mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2026-04-05 15:27:53 +10:00
way better plugin hooks system wip
This commit is contained in:
@@ -19,7 +19,6 @@ import os
|
||||
import re
|
||||
import sqlite3
|
||||
import sys
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
|
||||
import rich_click as click
|
||||
@@ -139,7 +138,6 @@ def index_in_sqlite(snapshot_id: str, texts: list[str]) -> None:
|
||||
def main(url: str, snapshot_id: str):
|
||||
"""Index snapshot content in SQLite FTS5."""
|
||||
|
||||
start_ts = datetime.now(timezone.utc)
|
||||
output = None
|
||||
status = 'failed'
|
||||
error = ''
|
||||
@@ -149,18 +147,10 @@ def main(url: str, snapshot_id: str):
|
||||
# Check if this backend is enabled (permanent skips - don't retry)
|
||||
backend = get_env('SEARCH_BACKEND_ENGINE', 'sqlite')
|
||||
if backend != 'sqlite':
|
||||
print(f'Skipping SQLite indexing (SEARCH_BACKEND_ENGINE={backend})')
|
||||
print(f'START_TS={start_ts.isoformat()}')
|
||||
print(f'END_TS={datetime.now(timezone.utc).isoformat()}')
|
||||
print(f'STATUS=skipped')
|
||||
print(f'RESULT_JSON={json.dumps({"extractor": EXTRACTOR_NAME, "status": "skipped", "url": url, "snapshot_id": snapshot_id})}')
|
||||
print(f'Skipping SQLite indexing (SEARCH_BACKEND_ENGINE={backend})', file=sys.stderr)
|
||||
sys.exit(0) # Permanent skip - different backend selected
|
||||
if not get_env_bool('USE_INDEXING_BACKEND', True):
|
||||
print('Skipping indexing (USE_INDEXING_BACKEND=False)')
|
||||
print(f'START_TS={start_ts.isoformat()}')
|
||||
print(f'END_TS={datetime.now(timezone.utc).isoformat()}')
|
||||
print(f'STATUS=skipped')
|
||||
print(f'RESULT_JSON={json.dumps({"extractor": EXTRACTOR_NAME, "status": "skipped", "url": url, "snapshot_id": snapshot_id})}')
|
||||
print('Skipping indexing (USE_INDEXING_BACKEND=False)', file=sys.stderr)
|
||||
sys.exit(0) # Permanent skip - indexing disabled
|
||||
else:
|
||||
contents = find_indexable_content()
|
||||
@@ -168,46 +158,22 @@ def main(url: str, snapshot_id: str):
|
||||
|
||||
if not contents:
|
||||
status = 'skipped'
|
||||
print('No indexable content found')
|
||||
print('No indexable content found', file=sys.stderr)
|
||||
else:
|
||||
texts = [content for _, content in contents]
|
||||
index_in_sqlite(snapshot_id, texts)
|
||||
status = 'succeeded'
|
||||
output = OUTPUT_DIR
|
||||
print(f'SQLite FTS indexed {len(texts)} documents')
|
||||
print(f'Sources: {", ".join(indexed_sources)}')
|
||||
|
||||
except Exception as e:
|
||||
error = f'{type(e).__name__}: {e}'
|
||||
status = 'failed'
|
||||
|
||||
end_ts = datetime.now(timezone.utc)
|
||||
duration = (end_ts - start_ts).total_seconds()
|
||||
|
||||
print(f'START_TS={start_ts.isoformat()}')
|
||||
print(f'END_TS={end_ts.isoformat()}')
|
||||
print(f'DURATION={duration:.2f}')
|
||||
if output:
|
||||
print(f'OUTPUT={output}')
|
||||
print(f'STATUS={status}')
|
||||
|
||||
if error:
|
||||
print(f'ERROR={error}', file=sys.stderr)
|
||||
|
||||
result_json = {
|
||||
'extractor': EXTRACTOR_NAME,
|
||||
'url': url,
|
||||
'snapshot_id': snapshot_id,
|
||||
'status': status,
|
||||
'start_ts': start_ts.isoformat(),
|
||||
'end_ts': end_ts.isoformat(),
|
||||
'duration': round(duration, 2),
|
||||
'output': output,
|
||||
'indexed_sources': indexed_sources,
|
||||
'error': error or None,
|
||||
}
|
||||
print(f'RESULT_JSON={json.dumps(result_json)}')
|
||||
print(f'ERROR: {error}', file=sys.stderr)
|
||||
|
||||
# Search indexing hooks don't emit ArchiveResult - they're utility hooks
|
||||
# Exit code indicates success/failure
|
||||
sys.exit(0 if status == 'succeeded' else 1)
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user