mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2026-04-05 15:27:53 +10:00
Update Python/JS hooks to clean JSONL format + add audit report
Phase 4 Plugin Audit Progress:
- Audited all 6 Dependency hooks (all already compliant)
- Audited all 11 Crawl Validate hooks (all already compliant)
- Updated 8 Python Snapshot hooks to clean JSONL format
- Updated 1 JS Snapshot hook (title.js) to clean JSONL format
Snapshot hooks updated to remove:
- RESULT_JSON= prefix
- Extra output lines (START_TS=, END_TS=, DURATION=, VERSION=, OUTPUT=, STATUS=)
Now output clean JSONL:
{"type": "ArchiveResult", "status": "...", "output_str": "..."}
Added implementation report to TODO_hook_architecture.md documenting:
- All completed phases (1, 3, 6, 7)
- Plugin audit results with status tables
- Remaining 13 JS hooks that need updating
- Files modified list
This commit is contained in:
@@ -245,23 +245,15 @@ def main(url: str, snapshot_id: str):
|
||||
try:
|
||||
# Check if SingleFile is enabled
|
||||
if not get_env_bool('SAVE_SINGLEFILE', True):
|
||||
print('Skipping SingleFile (SAVE_SINGLEFILE=False)')
|
||||
status = 'skipped'
|
||||
end_ts = datetime.now(timezone.utc)
|
||||
print(f'START_TS={start_ts.isoformat()}')
|
||||
print(f'END_TS={end_ts.isoformat()}')
|
||||
print(f'STATUS={status}')
|
||||
print(f'RESULT_JSON={json.dumps({"extractor": EXTRACTOR_NAME, "status": status, "url": url, "snapshot_id": snapshot_id})}')
|
||||
print('Skipping SingleFile (SAVE_SINGLEFILE=False)', file=sys.stderr)
|
||||
print(json.dumps({'type': 'ArchiveResult', 'status': 'skipped', 'output_str': 'SAVE_SINGLEFILE=False'}))
|
||||
sys.exit(0)
|
||||
|
||||
# Check if staticfile extractor already handled this (permanent skip)
|
||||
if has_staticfile_output():
|
||||
print(f'Skipping SingleFile - staticfile extractor already downloaded this')
|
||||
print(f'START_TS={start_ts.isoformat()}')
|
||||
print(f'END_TS={datetime.now(timezone.utc).isoformat()}')
|
||||
print(f'STATUS=skipped')
|
||||
print(f'RESULT_JSON={json.dumps({"extractor": EXTRACTOR_NAME, "status": "skipped", "url": url, "snapshot_id": snapshot_id})}')
|
||||
sys.exit(0) # Permanent skip - staticfile already handled
|
||||
print('Skipping SingleFile - staticfile extractor already downloaded this', file=sys.stderr)
|
||||
print(json.dumps({'type': 'ArchiveResult', 'status': 'skipped', 'output_str': 'staticfile already exists'}))
|
||||
sys.exit(0)
|
||||
|
||||
# Find binary
|
||||
binary = find_singlefile()
|
||||
@@ -287,38 +279,23 @@ def main(url: str, snapshot_id: str):
|
||||
error = f'{type(e).__name__}: {e}'
|
||||
status = 'failed'
|
||||
|
||||
# Print results
|
||||
# Calculate duration
|
||||
end_ts = datetime.now(timezone.utc)
|
||||
duration = (end_ts - start_ts).total_seconds()
|
||||
|
||||
print(f'START_TS={start_ts.isoformat()}')
|
||||
print(f'END_TS={end_ts.isoformat()}')
|
||||
print(f'DURATION={duration:.2f}')
|
||||
if cmd_str:
|
||||
print(f'CMD={cmd_str}')
|
||||
if version:
|
||||
print(f'VERSION={version}')
|
||||
if output:
|
||||
print(f'OUTPUT={output}')
|
||||
print(f'STATUS={status}')
|
||||
|
||||
if error:
|
||||
print(f'ERROR={error}', file=sys.stderr)
|
||||
print(f'ERROR: {error}', file=sys.stderr)
|
||||
|
||||
# Print JSON result
|
||||
result_json = {
|
||||
'extractor': EXTRACTOR_NAME,
|
||||
'url': url,
|
||||
'snapshot_id': snapshot_id,
|
||||
# Output clean JSONL (no RESULT_JSON= prefix)
|
||||
result = {
|
||||
'type': 'ArchiveResult',
|
||||
'status': status,
|
||||
'start_ts': start_ts.isoformat(),
|
||||
'end_ts': end_ts.isoformat(),
|
||||
'duration': round(duration, 2),
|
||||
'cmd_version': version,
|
||||
'output': output,
|
||||
'error': error or None,
|
||||
'output_str': output or error or '',
|
||||
}
|
||||
print(f'RESULT_JSON={json.dumps(result_json)}')
|
||||
if binary:
|
||||
result['cmd'] = [binary, '--browser-headless', url, OUTPUT_FILE]
|
||||
if version:
|
||||
result['cmd_version'] = version
|
||||
print(json.dumps(result))
|
||||
|
||||
sys.exit(0 if status == 'succeeded' else 1)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user