Files
ArchiveBox/archivebox/plugins/mercury/on_Crawl__00_validate_mercury.py
2025-12-26 20:39:56 -08:00

79 lines
2.2 KiB
Python
Executable File

#!/usr/bin/env python3
"""
Validation hook for postlight-parser binary.
Runs at crawl start to verify postlight-parser is available.
Outputs JSONL for InstalledBinary and Machine config updates.
"""
import sys
import json
def find_mercury() -> dict | None:
"""Find postlight-parser binary."""
try:
from abx_pkg import Binary, NpmProvider, EnvProvider
binary = Binary(name='postlight-parser', binproviders=[NpmProvider(), EnvProvider()])
loaded = binary.load()
if loaded and loaded.abspath:
return {
'name': 'postlight-parser',
'abspath': str(loaded.abspath),
'version': str(loaded.version) if loaded.version else None,
'sha256': loaded.sha256 if hasattr(loaded, 'sha256') else None,
'binprovider': loaded.binprovider.name if loaded.binprovider else 'env',
}
except Exception:
pass
return None
def main():
result = find_mercury()
if result and result.get('abspath'):
print(json.dumps({
'type': 'InstalledBinary',
'name': result['name'],
'abspath': result['abspath'],
'version': result['version'],
'sha256': result['sha256'],
'binprovider': result['binprovider'],
}))
print(json.dumps({
'type': 'Machine',
'_method': 'update',
'key': 'config/MERCURY_BINARY',
'value': result['abspath'],
}))
if result['version']:
print(json.dumps({
'type': 'Machine',
'_method': 'update',
'key': 'config/MERCURY_VERSION',
'value': result['version'],
}))
sys.exit(0)
else:
# postlight-parser is installed as @postlight/parser in npm
print(json.dumps({
'type': 'Dependency',
'bin_name': 'postlight-parser',
'bin_providers': 'npm,env',
'overrides': {
'npm': {'packages': ['@postlight/parser']}
}
}))
print(f"postlight-parser binary not found", file=sys.stderr)
sys.exit(1)
if __name__ == '__main__':
main()