fix orchestrator statemachine and Process from archiveresult migrations

This commit is contained in:
Nick Sweeting
2026-01-01 16:43:02 -08:00
parent 876feac522
commit 60422adc87
13 changed files with 378 additions and 96 deletions

View File

@@ -100,8 +100,10 @@ def fetch_content(url: str) -> str:
@click.command()
@click.option('--url', required=True, help='URL to parse (file:// or https://)')
@click.option('--snapshot-id', required=False, help='Snapshot UUID (unused but required by hook runner)')
def main(url: str, snapshot_id: str = None):
@click.option('--snapshot-id', required=False, help='Parent Snapshot UUID')
@click.option('--crawl-id', required=False, help='Crawl UUID')
@click.option('--depth', type=int, default=0, help='Current depth level')
def main(url: str, snapshot_id: str = None, crawl_id: str = None, depth: int = 0):
"""Parse plain text and extract URLs."""
try:
@@ -123,9 +125,12 @@ def main(url: str, snapshot_id: str = None):
'type': 'Snapshot',
'url': found_url,
'plugin': PLUGIN_NAME,
'depth': depth + 1,
}
if snapshot_id:
record['parent_snapshot_id'] = snapshot_id
if crawl_id:
record['crawl_id'] = crawl_id
print(json.dumps(record))
# Emit ArchiveResult record to mark completion