mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2026-01-04 09:55:33 +10:00
fix orchestrator statemachine and Process from archiveresult migrations
This commit is contained in:
@@ -100,8 +100,10 @@ def fetch_content(url: str) -> str:
|
||||
|
||||
@click.command()
|
||||
@click.option('--url', required=True, help='URL to parse (file:// or https://)')
|
||||
@click.option('--snapshot-id', required=False, help='Snapshot UUID (unused but required by hook runner)')
|
||||
def main(url: str, snapshot_id: str = None):
|
||||
@click.option('--snapshot-id', required=False, help='Parent Snapshot UUID')
|
||||
@click.option('--crawl-id', required=False, help='Crawl UUID')
|
||||
@click.option('--depth', type=int, default=0, help='Current depth level')
|
||||
def main(url: str, snapshot_id: str = None, crawl_id: str = None, depth: int = 0):
|
||||
"""Parse plain text and extract URLs."""
|
||||
|
||||
try:
|
||||
@@ -123,9 +125,12 @@ def main(url: str, snapshot_id: str = None):
|
||||
'type': 'Snapshot',
|
||||
'url': found_url,
|
||||
'plugin': PLUGIN_NAME,
|
||||
'depth': depth + 1,
|
||||
}
|
||||
if snapshot_id:
|
||||
record['parent_snapshot_id'] = snapshot_id
|
||||
if crawl_id:
|
||||
record['crawl_id'] = crawl_id
|
||||
print(json.dumps(record))
|
||||
|
||||
# Emit ArchiveResult record to mark completion
|
||||
|
||||
Reference in New Issue
Block a user