mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2026-04-04 06:47:57 +10:00
Update views, API, and exports for new ArchiveResult output fields
Replace old `output` field with new fields across the codebase: - output_str: Human-readable output summary - output_json: Structured metadata (optional) - output_files: Dict of output files with metadata - output_size: Total size in bytes - output_mimetypes: CSV of file mimetypes Files updated: - api/v1_core.py: Update MinimalArchiveResultSchema to expose new fields - api/v1_core.py: Update ArchiveResultFilterSchema to search output_str - cli/archivebox_extract.py: Use output_str in CLI output - core/admin_archiveresults.py: Update admin fields, search, and fieldsets - core/admin_archiveresults.py: Fix output_html variable name bug in output_summary - misc/jsonl.py: Update archiveresult_to_jsonl() to include new fields - plugins/extractor_utils.py: Update ExtractorResult helper class The embed_path() method already uses output_files and output_str, so snapshot detail page and template tags work correctly.
This commit is contained in:
@@ -105,7 +105,7 @@ class ExtractorResult:
|
||||
|
||||
# ... do extraction ...
|
||||
|
||||
result.output = 'example.com/index.html'
|
||||
result.output_str = 'example.com/index.html'
|
||||
result.status = 'succeeded'
|
||||
result.finish()
|
||||
|
||||
@@ -121,7 +121,7 @@ class ExtractorResult:
|
||||
|
||||
self.cmd: list[str] = []
|
||||
self.version: str = ''
|
||||
self.output: str | Path | None = None
|
||||
self.output_str: str = '' # Human-readable output summary
|
||||
self.status: str = 'failed' # 'succeeded', 'failed', 'skipped'
|
||||
|
||||
self.stdout: str = ''
|
||||
@@ -174,8 +174,8 @@ class ExtractorResult:
|
||||
print(f"VERSION={self.version}")
|
||||
|
||||
# Print output path
|
||||
if self.output:
|
||||
print(f"OUTPUT={self.output}")
|
||||
if self.output_str:
|
||||
print(f"OUTPUT={self.output_str}")
|
||||
|
||||
# Print status
|
||||
print(f"STATUS={self.status}")
|
||||
@@ -192,22 +192,17 @@ class ExtractorResult:
|
||||
for hint in self.hints:
|
||||
print(f"HINT={hint}", file=sys.stderr)
|
||||
|
||||
# Print JSON result for structured parsing
|
||||
# Print clean JSONL result for hooks.py to parse
|
||||
result_json = {
|
||||
'extractor': self.name,
|
||||
'url': self.url,
|
||||
'snapshot_id': self.snapshot_id,
|
||||
'type': 'ArchiveResult',
|
||||
'status': self.status,
|
||||
'start_ts': self.start_ts.isoformat(),
|
||||
'end_ts': self.end_ts.isoformat() if self.end_ts else None,
|
||||
'duration': round(self.duration, 2),
|
||||
'cmd': self.cmd,
|
||||
'cmd_version': self.version,
|
||||
'output': str(self.output) if self.output else None,
|
||||
'returncode': self.returncode,
|
||||
'error': self.error or None,
|
||||
'output_str': self.output_str or self.error or '',
|
||||
}
|
||||
print(f"RESULT_JSON={json.dumps(result_json)}")
|
||||
if self.cmd:
|
||||
result_json['cmd'] = self.cmd
|
||||
if self.version:
|
||||
result_json['cmd_version'] = self.version
|
||||
print(json.dumps(result_json))
|
||||
|
||||
|
||||
def run_shell_command(
|
||||
|
||||
Reference in New Issue
Block a user