feat: add schema_version to JSONL outputs and remove dead code

- Add schema_version (archivebox.VERSION) to all to_jsonl() outputs:
  - Snapshot.to_jsonl()
  - ArchiveResult.to_jsonl()
  - Binary.to_jsonl()
  - Process.to_jsonl()

- Update CLI commands to use model methods directly:
  - archivebox_snapshot.py: snapshot.to_jsonl()
  - archivebox_extract.py: result.to_jsonl()

- Remove dead wrapper functions from misc/jsonl.py:
  - snapshot_to_jsonl()
  - archiveresult_to_jsonl()
  - binary_to_jsonl()
  - process_to_jsonl()
  - machine_to_jsonl()

- Update tests to use model methods directly
This commit is contained in:
Claude
2025-12-30 19:24:53 +00:00
parent a5206e7648
commit bc273c5a7f
6 changed files with 66 additions and 95 deletions

View File

@@ -154,22 +154,6 @@ def filter_by_type(records: Iterator[Dict[str, Any]], record_type: str) -> Itera
yield record
def snapshot_to_jsonl(snapshot) -> Dict[str, Any]:
"""
Convert a Snapshot model instance to a JSONL record.
Wrapper that calls snapshot.to_jsonl() method.
"""
return snapshot.to_jsonl()
def archiveresult_to_jsonl(result) -> Dict[str, Any]:
"""
Convert an ArchiveResult model instance to a JSONL record.
Wrapper that calls result.to_jsonl() method.
"""
return result.to_jsonl()
def tag_to_jsonl(tag) -> Dict[str, Any]:
"""
Convert a Tag model instance to a JSONL record.
@@ -196,39 +180,6 @@ def crawl_to_jsonl(crawl) -> Dict[str, Any]:
}
def binary_to_jsonl(binary) -> Dict[str, Any]:
"""
Convert a Binary model instance to a JSONL record.
Wrapper that calls binary.to_jsonl() method.
"""
return binary.to_jsonl()
def process_to_jsonl(process) -> Dict[str, Any]:
"""
Convert a Process model instance to a JSONL record.
Wrapper that calls process.to_jsonl() method.
"""
return process.to_jsonl()
def machine_to_jsonl(machine) -> Dict[str, Any]:
"""
Convert a Machine model instance to a JSONL record.
"""
# Machine.to_jsonl() not implemented yet, use inline conversion
return {
'type': TYPE_MACHINE,
'id': str(machine.id),
'guid': machine.guid,
'hostname': machine.hostname,
'os_arch': machine.os_arch,
'os_family': machine.os_family,
'os_platform': machine.os_platform,
'os_release': machine.os_release,
}
def process_records(
records: Iterator[Dict[str, Any]],
handlers: Dict[str, Callable[[Dict[str, Any]], Optional[Dict[str, Any]]]]