Implement hook architecture with JSONL output support

Phase 1: Database migration for new ArchiveResult fields
- Add output_str (TextField) for human-readable summary
- Add output_json (JSONField) for structured metadata
- Add output_files (JSONField) for dict of {relative_path: {}}
- Add output_size (BigIntegerField) for total bytes
- Add output_mimetypes (CharField) for CSV of mimetypes
- Add binary FK to InstalledBinary (optional)
- Migrate existing 'output' field to new split fields

Phase 3: Update run_hook() for JSONL parsing
- Support new JSONL format (any line with {type: 'ModelName', ...})
- Maintain backwards compatibility with RESULT_JSON= format
- Add plugin metadata to each parsed record
- Detect background hooks with .bg. suffix in filename
- Add find_binary_for_cmd() helper function
- Add create_model_record() for processing side-effect records

Phase 6: Update ArchiveResult.run()
- Handle background hooks (return immediately when result is None)
- Process 'records' from HookResult for side-effect models
- Use new output fields (output_str, output_json, output_files, etc.)
- Call create_model_record() for InstalledBinary, Machine updates

Phase 7: Add background hook support
- Add is_background_hook() method to ArchiveResult
- Add check_background_completed() to check if process exited
- Add finalize_background_hook() to collect results from completed hooks
- Update SnapshotMachine.is_finished() to check/finalize background hooks
- Update _populate_output_fields() to walk directory and populate stats

Also updated references to old 'output' field in:
- admin_archiveresults.py
- statemachines.py
- templatetags/core_tags.py
This commit is contained in:
Claude
2025-12-27 08:38:49 +00:00
parent 35dd9acafe
commit 3d985fa8c8
7 changed files with 633 additions and 55 deletions

View File

@@ -68,6 +68,8 @@ class HookResult(TypedDict, total=False):
output_files: List[str]
duration_ms: int
hook: str
# New fields for JSONL parsing
records: List[Dict[str, Any]] # Parsed JSONL records with 'type' field
def discover_hooks(event_name: str) -> List[Path]:
@@ -268,7 +270,9 @@ def run_hook(
files_before = set(output_dir.rglob('*')) if output_dir.exists() else set()
# Detect if this is a background hook (long-running daemon)
is_background = '__background' in script.stem
# New convention: .bg. suffix (e.g., on_Snapshot__21_consolelog.bg.js)
# Old convention: __background in stem (for backwards compatibility)
is_background = '.bg.' in script.name or '__background' in script.stem
# Set up output files for ALL hooks (useful for debugging)
stdout_file = output_dir / 'stdout.log'
@@ -322,13 +326,44 @@ def run_hook(
# Exclude the log files themselves from new_files
new_files = [f for f in new_files if f not in ('stdout.log', 'stderr.log', 'hook.pid')]
# Parse RESULT_JSON from stdout
# Parse JSONL output from stdout
# Supports both new JSONL format (any line starting with { that has 'type')
# and legacy RESULT_JSON= format for backwards compatibility
output_json = None
records = []
plugin_name = script.parent.name # Plugin directory name (e.g., 'wget')
for line in stdout.splitlines():
if line.startswith('RESULT_JSON='):
line = line.strip()
if not line:
continue
# New JSONL format: any line starting with { that has 'type' field
if line.startswith('{'):
try:
output_json = json.loads(line[len('RESULT_JSON='):])
break
data = json.loads(line)
if 'type' in data:
# Add plugin metadata to every record
data['plugin'] = plugin_name
data['plugin_hook'] = str(script)
records.append(data)
# For backwards compatibility, also set output_json for first ArchiveResult
if data.get('type') == 'ArchiveResult' and output_json is None:
output_json = data
except json.JSONDecodeError:
pass
# Legacy format: RESULT_JSON=...
elif line.startswith('RESULT_JSON='):
try:
data = json.loads(line[len('RESULT_JSON='):])
if output_json is None:
output_json = data
# Convert legacy format to new format
data['type'] = 'ArchiveResult'
data['plugin'] = plugin_name
data['plugin_hook'] = str(script)
records.append(data)
except json.JSONDecodeError:
pass
@@ -348,6 +383,7 @@ def run_hook(
output_files=new_files,
duration_ms=duration_ms,
hook=str(script),
records=records,
)
except Exception as e:
@@ -360,6 +396,7 @@ def run_hook(
output_files=[],
duration_ms=duration_ms,
hook=str(script),
records=[],
)
@@ -1104,3 +1141,112 @@ def discover_plugin_templates() -> Dict[str, Dict[str, str]]:
return templates
# =============================================================================
# Hook Result Processing Helpers
# =============================================================================
def find_binary_for_cmd(cmd: List[str], machine_id: str) -> Optional[str]:
"""
Find InstalledBinary for a command, trying abspath first then name.
Only matches binaries on the current machine.
Args:
cmd: Command list (e.g., ['/usr/bin/wget', '-p', 'url'])
machine_id: Current machine ID
Returns:
Binary ID as string if found, None otherwise
"""
if not cmd:
return None
from machine.models import InstalledBinary
bin_path_or_name = cmd[0] if isinstance(cmd, list) else cmd
# Try matching by absolute path first
binary = InstalledBinary.objects.filter(
abspath=bin_path_or_name,
machine_id=machine_id
).first()
if binary:
return str(binary.id)
# Fallback: match by binary name
bin_name = Path(bin_path_or_name).name
binary = InstalledBinary.objects.filter(
name=bin_name,
machine_id=machine_id
).first()
return str(binary.id) if binary else None
def create_model_record(record: Dict[str, Any]) -> Any:
"""
Generic helper to create/update model instances from hook JSONL output.
Args:
record: Dict with 'type' field and model data
Returns:
Created/updated model instance, or None if type unknown
"""
from machine.models import InstalledBinary, Machine
record_type = record.pop('type', None)
if not record_type:
return None
# Remove plugin metadata (not model fields)
record.pop('plugin', None)
record.pop('plugin_hook', None)
if record_type == 'InstalledBinary':
# InstalledBinary requires machine FK
machine = Machine.current()
record.setdefault('machine', machine)
# Required fields check
name = record.get('name')
abspath = record.get('abspath')
if not name or not abspath:
return None
obj, created = InstalledBinary.objects.update_or_create(
machine=machine,
name=name,
defaults={
'abspath': abspath,
'version': record.get('version', ''),
'sha256': record.get('sha256', ''),
'binprovider': record.get('binprovider', 'env'),
}
)
return obj
elif record_type == 'Machine':
# Machine config update (special _method handling)
method = record.pop('_method', None)
if method == 'update':
key = record.get('key')
value = record.get('value')
if key and value:
machine = Machine.current()
if not machine.config:
machine.config = {}
machine.config[key] = value
machine.save(update_fields=['config'])
return machine
return None
# Add more types as needed (Dependency, Snapshot, etc.)
else:
# Unknown type - log warning but don't fail
import sys
print(f"Warning: Unknown record type '{record_type}' from hook output", file=sys.stderr)
return None