Merge remote-tracking branch 'origin/dev' into claude/improve-test-suite-xm6Bh

This commit is contained in:
Claude
2025-12-27 05:53:06 +00:00
24 changed files with 1101 additions and 894 deletions

View File

@@ -1,65 +0,0 @@
# Generated by Django 6.0 on 2025-12-25 09:34
import django.db.models.deletion
from archivebox import uuid_compat
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('machine', '0001_squashed'),
]
operations = [
migrations.AlterField(
model_name='dependency',
name='bin_name',
field=models.CharField(db_index=True, help_text='Binary executable name (e.g., wget, yt-dlp, chromium)', max_length=63, unique=True),
),
migrations.AlterField(
model_name='dependency',
name='bin_providers',
field=models.CharField(default='*', help_text='Comma-separated list of allowed providers: apt,brew,pip,npm,gem,nix,custom or * for any', max_length=127),
),
migrations.AlterField(
model_name='dependency',
name='config',
field=models.JSONField(blank=True, default=dict, help_text='JSON map of env var config to use during install'),
),
migrations.AlterField(
model_name='dependency',
name='custom_cmds',
field=models.JSONField(blank=True, default=dict, help_text="JSON map of provider -> custom install command (e.g., {'apt': 'apt install -y wget'})"),
),
migrations.AlterField(
model_name='dependency',
name='id',
field=models.UUIDField(default=uuid_compat.uuid7, editable=False, primary_key=True, serialize=False, unique=True),
),
migrations.AlterField(
model_name='installedbinary',
name='dependency',
field=models.ForeignKey(blank=True, help_text='The Dependency this binary satisfies', null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='installedbinary_set', to='machine.dependency'),
),
migrations.AlterField(
model_name='installedbinary',
name='id',
field=models.UUIDField(default=uuid_compat.uuid7, editable=False, primary_key=True, serialize=False, unique=True),
),
migrations.AlterField(
model_name='machine',
name='config',
field=models.JSONField(blank=True, default=dict, help_text='Machine-specific config overrides (e.g., resolved binary paths like WGET_BINARY)'),
),
migrations.AlterField(
model_name='machine',
name='id',
field=models.UUIDField(default=uuid_compat.uuid7, editable=False, primary_key=True, serialize=False, unique=True),
),
migrations.AlterField(
model_name='networkinterface',
name='id',
field=models.UUIDField(default=uuid_compat.uuid7, editable=False, primary_key=True, serialize=False, unique=True),
),
]

View File

@@ -0,0 +1,38 @@
# Generated manually on 2025-12-26
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('machine', '0001_squashed'),
]
operations = [
migrations.RenameField(
model_name='dependency',
old_name='custom_cmds',
new_name='overrides',
),
migrations.AlterField(
model_name='dependency',
name='bin_name',
field=models.CharField(db_index=True, help_text='Binary executable name (e.g., wget, yt-dlp, chromium)', max_length=63, unique=True),
),
migrations.AlterField(
model_name='dependency',
name='bin_providers',
field=models.CharField(default='*', help_text='Comma-separated list of allowed providers: apt,brew,pip,npm,gem,nix,custom or * for any', max_length=127),
),
migrations.AlterField(
model_name='dependency',
name='overrides',
field=models.JSONField(blank=True, default=dict, help_text="JSON map matching abx-pkg Binary.overrides format: {'pip': {'packages': ['pkg']}, 'apt': {'packages': ['pkg']}}"),
),
migrations.AlterField(
model_name='dependency',
name='config',
field=models.JSONField(blank=True, default=dict, help_text='JSON map of env var config to use during install'),
),
]

View File

@@ -109,13 +109,13 @@ class NetworkInterface(ModelWithHealthStats):
class DependencyManager(models.Manager):
def get_or_create_for_extractor(self, bin_name: str, bin_providers: str = '*', custom_cmds: dict = None, config: dict = None) -> 'Dependency':
def get_or_create_for_extractor(self, bin_name: str, bin_providers: str = '*', overrides: dict = None, config: dict = None) -> 'Dependency':
"""Get or create a Dependency for an extractor's binary."""
dependency, created = self.get_or_create(
bin_name=bin_name,
defaults={
'bin_providers': bin_providers,
'custom_cmds': custom_cmds or {},
'overrides': overrides or {},
'config': config or {},
}
)
@@ -132,11 +132,11 @@ class Dependency(models.Model):
Example:
Dependency.objects.get_or_create(
bin_name='wget',
bin_providers='apt,brew,nix,custom',
custom_cmds={
'apt': 'apt install -y --no-install-recommends wget',
'brew': 'brew install wget',
'custom': 'curl https://example.com/get-wget.sh | bash',
bin_providers='apt,brew,pip,env',
overrides={
'apt': {'packages': ['wget']},
'brew': {'packages': ['wget']},
'pip': {'packages': ['wget']},
}
)
"""
@@ -161,8 +161,8 @@ class Dependency(models.Model):
help_text="Binary executable name (e.g., wget, yt-dlp, chromium)")
bin_providers = models.CharField(max_length=127, default='*',
help_text="Comma-separated list of allowed providers: apt,brew,pip,npm,gem,nix,custom or * for any")
custom_cmds = models.JSONField(default=dict, blank=True,
help_text="JSON map of provider -> custom install command (e.g., {'apt': 'apt install -y wget'})")
overrides = models.JSONField(default=dict, blank=True,
help_text="JSON map matching abx-pkg Binary.overrides format: {'pip': {'packages': ['pkg']}, 'apt': {'packages': ['pkg']}}")
config = models.JSONField(default=dict, blank=True,
help_text="JSON map of env var config to use during install")
@@ -181,9 +181,9 @@ class Dependency(models.Model):
return True
return provider in self.bin_providers.split(',')
def get_install_cmd(self, provider: str) -> str | None:
"""Get the install command for a provider, or None for default."""
return self.custom_cmds.get(provider)
def get_overrides_for_provider(self, provider: str) -> dict | None:
"""Get the overrides for a provider, or None if not specified."""
return self.overrides.get(provider)
@property
def installed_binaries(self):
@@ -195,6 +195,85 @@ class Dependency(models.Model):
"""Check if at least one valid InstalledBinary exists for this dependency."""
return self.installed_binaries.filter(abspath__isnull=False).exclude(abspath='').exists()
def run(self):
"""
Execute dependency installation by running all on_Dependency hooks.
Each hook checks if it can handle this dependency and installs if possible.
Returns the InstalledBinary record on success, None on failure.
"""
import json
from pathlib import Path
from django.conf import settings
# Check if already installed
if self.is_installed:
return self.installed_binaries.first()
# Import here to avoid circular dependency
from archivebox.hooks import run_hooks
# Create output directory
DATA_DIR = getattr(settings, 'DATA_DIR', Path.cwd())
output_dir = Path(DATA_DIR) / 'tmp' / f'dependency_{self.id}'
output_dir.mkdir(parents=True, exist_ok=True)
# Build kwargs for hooks - pass overrides as JSON string
hook_kwargs = {
'dependency_id': str(self.id),
'bin_name': self.bin_name,
'bin_providers': self.bin_providers,
'overrides': json.dumps(self.overrides) if self.overrides else None,
}
# Run all on_Dependency hooks - each decides if it can handle this
results = run_hooks(
event_name='Dependency',
output_dir=output_dir,
timeout=600,
**hook_kwargs
)
# Process results - parse JSONL and create InstalledBinary records
for result in results:
if result['returncode'] != 0:
continue
# Parse JSONL output
for line in result['stdout'].strip().split('\n'):
if not line.strip():
continue
try:
obj = json.loads(line)
if obj.get('type') == 'InstalledBinary':
# Create InstalledBinary record
if not obj.get('name') or not obj.get('abspath') or not obj.get('version'):
continue
machine = Machine.current()
installed_binary, _ = InstalledBinary.objects.update_or_create(
machine=machine,
name=obj['name'],
defaults={
'abspath': obj['abspath'],
'version': obj['version'],
'sha256': obj.get('sha256') or '',
'binprovider': obj.get('binprovider') or 'env',
'dependency': self,
}
)
# Success! Return the installed binary
if self.is_installed:
return installed_binary
except json.JSONDecodeError:
continue
# Failed to install with any hook
return None
class InstalledBinaryManager(models.Manager):
def get_from_db_or_cache(self, name: str, abspath: str = '', version: str = '', sha256: str = '', binprovider: str = 'env') -> 'InstalledBinary':