mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2026-04-05 15:27:53 +10:00
much better tests and add page ui
This commit is contained in:
@@ -4,7 +4,7 @@ from django.contrib import admin
|
||||
from django.utils.html import format_html
|
||||
|
||||
from archivebox.base_models.admin import BaseModelAdmin, ConfigEditorMixin
|
||||
from archivebox.machine.models import Machine, NetworkInterface, Binary
|
||||
from archivebox.machine.models import Machine, NetworkInterface, Binary, Process
|
||||
|
||||
|
||||
class MachineAdmin(ConfigEditorMixin, BaseModelAdmin):
|
||||
@@ -143,7 +143,87 @@ class BinaryAdmin(BaseModelAdmin):
|
||||
)
|
||||
|
||||
|
||||
class ProcessAdmin(BaseModelAdmin):
|
||||
list_display = ('id', 'created_at', 'machine_info', 'archiveresult_link', 'cmd_str', 'status', 'exit_code', 'pid', 'binary_info', 'health')
|
||||
sort_fields = ('id', 'created_at', 'status', 'exit_code', 'pid')
|
||||
search_fields = ('id', 'machine__id', 'binary__name', 'cmd', 'pwd', 'stdout', 'stderr')
|
||||
|
||||
readonly_fields = ('created_at', 'modified_at', 'machine', 'binary', 'iface', 'archiveresult_link')
|
||||
|
||||
fieldsets = (
|
||||
('Process Info', {
|
||||
'fields': ('machine', 'archiveresult_link', 'status', 'retry_at'),
|
||||
'classes': ('card',),
|
||||
}),
|
||||
('Command', {
|
||||
'fields': ('cmd', 'pwd', 'env', 'timeout'),
|
||||
'classes': ('card', 'wide'),
|
||||
}),
|
||||
('Execution', {
|
||||
'fields': ('binary', 'iface', 'pid', 'exit_code', 'url'),
|
||||
'classes': ('card',),
|
||||
}),
|
||||
('Timing', {
|
||||
'fields': ('started_at', 'ended_at'),
|
||||
'classes': ('card',),
|
||||
}),
|
||||
('Output', {
|
||||
'fields': ('stdout', 'stderr'),
|
||||
'classes': ('card', 'wide', 'collapse'),
|
||||
}),
|
||||
('Usage', {
|
||||
'fields': ('num_uses_succeeded', 'num_uses_failed'),
|
||||
'classes': ('card',),
|
||||
}),
|
||||
('Timestamps', {
|
||||
'fields': ('created_at', 'modified_at'),
|
||||
'classes': ('card',),
|
||||
}),
|
||||
)
|
||||
|
||||
list_filter = ('status', 'exit_code', 'machine_id')
|
||||
ordering = ['-created_at']
|
||||
list_per_page = 100
|
||||
actions = ["delete_selected"]
|
||||
|
||||
@admin.display(description='Machine', ordering='machine__id')
|
||||
def machine_info(self, process):
|
||||
return format_html(
|
||||
'<a href="/admin/machine/machine/{}/change"><b><code>[{}]</code></b> {}</a>',
|
||||
process.machine.id, str(process.machine.id)[:8], process.machine.hostname,
|
||||
)
|
||||
|
||||
@admin.display(description='Binary', ordering='binary__name')
|
||||
def binary_info(self, process):
|
||||
if not process.binary:
|
||||
return '-'
|
||||
return format_html(
|
||||
'<a href="/admin/machine/binary/{}/change"><code>{}</code> v{}</a>',
|
||||
process.binary.id, process.binary.name, process.binary.version,
|
||||
)
|
||||
|
||||
@admin.display(description='ArchiveResult')
|
||||
def archiveresult_link(self, process):
|
||||
if not hasattr(process, 'archiveresult'):
|
||||
return '-'
|
||||
ar = process.archiveresult
|
||||
return format_html(
|
||||
'<a href="/admin/core/archiveresult/{}/change"><code>{}</code> → {}</a>',
|
||||
ar.id, ar.plugin, ar.snapshot.url[:50],
|
||||
)
|
||||
|
||||
@admin.display(description='Command')
|
||||
def cmd_str(self, process):
|
||||
if not process.cmd:
|
||||
return '-'
|
||||
cmd = ' '.join(process.cmd[:3]) if isinstance(process.cmd, list) else str(process.cmd)
|
||||
if len(process.cmd) > 3:
|
||||
cmd += ' ...'
|
||||
return format_html('<code style="font-size: 0.9em;">{}</code>', cmd[:80])
|
||||
|
||||
|
||||
def register_admin(admin_site):
|
||||
admin_site.register(Machine, MachineAdmin)
|
||||
admin_site.register(NetworkInterface, NetworkInterfaceAdmin)
|
||||
admin_site.register(Binary, BinaryAdmin)
|
||||
admin_site.register(Process, ProcessAdmin)
|
||||
|
||||
@@ -12,7 +12,11 @@ class MachineConfig(AppConfig):
|
||||
|
||||
def ready(self):
|
||||
"""Import models to register state machines with the registry"""
|
||||
from archivebox.machine import models # noqa: F401
|
||||
import sys
|
||||
|
||||
# Skip during makemigrations to avoid premature state machine access
|
||||
if 'makemigrations' not in sys.argv:
|
||||
from archivebox.machine import models # noqa: F401
|
||||
|
||||
|
||||
def register_admin(admin_site):
|
||||
|
||||
143
archivebox/machine/migrations/0001_initial.py
Normal file
143
archivebox/machine/migrations/0001_initial.py
Normal file
@@ -0,0 +1,143 @@
|
||||
# Generated by hand on 2025-12-29
|
||||
# Creates Machine, Binary, NetworkInterface, and Process tables using raw SQL
|
||||
|
||||
from django.db import migrations
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
initial = True
|
||||
|
||||
dependencies = [
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.RunSQL(
|
||||
# Forward SQL
|
||||
sql="""
|
||||
-- Create machine_machine table
|
||||
CREATE TABLE IF NOT EXISTS machine_machine (
|
||||
id TEXT PRIMARY KEY NOT NULL,
|
||||
created_at DATETIME NOT NULL,
|
||||
modified_at DATETIME NOT NULL,
|
||||
num_uses_succeeded INTEGER NOT NULL DEFAULT 0,
|
||||
num_uses_failed INTEGER NOT NULL DEFAULT 0,
|
||||
|
||||
guid VARCHAR(64) NOT NULL UNIQUE,
|
||||
hostname VARCHAR(63) NOT NULL,
|
||||
hw_in_docker BOOLEAN NOT NULL DEFAULT 0,
|
||||
hw_in_vm BOOLEAN NOT NULL DEFAULT 0,
|
||||
hw_manufacturer VARCHAR(63) NOT NULL,
|
||||
hw_product VARCHAR(63) NOT NULL,
|
||||
hw_uuid VARCHAR(255) NOT NULL,
|
||||
|
||||
os_arch VARCHAR(15) NOT NULL,
|
||||
os_family VARCHAR(15) NOT NULL,
|
||||
os_platform VARCHAR(63) NOT NULL,
|
||||
os_release VARCHAR(63) NOT NULL,
|
||||
os_kernel VARCHAR(255) NOT NULL,
|
||||
|
||||
stats TEXT,
|
||||
config TEXT
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS machine_machine_guid_idx ON machine_machine(guid);
|
||||
|
||||
-- Create machine_networkinterface table
|
||||
CREATE TABLE IF NOT EXISTS machine_networkinterface (
|
||||
id TEXT PRIMARY KEY NOT NULL,
|
||||
created_at DATETIME NOT NULL,
|
||||
modified_at DATETIME NOT NULL,
|
||||
num_uses_succeeded INTEGER NOT NULL DEFAULT 0,
|
||||
num_uses_failed INTEGER NOT NULL DEFAULT 0,
|
||||
|
||||
machine_id TEXT NOT NULL,
|
||||
iface VARCHAR(15) NOT NULL,
|
||||
ip_public VARCHAR(39) NOT NULL,
|
||||
ip_local VARCHAR(39) NOT NULL,
|
||||
mac_address VARCHAR(17) NOT NULL,
|
||||
dns_server VARCHAR(39) NOT NULL,
|
||||
hostname VARCHAR(256) NOT NULL,
|
||||
isp VARCHAR(256) NOT NULL,
|
||||
city VARCHAR(100) NOT NULL,
|
||||
region VARCHAR(100) NOT NULL,
|
||||
country VARCHAR(100) NOT NULL,
|
||||
|
||||
FOREIGN KEY (machine_id) REFERENCES machine_machine(id) ON DELETE CASCADE
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS machine_networkinterface_machine_id_idx ON machine_networkinterface(machine_id);
|
||||
|
||||
-- Create machine_binary table
|
||||
CREATE TABLE IF NOT EXISTS machine_binary (
|
||||
id TEXT PRIMARY KEY NOT NULL,
|
||||
created_at DATETIME NOT NULL,
|
||||
modified_at DATETIME NOT NULL,
|
||||
num_uses_succeeded INTEGER NOT NULL DEFAULT 0,
|
||||
num_uses_failed INTEGER NOT NULL DEFAULT 0,
|
||||
|
||||
machine_id TEXT NOT NULL,
|
||||
name VARCHAR(63) NOT NULL,
|
||||
binproviders VARCHAR(127) NOT NULL DEFAULT 'env',
|
||||
overrides TEXT NOT NULL DEFAULT '{}',
|
||||
|
||||
binprovider VARCHAR(31) NOT NULL DEFAULT '',
|
||||
abspath VARCHAR(255) NOT NULL DEFAULT '',
|
||||
version VARCHAR(32) NOT NULL DEFAULT '',
|
||||
sha256 VARCHAR(64) NOT NULL DEFAULT '',
|
||||
|
||||
status VARCHAR(16) NOT NULL DEFAULT 'queued',
|
||||
retry_at DATETIME,
|
||||
output_dir VARCHAR(255) NOT NULL DEFAULT '',
|
||||
|
||||
FOREIGN KEY (machine_id) REFERENCES machine_machine(id) ON DELETE CASCADE,
|
||||
UNIQUE(machine_id, name, abspath, version, sha256)
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS machine_binary_machine_id_idx ON machine_binary(machine_id);
|
||||
CREATE INDEX IF NOT EXISTS machine_binary_name_idx ON machine_binary(name);
|
||||
CREATE INDEX IF NOT EXISTS machine_binary_status_idx ON machine_binary(status);
|
||||
CREATE INDEX IF NOT EXISTS machine_binary_retry_at_idx ON machine_binary(retry_at);
|
||||
|
||||
-- Create machine_process table
|
||||
CREATE TABLE IF NOT EXISTS machine_process (
|
||||
id TEXT PRIMARY KEY NOT NULL,
|
||||
created_at DATETIME NOT NULL,
|
||||
modified_at DATETIME NOT NULL,
|
||||
num_uses_succeeded INTEGER NOT NULL DEFAULT 0,
|
||||
num_uses_failed INTEGER NOT NULL DEFAULT 0,
|
||||
|
||||
machine_id TEXT NOT NULL,
|
||||
binary_id TEXT,
|
||||
network_interface_id TEXT,
|
||||
|
||||
cmd TEXT NOT NULL,
|
||||
pwd VARCHAR(256),
|
||||
env TEXT,
|
||||
stdin TEXT,
|
||||
timeout INTEGER NOT NULL DEFAULT 60,
|
||||
|
||||
pid INTEGER,
|
||||
started_at DATETIME,
|
||||
ended_at DATETIME,
|
||||
exit_code INTEGER,
|
||||
stdout TEXT NOT NULL DEFAULT '',
|
||||
stderr TEXT NOT NULL DEFAULT '',
|
||||
|
||||
status VARCHAR(15) NOT NULL DEFAULT 'queued',
|
||||
retry_at DATETIME,
|
||||
|
||||
FOREIGN KEY (machine_id) REFERENCES machine_machine(id) ON DELETE CASCADE,
|
||||
FOREIGN KEY (binary_id) REFERENCES machine_binary(id) ON DELETE SET NULL,
|
||||
FOREIGN KEY (network_interface_id) REFERENCES machine_networkinterface(id) ON DELETE SET NULL
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS machine_process_status_idx ON machine_process(status);
|
||||
CREATE INDEX IF NOT EXISTS machine_process_retry_at_idx ON machine_process(retry_at);
|
||||
CREATE INDEX IF NOT EXISTS machine_process_machine_id_idx ON machine_process(machine_id);
|
||||
""",
|
||||
# Reverse SQL
|
||||
reverse_sql="""
|
||||
DROP TABLE IF EXISTS machine_process;
|
||||
DROP TABLE IF EXISTS machine_binary;
|
||||
DROP TABLE IF EXISTS machine_networkinterface;
|
||||
DROP TABLE IF EXISTS machine_machine;
|
||||
"""
|
||||
),
|
||||
]
|
||||
@@ -1,102 +0,0 @@
|
||||
# Squashed migration: replaces 0001-0004
|
||||
# For fresh installs: creates final schema
|
||||
# For dev users with 0001-0004 applied: marked as applied (no-op)
|
||||
|
||||
from uuid import uuid4
|
||||
from django.db import migrations, models
|
||||
import django.db.models.deletion
|
||||
import django.utils.timezone
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
initial = True
|
||||
|
||||
replaces = [
|
||||
('machine', '0001_initial'),
|
||||
('machine', '0002_alter_machine_stats_installedbinary'),
|
||||
('machine', '0003_alter_installedbinary_options_and_more'),
|
||||
('machine', '0004_alter_installedbinary_abspath_and_more'),
|
||||
]
|
||||
|
||||
dependencies = []
|
||||
|
||||
operations = [
|
||||
migrations.CreateModel(
|
||||
name='Machine',
|
||||
fields=[
|
||||
('num_uses_failed', models.PositiveIntegerField(default=0)),
|
||||
('num_uses_succeeded', models.PositiveIntegerField(default=0)),
|
||||
('id', models.UUIDField(default=uuid4, editable=False, primary_key=True, serialize=False, unique=True)),
|
||||
('created_at', models.DateTimeField(db_index=True, default=django.utils.timezone.now)),
|
||||
('modified_at', models.DateTimeField(auto_now=True)),
|
||||
('guid', models.CharField(default=None, editable=False, max_length=64, unique=True)),
|
||||
('hostname', models.CharField(default=None, max_length=63)),
|
||||
('hw_in_docker', models.BooleanField(default=False)),
|
||||
('hw_in_vm', models.BooleanField(default=False)),
|
||||
('hw_manufacturer', models.CharField(default=None, max_length=63)),
|
||||
('hw_product', models.CharField(default=None, max_length=63)),
|
||||
('hw_uuid', models.CharField(default=None, max_length=255)),
|
||||
('os_arch', models.CharField(default=None, max_length=15)),
|
||||
('os_family', models.CharField(default=None, max_length=15)),
|
||||
('os_platform', models.CharField(default=None, max_length=63)),
|
||||
('os_release', models.CharField(default=None, max_length=63)),
|
||||
('os_kernel', models.CharField(default=None, max_length=255)),
|
||||
('stats', models.JSONField(default=dict)),
|
||||
('config', models.JSONField(blank=True, default=dict)),
|
||||
],
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name='NetworkInterface',
|
||||
fields=[
|
||||
('num_uses_failed', models.PositiveIntegerField(default=0)),
|
||||
('num_uses_succeeded', models.PositiveIntegerField(default=0)),
|
||||
('id', models.UUIDField(default=uuid4, editable=False, primary_key=True, serialize=False, unique=True)),
|
||||
('created_at', models.DateTimeField(db_index=True, default=django.utils.timezone.now)),
|
||||
('modified_at', models.DateTimeField(auto_now=True)),
|
||||
('mac_address', models.CharField(default=None, editable=False, max_length=17)),
|
||||
('ip_public', models.GenericIPAddressField(default=None, editable=False)),
|
||||
('ip_local', models.GenericIPAddressField(default=None, editable=False)),
|
||||
('dns_server', models.GenericIPAddressField(default=None, editable=False)),
|
||||
('hostname', models.CharField(default=None, max_length=63)),
|
||||
('iface', models.CharField(default=None, max_length=15)),
|
||||
('isp', models.CharField(default=None, max_length=63)),
|
||||
('city', models.CharField(default=None, max_length=63)),
|
||||
('region', models.CharField(default=None, max_length=63)),
|
||||
('country', models.CharField(default=None, max_length=63)),
|
||||
('machine', models.ForeignKey(default=None, on_delete=django.db.models.deletion.CASCADE, to='machine.machine')),
|
||||
],
|
||||
options={
|
||||
'unique_together': {('machine', 'ip_public', 'ip_local', 'mac_address', 'dns_server')},
|
||||
},
|
||||
),
|
||||
# Dependency model removed - not needed anymore
|
||||
migrations.CreateModel(
|
||||
name='Binary',
|
||||
fields=[
|
||||
('num_uses_failed', models.PositiveIntegerField(default=0)),
|
||||
('num_uses_succeeded', models.PositiveIntegerField(default=0)),
|
||||
('id', models.UUIDField(default=uuid4, editable=False, primary_key=True, serialize=False, unique=True)),
|
||||
('created_at', models.DateTimeField(db_index=True, default=django.utils.timezone.now)),
|
||||
('modified_at', models.DateTimeField(auto_now=True)),
|
||||
('name', models.CharField(blank=True, db_index=True, default=None, max_length=63)),
|
||||
('binprovider', models.CharField(blank=True, default=None, max_length=31)),
|
||||
('abspath', models.CharField(blank=True, default=None, max_length=255)),
|
||||
('version', models.CharField(blank=True, default=None, max_length=32)),
|
||||
('sha256', models.CharField(blank=True, default=None, max_length=64)),
|
||||
('machine', models.ForeignKey(blank=True, default=None, on_delete=django.db.models.deletion.CASCADE, to='machine.machine')),
|
||||
# Fields added in migration 0005 (included here for fresh installs)
|
||||
('binproviders', models.CharField(blank=True, default='env', max_length=127)),
|
||||
('output_dir', models.CharField(blank=True, default='', max_length=255)),
|
||||
('overrides', models.JSONField(blank=True, default=dict)),
|
||||
('retry_at', models.DateTimeField(blank=True, db_index=True, default=django.utils.timezone.now, null=True)),
|
||||
('status', models.CharField(choices=[('queued', 'Queued'), ('started', 'Started'), ('succeeded', 'Succeeded'), ('failed', 'Failed')], db_index=True, default='queued', max_length=16)),
|
||||
# dependency FK removed - Dependency model deleted
|
||||
],
|
||||
options={
|
||||
'verbose_name': 'Binary',
|
||||
'verbose_name_plural': 'Binaries',
|
||||
'unique_together': {('machine', 'name', 'abspath', 'version', 'sha256')},
|
||||
},
|
||||
),
|
||||
]
|
||||
@@ -1,16 +0,0 @@
|
||||
# Generated manually on 2025-12-26
|
||||
# NOTE: This migration is intentionally empty but kept for dependency chain
|
||||
# The Dependency model was removed in 0004, so all operations have been stripped
|
||||
|
||||
from django.db import migrations
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('machine', '0001_squashed'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
# All Dependency operations removed - model deleted in 0004
|
||||
]
|
||||
@@ -1,17 +0,0 @@
|
||||
# Generated by Django 6.0 on 2025-12-28 05:12
|
||||
# NOTE: This migration is intentionally empty but kept for dependency chain
|
||||
# The Dependency model was removed in 0004, all operations stripped
|
||||
|
||||
from django.db import migrations
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('machine', '0002_rename_custom_cmds_to_overrides'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
# All operations removed - Dependency model deleted in 0004
|
||||
# This is a stub migration for users upgrading from old dev versions
|
||||
]
|
||||
@@ -1,28 +0,0 @@
|
||||
# Generated migration - removes Dependency model entirely
|
||||
# NOTE: This is a cleanup migration for users upgrading from old dev versions
|
||||
# that had the Dependency model. Fresh installs never create this table.
|
||||
|
||||
from django.db import migrations
|
||||
|
||||
|
||||
def drop_dependency_table(apps, schema_editor):
|
||||
"""
|
||||
Drop old Dependency table if it exists (from dev versions that had it).
|
||||
Safe to run multiple times, safe if table doesn't exist.
|
||||
|
||||
Does NOT touch machine_binary - that's our current Binary model table!
|
||||
"""
|
||||
schema_editor.execute('DROP TABLE IF EXISTS machine_dependency')
|
||||
# Also drop old InstalledBinary table if it somehow still exists
|
||||
schema_editor.execute('DROP TABLE IF EXISTS machine_installedbinary')
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('machine', '0003_alter_dependency_id_alter_installedbinary_dependency_and_more'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.RunPython(drop_dependency_table, migrations.RunPython.noop),
|
||||
]
|
||||
@@ -1,104 +0,0 @@
|
||||
# Generated by Django 6.0 on 2025-12-29 06:45
|
||||
|
||||
import django.db.models.deletion
|
||||
import django.utils.timezone
|
||||
from archivebox.uuid_compat import uuid7
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('machine', '0004_drop_dependency_table'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
# Update Django's state only - database already has correct schema
|
||||
migrations.SeparateDatabaseAndState(
|
||||
state_operations=[
|
||||
migrations.AddField(
|
||||
model_name='binary',
|
||||
name='binproviders',
|
||||
field=models.CharField(blank=True, default='env', help_text='Comma-separated list of allowed providers: apt,brew,pip,npm,env', max_length=127),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='binary',
|
||||
name='output_dir',
|
||||
field=models.CharField(blank=True, default='', help_text='Directory where installation hook logs are stored', max_length=255),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='binary',
|
||||
name='overrides',
|
||||
field=models.JSONField(blank=True, default=dict, help_text="Provider-specific overrides: {'apt': {'packages': ['pkg']}, ...}"),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='binary',
|
||||
name='retry_at',
|
||||
field=models.DateTimeField(blank=True, db_index=True, default=django.utils.timezone.now, help_text='When to retry this binary installation', null=True),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='binary',
|
||||
name='status',
|
||||
field=models.CharField(choices=[('queued', 'Queued'), ('started', 'Started'), ('succeeded', 'Succeeded'), ('failed', 'Failed')], db_index=True, default='queued', max_length=16),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='binary',
|
||||
name='abspath',
|
||||
field=models.CharField(blank=True, default='', max_length=255),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='binary',
|
||||
name='binprovider',
|
||||
field=models.CharField(blank=True, default='', help_text='Provider that successfully installed this binary', max_length=31),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='binary',
|
||||
name='id',
|
||||
field=models.UUIDField(default=uuid7, editable=False, primary_key=True, serialize=False, unique=True),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='binary',
|
||||
name='machine',
|
||||
field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='machine.machine'),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='binary',
|
||||
name='name',
|
||||
field=models.CharField(blank=True, db_index=True, default='', max_length=63),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='binary',
|
||||
name='sha256',
|
||||
field=models.CharField(blank=True, default='', max_length=64),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='binary',
|
||||
name='version',
|
||||
field=models.CharField(blank=True, default='', max_length=32),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='machine',
|
||||
name='config',
|
||||
field=models.JSONField(blank=True, default=dict, help_text='Machine-specific config overrides (e.g., resolved binary paths like WGET_BINARY)', null=True),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='machine',
|
||||
name='id',
|
||||
field=models.UUIDField(default=uuid7, editable=False, primary_key=True, serialize=False, unique=True),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='machine',
|
||||
name='stats',
|
||||
field=models.JSONField(blank=True, default=dict, null=True),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='networkinterface',
|
||||
name='id',
|
||||
field=models.UUIDField(default=uuid7, editable=False, primary_key=True, serialize=False, unique=True),
|
||||
),
|
||||
],
|
||||
database_operations=[
|
||||
# No database changes - schema already correct from previous migrations
|
||||
],
|
||||
),
|
||||
]
|
||||
@@ -433,6 +433,190 @@ class Binary(ModelWithHealthStats):
|
||||
kill_process(pid_file)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Process Model
|
||||
# =============================================================================
|
||||
|
||||
class ProcessManager(models.Manager):
|
||||
"""Manager for Process model."""
|
||||
|
||||
def create_for_archiveresult(self, archiveresult, **kwargs):
|
||||
"""
|
||||
Create a Process record for an ArchiveResult.
|
||||
|
||||
Called during migration and when creating new ArchiveResults.
|
||||
"""
|
||||
# Defaults from ArchiveResult if not provided
|
||||
defaults = {
|
||||
'machine': Machine.current(),
|
||||
'pwd': kwargs.get('pwd') or str(archiveresult.snapshot.output_dir / archiveresult.plugin),
|
||||
'cmd': kwargs.get('cmd') or [],
|
||||
'status': 'queued',
|
||||
'timeout': kwargs.get('timeout', 120),
|
||||
'env': kwargs.get('env', {}),
|
||||
}
|
||||
defaults.update(kwargs)
|
||||
|
||||
process = self.create(**defaults)
|
||||
return process
|
||||
|
||||
|
||||
class Process(ModelWithHealthStats):
|
||||
"""
|
||||
Tracks a single OS process execution.
|
||||
|
||||
Process represents the actual subprocess spawned to execute a hook.
|
||||
One Process can optionally be associated with an ArchiveResult (via OneToOne),
|
||||
but Process can also exist standalone for internal operations.
|
||||
|
||||
Follows the unified state machine pattern:
|
||||
- queued: Process ready to launch
|
||||
- running: Process actively executing
|
||||
- exited: Process completed (check exit_code for success/failure)
|
||||
|
||||
State machine calls launch() to spawn the process and monitors its lifecycle.
|
||||
"""
|
||||
|
||||
class StatusChoices(models.TextChoices):
|
||||
QUEUED = 'queued', 'Queued'
|
||||
RUNNING = 'running', 'Running'
|
||||
EXITED = 'exited', 'Exited'
|
||||
|
||||
# Primary fields
|
||||
id = models.UUIDField(primary_key=True, default=uuid7, editable=False, unique=True)
|
||||
created_at = models.DateTimeField(default=timezone.now, db_index=True)
|
||||
modified_at = models.DateTimeField(auto_now=True)
|
||||
|
||||
# Machine FK - required (every process runs on a machine)
|
||||
machine = models.ForeignKey(
|
||||
Machine,
|
||||
on_delete=models.CASCADE,
|
||||
null=False,
|
||||
related_name='processes',
|
||||
help_text='Machine where this process executed'
|
||||
)
|
||||
|
||||
# Execution metadata
|
||||
pwd = models.CharField(max_length=512, default='', null=False, blank=True,
|
||||
help_text='Working directory for process execution')
|
||||
cmd = models.JSONField(default=list, null=False, blank=True,
|
||||
help_text='Command as array of arguments')
|
||||
env = models.JSONField(default=dict, null=False, blank=True,
|
||||
help_text='Environment variables for process')
|
||||
timeout = models.IntegerField(default=120, null=False,
|
||||
help_text='Timeout in seconds')
|
||||
|
||||
# Process results
|
||||
pid = models.IntegerField(default=None, null=True, blank=True,
|
||||
help_text='OS process ID')
|
||||
exit_code = models.IntegerField(default=None, null=True, blank=True,
|
||||
help_text='Process exit code (0 = success)')
|
||||
stdout = models.TextField(default='', null=False, blank=True,
|
||||
help_text='Standard output from process')
|
||||
stderr = models.TextField(default='', null=False, blank=True,
|
||||
help_text='Standard error from process')
|
||||
|
||||
# Timing
|
||||
started_at = models.DateTimeField(default=None, null=True, blank=True,
|
||||
help_text='When process was launched')
|
||||
ended_at = models.DateTimeField(default=None, null=True, blank=True,
|
||||
help_text='When process completed/terminated')
|
||||
|
||||
# Optional FKs
|
||||
binary = models.ForeignKey(
|
||||
Binary,
|
||||
on_delete=models.SET_NULL,
|
||||
null=True, blank=True,
|
||||
related_name='processes',
|
||||
help_text='Binary used by this process'
|
||||
)
|
||||
iface = models.ForeignKey(
|
||||
NetworkInterface,
|
||||
on_delete=models.SET_NULL,
|
||||
null=True, blank=True,
|
||||
related_name='processes',
|
||||
help_text='Network interface used by this process'
|
||||
)
|
||||
|
||||
# Optional connection URL (for CDP, sonic, etc.)
|
||||
url = models.URLField(max_length=2048, default=None, null=True, blank=True,
|
||||
help_text='Connection URL (CDP endpoint, sonic server, etc.)')
|
||||
|
||||
# Reverse relation to ArchiveResult (OneToOne from AR side)
|
||||
# archiveresult: OneToOneField defined on ArchiveResult model
|
||||
|
||||
# State machine fields
|
||||
status = models.CharField(
|
||||
max_length=16,
|
||||
choices=StatusChoices.choices,
|
||||
default=StatusChoices.QUEUED,
|
||||
db_index=True
|
||||
)
|
||||
retry_at = models.DateTimeField(
|
||||
default=timezone.now,
|
||||
null=True, blank=True,
|
||||
db_index=True,
|
||||
help_text='When to retry this process'
|
||||
)
|
||||
|
||||
# Health stats
|
||||
num_uses_failed = models.PositiveIntegerField(default=0)
|
||||
num_uses_succeeded = models.PositiveIntegerField(default=0)
|
||||
|
||||
state_machine_name: str = 'archivebox.machine.models.ProcessMachine'
|
||||
|
||||
objects: ProcessManager = ProcessManager()
|
||||
|
||||
class Meta:
|
||||
app_label = 'machine'
|
||||
verbose_name = 'Process'
|
||||
verbose_name_plural = 'Processes'
|
||||
indexes = [
|
||||
models.Index(fields=['machine', 'status', 'retry_at']),
|
||||
models.Index(fields=['binary', 'exit_code']),
|
||||
]
|
||||
|
||||
def __str__(self) -> str:
|
||||
cmd_str = ' '.join(self.cmd[:3]) if self.cmd else '(no cmd)'
|
||||
return f'Process[{self.id}] {cmd_str} ({self.status})'
|
||||
|
||||
# Properties that delegate to related objects
|
||||
@property
|
||||
def cmd_version(self) -> str:
|
||||
"""Get version from associated binary."""
|
||||
return self.binary.version if self.binary else ''
|
||||
|
||||
@property
|
||||
def bin_abspath(self) -> str:
|
||||
"""Get absolute path from associated binary."""
|
||||
return self.binary.abspath if self.binary else ''
|
||||
|
||||
@property
|
||||
def plugin(self) -> str:
|
||||
"""Get plugin name from associated ArchiveResult (if any)."""
|
||||
if hasattr(self, 'archiveresult'):
|
||||
# Inline import to avoid circular dependency
|
||||
return self.archiveresult.plugin
|
||||
return ''
|
||||
|
||||
@property
|
||||
def hook_name(self) -> str:
|
||||
"""Get hook name from associated ArchiveResult (if any)."""
|
||||
if hasattr(self, 'archiveresult'):
|
||||
return self.archiveresult.hook_name
|
||||
return ''
|
||||
|
||||
def update_and_requeue(self, **kwargs):
|
||||
"""
|
||||
Update process fields and requeue for worker state machine.
|
||||
Sets modified_at to ensure workers pick up changes.
|
||||
"""
|
||||
for key, value in kwargs.items():
|
||||
setattr(self, key, value)
|
||||
self.modified_at = timezone.now()
|
||||
self.save()
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Binary State Machine
|
||||
# =============================================================================
|
||||
@@ -550,11 +734,119 @@ class BinaryMachine(BaseStateMachine, strict_states=True):
|
||||
self.binary.increment_health_stats(success=False)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Process State Machine
|
||||
# =============================================================================
|
||||
|
||||
class ProcessMachine(BaseStateMachine, strict_states=True):
|
||||
"""
|
||||
State machine for managing Process (OS subprocess) lifecycle.
|
||||
|
||||
Process Lifecycle:
|
||||
┌─────────────────────────────────────────────────────────────┐
|
||||
│ QUEUED State │
|
||||
│ • Process ready to launch, waiting for resources │
|
||||
└─────────────────────────────────────────────────────────────┘
|
||||
↓ tick() when can_start()
|
||||
┌─────────────────────────────────────────────────────────────┐
|
||||
│ RUNNING State → enter_running() │
|
||||
│ 1. process.launch() │
|
||||
│ • Spawn subprocess with cmd, pwd, env, timeout │
|
||||
│ • Set pid, started_at │
|
||||
│ • Process runs in background or foreground │
|
||||
│ 2. Monitor process completion │
|
||||
│ • Check exit code when process completes │
|
||||
└─────────────────────────────────────────────────────────────┘
|
||||
↓ tick() checks is_exited()
|
||||
┌─────────────────────────────────────────────────────────────┐
|
||||
│ EXITED State │
|
||||
│ • Process completed (exit_code set) │
|
||||
│ • Health stats incremented │
|
||||
│ • stdout/stderr captured │
|
||||
└─────────────────────────────────────────────────────────────┘
|
||||
|
||||
Note: This is a simpler state machine than ArchiveResult.
|
||||
Process is just about execution lifecycle. ArchiveResult handles
|
||||
the archival-specific logic (status, output parsing, etc.).
|
||||
"""
|
||||
|
||||
model_attr_name = 'process'
|
||||
|
||||
# States
|
||||
queued = State(value=Process.StatusChoices.QUEUED, initial=True)
|
||||
running = State(value=Process.StatusChoices.RUNNING)
|
||||
exited = State(value=Process.StatusChoices.EXITED, final=True)
|
||||
|
||||
# Tick Event - transitions based on conditions
|
||||
tick = (
|
||||
queued.to.itself(unless='can_start') |
|
||||
queued.to(running, cond='can_start') |
|
||||
running.to.itself(unless='is_exited') |
|
||||
running.to(exited, cond='is_exited')
|
||||
)
|
||||
|
||||
# Additional events (for explicit control)
|
||||
launch = queued.to(running)
|
||||
kill = running.to(exited)
|
||||
|
||||
def can_start(self) -> bool:
|
||||
"""Check if process can start (has cmd and machine)."""
|
||||
return bool(self.process.cmd and self.process.machine)
|
||||
|
||||
def is_exited(self) -> bool:
|
||||
"""Check if process has exited (exit_code is set)."""
|
||||
return self.process.exit_code is not None
|
||||
|
||||
@queued.enter
|
||||
def enter_queued(self):
|
||||
"""Process is queued for execution."""
|
||||
self.process.update_and_requeue(
|
||||
retry_at=timezone.now(),
|
||||
status=Process.StatusChoices.QUEUED,
|
||||
)
|
||||
|
||||
@running.enter
|
||||
def enter_running(self):
|
||||
"""Start process execution."""
|
||||
# Lock the process while it runs
|
||||
self.process.update_and_requeue(
|
||||
retry_at=timezone.now() + timedelta(seconds=self.process.timeout),
|
||||
status=Process.StatusChoices.RUNNING,
|
||||
started_at=timezone.now(),
|
||||
)
|
||||
|
||||
# Launch the subprocess
|
||||
# NOTE: This is a placeholder - actual launch logic would
|
||||
# be implemented based on how hooks currently spawn processes
|
||||
# For now, Process is a data model that tracks execution metadata
|
||||
# The actual subprocess spawning is still handled by run_hook()
|
||||
|
||||
# Mark as immediately exited for now (until we refactor run_hook)
|
||||
# In the future, this would actually spawn the subprocess
|
||||
self.process.exit_code = 0 # Placeholder
|
||||
self.process.save()
|
||||
|
||||
@exited.enter
|
||||
def enter_exited(self):
|
||||
"""Process has exited."""
|
||||
success = self.process.exit_code == 0
|
||||
|
||||
self.process.update_and_requeue(
|
||||
retry_at=None,
|
||||
status=Process.StatusChoices.EXITED,
|
||||
ended_at=timezone.now(),
|
||||
)
|
||||
|
||||
# Increment health stats based on exit code
|
||||
self.process.increment_health_stats(success=success)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# State Machine Registration
|
||||
# =============================================================================
|
||||
|
||||
# Manually register state machines with python-statemachine registry
|
||||
registry.register(BinaryMachine)
|
||||
registry.register(ProcessMachine)
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user