much better tests and add page ui

This commit is contained in:
Nick Sweeting
2025-12-29 04:02:11 -08:00
parent 9487f8a0de
commit 30c60eef76
93 changed files with 2998 additions and 2712 deletions

View File

@@ -4,7 +4,7 @@ from django.contrib import admin
from django.utils.html import format_html
from archivebox.base_models.admin import BaseModelAdmin, ConfigEditorMixin
from archivebox.machine.models import Machine, NetworkInterface, Binary
from archivebox.machine.models import Machine, NetworkInterface, Binary, Process
class MachineAdmin(ConfigEditorMixin, BaseModelAdmin):
@@ -143,7 +143,87 @@ class BinaryAdmin(BaseModelAdmin):
)
class ProcessAdmin(BaseModelAdmin):
list_display = ('id', 'created_at', 'machine_info', 'archiveresult_link', 'cmd_str', 'status', 'exit_code', 'pid', 'binary_info', 'health')
sort_fields = ('id', 'created_at', 'status', 'exit_code', 'pid')
search_fields = ('id', 'machine__id', 'binary__name', 'cmd', 'pwd', 'stdout', 'stderr')
readonly_fields = ('created_at', 'modified_at', 'machine', 'binary', 'iface', 'archiveresult_link')
fieldsets = (
('Process Info', {
'fields': ('machine', 'archiveresult_link', 'status', 'retry_at'),
'classes': ('card',),
}),
('Command', {
'fields': ('cmd', 'pwd', 'env', 'timeout'),
'classes': ('card', 'wide'),
}),
('Execution', {
'fields': ('binary', 'iface', 'pid', 'exit_code', 'url'),
'classes': ('card',),
}),
('Timing', {
'fields': ('started_at', 'ended_at'),
'classes': ('card',),
}),
('Output', {
'fields': ('stdout', 'stderr'),
'classes': ('card', 'wide', 'collapse'),
}),
('Usage', {
'fields': ('num_uses_succeeded', 'num_uses_failed'),
'classes': ('card',),
}),
('Timestamps', {
'fields': ('created_at', 'modified_at'),
'classes': ('card',),
}),
)
list_filter = ('status', 'exit_code', 'machine_id')
ordering = ['-created_at']
list_per_page = 100
actions = ["delete_selected"]
@admin.display(description='Machine', ordering='machine__id')
def machine_info(self, process):
return format_html(
'<a href="/admin/machine/machine/{}/change"><b><code>[{}]</code></b> &nbsp; {}</a>',
process.machine.id, str(process.machine.id)[:8], process.machine.hostname,
)
@admin.display(description='Binary', ordering='binary__name')
def binary_info(self, process):
if not process.binary:
return '-'
return format_html(
'<a href="/admin/machine/binary/{}/change"><code>{}</code> v{}</a>',
process.binary.id, process.binary.name, process.binary.version,
)
@admin.display(description='ArchiveResult')
def archiveresult_link(self, process):
if not hasattr(process, 'archiveresult'):
return '-'
ar = process.archiveresult
return format_html(
'<a href="/admin/core/archiveresult/{}/change"><code>{}</code> → {}</a>',
ar.id, ar.plugin, ar.snapshot.url[:50],
)
@admin.display(description='Command')
def cmd_str(self, process):
if not process.cmd:
return '-'
cmd = ' '.join(process.cmd[:3]) if isinstance(process.cmd, list) else str(process.cmd)
if len(process.cmd) > 3:
cmd += ' ...'
return format_html('<code style="font-size: 0.9em;">{}</code>', cmd[:80])
def register_admin(admin_site):
admin_site.register(Machine, MachineAdmin)
admin_site.register(NetworkInterface, NetworkInterfaceAdmin)
admin_site.register(Binary, BinaryAdmin)
admin_site.register(Process, ProcessAdmin)

View File

@@ -12,7 +12,11 @@ class MachineConfig(AppConfig):
def ready(self):
"""Import models to register state machines with the registry"""
from archivebox.machine import models # noqa: F401
import sys
# Skip during makemigrations to avoid premature state machine access
if 'makemigrations' not in sys.argv:
from archivebox.machine import models # noqa: F401
def register_admin(admin_site):

View File

@@ -0,0 +1,143 @@
# Generated by hand on 2025-12-29
# Creates Machine, Binary, NetworkInterface, and Process tables using raw SQL
from django.db import migrations
class Migration(migrations.Migration):
initial = True
dependencies = [
]
operations = [
migrations.RunSQL(
# Forward SQL
sql="""
-- Create machine_machine table
CREATE TABLE IF NOT EXISTS machine_machine (
id TEXT PRIMARY KEY NOT NULL,
created_at DATETIME NOT NULL,
modified_at DATETIME NOT NULL,
num_uses_succeeded INTEGER NOT NULL DEFAULT 0,
num_uses_failed INTEGER NOT NULL DEFAULT 0,
guid VARCHAR(64) NOT NULL UNIQUE,
hostname VARCHAR(63) NOT NULL,
hw_in_docker BOOLEAN NOT NULL DEFAULT 0,
hw_in_vm BOOLEAN NOT NULL DEFAULT 0,
hw_manufacturer VARCHAR(63) NOT NULL,
hw_product VARCHAR(63) NOT NULL,
hw_uuid VARCHAR(255) NOT NULL,
os_arch VARCHAR(15) NOT NULL,
os_family VARCHAR(15) NOT NULL,
os_platform VARCHAR(63) NOT NULL,
os_release VARCHAR(63) NOT NULL,
os_kernel VARCHAR(255) NOT NULL,
stats TEXT,
config TEXT
);
CREATE INDEX IF NOT EXISTS machine_machine_guid_idx ON machine_machine(guid);
-- Create machine_networkinterface table
CREATE TABLE IF NOT EXISTS machine_networkinterface (
id TEXT PRIMARY KEY NOT NULL,
created_at DATETIME NOT NULL,
modified_at DATETIME NOT NULL,
num_uses_succeeded INTEGER NOT NULL DEFAULT 0,
num_uses_failed INTEGER NOT NULL DEFAULT 0,
machine_id TEXT NOT NULL,
iface VARCHAR(15) NOT NULL,
ip_public VARCHAR(39) NOT NULL,
ip_local VARCHAR(39) NOT NULL,
mac_address VARCHAR(17) NOT NULL,
dns_server VARCHAR(39) NOT NULL,
hostname VARCHAR(256) NOT NULL,
isp VARCHAR(256) NOT NULL,
city VARCHAR(100) NOT NULL,
region VARCHAR(100) NOT NULL,
country VARCHAR(100) NOT NULL,
FOREIGN KEY (machine_id) REFERENCES machine_machine(id) ON DELETE CASCADE
);
CREATE INDEX IF NOT EXISTS machine_networkinterface_machine_id_idx ON machine_networkinterface(machine_id);
-- Create machine_binary table
CREATE TABLE IF NOT EXISTS machine_binary (
id TEXT PRIMARY KEY NOT NULL,
created_at DATETIME NOT NULL,
modified_at DATETIME NOT NULL,
num_uses_succeeded INTEGER NOT NULL DEFAULT 0,
num_uses_failed INTEGER NOT NULL DEFAULT 0,
machine_id TEXT NOT NULL,
name VARCHAR(63) NOT NULL,
binproviders VARCHAR(127) NOT NULL DEFAULT 'env',
overrides TEXT NOT NULL DEFAULT '{}',
binprovider VARCHAR(31) NOT NULL DEFAULT '',
abspath VARCHAR(255) NOT NULL DEFAULT '',
version VARCHAR(32) NOT NULL DEFAULT '',
sha256 VARCHAR(64) NOT NULL DEFAULT '',
status VARCHAR(16) NOT NULL DEFAULT 'queued',
retry_at DATETIME,
output_dir VARCHAR(255) NOT NULL DEFAULT '',
FOREIGN KEY (machine_id) REFERENCES machine_machine(id) ON DELETE CASCADE,
UNIQUE(machine_id, name, abspath, version, sha256)
);
CREATE INDEX IF NOT EXISTS machine_binary_machine_id_idx ON machine_binary(machine_id);
CREATE INDEX IF NOT EXISTS machine_binary_name_idx ON machine_binary(name);
CREATE INDEX IF NOT EXISTS machine_binary_status_idx ON machine_binary(status);
CREATE INDEX IF NOT EXISTS machine_binary_retry_at_idx ON machine_binary(retry_at);
-- Create machine_process table
CREATE TABLE IF NOT EXISTS machine_process (
id TEXT PRIMARY KEY NOT NULL,
created_at DATETIME NOT NULL,
modified_at DATETIME NOT NULL,
num_uses_succeeded INTEGER NOT NULL DEFAULT 0,
num_uses_failed INTEGER NOT NULL DEFAULT 0,
machine_id TEXT NOT NULL,
binary_id TEXT,
network_interface_id TEXT,
cmd TEXT NOT NULL,
pwd VARCHAR(256),
env TEXT,
stdin TEXT,
timeout INTEGER NOT NULL DEFAULT 60,
pid INTEGER,
started_at DATETIME,
ended_at DATETIME,
exit_code INTEGER,
stdout TEXT NOT NULL DEFAULT '',
stderr TEXT NOT NULL DEFAULT '',
status VARCHAR(15) NOT NULL DEFAULT 'queued',
retry_at DATETIME,
FOREIGN KEY (machine_id) REFERENCES machine_machine(id) ON DELETE CASCADE,
FOREIGN KEY (binary_id) REFERENCES machine_binary(id) ON DELETE SET NULL,
FOREIGN KEY (network_interface_id) REFERENCES machine_networkinterface(id) ON DELETE SET NULL
);
CREATE INDEX IF NOT EXISTS machine_process_status_idx ON machine_process(status);
CREATE INDEX IF NOT EXISTS machine_process_retry_at_idx ON machine_process(retry_at);
CREATE INDEX IF NOT EXISTS machine_process_machine_id_idx ON machine_process(machine_id);
""",
# Reverse SQL
reverse_sql="""
DROP TABLE IF EXISTS machine_process;
DROP TABLE IF EXISTS machine_binary;
DROP TABLE IF EXISTS machine_networkinterface;
DROP TABLE IF EXISTS machine_machine;
"""
),
]

View File

@@ -1,102 +0,0 @@
# Squashed migration: replaces 0001-0004
# For fresh installs: creates final schema
# For dev users with 0001-0004 applied: marked as applied (no-op)
from uuid import uuid4
from django.db import migrations, models
import django.db.models.deletion
import django.utils.timezone
class Migration(migrations.Migration):
initial = True
replaces = [
('machine', '0001_initial'),
('machine', '0002_alter_machine_stats_installedbinary'),
('machine', '0003_alter_installedbinary_options_and_more'),
('machine', '0004_alter_installedbinary_abspath_and_more'),
]
dependencies = []
operations = [
migrations.CreateModel(
name='Machine',
fields=[
('num_uses_failed', models.PositiveIntegerField(default=0)),
('num_uses_succeeded', models.PositiveIntegerField(default=0)),
('id', models.UUIDField(default=uuid4, editable=False, primary_key=True, serialize=False, unique=True)),
('created_at', models.DateTimeField(db_index=True, default=django.utils.timezone.now)),
('modified_at', models.DateTimeField(auto_now=True)),
('guid', models.CharField(default=None, editable=False, max_length=64, unique=True)),
('hostname', models.CharField(default=None, max_length=63)),
('hw_in_docker', models.BooleanField(default=False)),
('hw_in_vm', models.BooleanField(default=False)),
('hw_manufacturer', models.CharField(default=None, max_length=63)),
('hw_product', models.CharField(default=None, max_length=63)),
('hw_uuid', models.CharField(default=None, max_length=255)),
('os_arch', models.CharField(default=None, max_length=15)),
('os_family', models.CharField(default=None, max_length=15)),
('os_platform', models.CharField(default=None, max_length=63)),
('os_release', models.CharField(default=None, max_length=63)),
('os_kernel', models.CharField(default=None, max_length=255)),
('stats', models.JSONField(default=dict)),
('config', models.JSONField(blank=True, default=dict)),
],
),
migrations.CreateModel(
name='NetworkInterface',
fields=[
('num_uses_failed', models.PositiveIntegerField(default=0)),
('num_uses_succeeded', models.PositiveIntegerField(default=0)),
('id', models.UUIDField(default=uuid4, editable=False, primary_key=True, serialize=False, unique=True)),
('created_at', models.DateTimeField(db_index=True, default=django.utils.timezone.now)),
('modified_at', models.DateTimeField(auto_now=True)),
('mac_address', models.CharField(default=None, editable=False, max_length=17)),
('ip_public', models.GenericIPAddressField(default=None, editable=False)),
('ip_local', models.GenericIPAddressField(default=None, editable=False)),
('dns_server', models.GenericIPAddressField(default=None, editable=False)),
('hostname', models.CharField(default=None, max_length=63)),
('iface', models.CharField(default=None, max_length=15)),
('isp', models.CharField(default=None, max_length=63)),
('city', models.CharField(default=None, max_length=63)),
('region', models.CharField(default=None, max_length=63)),
('country', models.CharField(default=None, max_length=63)),
('machine', models.ForeignKey(default=None, on_delete=django.db.models.deletion.CASCADE, to='machine.machine')),
],
options={
'unique_together': {('machine', 'ip_public', 'ip_local', 'mac_address', 'dns_server')},
},
),
# Dependency model removed - not needed anymore
migrations.CreateModel(
name='Binary',
fields=[
('num_uses_failed', models.PositiveIntegerField(default=0)),
('num_uses_succeeded', models.PositiveIntegerField(default=0)),
('id', models.UUIDField(default=uuid4, editable=False, primary_key=True, serialize=False, unique=True)),
('created_at', models.DateTimeField(db_index=True, default=django.utils.timezone.now)),
('modified_at', models.DateTimeField(auto_now=True)),
('name', models.CharField(blank=True, db_index=True, default=None, max_length=63)),
('binprovider', models.CharField(blank=True, default=None, max_length=31)),
('abspath', models.CharField(blank=True, default=None, max_length=255)),
('version', models.CharField(blank=True, default=None, max_length=32)),
('sha256', models.CharField(blank=True, default=None, max_length=64)),
('machine', models.ForeignKey(blank=True, default=None, on_delete=django.db.models.deletion.CASCADE, to='machine.machine')),
# Fields added in migration 0005 (included here for fresh installs)
('binproviders', models.CharField(blank=True, default='env', max_length=127)),
('output_dir', models.CharField(blank=True, default='', max_length=255)),
('overrides', models.JSONField(blank=True, default=dict)),
('retry_at', models.DateTimeField(blank=True, db_index=True, default=django.utils.timezone.now, null=True)),
('status', models.CharField(choices=[('queued', 'Queued'), ('started', 'Started'), ('succeeded', 'Succeeded'), ('failed', 'Failed')], db_index=True, default='queued', max_length=16)),
# dependency FK removed - Dependency model deleted
],
options={
'verbose_name': 'Binary',
'verbose_name_plural': 'Binaries',
'unique_together': {('machine', 'name', 'abspath', 'version', 'sha256')},
},
),
]

View File

@@ -1,16 +0,0 @@
# Generated manually on 2025-12-26
# NOTE: This migration is intentionally empty but kept for dependency chain
# The Dependency model was removed in 0004, so all operations have been stripped
from django.db import migrations
class Migration(migrations.Migration):
dependencies = [
('machine', '0001_squashed'),
]
operations = [
# All Dependency operations removed - model deleted in 0004
]

View File

@@ -1,17 +0,0 @@
# Generated by Django 6.0 on 2025-12-28 05:12
# NOTE: This migration is intentionally empty but kept for dependency chain
# The Dependency model was removed in 0004, all operations stripped
from django.db import migrations
class Migration(migrations.Migration):
dependencies = [
('machine', '0002_rename_custom_cmds_to_overrides'),
]
operations = [
# All operations removed - Dependency model deleted in 0004
# This is a stub migration for users upgrading from old dev versions
]

View File

@@ -1,28 +0,0 @@
# Generated migration - removes Dependency model entirely
# NOTE: This is a cleanup migration for users upgrading from old dev versions
# that had the Dependency model. Fresh installs never create this table.
from django.db import migrations
def drop_dependency_table(apps, schema_editor):
"""
Drop old Dependency table if it exists (from dev versions that had it).
Safe to run multiple times, safe if table doesn't exist.
Does NOT touch machine_binary - that's our current Binary model table!
"""
schema_editor.execute('DROP TABLE IF EXISTS machine_dependency')
# Also drop old InstalledBinary table if it somehow still exists
schema_editor.execute('DROP TABLE IF EXISTS machine_installedbinary')
class Migration(migrations.Migration):
dependencies = [
('machine', '0003_alter_dependency_id_alter_installedbinary_dependency_and_more'),
]
operations = [
migrations.RunPython(drop_dependency_table, migrations.RunPython.noop),
]

View File

@@ -1,104 +0,0 @@
# Generated by Django 6.0 on 2025-12-29 06:45
import django.db.models.deletion
import django.utils.timezone
from archivebox.uuid_compat import uuid7
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('machine', '0004_drop_dependency_table'),
]
operations = [
# Update Django's state only - database already has correct schema
migrations.SeparateDatabaseAndState(
state_operations=[
migrations.AddField(
model_name='binary',
name='binproviders',
field=models.CharField(blank=True, default='env', help_text='Comma-separated list of allowed providers: apt,brew,pip,npm,env', max_length=127),
),
migrations.AddField(
model_name='binary',
name='output_dir',
field=models.CharField(blank=True, default='', help_text='Directory where installation hook logs are stored', max_length=255),
),
migrations.AddField(
model_name='binary',
name='overrides',
field=models.JSONField(blank=True, default=dict, help_text="Provider-specific overrides: {'apt': {'packages': ['pkg']}, ...}"),
),
migrations.AddField(
model_name='binary',
name='retry_at',
field=models.DateTimeField(blank=True, db_index=True, default=django.utils.timezone.now, help_text='When to retry this binary installation', null=True),
),
migrations.AddField(
model_name='binary',
name='status',
field=models.CharField(choices=[('queued', 'Queued'), ('started', 'Started'), ('succeeded', 'Succeeded'), ('failed', 'Failed')], db_index=True, default='queued', max_length=16),
),
migrations.AlterField(
model_name='binary',
name='abspath',
field=models.CharField(blank=True, default='', max_length=255),
),
migrations.AlterField(
model_name='binary',
name='binprovider',
field=models.CharField(blank=True, default='', help_text='Provider that successfully installed this binary', max_length=31),
),
migrations.AlterField(
model_name='binary',
name='id',
field=models.UUIDField(default=uuid7, editable=False, primary_key=True, serialize=False, unique=True),
),
migrations.AlterField(
model_name='binary',
name='machine',
field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='machine.machine'),
),
migrations.AlterField(
model_name='binary',
name='name',
field=models.CharField(blank=True, db_index=True, default='', max_length=63),
),
migrations.AlterField(
model_name='binary',
name='sha256',
field=models.CharField(blank=True, default='', max_length=64),
),
migrations.AlterField(
model_name='binary',
name='version',
field=models.CharField(blank=True, default='', max_length=32),
),
migrations.AlterField(
model_name='machine',
name='config',
field=models.JSONField(blank=True, default=dict, help_text='Machine-specific config overrides (e.g., resolved binary paths like WGET_BINARY)', null=True),
),
migrations.AlterField(
model_name='machine',
name='id',
field=models.UUIDField(default=uuid7, editable=False, primary_key=True, serialize=False, unique=True),
),
migrations.AlterField(
model_name='machine',
name='stats',
field=models.JSONField(blank=True, default=dict, null=True),
),
migrations.AlterField(
model_name='networkinterface',
name='id',
field=models.UUIDField(default=uuid7, editable=False, primary_key=True, serialize=False, unique=True),
),
],
database_operations=[
# No database changes - schema already correct from previous migrations
],
),
]

View File

@@ -433,6 +433,190 @@ class Binary(ModelWithHealthStats):
kill_process(pid_file)
# =============================================================================
# Process Model
# =============================================================================
class ProcessManager(models.Manager):
"""Manager for Process model."""
def create_for_archiveresult(self, archiveresult, **kwargs):
"""
Create a Process record for an ArchiveResult.
Called during migration and when creating new ArchiveResults.
"""
# Defaults from ArchiveResult if not provided
defaults = {
'machine': Machine.current(),
'pwd': kwargs.get('pwd') or str(archiveresult.snapshot.output_dir / archiveresult.plugin),
'cmd': kwargs.get('cmd') or [],
'status': 'queued',
'timeout': kwargs.get('timeout', 120),
'env': kwargs.get('env', {}),
}
defaults.update(kwargs)
process = self.create(**defaults)
return process
class Process(ModelWithHealthStats):
"""
Tracks a single OS process execution.
Process represents the actual subprocess spawned to execute a hook.
One Process can optionally be associated with an ArchiveResult (via OneToOne),
but Process can also exist standalone for internal operations.
Follows the unified state machine pattern:
- queued: Process ready to launch
- running: Process actively executing
- exited: Process completed (check exit_code for success/failure)
State machine calls launch() to spawn the process and monitors its lifecycle.
"""
class StatusChoices(models.TextChoices):
QUEUED = 'queued', 'Queued'
RUNNING = 'running', 'Running'
EXITED = 'exited', 'Exited'
# Primary fields
id = models.UUIDField(primary_key=True, default=uuid7, editable=False, unique=True)
created_at = models.DateTimeField(default=timezone.now, db_index=True)
modified_at = models.DateTimeField(auto_now=True)
# Machine FK - required (every process runs on a machine)
machine = models.ForeignKey(
Machine,
on_delete=models.CASCADE,
null=False,
related_name='processes',
help_text='Machine where this process executed'
)
# Execution metadata
pwd = models.CharField(max_length=512, default='', null=False, blank=True,
help_text='Working directory for process execution')
cmd = models.JSONField(default=list, null=False, blank=True,
help_text='Command as array of arguments')
env = models.JSONField(default=dict, null=False, blank=True,
help_text='Environment variables for process')
timeout = models.IntegerField(default=120, null=False,
help_text='Timeout in seconds')
# Process results
pid = models.IntegerField(default=None, null=True, blank=True,
help_text='OS process ID')
exit_code = models.IntegerField(default=None, null=True, blank=True,
help_text='Process exit code (0 = success)')
stdout = models.TextField(default='', null=False, blank=True,
help_text='Standard output from process')
stderr = models.TextField(default='', null=False, blank=True,
help_text='Standard error from process')
# Timing
started_at = models.DateTimeField(default=None, null=True, blank=True,
help_text='When process was launched')
ended_at = models.DateTimeField(default=None, null=True, blank=True,
help_text='When process completed/terminated')
# Optional FKs
binary = models.ForeignKey(
Binary,
on_delete=models.SET_NULL,
null=True, blank=True,
related_name='processes',
help_text='Binary used by this process'
)
iface = models.ForeignKey(
NetworkInterface,
on_delete=models.SET_NULL,
null=True, blank=True,
related_name='processes',
help_text='Network interface used by this process'
)
# Optional connection URL (for CDP, sonic, etc.)
url = models.URLField(max_length=2048, default=None, null=True, blank=True,
help_text='Connection URL (CDP endpoint, sonic server, etc.)')
# Reverse relation to ArchiveResult (OneToOne from AR side)
# archiveresult: OneToOneField defined on ArchiveResult model
# State machine fields
status = models.CharField(
max_length=16,
choices=StatusChoices.choices,
default=StatusChoices.QUEUED,
db_index=True
)
retry_at = models.DateTimeField(
default=timezone.now,
null=True, blank=True,
db_index=True,
help_text='When to retry this process'
)
# Health stats
num_uses_failed = models.PositiveIntegerField(default=0)
num_uses_succeeded = models.PositiveIntegerField(default=0)
state_machine_name: str = 'archivebox.machine.models.ProcessMachine'
objects: ProcessManager = ProcessManager()
class Meta:
app_label = 'machine'
verbose_name = 'Process'
verbose_name_plural = 'Processes'
indexes = [
models.Index(fields=['machine', 'status', 'retry_at']),
models.Index(fields=['binary', 'exit_code']),
]
def __str__(self) -> str:
cmd_str = ' '.join(self.cmd[:3]) if self.cmd else '(no cmd)'
return f'Process[{self.id}] {cmd_str} ({self.status})'
# Properties that delegate to related objects
@property
def cmd_version(self) -> str:
"""Get version from associated binary."""
return self.binary.version if self.binary else ''
@property
def bin_abspath(self) -> str:
"""Get absolute path from associated binary."""
return self.binary.abspath if self.binary else ''
@property
def plugin(self) -> str:
"""Get plugin name from associated ArchiveResult (if any)."""
if hasattr(self, 'archiveresult'):
# Inline import to avoid circular dependency
return self.archiveresult.plugin
return ''
@property
def hook_name(self) -> str:
"""Get hook name from associated ArchiveResult (if any)."""
if hasattr(self, 'archiveresult'):
return self.archiveresult.hook_name
return ''
def update_and_requeue(self, **kwargs):
"""
Update process fields and requeue for worker state machine.
Sets modified_at to ensure workers pick up changes.
"""
for key, value in kwargs.items():
setattr(self, key, value)
self.modified_at = timezone.now()
self.save()
# =============================================================================
# Binary State Machine
# =============================================================================
@@ -550,11 +734,119 @@ class BinaryMachine(BaseStateMachine, strict_states=True):
self.binary.increment_health_stats(success=False)
# =============================================================================
# Process State Machine
# =============================================================================
class ProcessMachine(BaseStateMachine, strict_states=True):
"""
State machine for managing Process (OS subprocess) lifecycle.
Process Lifecycle:
┌─────────────────────────────────────────────────────────────┐
│ QUEUED State │
│ • Process ready to launch, waiting for resources │
└─────────────────────────────────────────────────────────────┘
↓ tick() when can_start()
┌─────────────────────────────────────────────────────────────┐
│ RUNNING State → enter_running() │
│ 1. process.launch() │
│ • Spawn subprocess with cmd, pwd, env, timeout │
│ • Set pid, started_at │
│ • Process runs in background or foreground │
│ 2. Monitor process completion │
│ • Check exit code when process completes │
└─────────────────────────────────────────────────────────────┘
↓ tick() checks is_exited()
┌─────────────────────────────────────────────────────────────┐
│ EXITED State │
│ • Process completed (exit_code set) │
│ • Health stats incremented │
│ • stdout/stderr captured │
└─────────────────────────────────────────────────────────────┘
Note: This is a simpler state machine than ArchiveResult.
Process is just about execution lifecycle. ArchiveResult handles
the archival-specific logic (status, output parsing, etc.).
"""
model_attr_name = 'process'
# States
queued = State(value=Process.StatusChoices.QUEUED, initial=True)
running = State(value=Process.StatusChoices.RUNNING)
exited = State(value=Process.StatusChoices.EXITED, final=True)
# Tick Event - transitions based on conditions
tick = (
queued.to.itself(unless='can_start') |
queued.to(running, cond='can_start') |
running.to.itself(unless='is_exited') |
running.to(exited, cond='is_exited')
)
# Additional events (for explicit control)
launch = queued.to(running)
kill = running.to(exited)
def can_start(self) -> bool:
"""Check if process can start (has cmd and machine)."""
return bool(self.process.cmd and self.process.machine)
def is_exited(self) -> bool:
"""Check if process has exited (exit_code is set)."""
return self.process.exit_code is not None
@queued.enter
def enter_queued(self):
"""Process is queued for execution."""
self.process.update_and_requeue(
retry_at=timezone.now(),
status=Process.StatusChoices.QUEUED,
)
@running.enter
def enter_running(self):
"""Start process execution."""
# Lock the process while it runs
self.process.update_and_requeue(
retry_at=timezone.now() + timedelta(seconds=self.process.timeout),
status=Process.StatusChoices.RUNNING,
started_at=timezone.now(),
)
# Launch the subprocess
# NOTE: This is a placeholder - actual launch logic would
# be implemented based on how hooks currently spawn processes
# For now, Process is a data model that tracks execution metadata
# The actual subprocess spawning is still handled by run_hook()
# Mark as immediately exited for now (until we refactor run_hook)
# In the future, this would actually spawn the subprocess
self.process.exit_code = 0 # Placeholder
self.process.save()
@exited.enter
def enter_exited(self):
"""Process has exited."""
success = self.process.exit_code == 0
self.process.update_and_requeue(
retry_at=None,
status=Process.StatusChoices.EXITED,
ended_at=timezone.now(),
)
# Increment health stats based on exit code
self.process.increment_health_stats(success=success)
# =============================================================================
# State Machine Registration
# =============================================================================
# Manually register state machines with python-statemachine registry
registry.register(BinaryMachine)
registry.register(ProcessMachine)