This commit is contained in:
Nick Sweeting
2026-03-23 03:58:32 -07:00
parent 268856bcfb
commit b749b26c5d
286 changed files with 21704 additions and 13480 deletions

View File

@@ -1 +1 @@
__package__ = 'archivebox.machine'
__package__ = "archivebox.machine"

View File

@@ -1,229 +1,543 @@
__package__ = 'archivebox.machine'
__package__ = "archivebox.machine"
from django.contrib import admin
import json
import shlex
from django.contrib import admin, messages
from django.db.models import DurationField, ExpressionWrapper, F
from django.db.models.functions import Coalesce, Now
from django.shortcuts import redirect
from django.utils import timezone
from django.utils.html import format_html
from django_object_actions import action
from archivebox.base_models.admin import BaseModelAdmin, ConfigEditorMixin
from archivebox.misc.logging_util import printable_filesize
from archivebox.machine.env_utils import env_to_dotenv_text
from archivebox.machine.models import Machine, NetworkInterface, Binary, Process
class MachineAdmin(ConfigEditorMixin, BaseModelAdmin):
list_display = ('id', 'created_at', 'hostname', 'ips', 'os_platform', 'hw_in_docker', 'hw_in_vm', 'hw_manufacturer', 'hw_product', 'os_arch', 'os_family', 'os_release', 'hw_uuid', 'health_display')
sort_fields = ('id', 'created_at', 'hostname', 'ips', 'os_platform', 'hw_in_docker', 'hw_in_vm', 'hw_manufacturer', 'hw_product', 'os_arch', 'os_family', 'os_release', 'hw_uuid')
readonly_fields = ('guid', 'created_at', 'modified_at', 'ips')
fieldsets = (
('Identity', {
'fields': ('hostname', 'guid', 'ips'),
'classes': ('card',),
}),
('Hardware', {
'fields': ('hw_manufacturer', 'hw_product', 'hw_uuid', 'hw_in_docker', 'hw_in_vm'),
'classes': ('card',),
}),
('Operating System', {
'fields': ('os_platform', 'os_family', 'os_arch', 'os_kernel', 'os_release'),
'classes': ('card',),
}),
('Statistics', {
'fields': ('stats', 'num_uses_succeeded', 'num_uses_failed'),
'classes': ('card',),
}),
('Configuration', {
'fields': ('config',),
'classes': ('card', 'wide'),
}),
('Timestamps', {
'fields': ('created_at', 'modified_at'),
'classes': ('card',),
}),
def _render_copy_block(text: str, *, multiline: bool = False):
if multiline:
return format_html(
"""
<div style="position: relative; width: 100%; max-width: 100%; overflow: hidden; box-sizing: border-box;">
<button type="button"
data-command="{}"
onclick="(function(btn){{var text=btn.dataset.command||''; if(navigator.clipboard&&navigator.clipboard.writeText){{navigator.clipboard.writeText(text);}} else {{var ta=document.createElement('textarea'); ta.value=text; document.body.appendChild(ta); ta.select(); document.execCommand('copy'); document.body.removeChild(ta);}}}})(this); return false;"
style="position: absolute; top: 6px; right: 6px; z-index: 1; padding: 2px 8px; border: 0; border-radius: 4px; background: #e2e8f0; color: #334155; font-size: 11px; cursor: pointer;">
Copy
</button>
<pre title="{}" style="display: block; width: 100%; max-width: 100%; overflow: auto; max-height: 300px; margin: 0; padding: 8px 56px 8px 8px; background: #f8fafc; border: 1px solid #e2e8f0; border-radius: 6px; font-size: 11px; line-height: 1.45; white-space: pre-wrap; word-break: break-word; box-sizing: border-box;">{}</pre>
</div>
""",
text,
text,
text,
)
return format_html(
"""
<div style="position: relative; width: 100%; max-width: 100%; overflow: hidden; box-sizing: border-box;">
<button type="button"
data-command="{}"
onclick="(function(btn){{var text=btn.dataset.command||''; if(navigator.clipboard&&navigator.clipboard.writeText){{navigator.clipboard.writeText(text);}} else {{var ta=document.createElement('textarea'); ta.value=text; document.body.appendChild(ta); ta.select(); document.execCommand('copy'); document.body.removeChild(ta);}}}})(this); return false;"
style="position: absolute; top: 6px; right: 6px; z-index: 1; padding: 2px 8px; border: 0; border-radius: 4px; background: #e2e8f0; color: #334155; font-size: 11px; cursor: pointer;">
Copy
</button>
<code title="{}" style="display: block; width: 100%; max-width: 100%; overflow: hidden; text-overflow: ellipsis; white-space: nowrap; padding: 8px 56px 8px 8px; background: #f8fafc; border: 1px solid #e2e8f0; border-radius: 6px; font-size: 11px; box-sizing: border-box;">
{}
</code>
</div>
""",
text,
text,
text,
)
list_filter = ('hw_in_docker', 'hw_in_vm', 'os_arch', 'os_family', 'os_platform')
ordering = ['-created_at']
def _format_process_duration_seconds(started_at, ended_at) -> str:
if not started_at:
return "-"
end_time = ended_at or timezone.now()
seconds = max((end_time - started_at).total_seconds(), 0.0)
if seconds < 1:
return f"{seconds:.2f}s"
if seconds < 10 and seconds != int(seconds):
return f"{seconds:.1f}s"
return f"{int(seconds)}s"
class MachineAdmin(ConfigEditorMixin, BaseModelAdmin):
list_display = (
"id",
"created_at",
"hostname",
"ips",
"os_platform",
"hw_in_docker",
"hw_in_vm",
"hw_manufacturer",
"hw_product",
"os_arch",
"os_family",
"os_release",
"hw_uuid",
"health_display",
)
sort_fields = (
"id",
"created_at",
"hostname",
"ips",
"os_platform",
"hw_in_docker",
"hw_in_vm",
"hw_manufacturer",
"hw_product",
"os_arch",
"os_family",
"os_release",
"hw_uuid",
)
readonly_fields = ("guid", "created_at", "modified_at", "ips")
fieldsets = (
(
"Identity",
{
"fields": ("hostname", "guid", "ips"),
"classes": ("card",),
},
),
(
"Hardware",
{
"fields": ("hw_manufacturer", "hw_product", "hw_uuid", "hw_in_docker", "hw_in_vm"),
"classes": ("card",),
},
),
(
"Operating System",
{
"fields": ("os_platform", "os_family", "os_arch", "os_kernel", "os_release"),
"classes": ("card",),
},
),
(
"Statistics",
{
"fields": ("stats", "num_uses_succeeded", "num_uses_failed"),
"classes": ("card",),
},
),
(
"Configuration",
{
"fields": ("config",),
"classes": ("card", "wide"),
},
),
(
"Timestamps",
{
"fields": ("created_at", "modified_at"),
"classes": ("card",),
},
),
)
list_filter = ("hw_in_docker", "hw_in_vm", "os_arch", "os_family", "os_platform")
ordering = ["-created_at"]
list_per_page = 100
actions = ["delete_selected"]
@admin.display(description='Public IP', ordering='networkinterface__ip_public')
@admin.display(description="Public IP", ordering="networkinterface__ip_public")
def ips(self, machine):
return format_html(
'<a href="/admin/machine/networkinterface/?q={}"><b><code>{}</code></b></a>',
machine.id, ', '.join(machine.networkinterface_set.values_list('ip_public', flat=True)),
machine.id,
", ".join(machine.networkinterface_set.values_list("ip_public", flat=True)),
)
@admin.display(description='Health', ordering='health')
@admin.display(description="Health", ordering="health")
def health_display(self, obj):
h = obj.health
color = 'green' if h >= 80 else 'orange' if h >= 50 else 'red'
color = "green" if h >= 80 else "orange" if h >= 50 else "red"
return format_html('<span style="color: {};">{}</span>', color, h)
class NetworkInterfaceAdmin(BaseModelAdmin):
list_display = ('id', 'created_at', 'machine_info', 'ip_public', 'dns_server', 'isp', 'country', 'region', 'city', 'iface', 'ip_local', 'mac_address', 'health_display')
sort_fields = ('id', 'created_at', 'machine_info', 'ip_public', 'dns_server', 'isp', 'country', 'region', 'city', 'iface', 'ip_local', 'mac_address')
search_fields = ('id', 'machine__id', 'iface', 'ip_public', 'ip_local', 'mac_address', 'dns_server', 'hostname', 'isp', 'city', 'region', 'country')
readonly_fields = ('machine', 'created_at', 'modified_at', 'mac_address', 'ip_public', 'ip_local', 'dns_server')
fieldsets = (
('Machine', {
'fields': ('machine',),
'classes': ('card',),
}),
('Network', {
'fields': ('iface', 'ip_public', 'ip_local', 'mac_address', 'dns_server'),
'classes': ('card',),
}),
('Location', {
'fields': ('hostname', 'isp', 'city', 'region', 'country'),
'classes': ('card',),
}),
('Usage', {
'fields': ('num_uses_succeeded', 'num_uses_failed'),
'classes': ('card',),
}),
('Timestamps', {
'fields': ('created_at', 'modified_at'),
'classes': ('card',),
}),
list_display = (
"id",
"created_at",
"machine_info",
"ip_public",
"dns_server",
"isp",
"country",
"region",
"city",
"iface",
"ip_local",
"mac_address",
"health_display",
)
sort_fields = (
"id",
"created_at",
"machine_info",
"ip_public",
"dns_server",
"isp",
"country",
"region",
"city",
"iface",
"ip_local",
"mac_address",
)
search_fields = (
"id",
"machine__id",
"iface",
"ip_public",
"ip_local",
"mac_address",
"dns_server",
"hostname",
"isp",
"city",
"region",
"country",
)
list_filter = ('isp', 'country', 'region')
ordering = ['-created_at']
readonly_fields = ("machine", "created_at", "modified_at", "mac_address", "ip_public", "ip_local", "dns_server")
fieldsets = (
(
"Machine",
{
"fields": ("machine",),
"classes": ("card",),
},
),
(
"Network",
{
"fields": ("iface", "ip_public", "ip_local", "mac_address", "dns_server"),
"classes": ("card",),
},
),
(
"Location",
{
"fields": ("hostname", "isp", "city", "region", "country"),
"classes": ("card",),
},
),
(
"Usage",
{
"fields": ("num_uses_succeeded", "num_uses_failed"),
"classes": ("card",),
},
),
(
"Timestamps",
{
"fields": ("created_at", "modified_at"),
"classes": ("card",),
},
),
)
list_filter = ("isp", "country", "region")
ordering = ["-created_at"]
list_per_page = 100
actions = ["delete_selected"]
@admin.display(description='Machine', ordering='machine__id')
@admin.display(description="Machine", ordering="machine__id")
def machine_info(self, iface):
return format_html(
'<a href="/admin/machine/machine/{}/change"><b><code>[{}]</code></b> &nbsp; {}</a>',
iface.machine.id, str(iface.machine.id)[:8], iface.machine.hostname,
iface.machine.id,
str(iface.machine.id)[:8],
iface.machine.hostname,
)
@admin.display(description='Health', ordering='health')
@admin.display(description="Health", ordering="health")
def health_display(self, obj):
h = obj.health
color = 'green' if h >= 80 else 'orange' if h >= 50 else 'red'
color = "green" if h >= 80 else "orange" if h >= 50 else "red"
return format_html('<span style="color: {};">{}</span>', color, h)
class BinaryAdmin(BaseModelAdmin):
list_display = ('id', 'created_at', 'machine_info', 'name', 'binprovider', 'version', 'abspath', 'sha256', 'status', 'health_display')
sort_fields = ('id', 'created_at', 'machine_info', 'name', 'binprovider', 'version', 'abspath', 'sha256', 'status')
search_fields = ('id', 'machine__id', 'name', 'binprovider', 'version', 'abspath', 'sha256')
list_display = ("id", "created_at", "machine_info", "name", "binprovider", "version", "abspath", "sha256", "status", "health_display")
sort_fields = ("id", "created_at", "machine_info", "name", "binprovider", "version", "abspath", "sha256", "status")
search_fields = ("id", "machine__id", "name", "binprovider", "version", "abspath", "sha256")
readonly_fields = ('created_at', 'modified_at', 'output_dir')
readonly_fields = ("created_at", "modified_at", "output_dir")
fieldsets = (
('Binary Info', {
'fields': ('name', 'binproviders', 'binprovider', 'overrides'),
'classes': ('card',),
}),
('Location', {
'fields': ('machine', 'abspath'),
'classes': ('card',),
}),
('Version', {
'fields': ('version', 'sha256'),
'classes': ('card',),
}),
('State', {
'fields': ('status', 'retry_at', 'output_dir'),
'classes': ('card',),
}),
('Usage', {
'fields': ('num_uses_succeeded', 'num_uses_failed'),
'classes': ('card',),
}),
('Timestamps', {
'fields': ('created_at', 'modified_at'),
'classes': ('card',),
}),
(
"Binary Info",
{
"fields": ("name", "binproviders", "binprovider", "overrides"),
"classes": ("card",),
},
),
(
"Location",
{
"fields": ("machine", "abspath"),
"classes": ("card",),
},
),
(
"Version",
{
"fields": ("version", "sha256"),
"classes": ("card",),
},
),
(
"State",
{
"fields": ("status", "retry_at", "output_dir"),
"classes": ("card",),
},
),
(
"Usage",
{
"fields": ("num_uses_succeeded", "num_uses_failed"),
"classes": ("card",),
},
),
(
"Timestamps",
{
"fields": ("created_at", "modified_at"),
"classes": ("card",),
},
),
)
list_filter = ('name', 'binprovider', 'status', 'machine_id')
ordering = ['-created_at']
list_filter = ("name", "binprovider", "status", "machine_id")
ordering = ["-created_at"]
list_per_page = 100
actions = ["delete_selected"]
@admin.display(description='Machine', ordering='machine__id')
@admin.display(description="Machine", ordering="machine__id")
def machine_info(self, binary):
return format_html(
'<a href="/admin/machine/machine/{}/change"><b><code>[{}]</code></b> &nbsp; {}</a>',
binary.machine.id, str(binary.machine.id)[:8], binary.machine.hostname,
binary.machine.id,
str(binary.machine.id)[:8],
binary.machine.hostname,
)
@admin.display(description='Health', ordering='health')
@admin.display(description="Health", ordering="health")
def health_display(self, obj):
h = obj.health
color = 'green' if h >= 80 else 'orange' if h >= 50 else 'red'
color = "green" if h >= 80 else "orange" if h >= 50 else "red"
return format_html('<span style="color: {};">{}</span>', color, h)
class ProcessAdmin(BaseModelAdmin):
list_display = ('id', 'created_at', 'machine_info', 'archiveresult_link', 'cmd_str', 'status', 'exit_code', 'pid', 'binary_info')
sort_fields = ('id', 'created_at', 'status', 'exit_code', 'pid')
search_fields = ('id', 'machine__id', 'binary__name', 'cmd', 'pwd', 'stdout', 'stderr')
list_display = (
"id",
"created_at",
"machine_info",
"archiveresult_link",
"snapshot_link",
"crawl_link",
"cmd_str",
"status",
"duration_display",
"exit_code",
"pid",
"output_summary",
"binary_info",
)
sort_fields = (
"id",
"created_at",
"machine_info",
"archiveresult_link",
"snapshot_link",
"crawl_link",
"cmd_str",
"status",
"duration_display",
"exit_code",
"pid",
"output_summary",
"binary_info",
)
search_fields = ("id", "machine__id", "binary__name", "cmd", "pwd", "stdout", "stderr")
readonly_fields = ('created_at', 'modified_at', 'machine', 'binary_link', 'iface_link', 'archiveresult_link')
fieldsets = (
('Process Info', {
'fields': ('machine', 'archiveresult_link', 'status', 'retry_at'),
'classes': ('card',),
}),
('Command', {
'fields': ('cmd', 'pwd', 'env', 'timeout'),
'classes': ('card', 'wide'),
}),
('Execution', {
'fields': ('binary_link', 'iface_link', 'pid', 'exit_code', 'url'),
'classes': ('card',),
}),
('Timing', {
'fields': ('started_at', 'ended_at'),
'classes': ('card',),
}),
('Output', {
'fields': ('stdout', 'stderr'),
'classes': ('card', 'wide', 'collapse'),
}),
('Timestamps', {
'fields': ('created_at', 'modified_at'),
'classes': ('card',),
}),
readonly_fields = (
"created_at",
"modified_at",
"machine",
"binary_link",
"iface_link",
"archiveresult_link",
"snapshot_link",
"crawl_link",
"cmd_display",
"env_display",
"timeout",
"pid",
"exit_code",
"url",
"started_at",
"ended_at",
"duration_display",
)
list_filter = ('status', 'exit_code', 'machine_id')
ordering = ['-created_at']
list_per_page = 100
actions = ["delete_selected"]
fieldsets = (
(
"Process Info",
{
"fields": ("machine", "archiveresult_link", "snapshot_link", "crawl_link", "status", "retry_at"),
"classes": ("card",),
},
),
(
"Command",
{
"fields": ("cmd_display", "pwd", "env_display", "timeout"),
"classes": ("card", "wide"),
},
),
(
"Execution",
{
"fields": ("binary_link", "iface_link", "pid", "exit_code", "url"),
"classes": ("card",),
},
),
(
"Timing",
{
"fields": ("started_at", "ended_at", "duration_display"),
"classes": ("card",),
},
),
(
"Output",
{
"fields": ("stdout", "stderr"),
"classes": ("card", "wide", "collapse"),
},
),
(
"Timestamps",
{
"fields": ("created_at", "modified_at"),
"classes": ("card",),
},
),
)
@admin.display(description='Machine', ordering='machine__id')
list_filter = ("status", "exit_code", "machine_id")
ordering = ["-created_at"]
list_per_page = 100
actions = ["kill_processes", "delete_selected"]
change_actions = ["kill_process"]
def get_queryset(self, request):
return (
super()
.get_queryset(request)
.select_related(
"machine",
"binary",
"iface",
"archiveresult__snapshot__crawl",
)
.annotate(
runtime_sort=ExpressionWrapper(
Coalesce(F("ended_at"), Now()) - F("started_at"),
output_field=DurationField(),
),
)
)
def _terminate_processes(self, request, processes):
terminated = 0
skipped = 0
for process in processes:
if process.status == Process.StatusChoices.EXITED or not process.is_running:
skipped += 1
continue
if process.terminate():
terminated += 1
else:
skipped += 1
if terminated:
self.message_user(
request,
f"Killed {terminated} running process{'es' if terminated != 1 else ''}.",
level=messages.SUCCESS,
)
if skipped:
self.message_user(
request,
f"Skipped {skipped} process{'es' if skipped != 1 else ''} that were already exited.",
level=messages.INFO,
)
return terminated, skipped
@admin.action(description="Kill selected processes")
def kill_processes(self, request, queryset):
self._terminate_processes(request, queryset)
@action(
label="Kill",
description="Kill this process if it is still running",
attrs={"class": "deletelink"},
)
def kill_process(self, request, obj):
self._terminate_processes(request, [obj])
return redirect("admin:machine_process_change", obj.pk)
@admin.display(description="Machine", ordering="machine__id")
def machine_info(self, process):
return format_html(
'<a href="/admin/machine/machine/{}/change"><b><code>[{}]</code></b> &nbsp; {}</a>',
process.machine.id, str(process.machine.id)[:8], process.machine.hostname,
process.machine.id,
str(process.machine.id)[:8],
process.machine.hostname,
)
@admin.display(description='Binary', ordering='binary__name')
@admin.display(description="Binary", ordering="binary__name")
def binary_info(self, process):
if not process.binary:
return '-'
return "-"
return format_html(
'<a href="/admin/machine/binary/{}/change"><code>{}</code> v{}</a>',
process.binary.id, process.binary.name, process.binary.version,
process.binary.id,
process.binary.name,
process.binary.version,
)
@admin.display(description='Binary', ordering='binary__name')
@admin.display(description="Binary", ordering="binary__name")
def binary_link(self, process):
return self.binary_info(process)
@admin.display(description='Network Interface', ordering='iface__id')
@admin.display(description="Network Interface", ordering="iface__id")
def iface_link(self, process):
if not process.iface:
return '-'
return "-"
return format_html(
'<a href="/admin/machine/networkinterface/{}/change"><code>{}</code> {}</a>',
process.iface.id,
@@ -231,25 +545,112 @@ class ProcessAdmin(BaseModelAdmin):
process.iface.iface or process.iface.ip_public or process.iface.ip_local,
)
@admin.display(description='ArchiveResult')
@admin.display(description="ArchiveResult", ordering="archiveresult__plugin")
def archiveresult_link(self, process):
if not hasattr(process, 'archiveresult'):
return '-'
if not hasattr(process, "archiveresult"):
return "-"
ar = process.archiveresult
return format_html(
'<a href="/admin/core/archiveresult/{}/change"><code>{}</code>{}</a>',
ar.id, ar.plugin, ar.snapshot.url[:50],
'<a href="/admin/core/archiveresult/{}/change">{}<code>{}</code></a>',
ar.id,
ar.snapshot.url[:50],
ar.plugin,
)
@admin.display(description='Command')
@admin.display(description="Snapshot", ordering="archiveresult__snapshot__id")
def snapshot_link(self, process):
ar = getattr(process, "archiveresult", None)
snapshot = getattr(ar, "snapshot", None)
if not snapshot:
return "-"
return format_html(
'<a href="/admin/core/snapshot/{}/change"><code>{}</code></a>',
snapshot.id,
str(snapshot.id)[:8],
)
@admin.display(description="Crawl", ordering="archiveresult__snapshot__crawl__id")
def crawl_link(self, process):
ar = getattr(process, "archiveresult", None)
snapshot = getattr(ar, "snapshot", None)
crawl = getattr(snapshot, "crawl", None)
if not crawl:
return "-"
return format_html(
'<a href="/admin/crawls/crawl/{}/change"><code>{}</code></a>',
crawl.id,
str(crawl.id)[:8],
)
@admin.display(description="Command", ordering="cmd")
def cmd_str(self, process):
if not process.cmd:
return '-'
cmd = ' '.join(process.cmd[:3]) if isinstance(process.cmd, list) else str(process.cmd)
return "-"
cmd = " ".join(process.cmd[:3]) if isinstance(process.cmd, list) else str(process.cmd)
if len(process.cmd) > 3:
cmd += ' ...'
cmd += " ..."
return format_html('<code style="font-size: 0.9em;">{}</code>', cmd[:80])
@admin.display(description="Duration", ordering="runtime_sort")
def duration_display(self, process):
return _format_process_duration_seconds(process.started_at, process.ended_at)
@admin.display(description="Output", ordering="archiveresult__output_size")
def output_summary(self, process):
output_files = getattr(getattr(process, "archiveresult", None), "output_files", {}) or {}
if isinstance(output_files, str):
try:
output_files = json.loads(output_files)
except Exception:
output_files = {}
file_count = 0
total_bytes = 0
if isinstance(output_files, dict):
file_count = len(output_files)
items = output_files.values()
elif isinstance(output_files, (list, tuple, set)):
file_count = len(output_files)
items = output_files
else:
items = ()
for metadata in items:
if not isinstance(metadata, dict):
continue
size = metadata.get("size", 0)
try:
total_bytes += int(size or 0)
except (TypeError, ValueError):
continue
file_label = "file" if file_count == 1 else "files"
return format_html(
'<code style="font-size: 0.9em;">{} {}{}</code>',
file_count,
file_label,
printable_filesize(total_bytes),
)
@admin.display(description="Command")
def cmd_display(self, process):
if not process.cmd:
return "-"
if isinstance(process.cmd, list):
cmd = shlex.join(str(arg) for arg in process.cmd)
else:
cmd = str(process.cmd)
return _render_copy_block(cmd)
@admin.display(description="Environment")
def env_display(self, process):
env_text = env_to_dotenv_text(process.env)
if not env_text:
return "-"
return _render_copy_block(env_text, multiline=True)
def register_admin(admin_site):
admin_site.register(Machine, MachineAdmin)

View File

@@ -1,24 +1,25 @@
__package__ = 'archivebox.machine'
__package__ = "archivebox.machine"
from django.apps import AppConfig
class MachineConfig(AppConfig):
default_auto_field = 'django.db.models.BigAutoField'
default_auto_field = "django.db.models.BigAutoField"
name = 'archivebox.machine'
label = 'machine' # Explicit label for migrations
verbose_name = 'Machine Info'
name = "archivebox.machine"
label = "machine" # Explicit label for migrations
verbose_name = "Machine Info"
def ready(self):
"""Import models to register state machines with the registry"""
import sys
# Skip during makemigrations to avoid premature state machine access
if 'makemigrations' not in sys.argv:
if "makemigrations" not in sys.argv:
from archivebox.machine import models # noqa: F401
def register_admin(admin_site):
from archivebox.machine.admin import register_admin
register_admin(admin_site)

View File

@@ -2,7 +2,7 @@ import os
import json
import socket
import urllib.request
from typing import Dict, Any
from typing import Any
from pathlib import Path
import subprocess
import platform
@@ -10,34 +10,35 @@ import tempfile
from datetime import datetime
import psutil
import machineid # https://github.com/keygen-sh/py-machineid
import machineid # https://github.com/keygen-sh/py-machineid
from rich import print
PACKAGE_DIR = Path(__file__).parent
DATA_DIR = Path(os.getcwd()).resolve()
def get_vm_info():
hw_in_docker = bool(os.getenv('IN_DOCKER', False) in ('1', 'true', 'True', 'TRUE'))
hw_in_docker = bool(os.getenv("IN_DOCKER", False) in ("1", "true", "True", "TRUE"))
hw_in_vm = False
try:
# check for traces of docker/containerd/podman in cgroup
with open('/proc/self/cgroup', 'r') as procfile:
with open("/proc/self/cgroup") as procfile:
for line in procfile:
cgroup = line.strip() # .split('/', 1)[-1].lower()
if 'docker' in cgroup or 'containerd' in cgroup or 'podman' in cgroup:
if "docker" in cgroup or "containerd" in cgroup or "podman" in cgroup:
hw_in_docker = True
except Exception:
pass
hw_manufacturer = 'Docker' if hw_in_docker else 'Unknown'
hw_product = 'Container' if hw_in_docker else 'Unknown'
hw_manufacturer = "Docker" if hw_in_docker else "Unknown"
hw_product = "Container" if hw_in_docker else "Unknown"
hw_uuid = machineid.id()
if platform.system().lower() == 'darwin':
if platform.system().lower() == "darwin":
# Get macOS machine info
hw_manufacturer = 'Apple'
hw_product = 'Mac'
hw_manufacturer = "Apple"
hw_product = "Mac"
try:
# Hardware:
# Hardware Overview:
@@ -48,14 +49,14 @@ def get_vm_info():
# Serial Number (system): M230YYTD77
# Hardware UUID: 39A12B50-1972-5910-8BEE-235AD20C8EE3
# ...
result = subprocess.run(['system_profiler', 'SPHardwareDataType'], capture_output=True, text=True, check=True)
for line in result.stdout.split('\n'):
if 'Model Name:' in line:
hw_product = line.split(':', 1)[-1].strip()
elif 'Model Identifier:' in line:
hw_product += ' ' + line.split(':', 1)[-1].strip()
elif 'Hardware UUID:' in line:
hw_uuid = line.split(':', 1)[-1].strip()
result = subprocess.run(["system_profiler", "SPHardwareDataType"], capture_output=True, text=True, check=True)
for line in result.stdout.split("\n"):
if "Model Name:" in line:
hw_product = line.split(":", 1)[-1].strip()
elif "Model Identifier:" in line:
hw_product += " " + line.split(":", 1)[-1].strip()
elif "Hardware UUID:" in line:
hw_uuid = line.split(":", 1)[-1].strip()
except Exception:
pass
else:
@@ -72,25 +73,25 @@ def get_vm_info():
# UUID: fb65f41c-ec24-4539-beaf-f941903bdb2c
# ...
# Family: DigitalOcean_Droplet
dmidecode = subprocess.run(['dmidecode', '-t', 'system'], capture_output=True, text=True, check=True)
for line in dmidecode.stdout.split('\n'):
if 'Manufacturer:' in line:
hw_manufacturer = line.split(':', 1)[-1].strip()
elif 'Product Name:' in line:
hw_product = line.split(':', 1)[-1].strip()
elif 'UUID:' in line:
hw_uuid = line.split(':', 1)[-1].strip()
dmidecode = subprocess.run(["dmidecode", "-t", "system"], capture_output=True, text=True, check=True)
for line in dmidecode.stdout.split("\n"):
if "Manufacturer:" in line:
hw_manufacturer = line.split(":", 1)[-1].strip()
elif "Product Name:" in line:
hw_product = line.split(":", 1)[-1].strip()
elif "UUID:" in line:
hw_uuid = line.split(":", 1)[-1].strip()
except Exception:
pass
# Check for VM fingerprint in manufacturer/product name
if 'qemu' in hw_product.lower() or 'vbox' in hw_product.lower() or 'lxc' in hw_product.lower() or 'vm' in hw_product.lower():
if "qemu" in hw_product.lower() or "vbox" in hw_product.lower() or "lxc" in hw_product.lower() or "vm" in hw_product.lower():
hw_in_vm = True
# Check for QEMU explicitly in pmap output
try:
result = subprocess.run(['pmap', '1'], capture_output=True, text=True, check=True)
if 'qemu' in result.stdout.lower():
result = subprocess.run(["pmap", "1"], capture_output=True, text=True, check=True)
if "qemu" in result.stdout.lower():
hw_in_vm = True
except Exception:
pass
@@ -103,17 +104,18 @@ def get_vm_info():
"hw_uuid": hw_uuid,
}
def get_public_ip() -> str:
def fetch_url(url: str) -> str:
with urllib.request.urlopen(url, timeout=5) as response:
return response.read().decode('utf-8').strip()
return response.read().decode("utf-8").strip()
def fetch_dns(pubip_lookup_host: str) -> str:
return socket.gethostbyname(pubip_lookup_host).strip()
methods = [
(lambda: fetch_url("https://ipinfo.io/ip"), lambda r: r),
(lambda: fetch_url("https://api.ipify.org?format=json"), lambda r: json.loads(r)['ip']),
(lambda: fetch_url("https://api.ipify.org?format=json"), lambda r: json.loads(r)["ip"]),
(lambda: fetch_dns("myip.opendns.com"), lambda r: r),
(lambda: fetch_url("http://whatismyip.akamai.com/"), lambda r: r), # try HTTP as final fallback in case of TLS/system time errors
]
@@ -128,68 +130,72 @@ def get_public_ip() -> str:
raise Exception("Could not determine public IP address")
def get_local_ip(remote_ip: str='1.1.1.1', remote_port: int=80) -> str:
def get_local_ip(remote_ip: str = "1.1.1.1", remote_port: int = 80) -> str:
try:
with socket.socket(socket.AF_INET, socket.SOCK_DGRAM) as s:
s.connect((remote_ip, remote_port))
return s.getsockname()[0]
except Exception:
pass
return '127.0.0.1'
return "127.0.0.1"
ip_addrs = lambda addrs: (a for a in addrs if a.family == socket.AF_INET)
mac_addrs = lambda addrs: (a for a in addrs if a.family == psutil.AF_LINK)
def get_isp_info(ip=None):
# Get public IP
try:
ip = ip or urllib.request.urlopen('https://api.ipify.org').read().decode('utf8')
ip = ip or urllib.request.urlopen("https://api.ipify.org").read().decode("utf8")
except Exception:
pass
# Get ISP name, city, and country
data = {}
try:
url = f'https://ipapi.co/{ip}/json/'
url = f"https://ipapi.co/{ip}/json/"
response = urllib.request.urlopen(url)
data = json.loads(response.read().decode())
except Exception:
pass
isp = data.get('org', 'Unknown')
city = data.get('city', 'Unknown')
region = data.get('region', 'Unknown')
country = data.get('country_name', 'Unknown')
isp = data.get("org", "Unknown")
city = data.get("city", "Unknown")
region = data.get("region", "Unknown")
country = data.get("country_name", "Unknown")
# Get system DNS resolver servers
dns_server = None
try:
result = subprocess.run(['dig', 'example.com', 'A'], capture_output=True, text=True, check=True).stdout
dns_server = result.split(';; SERVER: ', 1)[-1].split('\n')[0].split('#')[0].strip()
result = subprocess.run(["dig", "example.com", "A"], capture_output=True, text=True, check=True).stdout
dns_server = result.split(";; SERVER: ", 1)[-1].split("\n")[0].split("#")[0].strip()
except Exception:
try:
dns_server = Path('/etc/resolv.conf').read_text().split('nameserver ', 1)[-1].split('\n')[0].strip()
dns_server = Path("/etc/resolv.conf").read_text().split("nameserver ", 1)[-1].split("\n")[0].strip()
except Exception:
dns_server = '127.0.0.1'
print(f'[red]:warning: WARNING: Could not determine DNS server, using {dns_server}[/red]')
dns_server = "127.0.0.1"
print(f"[red]:warning: WARNING: Could not determine DNS server, using {dns_server}[/red]")
# Get DNS resolver's ISP name
# url = f'https://ipapi.co/{dns_server}/json/'
# dns_isp = json.loads(urllib.request.urlopen(url).read().decode()).get('org', 'Unknown')
return {
'isp': isp,
'city': city,
'region': region,
'country': country,
'dns_server': dns_server,
"isp": isp,
"city": city,
"region": region,
"country": country,
"dns_server": dns_server,
# 'net_dns_isp': dns_isp,
}
def get_host_network() -> Dict[str, Any]:
def get_host_network() -> dict[str, Any]:
default_gateway_local_ip = get_local_ip()
gateways = psutil.net_if_addrs()
for interface, ips in gateways.items():
for local_ip in ip_addrs(ips):
if default_gateway_local_ip == local_ip.address:
@@ -204,20 +210,20 @@ def get_host_network() -> Dict[str, Any]:
# "is_behind_nat": local_ip.address != public_ip,
**get_isp_info(public_ip),
}
raise Exception("Could not determine host network info")
def get_os_info() -> Dict[str, Any]:
def get_os_info() -> dict[str, Any]:
os_release = platform.release()
if platform.system().lower() == 'darwin':
os_release = 'macOS ' + platform.mac_ver()[0]
if platform.system().lower() == "darwin":
os_release = "macOS " + platform.mac_ver()[0]
else:
try:
os_release = subprocess.run(['lsb_release', '-ds'], capture_output=True, text=True, check=True).stdout.strip()
os_release = subprocess.run(["lsb_release", "-ds"], capture_output=True, text=True, check=True).stdout.strip()
except Exception:
pass
return {
"os_arch": platform.machine(),
"os_family": platform.system().lower(),
@@ -226,7 +232,8 @@ def get_os_info() -> Dict[str, Any]:
"os_release": os_release,
}
def get_host_stats() -> Dict[str, Any]:
def get_host_stats() -> dict[str, Any]:
try:
with tempfile.TemporaryDirectory() as tmp_dir:
tmp_usage = psutil.disk_usage(str(tmp_dir))
@@ -267,24 +274,23 @@ def get_host_stats() -> Dict[str, Any]:
except Exception:
return {}
def get_host_immutable_info(host_info: Dict[str, Any]) -> Dict[str, Any]:
return {
key: value
for key, value in host_info.items()
if key in ['guid', 'net_mac', 'os_family', 'cpu_arch']
}
def get_host_immutable_info(host_info: dict[str, Any]) -> dict[str, Any]:
return {key: value for key, value in host_info.items() if key in ["guid", "net_mac", "os_family", "cpu_arch"]}
def get_host_guid() -> str:
return machineid.hashed_id('archivebox')
return machineid.hashed_id("archivebox")
# Example usage
if __name__ == "__main__":
host_info = {
'guid': get_host_guid(),
'os': get_os_info(),
'vm': get_vm_info(),
'net': get_host_network(),
'stats': get_host_stats(),
"guid": get_host_guid(),
"os": get_os_info(),
"vm": get_vm_info(),
"net": get_host_network(),
"stats": get_host_stats(),
}
print(host_info)

View File

@@ -0,0 +1,51 @@
__package__ = "archivebox.machine"
import json
import shlex
from typing import Any
SENSITIVE_ENV_KEY_PARTS = ("KEY", "TOKEN", "SECRET")
def stringify_env_value(value: Any) -> str:
if value is None:
return ""
if isinstance(value, str):
return value
if isinstance(value, bool):
return "True" if value else "False"
return json.dumps(value, separators=(",", ":"))
def is_redacted_env_key(key: str) -> bool:
upper_key = str(key or "").upper()
return any(part in upper_key for part in SENSITIVE_ENV_KEY_PARTS)
def redact_env(env: dict[str, Any] | None) -> dict[str, Any]:
if not isinstance(env, dict):
return {}
return {
str(key): value
for key, value in env.items()
if key is not None and not is_redacted_env_key(str(key))
}
def env_to_dotenv_text(env: dict[str, Any] | None) -> str:
redacted_env = redact_env(env)
return "\n".join(
f"{key}={shlex.quote(stringify_env_value(value))}"
for key, value in sorted(redacted_env.items())
if value is not None
)
def env_to_shell_exports(env: dict[str, Any] | None) -> str:
redacted_env = redact_env(env)
return " ".join(
f"{key}={shlex.quote(stringify_env_value(value))}"
for key, value in sorted(redacted_env.items())
if value is not None
)

View File

@@ -8,11 +8,9 @@ from archivebox.uuid_compat import uuid7
class Migration(migrations.Migration):
initial = True
dependencies = [
]
dependencies = []
operations = [
migrations.SeparateDatabaseAndState(
@@ -105,87 +103,143 @@ class Migration(migrations.Migration):
DROP TABLE IF EXISTS machine_binary;
DROP TABLE IF EXISTS machine_networkinterface;
DROP TABLE IF EXISTS machine_machine;
"""
""",
),
],
state_operations=[
migrations.CreateModel(
name='Machine',
name="Machine",
fields=[
('id', models.UUIDField(default=uuid7, editable=False, primary_key=True, serialize=False, unique=True)),
('created_at', models.DateTimeField(db_index=True, default=django.utils.timezone.now)),
('modified_at', models.DateTimeField(auto_now=True)),
('guid', models.CharField(default=None, editable=False, max_length=64, unique=True)),
('hostname', models.CharField(default=None, max_length=63)),
('hw_in_docker', models.BooleanField(default=False)),
('hw_in_vm', models.BooleanField(default=False)),
('hw_manufacturer', models.CharField(default=None, max_length=63)),
('hw_product', models.CharField(default=None, max_length=63)),
('hw_uuid', models.CharField(default=None, max_length=255)),
('os_arch', models.CharField(default=None, max_length=15)),
('os_family', models.CharField(default=None, max_length=15)),
('os_platform', models.CharField(default=None, max_length=63)),
('os_release', models.CharField(default=None, max_length=63)),
('os_kernel', models.CharField(default=None, max_length=255)),
('stats', models.JSONField(blank=True, default=dict, null=True)),
('config', models.JSONField(blank=True, default=dict, help_text='Machine-specific config overrides (e.g., resolved binary paths like WGET_BINARY)', null=True)),
('num_uses_succeeded', models.PositiveIntegerField(default=0)),
('num_uses_failed', models.PositiveIntegerField(default=0)),
("id", models.UUIDField(default=uuid7, editable=False, primary_key=True, serialize=False, unique=True)),
("created_at", models.DateTimeField(db_index=True, default=django.utils.timezone.now)),
("modified_at", models.DateTimeField(auto_now=True)),
("guid", models.CharField(default=None, editable=False, max_length=64, unique=True)),
("hostname", models.CharField(default=None, max_length=63)),
("hw_in_docker", models.BooleanField(default=False)),
("hw_in_vm", models.BooleanField(default=False)),
("hw_manufacturer", models.CharField(default=None, max_length=63)),
("hw_product", models.CharField(default=None, max_length=63)),
("hw_uuid", models.CharField(default=None, max_length=255)),
("os_arch", models.CharField(default=None, max_length=15)),
("os_family", models.CharField(default=None, max_length=15)),
("os_platform", models.CharField(default=None, max_length=63)),
("os_release", models.CharField(default=None, max_length=63)),
("os_kernel", models.CharField(default=None, max_length=255)),
("stats", models.JSONField(blank=True, default=dict, null=True)),
(
"config",
models.JSONField(
blank=True,
default=dict,
help_text="Machine-specific config overrides (e.g., resolved binary paths like WGET_BINARY)",
null=True,
),
),
("num_uses_succeeded", models.PositiveIntegerField(default=0)),
("num_uses_failed", models.PositiveIntegerField(default=0)),
],
options={
'app_label': 'machine',
"app_label": "machine",
},
),
migrations.CreateModel(
name='NetworkInterface',
name="NetworkInterface",
fields=[
('id', models.UUIDField(default=uuid7, editable=False, primary_key=True, serialize=False, unique=True)),
('created_at', models.DateTimeField(db_index=True, default=django.utils.timezone.now)),
('modified_at', models.DateTimeField(auto_now=True)),
('mac_address', models.CharField(default=None, editable=False, max_length=17)),
('ip_public', models.GenericIPAddressField(default=None, editable=False)),
('ip_local', models.GenericIPAddressField(default=None, editable=False)),
('dns_server', models.GenericIPAddressField(default=None, editable=False)),
('hostname', models.CharField(default=None, max_length=63)),
('iface', models.CharField(default=None, max_length=15)),
('isp', models.CharField(default=None, max_length=63)),
('city', models.CharField(default=None, max_length=63)),
('region', models.CharField(default=None, max_length=63)),
('country', models.CharField(default=None, max_length=63)),
('machine', models.ForeignKey(default=None, on_delete=django.db.models.deletion.CASCADE, to='machine.machine')),
('num_uses_succeeded', models.PositiveIntegerField(default=0)),
('num_uses_failed', models.PositiveIntegerField(default=0)),
("id", models.UUIDField(default=uuid7, editable=False, primary_key=True, serialize=False, unique=True)),
("created_at", models.DateTimeField(db_index=True, default=django.utils.timezone.now)),
("modified_at", models.DateTimeField(auto_now=True)),
("mac_address", models.CharField(default=None, editable=False, max_length=17)),
("ip_public", models.GenericIPAddressField(default=None, editable=False)),
("ip_local", models.GenericIPAddressField(default=None, editable=False)),
("dns_server", models.GenericIPAddressField(default=None, editable=False)),
("hostname", models.CharField(default=None, max_length=63)),
("iface", models.CharField(default=None, max_length=15)),
("isp", models.CharField(default=None, max_length=63)),
("city", models.CharField(default=None, max_length=63)),
("region", models.CharField(default=None, max_length=63)),
("country", models.CharField(default=None, max_length=63)),
("machine", models.ForeignKey(default=None, on_delete=django.db.models.deletion.CASCADE, to="machine.machine")),
("num_uses_succeeded", models.PositiveIntegerField(default=0)),
("num_uses_failed", models.PositiveIntegerField(default=0)),
],
options={
'unique_together': {('machine', 'ip_public', 'ip_local', 'mac_address', 'dns_server')},
'app_label': 'machine',
"unique_together": {("machine", "ip_public", "ip_local", "mac_address", "dns_server")},
"app_label": "machine",
},
),
migrations.CreateModel(
name='Binary',
name="Binary",
fields=[
('id', models.UUIDField(default=uuid7, editable=False, primary_key=True, serialize=False, unique=True)),
('created_at', models.DateTimeField(db_index=True, default=django.utils.timezone.now)),
('modified_at', models.DateTimeField(auto_now=True)),
('name', models.CharField(blank=True, db_index=True, default='', max_length=63)),
('binproviders', models.CharField(blank=True, default='env', help_text='Comma-separated list of allowed providers: apt,brew,pip,npm,env', max_length=127)),
('overrides', models.JSONField(blank=True, default=dict, help_text="Provider-specific overrides: {'apt': {'install_args': ['pkg']}, ...}")),
('binprovider', models.CharField(blank=True, default='', help_text='Provider that successfully installed this binary', max_length=31)),
('abspath', models.CharField(blank=True, default='', max_length=255)),
('version', models.CharField(blank=True, default='', max_length=32)),
('sha256', models.CharField(blank=True, default='', max_length=64)),
('status', models.CharField(choices=[('queued', 'Queued'), ('started', 'Started'), ('succeeded', 'Succeeded'), ('failed', 'Failed')], db_index=True, default='queued', max_length=16)),
('retry_at', models.DateTimeField(blank=True, db_index=True, default=django.utils.timezone.now, help_text='When to retry this binary installation', null=True)),
('output_dir', models.CharField(blank=True, default='', help_text='Directory where installation hook logs are stored', max_length=255)),
('machine', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='machine.machine')),
('num_uses_succeeded', models.PositiveIntegerField(default=0)),
('num_uses_failed', models.PositiveIntegerField(default=0)),
("id", models.UUIDField(default=uuid7, editable=False, primary_key=True, serialize=False, unique=True)),
("created_at", models.DateTimeField(db_index=True, default=django.utils.timezone.now)),
("modified_at", models.DateTimeField(auto_now=True)),
("name", models.CharField(blank=True, db_index=True, default="", max_length=63)),
(
"binproviders",
models.CharField(
blank=True,
default="env",
help_text="Comma-separated list of allowed providers: apt,brew,pip,npm,env",
max_length=127,
),
),
(
"overrides",
models.JSONField(
blank=True,
default=dict,
help_text="Provider-specific overrides: {'apt': {'install_args': ['pkg']}, ...}",
),
),
(
"binprovider",
models.CharField(
blank=True,
default="",
help_text="Provider that successfully installed this binary",
max_length=31,
),
),
("abspath", models.CharField(blank=True, default="", max_length=255)),
("version", models.CharField(blank=True, default="", max_length=32)),
("sha256", models.CharField(blank=True, default="", max_length=64)),
(
"status",
models.CharField(
choices=[("queued", "Queued"), ("started", "Started"), ("succeeded", "Succeeded"), ("failed", "Failed")],
db_index=True,
default="queued",
max_length=16,
),
),
(
"retry_at",
models.DateTimeField(
blank=True,
db_index=True,
default=django.utils.timezone.now,
help_text="When to retry this binary installation",
null=True,
),
),
(
"output_dir",
models.CharField(
blank=True,
default="",
help_text="Directory where installation hook logs are stored",
max_length=255,
),
),
("machine", models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to="machine.machine")),
("num_uses_succeeded", models.PositiveIntegerField(default=0)),
("num_uses_failed", models.PositiveIntegerField(default=0)),
],
options={
'verbose_name': 'Binary',
'verbose_name_plural': 'Binaries',
'unique_together': {('machine', 'name', 'abspath', 'version', 'sha256')},
'app_label': 'machine',
"verbose_name": "Binary",
"verbose_name_plural": "Binaries",
"unique_together": {("machine", "name", "abspath", "version", "sha256")},
"app_label": "machine",
},
),
],

View File

@@ -16,17 +16,17 @@ def converge_binary_table(apps, schema_editor):
cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name IN ('machine_installedbinary', 'machine_binary')")
existing_tables = {row[0] for row in cursor.fetchall()}
print(f'DEBUG 0005: Existing tables: {existing_tables}')
print(f"DEBUG 0005: Existing tables: {existing_tables}")
# Drop old InstalledBinary table if it exists (0.8.6rc0 path)
if 'machine_installedbinary' in existing_tables:
print('✓ Dropping machine_installedbinary table (0.8.6rc0 divergence)')
if "machine_installedbinary" in existing_tables:
print("✓ Dropping machine_installedbinary table (0.8.6rc0 divergence)")
cursor.execute("DROP TABLE IF EXISTS machine_installedbinary")
# Create Binary table if it doesn't exist
# This handles the case where 0.8.6rc0's 0001_initial didn't create it
if 'machine_binary' not in existing_tables:
print('✓ Creating machine_binary table with correct schema')
if "machine_binary" not in existing_tables:
print("✓ Creating machine_binary table with correct schema")
cursor.execute("""
CREATE TABLE machine_binary (
id TEXT PRIMARY KEY NOT NULL,
@@ -53,15 +53,14 @@ def converge_binary_table(apps, schema_editor):
cursor.execute("CREATE INDEX machine_binary_name_idx ON machine_binary(name)")
cursor.execute("CREATE INDEX machine_binary_abspath_idx ON machine_binary(abspath)")
print('✓ machine_binary table created')
print("✓ machine_binary table created")
else:
print('✓ machine_binary table already exists')
print("✓ machine_binary table already exists")
class Migration(migrations.Migration):
dependencies = [
('machine', '0001_initial'),
("machine", "0001_initial"),
]
operations = [

View File

@@ -8,39 +8,95 @@ from archivebox.uuid_compat import uuid7
class Migration(migrations.Migration):
dependencies = [
('machine', '0005_converge_binary_model'),
("machine", "0005_converge_binary_model"),
]
operations = [
migrations.CreateModel(
name='Process',
name="Process",
fields=[
('id', models.UUIDField(default=uuid7, editable=False, primary_key=True, serialize=False, unique=True)),
('created_at', models.DateTimeField(db_index=True, default=django.utils.timezone.now)),
('modified_at', models.DateTimeField(auto_now=True)),
('pwd', models.CharField(blank=True, default='', help_text='Working directory for process execution', max_length=512)),
('cmd', models.JSONField(blank=True, default=list, help_text='Command as array of arguments')),
('env', models.JSONField(blank=True, default=dict, help_text='Environment variables for process')),
('timeout', models.IntegerField(default=120, help_text='Timeout in seconds')),
('pid', models.IntegerField(blank=True, default=None, help_text='OS process ID', null=True)),
('exit_code', models.IntegerField(blank=True, default=None, help_text='Process exit code (0 = success)', null=True)),
('stdout', models.TextField(blank=True, default='', help_text='Standard output from process')),
('stderr', models.TextField(blank=True, default='', help_text='Standard error from process')),
('started_at', models.DateTimeField(blank=True, default=None, help_text='When process was launched', null=True)),
('ended_at', models.DateTimeField(blank=True, default=None, help_text='When process completed/terminated', null=True)),
('url', models.URLField(blank=True, default=None, help_text='Connection URL (CDP endpoint, sonic server, etc.)', max_length=2048, null=True)),
('status', models.CharField(choices=[('queued', 'Queued'), ('running', 'Running'), ('exited', 'Exited')], db_index=True, default='queued', max_length=16)),
('retry_at', models.DateTimeField(blank=True, db_index=True, default=django.utils.timezone.now, help_text='When to retry this process', null=True)),
('binary', models.ForeignKey(blank=True, help_text='Binary used by this process', null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='process_set', to='machine.binary')),
('iface', models.ForeignKey(blank=True, help_text='Network interface used by this process', null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='process_set', to='machine.networkinterface')),
('machine', models.ForeignKey(help_text='Machine where this process executed', on_delete=django.db.models.deletion.CASCADE, related_name='process_set', to='machine.machine')),
("id", models.UUIDField(default=uuid7, editable=False, primary_key=True, serialize=False, unique=True)),
("created_at", models.DateTimeField(db_index=True, default=django.utils.timezone.now)),
("modified_at", models.DateTimeField(auto_now=True)),
("pwd", models.CharField(blank=True, default="", help_text="Working directory for process execution", max_length=512)),
("cmd", models.JSONField(blank=True, default=list, help_text="Command as array of arguments")),
("env", models.JSONField(blank=True, default=dict, help_text="Environment variables for process")),
("timeout", models.IntegerField(default=120, help_text="Timeout in seconds")),
("pid", models.IntegerField(blank=True, default=None, help_text="OS process ID", null=True)),
("exit_code", models.IntegerField(blank=True, default=None, help_text="Process exit code (0 = success)", null=True)),
("stdout", models.TextField(blank=True, default="", help_text="Standard output from process")),
("stderr", models.TextField(blank=True, default="", help_text="Standard error from process")),
("started_at", models.DateTimeField(blank=True, default=None, help_text="When process was launched", null=True)),
("ended_at", models.DateTimeField(blank=True, default=None, help_text="When process completed/terminated", null=True)),
(
"url",
models.URLField(
blank=True,
default=None,
help_text="Connection URL (CDP endpoint, sonic server, etc.)",
max_length=2048,
null=True,
),
),
(
"status",
models.CharField(
choices=[("queued", "Queued"), ("running", "Running"), ("exited", "Exited")],
db_index=True,
default="queued",
max_length=16,
),
),
(
"retry_at",
models.DateTimeField(
blank=True,
db_index=True,
default=django.utils.timezone.now,
help_text="When to retry this process",
null=True,
),
),
(
"binary",
models.ForeignKey(
blank=True,
help_text="Binary used by this process",
null=True,
on_delete=django.db.models.deletion.SET_NULL,
related_name="process_set",
to="machine.binary",
),
),
(
"iface",
models.ForeignKey(
blank=True,
help_text="Network interface used by this process",
null=True,
on_delete=django.db.models.deletion.SET_NULL,
related_name="process_set",
to="machine.networkinterface",
),
),
(
"machine",
models.ForeignKey(
help_text="Machine where this process executed",
on_delete=django.db.models.deletion.CASCADE,
related_name="process_set",
to="machine.machine",
),
),
],
options={
'verbose_name': 'Process',
'verbose_name_plural': 'Processes',
'indexes': [models.Index(fields=['machine', 'status', 'retry_at'], name='machine_pro_machine_5e3a87_idx'), models.Index(fields=['binary', 'exit_code'], name='machine_pro_binary__7bd19c_idx')],
"verbose_name": "Process",
"verbose_name_plural": "Processes",
"indexes": [
models.Index(fields=["machine", "status", "retry_at"], name="machine_pro_machine_5e3a87_idx"),
models.Index(fields=["binary", "exit_code"], name="machine_pro_binary__7bd19c_idx"),
],
},
),
]

View File

@@ -5,20 +5,38 @@ from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('machine', '0006_process'),
("machine", "0006_process"),
]
operations = [
migrations.AddField(
model_name='process',
name='parent',
field=models.ForeignKey(blank=True, help_text='Parent process that spawned this process', null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='children', to='machine.process'),
model_name="process",
name="parent",
field=models.ForeignKey(
blank=True,
help_text="Parent process that spawned this process",
null=True,
on_delete=django.db.models.deletion.SET_NULL,
related_name="children",
to="machine.process",
),
),
migrations.AddField(
model_name='process',
name='process_type',
field=models.CharField(choices=[('supervisord', 'Supervisord'), ('orchestrator', 'Orchestrator'), ('worker', 'Worker'), ('cli', 'CLI'), ('binary', 'Binary')], db_index=True, default='cli', help_text='Type of process (cli, worker, orchestrator, binary, supervisord)', max_length=16),
model_name="process",
name="process_type",
field=models.CharField(
choices=[
("supervisord", "Supervisord"),
("orchestrator", "Orchestrator"),
("worker", "Worker"),
("cli", "CLI"),
("binary", "Binary"),
],
db_index=True,
default="cli",
help_text="Type of process (cli, worker, orchestrator, binary, supervisord)",
max_length=16,
),
),
]

View File

@@ -4,15 +4,20 @@ from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('machine', '0007_add_process_type_and_parent'),
("machine", "0007_add_process_type_and_parent"),
]
operations = [
migrations.AddField(
model_name='process',
name='worker_type',
field=models.CharField(blank=True, db_index=True, default='', help_text='Worker type name for WORKER processes (crawl, snapshot, archiveresult)', max_length=32),
model_name="process",
name="worker_type",
field=models.CharField(
blank=True,
db_index=True,
default="",
help_text="Worker type name for WORKER processes (crawl, snapshot, archiveresult)",
max_length=32,
),
),
]

View File

@@ -4,15 +4,19 @@ from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('machine', '0008_add_worker_type_field'),
("machine", "0008_add_worker_type_field"),
]
operations = [
migrations.AlterField(
model_name='binary',
name='status',
field=models.CharField(choices=[('queued', 'Queued'), ('installed', 'Installed')], db_index=True, default='queued', max_length=16),
model_name="binary",
name="status",
field=models.CharField(
choices=[("queued", "Queued"), ("installed", "Installed")],
db_index=True,
default="queued",
max_length=16,
),
),
]

View File

@@ -4,15 +4,27 @@ from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('machine', '0009_alter_binary_status'),
("machine", "0009_alter_binary_status"),
]
operations = [
migrations.AlterField(
model_name='process',
name='process_type',
field=models.CharField(choices=[('supervisord', 'Supervisord'), ('orchestrator', 'Orchestrator'), ('worker', 'Worker'), ('cli', 'CLI'), ('hook', 'Hook'), ('binary', 'Binary')], db_index=True, default='cli', help_text='Type of process (cli, worker, orchestrator, binary, supervisord)', max_length=16),
model_name="process",
name="process_type",
field=models.CharField(
choices=[
("supervisord", "Supervisord"),
("orchestrator", "Orchestrator"),
("worker", "Worker"),
("cli", "CLI"),
("hook", "Hook"),
("binary", "Binary"),
],
db_index=True,
default="cli",
help_text="Type of process (cli, worker, orchestrator, binary, supervisord)",
max_length=16,
),
),
]

View File

@@ -6,17 +6,16 @@ def remove_output_dir_if_exists(apps, schema_editor):
cursor.execute("PRAGMA table_info(machine_binary)")
columns = {row[1] for row in cursor.fetchall()}
if 'output_dir' not in columns:
if "output_dir" not in columns:
return
Binary = apps.get_model('machine', 'Binary')
schema_editor.remove_field(Binary, Binary._meta.get_field('output_dir'))
Binary = apps.get_model("machine", "Binary")
schema_editor.remove_field(Binary, Binary._meta.get_field("output_dir"))
class Migration(migrations.Migration):
dependencies = [
('machine', '0010_alter_process_process_type'),
("machine", "0010_alter_process_process_type"),
]
operations = [
@@ -26,8 +25,8 @@ class Migration(migrations.Migration):
],
state_operations=[
migrations.RemoveField(
model_name='binary',
name='output_dir',
model_name="binary",
name="output_dir",
),
],
),

File diff suppressed because it is too large Load Diff