WIP: checkpoint working tree before rebasing onto dev

This commit is contained in:
Nick Sweeting
2026-03-22 20:23:45 -07:00
parent a6548df8d0
commit f400a2cd67
87 changed files with 12607 additions and 1808 deletions

View File

@@ -19,12 +19,19 @@ class Command(BaseCommand):
def handle(self, *args, **kwargs):
import os
import subprocess
import sys
import time
import psutil
from archivebox.config.common import STORAGE_CONFIG
from archivebox.machine.models import Machine, Process
from archivebox.workers.supervisord_util import (
RUNNER_WORKER,
get_existing_supervisord_process,
get_worker,
start_worker,
stop_worker,
)
pidfile = kwargs.get("pidfile") or os.environ.get("ARCHIVEBOX_RUNSERVER_PIDFILE")
if not pidfile:
@@ -32,11 +39,38 @@ class Command(BaseCommand):
interval = max(0.2, float(kwargs.get("interval", 1.0)))
last_pid = None
runner_proc: subprocess.Popen[bytes] | None = None
def stop_duplicate_watchers() -> None:
current_pid = os.getpid()
for proc in psutil.process_iter(["pid", "cmdline"]):
if proc.info["pid"] == current_pid:
continue
cmdline = proc.info.get("cmdline") or []
if not cmdline:
continue
if "runner_watch" not in " ".join(cmdline):
continue
if not any(str(arg) == f"--pidfile={pidfile}" or str(arg) == pidfile for arg in cmdline):
continue
try:
proc.terminate()
proc.wait(timeout=2.0)
except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.TimeoutExpired):
try:
proc.kill()
except (psutil.NoSuchProcess, psutil.AccessDenied):
pass
def get_supervisor():
supervisor = get_existing_supervisord_process()
if supervisor is None:
raise RuntimeError("runner_watch requires a running supervisord process")
return supervisor
stop_duplicate_watchers()
start_worker(get_supervisor(), RUNNER_WORKER, lazy=True)
def restart_runner() -> None:
nonlocal runner_proc
Process.cleanup_stale_running()
machine = Machine.current()
@@ -55,29 +89,18 @@ class Command(BaseCommand):
except Exception:
continue
if runner_proc and runner_proc.poll() is None:
try:
runner_proc.terminate()
runner_proc.wait(timeout=2.0)
except Exception:
try:
runner_proc.kill()
except Exception:
pass
supervisor = get_supervisor()
runner_proc = subprocess.Popen(
[sys.executable, '-m', 'archivebox', 'run', '--daemon'],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
start_new_session=True,
)
try:
stop_worker(supervisor, RUNNER_WORKER["name"])
except Exception:
pass
start_worker(supervisor, RUNNER_WORKER)
def runner_running() -> bool:
return Process.objects.filter(
machine=Machine.current(),
status=Process.StatusChoices.RUNNING,
process_type=Process.TypeChoices.ORCHESTRATOR,
).exists()
proc = get_worker(get_supervisor(), RUNNER_WORKER["name"])
return bool(proc and proc.get("statename") == "RUNNING")
while True:
try:

View File

@@ -6,6 +6,7 @@ import socket
import psutil
import shutil
import subprocess
import shlex
from typing import Dict, cast, Iterator
from pathlib import Path
@@ -29,24 +30,63 @@ WORKERS_DIR_NAME = "workers"
# Global reference to supervisord process for cleanup
_supervisord_proc = None
def _shell_join(args: list[str]) -> str:
return shlex.join(args)
RUNNER_WORKER = {
"name": "worker_runner",
"command": "archivebox run --daemon",
"autostart": "true",
"command": _shell_join([sys.executable, "-m", "archivebox", "run", "--daemon"]),
"autostart": "false",
"autorestart": "true",
"stdout_logfile": "logs/worker_runner.log",
"redirect_stderr": "true",
}
RUNNER_WATCH_WORKER = lambda pidfile: {
"name": "worker_runner_watch",
"command": _shell_join([sys.executable, "-m", "archivebox", "manage", "runner_watch", f"--pidfile={pidfile}"]),
"autostart": "false",
"autorestart": "true",
"stdout_logfile": "logs/worker_runner_watch.log",
"redirect_stderr": "true",
}
SERVER_WORKER = lambda host, port: {
"name": "worker_daphne",
"command": f"{sys.executable} -m daphne --bind={host} --port={port} --application-close-timeout=600 archivebox.core.asgi:application",
"command": _shell_join([sys.executable, "-m", "daphne", f"--bind={host}", f"--port={port}", "--application-close-timeout=600", "archivebox.core.asgi:application"]),
"autostart": "false",
"autorestart": "true",
"stdout_logfile": "logs/worker_daphne.log",
"redirect_stderr": "true",
}
def RUNSERVER_WORKER(host: str, port: str, *, reload: bool, pidfile: str | None = None, nothreading: bool = False):
command = [sys.executable, "-m", "archivebox", "manage", "runserver", f"{host}:{port}"]
if not reload:
command.append("--noreload")
if nothreading:
command.append("--nothreading")
environment = ['ARCHIVEBOX_RUNSERVER="1"']
if reload:
assert pidfile, "RUNSERVER_WORKER requires a pidfile when reload=True"
environment.extend([
'ARCHIVEBOX_AUTORELOAD="1"',
f'ARCHIVEBOX_RUNSERVER_PIDFILE="{pidfile}"',
])
return {
"name": "worker_runserver",
"command": _shell_join(command),
"environment": ",".join(environment),
"autostart": "false",
"autorestart": "true",
"stdout_logfile": "logs/worker_runserver.log",
"redirect_stderr": "true",
}
def is_port_in_use(host: str, port: int) -> bool:
"""Check if a port is already in use."""
try:
@@ -511,16 +551,30 @@ def watch_worker(supervisor, daemon_name, interval=5):
def start_server_workers(host='0.0.0.0', port='8000', daemonize=False):
def start_server_workers(host='0.0.0.0', port='8000', daemonize=False, debug=False, reload=False, nothreading=False):
from archivebox.config.common import STORAGE_CONFIG
supervisor = get_or_create_supervisord_process(daemonize=daemonize)
bg_workers = [RUNNER_WORKER]
if debug:
pidfile = str(STORAGE_CONFIG.TMP_DIR / 'runserver.pid') if reload else None
server_worker = RUNSERVER_WORKER(host=host, port=port, reload=reload, pidfile=pidfile, nothreading=nothreading)
bg_workers: list[tuple[dict[str, str], bool]] = (
[(RUNNER_WORKER, True), (RUNNER_WATCH_WORKER(pidfile), False)] if reload else [(RUNNER_WORKER, False)]
)
log_files = ['logs/worker_runserver.log', 'logs/worker_runner.log']
if reload:
log_files.insert(1, 'logs/worker_runner_watch.log')
else:
server_worker = SERVER_WORKER(host=host, port=port)
bg_workers = [(RUNNER_WORKER, False)]
log_files = ['logs/worker_daphne.log', 'logs/worker_runner.log']
print()
start_worker(supervisor, SERVER_WORKER(host=host, port=port))
start_worker(supervisor, server_worker)
print()
for worker in bg_workers:
start_worker(supervisor, worker)
for worker, lazy in bg_workers:
start_worker(supervisor, worker, lazy=lazy)
print()
if not daemonize:
@@ -529,7 +583,7 @@ def start_server_workers(host='0.0.0.0', port='8000', daemonize=False):
sys.stdout.write('Tailing worker logs (Ctrl+C to stop)...\n\n')
sys.stdout.flush()
tail_multiple_worker_logs(
log_files=['logs/worker_daphne.log', 'logs/worker_runner.log'],
log_files=log_files,
follow=True,
proc=_supervisord_proc, # Stop tailing when supervisord exits
)

View File

@@ -50,10 +50,11 @@ def bg_archive_snapshots(snapshots, kwargs: dict | None = None) -> int:
Snapshot.objects.filter(id=snapshot.id).update(
status=Snapshot.StatusChoices.QUEUED,
retry_at=timezone.now(),
downloaded_at=None,
)
crawl_id = getattr(snapshot, 'crawl_id', None)
if crawl_id:
Crawl.objects.filter(id=crawl_id).exclude(status=Crawl.StatusChoices.SEALED).update(
Crawl.objects.filter(id=crawl_id).update(
status=Crawl.StatusChoices.QUEUED,
retry_at=timezone.now(),
)
@@ -75,10 +76,11 @@ def bg_archive_snapshot(snapshot, overwrite: bool = False, methods: list | None
Snapshot.objects.filter(id=snapshot.id).update(
status=Snapshot.StatusChoices.QUEUED,
retry_at=timezone.now(),
downloaded_at=None,
)
crawl_id = getattr(snapshot, 'crawl_id', None)
if crawl_id:
Crawl.objects.filter(id=crawl_id).exclude(status=Crawl.StatusChoices.SEALED).update(
Crawl.objects.filter(id=crawl_id).update(
status=Crawl.StatusChoices.QUEUED,
retry_at=timezone.now(),
)