mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2026-04-06 07:47:53 +10:00
WIP: checkpoint working tree before rebasing onto dev
This commit is contained in:
@@ -19,12 +19,19 @@ class Command(BaseCommand):
|
||||
|
||||
def handle(self, *args, **kwargs):
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
|
||||
import psutil
|
||||
|
||||
from archivebox.config.common import STORAGE_CONFIG
|
||||
from archivebox.machine.models import Machine, Process
|
||||
from archivebox.workers.supervisord_util import (
|
||||
RUNNER_WORKER,
|
||||
get_existing_supervisord_process,
|
||||
get_worker,
|
||||
start_worker,
|
||||
stop_worker,
|
||||
)
|
||||
|
||||
pidfile = kwargs.get("pidfile") or os.environ.get("ARCHIVEBOX_RUNSERVER_PIDFILE")
|
||||
if not pidfile:
|
||||
@@ -32,11 +39,38 @@ class Command(BaseCommand):
|
||||
|
||||
interval = max(0.2, float(kwargs.get("interval", 1.0)))
|
||||
last_pid = None
|
||||
runner_proc: subprocess.Popen[bytes] | None = None
|
||||
|
||||
def stop_duplicate_watchers() -> None:
|
||||
current_pid = os.getpid()
|
||||
for proc in psutil.process_iter(["pid", "cmdline"]):
|
||||
if proc.info["pid"] == current_pid:
|
||||
continue
|
||||
cmdline = proc.info.get("cmdline") or []
|
||||
if not cmdline:
|
||||
continue
|
||||
if "runner_watch" not in " ".join(cmdline):
|
||||
continue
|
||||
if not any(str(arg) == f"--pidfile={pidfile}" or str(arg) == pidfile for arg in cmdline):
|
||||
continue
|
||||
try:
|
||||
proc.terminate()
|
||||
proc.wait(timeout=2.0)
|
||||
except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.TimeoutExpired):
|
||||
try:
|
||||
proc.kill()
|
||||
except (psutil.NoSuchProcess, psutil.AccessDenied):
|
||||
pass
|
||||
|
||||
def get_supervisor():
|
||||
supervisor = get_existing_supervisord_process()
|
||||
if supervisor is None:
|
||||
raise RuntimeError("runner_watch requires a running supervisord process")
|
||||
return supervisor
|
||||
|
||||
stop_duplicate_watchers()
|
||||
start_worker(get_supervisor(), RUNNER_WORKER, lazy=True)
|
||||
|
||||
def restart_runner() -> None:
|
||||
nonlocal runner_proc
|
||||
|
||||
Process.cleanup_stale_running()
|
||||
machine = Machine.current()
|
||||
|
||||
@@ -55,29 +89,18 @@ class Command(BaseCommand):
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
if runner_proc and runner_proc.poll() is None:
|
||||
try:
|
||||
runner_proc.terminate()
|
||||
runner_proc.wait(timeout=2.0)
|
||||
except Exception:
|
||||
try:
|
||||
runner_proc.kill()
|
||||
except Exception:
|
||||
pass
|
||||
supervisor = get_supervisor()
|
||||
|
||||
runner_proc = subprocess.Popen(
|
||||
[sys.executable, '-m', 'archivebox', 'run', '--daemon'],
|
||||
stdout=subprocess.DEVNULL,
|
||||
stderr=subprocess.DEVNULL,
|
||||
start_new_session=True,
|
||||
)
|
||||
try:
|
||||
stop_worker(supervisor, RUNNER_WORKER["name"])
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
start_worker(supervisor, RUNNER_WORKER)
|
||||
|
||||
def runner_running() -> bool:
|
||||
return Process.objects.filter(
|
||||
machine=Machine.current(),
|
||||
status=Process.StatusChoices.RUNNING,
|
||||
process_type=Process.TypeChoices.ORCHESTRATOR,
|
||||
).exists()
|
||||
proc = get_worker(get_supervisor(), RUNNER_WORKER["name"])
|
||||
return bool(proc and proc.get("statename") == "RUNNING")
|
||||
|
||||
while True:
|
||||
try:
|
||||
|
||||
@@ -6,6 +6,7 @@ import socket
|
||||
import psutil
|
||||
import shutil
|
||||
import subprocess
|
||||
import shlex
|
||||
|
||||
from typing import Dict, cast, Iterator
|
||||
from pathlib import Path
|
||||
@@ -29,24 +30,63 @@ WORKERS_DIR_NAME = "workers"
|
||||
# Global reference to supervisord process for cleanup
|
||||
_supervisord_proc = None
|
||||
|
||||
|
||||
def _shell_join(args: list[str]) -> str:
|
||||
return shlex.join(args)
|
||||
|
||||
RUNNER_WORKER = {
|
||||
"name": "worker_runner",
|
||||
"command": "archivebox run --daemon",
|
||||
"autostart": "true",
|
||||
"command": _shell_join([sys.executable, "-m", "archivebox", "run", "--daemon"]),
|
||||
"autostart": "false",
|
||||
"autorestart": "true",
|
||||
"stdout_logfile": "logs/worker_runner.log",
|
||||
"redirect_stderr": "true",
|
||||
}
|
||||
|
||||
RUNNER_WATCH_WORKER = lambda pidfile: {
|
||||
"name": "worker_runner_watch",
|
||||
"command": _shell_join([sys.executable, "-m", "archivebox", "manage", "runner_watch", f"--pidfile={pidfile}"]),
|
||||
"autostart": "false",
|
||||
"autorestart": "true",
|
||||
"stdout_logfile": "logs/worker_runner_watch.log",
|
||||
"redirect_stderr": "true",
|
||||
}
|
||||
|
||||
SERVER_WORKER = lambda host, port: {
|
||||
"name": "worker_daphne",
|
||||
"command": f"{sys.executable} -m daphne --bind={host} --port={port} --application-close-timeout=600 archivebox.core.asgi:application",
|
||||
"command": _shell_join([sys.executable, "-m", "daphne", f"--bind={host}", f"--port={port}", "--application-close-timeout=600", "archivebox.core.asgi:application"]),
|
||||
"autostart": "false",
|
||||
"autorestart": "true",
|
||||
"stdout_logfile": "logs/worker_daphne.log",
|
||||
"redirect_stderr": "true",
|
||||
}
|
||||
|
||||
|
||||
def RUNSERVER_WORKER(host: str, port: str, *, reload: bool, pidfile: str | None = None, nothreading: bool = False):
|
||||
command = [sys.executable, "-m", "archivebox", "manage", "runserver", f"{host}:{port}"]
|
||||
if not reload:
|
||||
command.append("--noreload")
|
||||
if nothreading:
|
||||
command.append("--nothreading")
|
||||
|
||||
environment = ['ARCHIVEBOX_RUNSERVER="1"']
|
||||
if reload:
|
||||
assert pidfile, "RUNSERVER_WORKER requires a pidfile when reload=True"
|
||||
environment.extend([
|
||||
'ARCHIVEBOX_AUTORELOAD="1"',
|
||||
f'ARCHIVEBOX_RUNSERVER_PIDFILE="{pidfile}"',
|
||||
])
|
||||
|
||||
return {
|
||||
"name": "worker_runserver",
|
||||
"command": _shell_join(command),
|
||||
"environment": ",".join(environment),
|
||||
"autostart": "false",
|
||||
"autorestart": "true",
|
||||
"stdout_logfile": "logs/worker_runserver.log",
|
||||
"redirect_stderr": "true",
|
||||
}
|
||||
|
||||
def is_port_in_use(host: str, port: int) -> bool:
|
||||
"""Check if a port is already in use."""
|
||||
try:
|
||||
@@ -511,16 +551,30 @@ def watch_worker(supervisor, daemon_name, interval=5):
|
||||
|
||||
|
||||
|
||||
def start_server_workers(host='0.0.0.0', port='8000', daemonize=False):
|
||||
def start_server_workers(host='0.0.0.0', port='8000', daemonize=False, debug=False, reload=False, nothreading=False):
|
||||
from archivebox.config.common import STORAGE_CONFIG
|
||||
|
||||
supervisor = get_or_create_supervisord_process(daemonize=daemonize)
|
||||
|
||||
bg_workers = [RUNNER_WORKER]
|
||||
if debug:
|
||||
pidfile = str(STORAGE_CONFIG.TMP_DIR / 'runserver.pid') if reload else None
|
||||
server_worker = RUNSERVER_WORKER(host=host, port=port, reload=reload, pidfile=pidfile, nothreading=nothreading)
|
||||
bg_workers: list[tuple[dict[str, str], bool]] = (
|
||||
[(RUNNER_WORKER, True), (RUNNER_WATCH_WORKER(pidfile), False)] if reload else [(RUNNER_WORKER, False)]
|
||||
)
|
||||
log_files = ['logs/worker_runserver.log', 'logs/worker_runner.log']
|
||||
if reload:
|
||||
log_files.insert(1, 'logs/worker_runner_watch.log')
|
||||
else:
|
||||
server_worker = SERVER_WORKER(host=host, port=port)
|
||||
bg_workers = [(RUNNER_WORKER, False)]
|
||||
log_files = ['logs/worker_daphne.log', 'logs/worker_runner.log']
|
||||
|
||||
print()
|
||||
start_worker(supervisor, SERVER_WORKER(host=host, port=port))
|
||||
start_worker(supervisor, server_worker)
|
||||
print()
|
||||
for worker in bg_workers:
|
||||
start_worker(supervisor, worker)
|
||||
for worker, lazy in bg_workers:
|
||||
start_worker(supervisor, worker, lazy=lazy)
|
||||
print()
|
||||
|
||||
if not daemonize:
|
||||
@@ -529,7 +583,7 @@ def start_server_workers(host='0.0.0.0', port='8000', daemonize=False):
|
||||
sys.stdout.write('Tailing worker logs (Ctrl+C to stop)...\n\n')
|
||||
sys.stdout.flush()
|
||||
tail_multiple_worker_logs(
|
||||
log_files=['logs/worker_daphne.log', 'logs/worker_runner.log'],
|
||||
log_files=log_files,
|
||||
follow=True,
|
||||
proc=_supervisord_proc, # Stop tailing when supervisord exits
|
||||
)
|
||||
|
||||
@@ -50,10 +50,11 @@ def bg_archive_snapshots(snapshots, kwargs: dict | None = None) -> int:
|
||||
Snapshot.objects.filter(id=snapshot.id).update(
|
||||
status=Snapshot.StatusChoices.QUEUED,
|
||||
retry_at=timezone.now(),
|
||||
downloaded_at=None,
|
||||
)
|
||||
crawl_id = getattr(snapshot, 'crawl_id', None)
|
||||
if crawl_id:
|
||||
Crawl.objects.filter(id=crawl_id).exclude(status=Crawl.StatusChoices.SEALED).update(
|
||||
Crawl.objects.filter(id=crawl_id).update(
|
||||
status=Crawl.StatusChoices.QUEUED,
|
||||
retry_at=timezone.now(),
|
||||
)
|
||||
@@ -75,10 +76,11 @@ def bg_archive_snapshot(snapshot, overwrite: bool = False, methods: list | None
|
||||
Snapshot.objects.filter(id=snapshot.id).update(
|
||||
status=Snapshot.StatusChoices.QUEUED,
|
||||
retry_at=timezone.now(),
|
||||
downloaded_at=None,
|
||||
)
|
||||
crawl_id = getattr(snapshot, 'crawl_id', None)
|
||||
if crawl_id:
|
||||
Crawl.objects.filter(id=crawl_id).exclude(status=Crawl.StatusChoices.SEALED).update(
|
||||
Crawl.objects.filter(id=crawl_id).update(
|
||||
status=Crawl.StatusChoices.QUEUED,
|
||||
retry_at=timezone.now(),
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user