mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2026-04-04 23:07:56 +10:00
logging and admin ui improvements
This commit is contained in:
@@ -115,12 +115,10 @@ def add(urls: str | list[str],
|
||||
# - Repeat until max_depth reached
|
||||
|
||||
if bg:
|
||||
# Background mode: start orchestrator and return immediately
|
||||
print('[yellow]\\[*] Running in background mode - starting orchestrator...[/yellow]')
|
||||
orchestrator = Orchestrator(exit_on_idle=True)
|
||||
orchestrator.start() # Fork to background
|
||||
# Background mode: just queue work and return (orchestrator via server will pick it up)
|
||||
print('[yellow]\\[*] URLs queued. Orchestrator will process them (run `archivebox server` if not already running).[/yellow]')
|
||||
else:
|
||||
# Foreground mode: run orchestrator until all work is done
|
||||
# Foreground mode: run orchestrator inline until all work is done
|
||||
print(f'[green]\\[*] Starting orchestrator to process crawl...[/green]')
|
||||
orchestrator = Orchestrator(exit_on_idle=True)
|
||||
orchestrator.runloop() # Block until complete
|
||||
|
||||
@@ -117,11 +117,11 @@ def run_plugins(
|
||||
if snapshot_id:
|
||||
snapshot_ids.add(snapshot_id)
|
||||
elif record.get('url'):
|
||||
# Look up by URL
|
||||
try:
|
||||
snap = Snapshot.objects.get(url=record['url'])
|
||||
# Look up by URL (get most recent if multiple exist)
|
||||
snap = Snapshot.objects.filter(url=record['url']).order_by('-created_at').first()
|
||||
if snap:
|
||||
snapshot_ids.add(str(snap.id))
|
||||
except Snapshot.DoesNotExist:
|
||||
else:
|
||||
rprint(f'[yellow]Snapshot not found for URL: {record["url"]}[/yellow]', file=sys.stderr)
|
||||
|
||||
elif record_type == TYPE_ARCHIVERESULT:
|
||||
|
||||
@@ -49,20 +49,45 @@ def install(dry_run: bool=False) -> None:
|
||||
# Using a minimal crawl that will trigger on_Crawl hooks
|
||||
created_by_id = get_or_create_system_user_pk()
|
||||
|
||||
seed = Seed.objects.create(
|
||||
seed, _created = Seed.objects.get_or_create(
|
||||
uri='archivebox://install',
|
||||
label='Dependency detection',
|
||||
created_by_id=created_by_id,
|
||||
defaults={
|
||||
'extractor': 'auto',
|
||||
}
|
||||
)
|
||||
|
||||
crawl = Crawl.objects.create(
|
||||
crawl, created = Crawl.objects.get_or_create(
|
||||
seed=seed,
|
||||
max_depth=0,
|
||||
created_by_id=created_by_id,
|
||||
status='queued',
|
||||
defaults={
|
||||
'status': 'queued',
|
||||
}
|
||||
)
|
||||
|
||||
# If crawl already existed, reset it to queued state so it can be processed again
|
||||
if not created:
|
||||
crawl.status = 'queued'
|
||||
crawl.retry_at = timezone.now()
|
||||
crawl.save()
|
||||
|
||||
print(f'[+] Created dependency detection crawl: {crawl.id}')
|
||||
print(f'[+] Crawl status: {crawl.status}, retry_at: {crawl.retry_at}')
|
||||
|
||||
# Verify the crawl is in the queue
|
||||
from crawls.models import Crawl as CrawlModel
|
||||
queued_crawls = CrawlModel.objects.filter(
|
||||
retry_at__lte=timezone.now()
|
||||
).exclude(
|
||||
status__in=CrawlModel.FINAL_STATES
|
||||
)
|
||||
print(f'[+] Crawls in queue: {queued_crawls.count()}')
|
||||
if queued_crawls.exists():
|
||||
for c in queued_crawls:
|
||||
print(f' - Crawl {c.id}: status={c.status}, retry_at={c.retry_at}')
|
||||
|
||||
print('[+] Running crawl to detect binaries via on_Crawl hooks...')
|
||||
print()
|
||||
|
||||
|
||||
@@ -56,20 +56,53 @@ def server(runserver_args: Iterable[str]=(SERVER_CONFIG.BIND_ADDR,),
|
||||
except IndexError:
|
||||
pass
|
||||
|
||||
print('[green][+] Starting ArchiveBox webserver...[/green]')
|
||||
print(f' [blink][green]>[/green][/blink] Starting ArchiveBox webserver on [deep_sky_blue4][link=http://{host}:{port}]http://{host}:{port}[/link][/deep_sky_blue4]')
|
||||
print(f' [green]>[/green] Log in to ArchiveBox Admin UI on [deep_sky_blue3][link=http://{host}:{port}/admin]http://{host}:{port}/admin[/link][/deep_sky_blue3]')
|
||||
print(' > Writing ArchiveBox error log to ./logs/errors.log')
|
||||
|
||||
if SHELL_CONFIG.DEBUG:
|
||||
print('[green][+] Starting ArchiveBox webserver in DEBUG mode...[/green]')
|
||||
print(f' [blink][green]>[/green][/blink] Starting ArchiveBox webserver on [deep_sky_blue4][link=http://{host}:{port}]http://{host}:{port}[/link][/deep_sky_blue4]')
|
||||
print(f' [green]>[/green] Log in to ArchiveBox Admin UI on [deep_sky_blue3][link=http://{host}:{port}/admin]http://{host}:{port}/admin[/link][/deep_sky_blue3]')
|
||||
print(' > Writing ArchiveBox error log to ./logs/errors.log')
|
||||
if not reload:
|
||||
runserver_args.append('--noreload') # '--insecure'
|
||||
if nothreading:
|
||||
runserver_args.append('--nothreading')
|
||||
call_command("runserver", *runserver_args)
|
||||
else:
|
||||
from workers.supervisord_util import start_server_workers
|
||||
from workers.supervisord_util import (
|
||||
get_existing_supervisord_process,
|
||||
get_worker,
|
||||
start_server_workers,
|
||||
tail_multiple_worker_logs,
|
||||
)
|
||||
|
||||
# Check if supervisord is already running
|
||||
supervisor = get_existing_supervisord_process()
|
||||
if supervisor:
|
||||
daphne_proc = get_worker(supervisor, 'worker_daphne')
|
||||
|
||||
# If daphne is already running, just tail logs
|
||||
if daphne_proc and daphne_proc.get('statename') == 'RUNNING':
|
||||
orchestrator_proc = get_worker(supervisor, 'worker_orchestrator')
|
||||
print('[yellow][!] ArchiveBox server is already running[/yellow]')
|
||||
print(f' [green]√[/green] Web server (worker_daphne) is RUNNING on [deep_sky_blue4][link=http://{host}:{port}]http://{host}:{port}[/link][/deep_sky_blue4]')
|
||||
if orchestrator_proc and orchestrator_proc.get('statename') == 'RUNNING':
|
||||
print(f' [green]√[/green] Background worker (worker_orchestrator) is RUNNING')
|
||||
print()
|
||||
print('[blue][i] Tailing worker logs (Ctrl+C to stop watching)...[/i][/blue]')
|
||||
print()
|
||||
|
||||
# Tail logs for both workers
|
||||
tail_multiple_worker_logs(
|
||||
log_files=['logs/worker_daphne.log', 'logs/worker_orchestrator.log'],
|
||||
follow=True,
|
||||
)
|
||||
return
|
||||
# Otherwise, daphne is not running - fall through to start it
|
||||
|
||||
# No existing workers found - start new ones
|
||||
print('[green][+] Starting ArchiveBox webserver...[/green]')
|
||||
print(f' [blink][green]>[/green][/blink] Starting ArchiveBox webserver on [deep_sky_blue4][link=http://{host}:{port}]http://{host}:{port}[/link][/deep_sky_blue4]')
|
||||
print(f' [green]>[/green] Log in to ArchiveBox Admin UI on [deep_sky_blue3][link=http://{host}:{port}/admin]http://{host}:{port}/admin[/link][/deep_sky_blue3]')
|
||||
print(' > Writing ArchiveBox error log to ./logs/errors.log')
|
||||
print()
|
||||
start_server_workers(host=host, port=port, daemonize=daemonize)
|
||||
print("\n[i][green][🟩] ArchiveBox server shut down gracefully.[/green][/i]")
|
||||
|
||||
@@ -119,12 +119,13 @@ def version(quiet: bool=False,
|
||||
else:
|
||||
for key in sorted(set(binary_config_keys)):
|
||||
# Get the actual binary name/path from config value
|
||||
bin_value = config.get(key, '').strip()
|
||||
# Prioritize Machine.config overrides over base config
|
||||
bin_value = machine.config.get(key) or config.get(key, '').strip()
|
||||
if not bin_value:
|
||||
continue
|
||||
|
||||
# Check if it's a path (has slashes) or just a name
|
||||
is_path = '/' in bin_value
|
||||
is_path = '/' in str(bin_value)
|
||||
|
||||
if is_path:
|
||||
# It's a full path - match against abspath
|
||||
|
||||
Reference in New Issue
Block a user