mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2026-01-02 17:05:38 +10:00
120 lines
3.8 KiB
Python
Executable File
120 lines
3.8 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
|
|
__package__ = 'archivebox.cli'
|
|
|
|
import os
|
|
import sys
|
|
import shutil
|
|
|
|
import rich_click as click
|
|
from rich import print
|
|
|
|
from archivebox.misc.util import docstring, enforce_types
|
|
|
|
|
|
@enforce_types
|
|
def install(dry_run: bool=False) -> None:
|
|
"""Detect and install ArchiveBox dependencies by running a dependency-check crawl"""
|
|
|
|
from archivebox.config.permissions import IS_ROOT, ARCHIVEBOX_USER, ARCHIVEBOX_GROUP
|
|
from archivebox.config.paths import ARCHIVE_DIR
|
|
from archivebox.misc.logging import stderr
|
|
from archivebox.cli.archivebox_init import init
|
|
|
|
if not (os.access(ARCHIVE_DIR, os.R_OK) and ARCHIVE_DIR.is_dir()):
|
|
init() # must init full index because we need a db to store Binary entries in
|
|
|
|
print('\n[green][+] Detecting ArchiveBox dependencies...[/green]')
|
|
|
|
if IS_ROOT:
|
|
EUID = os.geteuid()
|
|
print()
|
|
print(f'[yellow]:warning: Running as UID=[blue]{EUID}[/blue].[/yellow]')
|
|
print(f' DATA_DIR will be owned by [blue]{ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP}[/blue].')
|
|
print()
|
|
|
|
if dry_run:
|
|
print('[dim]Dry run - would create a crawl to detect dependencies[/dim]')
|
|
return
|
|
|
|
# Set up Django
|
|
from archivebox.config.django import setup_django
|
|
setup_django()
|
|
|
|
from django.utils import timezone
|
|
from archivebox.crawls.models import Crawl
|
|
from archivebox.base_models.models import get_or_create_system_user_pk
|
|
|
|
# Create a crawl for dependency detection
|
|
# Using a minimal crawl that will trigger on_Crawl hooks
|
|
created_by_id = get_or_create_system_user_pk()
|
|
|
|
crawl, created = Crawl.objects.get_or_create(
|
|
urls='archivebox://install',
|
|
defaults={
|
|
'label': 'Dependency detection',
|
|
'created_by_id': created_by_id,
|
|
'max_depth': 0,
|
|
'status': 'queued',
|
|
}
|
|
)
|
|
|
|
# If crawl already existed, reset it to queued state so it can be processed again
|
|
if not created:
|
|
crawl.status = 'queued'
|
|
crawl.retry_at = timezone.now()
|
|
crawl.save()
|
|
|
|
print(f'[+] Created dependency detection crawl: {crawl.id}')
|
|
print(f'[+] Crawl status: {crawl.status}, retry_at: {crawl.retry_at}')
|
|
|
|
# Verify the crawl is in the queue
|
|
from archivebox.crawls.models import Crawl as CrawlModel
|
|
queued_crawls = CrawlModel.objects.filter(
|
|
retry_at__lte=timezone.now()
|
|
).exclude(
|
|
status__in=CrawlModel.FINAL_STATES
|
|
)
|
|
print(f'[+] Crawls in queue: {queued_crawls.count()}')
|
|
if queued_crawls.exists():
|
|
for c in queued_crawls:
|
|
print(f' - Crawl {c.id}: status={c.status}, retry_at={c.retry_at}')
|
|
|
|
print('[+] Running crawl to detect binaries via on_Crawl hooks...')
|
|
print()
|
|
|
|
# Run the crawl synchronously (this triggers on_Crawl hooks)
|
|
from archivebox.workers.orchestrator import Orchestrator
|
|
orchestrator = Orchestrator(exit_on_idle=True)
|
|
orchestrator.runloop()
|
|
|
|
print()
|
|
|
|
# Check for superuser
|
|
from django.contrib.auth import get_user_model
|
|
User = get_user_model()
|
|
|
|
if not User.objects.filter(is_superuser=True).exclude(username='system').exists():
|
|
stderr('\n[+] Don\'t forget to create a new admin user for the Web UI...', color='green')
|
|
stderr(' archivebox manage createsuperuser')
|
|
|
|
print()
|
|
|
|
# Run version to show full status
|
|
archivebox_path = shutil.which('archivebox') or sys.executable
|
|
if 'python' in archivebox_path:
|
|
os.system(f'{sys.executable} -m archivebox version')
|
|
else:
|
|
os.system(f'{archivebox_path} version')
|
|
|
|
|
|
@click.command()
|
|
@click.option('--dry-run', '-d', is_flag=True, help='Show what would happen without actually running', default=False)
|
|
@docstring(install.__doc__)
|
|
def main(**kwargs) -> None:
|
|
install(**kwargs)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|