mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2026-04-06 07:47:53 +10:00
move tests into subfolder, add missing install hooks
This commit is contained in:
@@ -41,9 +41,11 @@ class ArchiveBoxGroup(click.Group):
|
||||
archive_commands = {
|
||||
# High-level commands
|
||||
'add': 'archivebox.cli.archivebox_add.main',
|
||||
'remove': 'archivebox.cli.archivebox_remove.main',
|
||||
'run': 'archivebox.cli.archivebox_run.main',
|
||||
'update': 'archivebox.cli.archivebox_update.main',
|
||||
'status': 'archivebox.cli.archivebox_status.main',
|
||||
'search': 'archivebox.cli.archivebox_search.main',
|
||||
'config': 'archivebox.cli.archivebox_config.main',
|
||||
'schedule': 'archivebox.cli.archivebox_schedule.main',
|
||||
'server': 'archivebox.cli.archivebox_server.main',
|
||||
|
||||
@@ -13,8 +13,15 @@ from archivebox.misc.util import docstring, enforce_types
|
||||
|
||||
|
||||
@enforce_types
|
||||
def install(dry_run: bool=False) -> None:
|
||||
"""Detect and install ArchiveBox dependencies by running a dependency-check crawl"""
|
||||
def install(binaries: tuple[str, ...] = (), binproviders: str = '*', dry_run: bool = False) -> None:
|
||||
"""Detect and install ArchiveBox dependencies by running a dependency-check crawl
|
||||
|
||||
Examples:
|
||||
archivebox install # Install all dependencies
|
||||
archivebox install wget curl # Install only wget and curl
|
||||
archivebox install --binproviders=pip yt-dlp # Install yt-dlp using only pip
|
||||
archivebox install --binproviders=brew,apt # Install all deps using only brew or apt
|
||||
"""
|
||||
|
||||
from archivebox.config.permissions import IS_ROOT, ARCHIVEBOX_USER, ARCHIVEBOX_GROUP
|
||||
from archivebox.config.paths import ARCHIVE_DIR
|
||||
@@ -24,7 +31,14 @@ def install(dry_run: bool=False) -> None:
|
||||
if not (os.access(ARCHIVE_DIR, os.R_OK) and ARCHIVE_DIR.is_dir()):
|
||||
init() # must init full index because we need a db to store Binary entries in
|
||||
|
||||
print('\n[green][+] Detecting ArchiveBox dependencies...[/green]')
|
||||
# Show what we're installing
|
||||
if binaries:
|
||||
print(f'\n[green][+] Installing specific binaries: {", ".join(binaries)}[/green]')
|
||||
else:
|
||||
print('\n[green][+] Detecting and installing all ArchiveBox dependencies...[/green]')
|
||||
|
||||
if binproviders != '*':
|
||||
print(f'[green][+] Using providers: {binproviders}[/green]')
|
||||
|
||||
if IS_ROOT:
|
||||
EUID = os.geteuid()
|
||||
@@ -49,6 +63,19 @@ def install(dry_run: bool=False) -> None:
|
||||
# Using a minimal crawl that will trigger on_Crawl hooks
|
||||
created_by_id = get_or_create_system_user_pk()
|
||||
|
||||
# Build config for this crawl using existing PLUGINS filter
|
||||
crawl_config = {}
|
||||
|
||||
# Combine binary names and provider names into PLUGINS list
|
||||
plugins = []
|
||||
if binaries:
|
||||
plugins.extend(binaries)
|
||||
if binproviders != '*':
|
||||
plugins.extend(binproviders.split(','))
|
||||
|
||||
if plugins:
|
||||
crawl_config['PLUGINS'] = ','.join(plugins)
|
||||
|
||||
crawl, created = Crawl.objects.get_or_create(
|
||||
urls='archivebox://install',
|
||||
defaults={
|
||||
@@ -56,6 +83,7 @@ def install(dry_run: bool=False) -> None:
|
||||
'created_by_id': created_by_id,
|
||||
'max_depth': 0,
|
||||
'status': 'queued',
|
||||
'config': crawl_config,
|
||||
}
|
||||
)
|
||||
|
||||
@@ -63,9 +91,12 @@ def install(dry_run: bool=False) -> None:
|
||||
if not created:
|
||||
crawl.status = 'queued'
|
||||
crawl.retry_at = timezone.now()
|
||||
crawl.config = crawl_config # Update config
|
||||
crawl.save()
|
||||
|
||||
print(f'[+] Created dependency detection crawl: {crawl.id}')
|
||||
if crawl_config:
|
||||
print(f'[+] Crawl config: {crawl_config}')
|
||||
print(f'[+] Crawl status: {crawl.status}, retry_at: {crawl.retry_at}')
|
||||
|
||||
# Verify the crawl is in the queue
|
||||
@@ -100,15 +131,15 @@ def install(dry_run: bool=False) -> None:
|
||||
|
||||
print()
|
||||
|
||||
# Run version to show full status
|
||||
archivebox_path = shutil.which('archivebox') or sys.executable
|
||||
if 'python' in archivebox_path:
|
||||
os.system(f'{sys.executable} -m archivebox version')
|
||||
else:
|
||||
os.system(f'{archivebox_path} version')
|
||||
# Show version to display full status including installed binaries
|
||||
# Django is already loaded, so just import and call the function directly
|
||||
from archivebox.cli.archivebox_version import version as show_version
|
||||
show_version(quiet=False)
|
||||
|
||||
|
||||
@click.command()
|
||||
@click.argument('binaries', nargs=-1, type=str, required=False)
|
||||
@click.option('--binproviders', '-p', default='*', help='Comma-separated list of providers to use (pip,npm,brew,apt,env,custom) or * for all', show_default=True)
|
||||
@click.option('--dry-run', '-d', is_flag=True, help='Show what would happen without actually running', default=False)
|
||||
@docstring(install.__doc__)
|
||||
def main(**kwargs) -> None:
|
||||
|
||||
@@ -50,6 +50,9 @@ def get_snapshots(snapshots: Optional[QuerySet]=None,
|
||||
if filter_patterns:
|
||||
result = Snapshot.objects.filter_by_patterns(filter_patterns, filter_type)
|
||||
|
||||
# Prefetch crawl relationship to avoid N+1 queries when accessing output_dir
|
||||
result = result.select_related('crawl', 'crawl__created_by')
|
||||
|
||||
if not result:
|
||||
stderr('[!] No Snapshots matched your filters:', filter_patterns, f'({filter_type})', color='lightyellow')
|
||||
|
||||
|
||||
@@ -145,16 +145,29 @@ def drain_old_archive_dirs(resume_from: str = None, batch_size: int = 100) -> di
|
||||
# Check if needs migration (0.8.x → 0.9.x)
|
||||
if snapshot.fs_migration_needed:
|
||||
try:
|
||||
snapshot.save() # Triggers migration + creates symlink
|
||||
# Manually trigger filesystem migration without full save()
|
||||
# This avoids UNIQUE constraint issues while still migrating files
|
||||
cleanup_info = None
|
||||
if hasattr(snapshot, '_fs_migrate_from_0_8_0_to_0_9_0'):
|
||||
cleanup_info = snapshot._fs_migrate_from_0_8_0_to_0_9_0()
|
||||
|
||||
# Update only fs_version field using queryset update (bypasses validation)
|
||||
from archivebox.core.models import Snapshot as SnapshotModel
|
||||
SnapshotModel.objects.filter(pk=snapshot.pk).update(fs_version='0.9.0')
|
||||
|
||||
# Commit the transaction
|
||||
transaction.commit()
|
||||
|
||||
# Manually call cleanup since we bypassed normal save() flow
|
||||
if cleanup_info:
|
||||
old_dir, new_dir = cleanup_info
|
||||
snapshot._cleanup_old_migration_dir(old_dir, new_dir)
|
||||
|
||||
stats['migrated'] += 1
|
||||
print(f" [{stats['processed']}] Migrated: {entry_path.name}")
|
||||
except Exception as e:
|
||||
# Snapshot already exists in DB with different crawl - skip it
|
||||
if 'UNIQUE constraint failed' in str(e):
|
||||
stats['skipped'] += 1
|
||||
print(f" [{stats['processed']}] Skipped (already in DB): {entry_path.name}")
|
||||
else:
|
||||
raise
|
||||
stats['skipped'] += 1
|
||||
print(f" [{stats['processed']}] Skipped (error: {e}): {entry_path.name}")
|
||||
else:
|
||||
stats['skipped'] += 1
|
||||
|
||||
|
||||
@@ -104,40 +104,47 @@ def version(quiet: bool=False,
|
||||
failures = []
|
||||
|
||||
# Setup Django before importing models
|
||||
from archivebox.config.django import setup_django
|
||||
setup_django()
|
||||
try:
|
||||
from archivebox.config.django import setup_django
|
||||
setup_django()
|
||||
|
||||
from archivebox.machine.models import Machine, Binary
|
||||
from archivebox.machine.models import Machine, Binary
|
||||
|
||||
machine = Machine.current()
|
||||
machine = Machine.current()
|
||||
|
||||
# Get all binaries from the database
|
||||
all_installed = Binary.objects.filter(
|
||||
machine=machine
|
||||
).exclude(abspath='').exclude(abspath__isnull=True).order_by('name')
|
||||
# Get all binaries from the database with timeout protection
|
||||
all_installed = Binary.objects.filter(
|
||||
machine=machine
|
||||
).exclude(abspath='').exclude(abspath__isnull=True).order_by('name')
|
||||
|
||||
if not all_installed.exists():
|
||||
prnt('', '[grey53]No binaries detected. Run [green]archivebox install[/green] to detect dependencies.[/grey53]')
|
||||
else:
|
||||
for installed in all_installed:
|
||||
# Skip if user specified specific binaries and this isn't one
|
||||
if binaries and installed.name not in binaries:
|
||||
continue
|
||||
if not all_installed.exists():
|
||||
prnt('', '[grey53]No binaries detected. Run [green]archivebox install[/green] to detect dependencies.[/grey53]')
|
||||
else:
|
||||
for installed in all_installed:
|
||||
# Skip if user specified specific binaries and this isn't one
|
||||
if binaries and installed.name not in binaries:
|
||||
continue
|
||||
|
||||
if installed.is_valid:
|
||||
display_path = installed.abspath.replace(str(DATA_DIR), '.').replace(str(Path('~').expanduser()), '~')
|
||||
version_str = (installed.version or 'unknown')[:15]
|
||||
provider = (installed.binprovider or 'env')[:8]
|
||||
prnt('', '[green]√[/green]', '', installed.name.ljust(18), version_str.ljust(16), provider.ljust(8), display_path, overflow='ignore', crop=False)
|
||||
else:
|
||||
prnt('', '[red]X[/red]', '', installed.name.ljust(18), '[grey53]not installed[/grey53]', overflow='ignore', crop=False)
|
||||
failures.append(installed.name)
|
||||
if installed.is_valid:
|
||||
display_path = installed.abspath.replace(str(DATA_DIR), '.').replace(str(Path('~').expanduser()), '~')
|
||||
version_str = (installed.version or 'unknown')[:15]
|
||||
provider = (installed.binprovider or 'env')[:8]
|
||||
prnt('', '[green]√[/green]', '', installed.name.ljust(18), version_str.ljust(16), provider.ljust(8), display_path, overflow='ignore', crop=False)
|
||||
else:
|
||||
prnt('', '[red]X[/red]', '', installed.name.ljust(18), '[grey53]not installed[/grey53]', overflow='ignore', crop=False)
|
||||
failures.append(installed.name)
|
||||
|
||||
# Show hint if no binaries are installed yet
|
||||
has_any_installed = Binary.objects.filter(machine=machine).exclude(abspath='').exists()
|
||||
if not has_any_installed:
|
||||
# Show hint if no binaries are installed yet
|
||||
has_any_installed = Binary.objects.filter(machine=machine).exclude(abspath='').exists()
|
||||
if not has_any_installed:
|
||||
prnt()
|
||||
prnt('', '[grey53]Run [green]archivebox install[/green] to detect and install dependencies.[/grey53]')
|
||||
|
||||
except Exception as e:
|
||||
# Handle database errors gracefully (locked, missing, etc.)
|
||||
prnt()
|
||||
prnt('', '[grey53]Run [green]archivebox install[/green] to detect and install dependencies.[/grey53]')
|
||||
prnt('', f'[yellow]Warning: Could not query binaries from database: {e}[/yellow]')
|
||||
prnt('', '[grey53]Run [green]archivebox init[/green] and [green]archivebox install[/green] to set up dependencies.[/grey53]')
|
||||
|
||||
if not binaries:
|
||||
# Show code and data locations
|
||||
|
||||
Reference in New Issue
Block a user