Restore CLI compat and plugin dependency handling

This commit is contained in:
Nick Sweeting
2026-03-15 06:06:18 -07:00
parent 6b482c62df
commit 1f792d7199
19 changed files with 302 additions and 92 deletions

View File

@@ -41,6 +41,7 @@ class ArchiveBoxGroup(click.Group):
archive_commands = {
# High-level commands
'add': 'archivebox.cli.archivebox_add.main',
'extract': 'archivebox.cli.archivebox_extract.main',
'list': 'archivebox.cli.archivebox_list.main',
'remove': 'archivebox.cli.archivebox_remove.main',
'run': 'archivebox.cli.archivebox_run.main',
@@ -55,6 +56,10 @@ class ArchiveBoxGroup(click.Group):
# Introspection commands
'pluginmap': 'archivebox.cli.archivebox_pluginmap.main',
}
legacy_model_commands = {
'crawl': 'archivebox.cli.archivebox_crawl_compat.main',
'snapshot': 'archivebox.cli.archivebox_snapshot_compat.main',
}
all_subcommands = {
**meta_commands,
**setup_commands,
@@ -67,12 +72,35 @@ class ArchiveBoxGroup(click.Group):
'archive': 'add',
# Old commands replaced by new model commands
'orchestrator': 'run',
'extract': 'archiveresult',
}
legacy_model_subcommands = {
'crawl': {'create', 'list', 'update', 'delete'},
'snapshot': {'create', 'list', 'update', 'delete'},
}
@classmethod
def get_canonical_name(cls, cmd_name):
return cls.renamed_commands.get(cmd_name, cmd_name)
@classmethod
def _should_use_legacy_model_command(cls, cmd_name: str) -> bool:
if cmd_name not in cls.legacy_model_commands:
return False
try:
arg_idx = sys.argv.index(cmd_name)
except ValueError:
return False
remaining_args = sys.argv[arg_idx + 1:]
if not remaining_args:
return False
first_arg = remaining_args[0]
if first_arg in ('-h', '--help'):
return False
return first_arg not in cls.legacy_model_subcommands[cmd_name]
def get_command(self, ctx, cmd_name):
@@ -82,6 +110,9 @@ class ArchiveBoxGroup(click.Group):
print(f' [violet]Hint:[/violet] `archivebox {cmd_name}` has been renamed to `archivebox {new_name}`')
cmd_name = new_name
ctx.invoked_subcommand = cmd_name
if self._should_use_legacy_model_command(cmd_name):
return self._lazy_load(self.legacy_model_commands[cmd_name])
# handle lazy loading of commands
if cmd_name in self.all_subcommands:
@@ -91,8 +122,8 @@ class ArchiveBoxGroup(click.Group):
return super().get_command(ctx, cmd_name)
@classmethod
def _lazy_load(cls, cmd_name):
import_path = cls.all_subcommands[cmd_name]
def _lazy_load(cls, cmd_name_or_path):
import_path = cls.all_subcommands.get(cmd_name_or_path, cmd_name_or_path)
modname, funcname = import_path.rsplit('.', 1)
# print(f'LAZY LOADING {import_path}')

View File

@@ -0,0 +1,27 @@
#!/usr/bin/env python3
__package__ = 'archivebox.cli'
__command__ = 'archivebox crawl'
import sys
import rich_click as click
from archivebox.cli.archivebox_add import add
@click.command(context_settings={'ignore_unknown_options': True})
@click.option('--depth', '-d', type=int, default=0, help='Max crawl depth (default: 0)')
@click.option('--tag', '-t', default='', help='Comma-separated tags to add')
@click.option('--status', '-s', default='queued', help='Initial status (default: queued)')
@click.option('--wait/--no-wait', 'wait', default=True, help='Accepted for backwards compatibility')
@click.argument('urls', nargs=-1)
def main(depth: int, tag: str, status: str, wait: bool, urls: tuple[str, ...]):
"""Backwards-compatible `archivebox crawl URL...` entrypoint."""
del status, wait
add(list(urls), depth=depth, tag=tag, index_only=True, bg=True)
sys.exit(0)
if __name__ == '__main__':
main()

View File

@@ -226,7 +226,7 @@ def is_archiveresult_id(value: str) -> bool:
@click.command()
@click.option('--plugins', '-p', default='', help='Comma-separated list of plugins to run (e.g., screenshot,singlefile)')
@click.option('--plugins', '--plugin', '-p', default='', help='Comma-separated list of plugins to run (e.g., screenshot,singlefile)')
@click.option('--wait/--no-wait', default=True, help='Wait for plugins to complete (default: wait)')
@click.argument('args', nargs=-1)
def main(plugins: str, wait: bool, args: tuple):

View File

@@ -12,6 +12,7 @@ import rich_click as click
from django.db.models import QuerySet
from archivebox.config import DATA_DIR
from archivebox.config.constants import CONSTANTS
from archivebox.config.django import setup_django
from archivebox.misc.util import enforce_types, docstring
from archivebox.misc.checks import check_data_folder
@@ -65,6 +66,9 @@ def remove(filter_patterns: Iterable[str]=(),
for snapshot in snapshots:
if delete:
shutil.rmtree(snapshot.output_dir, ignore_errors=True)
legacy_path = CONSTANTS.ARCHIVE_DIR / snapshot.timestamp
if legacy_path.is_symlink():
legacy_path.unlink(missing_ok=True)
finally:
timer.end()

View File

@@ -0,0 +1,24 @@
#!/usr/bin/env python3
__package__ = 'archivebox.cli'
__command__ = 'archivebox snapshot'
import sys
import rich_click as click
from archivebox.cli.archivebox_snapshot import create_snapshots
@click.command(context_settings={'ignore_unknown_options': True})
@click.option('--tag', '-t', default='', help='Comma-separated tags to add')
@click.option('--status', '-s', default='queued', help='Initial status (default: queued)')
@click.option('--depth', '-d', type=int, default=0, help='Crawl depth (default: 0)')
@click.argument('urls', nargs=-1)
def main(tag: str, status: str, depth: int, urls: tuple[str, ...]):
"""Backwards-compatible `archivebox snapshot URL...` entrypoint."""
sys.exit(create_snapshots(urls, tag=tag, status=status, depth=depth))
if __name__ == '__main__':
main()