Files
ArchiveBox/archivebox/cli/__init__.py
Nick Sweeting b749b26c5d wip
2026-03-23 03:58:32 -07:00

200 lines
7.5 KiB
Python

__package__ = "archivebox.cli"
__command__ = "archivebox"
import os
import sys
from importlib import import_module
import rich_click as click
from rich import print
from archivebox.config.version import VERSION
if "--debug" in sys.argv:
os.environ["DEBUG"] = "True"
sys.argv.remove("--debug")
class ArchiveBoxGroup(click.Group):
"""lazy loading click group for archivebox commands"""
meta_commands = {
"help": "archivebox.cli.archivebox_help.main",
"version": "archivebox.cli.archivebox_version.main",
"mcp": "archivebox.cli.archivebox_mcp.main",
}
setup_commands = {
"init": "archivebox.cli.archivebox_init.main",
"install": "archivebox.cli.archivebox_install.main",
}
# Model commands (CRUD operations via subcommands)
model_commands = {
"crawl": "archivebox.cli.archivebox_crawl.main",
"snapshot": "archivebox.cli.archivebox_snapshot.main",
"archiveresult": "archivebox.cli.archivebox_archiveresult.main",
"tag": "archivebox.cli.archivebox_tag.main",
"binary": "archivebox.cli.archivebox_binary.main",
"process": "archivebox.cli.archivebox_process.main",
"machine": "archivebox.cli.archivebox_machine.main",
"persona": "archivebox.cli.archivebox_persona.main",
}
archive_commands = {
# High-level commands
"add": "archivebox.cli.archivebox_add.main",
"extract": "archivebox.cli.archivebox_extract.main",
"list": "archivebox.cli.archivebox_list.main",
"remove": "archivebox.cli.archivebox_remove.main",
"run": "archivebox.cli.archivebox_run.main",
"update": "archivebox.cli.archivebox_update.main",
"status": "archivebox.cli.archivebox_status.main",
"search": "archivebox.cli.archivebox_search.main",
"config": "archivebox.cli.archivebox_config.main",
"schedule": "archivebox.cli.archivebox_schedule.main",
"server": "archivebox.cli.archivebox_server.main",
"shell": "archivebox.cli.archivebox_shell.main",
"manage": "archivebox.cli.archivebox_manage.main",
# Introspection commands
"pluginmap": "archivebox.cli.archivebox_pluginmap.main",
}
legacy_model_commands = {
"crawl": "archivebox.cli.archivebox_crawl_compat.main",
"snapshot": "archivebox.cli.archivebox_snapshot_compat.main",
}
all_subcommands = {
**meta_commands,
**setup_commands,
**model_commands,
**archive_commands,
}
renamed_commands = {
"setup": "install",
"import": "add",
"archive": "add",
}
legacy_model_subcommands = {
"crawl": {"create", "list", "update", "delete"},
"snapshot": {"create", "list", "update", "delete"},
}
@classmethod
def get_canonical_name(cls, cmd_name):
return cls.renamed_commands.get(cmd_name, cmd_name)
@classmethod
def _should_use_legacy_model_command(cls, cmd_name: str) -> bool:
if cmd_name not in cls.legacy_model_commands:
return False
try:
arg_idx = sys.argv.index(cmd_name)
except ValueError:
return False
remaining_args = sys.argv[arg_idx + 1 :]
if not remaining_args:
return False
first_arg = remaining_args[0]
if first_arg in ("-h", "--help"):
return False
return first_arg not in cls.legacy_model_subcommands[cmd_name]
def get_command(self, ctx, cmd_name):
# handle renamed commands
if cmd_name in self.renamed_commands:
new_name = self.renamed_commands[cmd_name]
print(
f" [violet]Hint:[/violet] `archivebox {cmd_name}` has been renamed to `archivebox {new_name}`",
file=sys.stderr,
)
cmd_name = new_name
ctx.invoked_subcommand = cmd_name
if self._should_use_legacy_model_command(cmd_name):
return self._lazy_load(self.legacy_model_commands[cmd_name])
# handle lazy loading of commands
if cmd_name in self.all_subcommands:
return self._lazy_load(cmd_name)
# fall-back to using click's default command lookup
return super().get_command(ctx, cmd_name)
@classmethod
def _lazy_load(cls, cmd_name_or_path):
import_path = cls.all_subcommands.get(cmd_name_or_path)
if import_path is None:
import_path = cmd_name_or_path
modname, funcname = import_path.rsplit(".", 1)
# print(f'LAZY LOADING {import_path}')
mod = import_module(modname)
func = getattr(mod, funcname)
if not hasattr(func, "__doc__"):
raise ValueError(f"lazy loading of {import_path} failed - no docstring found on method")
# if not isinstance(cmd, click.BaseCommand):
# raise ValueError(f'lazy loading of {import_path} failed - not a click command')
return func
@click.group(cls=ArchiveBoxGroup, invoke_without_command=True)
@click.option("--help", "-h", is_flag=True, help="Show help")
@click.version_option(VERSION, "-v", "--version", package_name="archivebox", message="%(version)s")
@click.pass_context
def cli(ctx, help=False):
"""ArchiveBox: The self-hosted internet archive"""
subcommand = ArchiveBoxGroup.get_canonical_name(ctx.invoked_subcommand)
# if --help is passed or no subcommand is given, show custom help message
if help or ctx.invoked_subcommand is None:
ctx.invoke(ctx.command.get_command(ctx, "help"))
# if the subcommand is in archive_commands or model_commands,
# then we need to set up the django environment and check that we're in a valid data folder
if subcommand in ArchiveBoxGroup.archive_commands or subcommand in ArchiveBoxGroup.model_commands:
# print('SETUP DJANGO AND CHECK DATA FOLDER')
try:
if subcommand == "server":
run_in_debug = "--reload" in sys.argv or os.environ.get("DEBUG") in ("1", "true", "True", "TRUE", "yes")
if run_in_debug:
os.environ["ARCHIVEBOX_RUNSERVER"] = "1"
if "--reload" in sys.argv:
os.environ["ARCHIVEBOX_AUTORELOAD"] = "1"
from archivebox.config.common import STORAGE_CONFIG
os.environ["ARCHIVEBOX_RUNSERVER_PIDFILE"] = str(STORAGE_CONFIG.TMP_DIR / "runserver.pid")
from archivebox.config.django import setup_django
from archivebox.misc.checks import check_data_folder
setup_django()
check_data_folder()
except Exception as e:
print(f"[red][X] Error setting up Django or checking data folder: {e}[/red]", file=sys.stderr)
if subcommand not in ("manage", "shell"): # not all management commands need django to be setup beforehand
raise
def main(args=None, prog_name=None, stdin=None):
# show `docker run archivebox xyz` in help messages if running in docker
IN_DOCKER = os.environ.get("IN_DOCKER", False) in ("1", "true", "True", "TRUE", "yes")
IS_TTY = sys.stdin.isatty()
prog_name = prog_name or (f"docker compose run{'' if IS_TTY else ' -T'} archivebox" if IN_DOCKER else "archivebox")
# stdin param allows passing input data from caller (used by __main__.py)
# currently not used by click-based CLI, but kept for backwards compatibility
try:
cli(args=args, prog_name=prog_name)
except KeyboardInterrupt:
print("\n\n[red][X] Got CTRL+C. Exiting...[/red]")
if __name__ == "__main__":
main()