__package__ = "archivebox.cli" __command__ = "archivebox" import os import sys from importlib import import_module import rich_click as click from rich import print from archivebox.config.version import VERSION if "--debug" in sys.argv: os.environ["DEBUG"] = "True" sys.argv.remove("--debug") class ArchiveBoxGroup(click.Group): """lazy loading click group for archivebox commands""" meta_commands = { "help": "archivebox.cli.archivebox_help.main", "version": "archivebox.cli.archivebox_version.main", "mcp": "archivebox.cli.archivebox_mcp.main", } setup_commands = { "init": "archivebox.cli.archivebox_init.main", "install": "archivebox.cli.archivebox_install.main", } # Model commands (CRUD operations via subcommands) model_commands = { "crawl": "archivebox.cli.archivebox_crawl.main", "snapshot": "archivebox.cli.archivebox_snapshot.main", "archiveresult": "archivebox.cli.archivebox_archiveresult.main", "tag": "archivebox.cli.archivebox_tag.main", "binary": "archivebox.cli.archivebox_binary.main", "process": "archivebox.cli.archivebox_process.main", "machine": "archivebox.cli.archivebox_machine.main", "persona": "archivebox.cli.archivebox_persona.main", } archive_commands = { # High-level commands "add": "archivebox.cli.archivebox_add.main", "extract": "archivebox.cli.archivebox_extract.main", "list": "archivebox.cli.archivebox_list.main", "remove": "archivebox.cli.archivebox_remove.main", "run": "archivebox.cli.archivebox_run.main", "update": "archivebox.cli.archivebox_update.main", "status": "archivebox.cli.archivebox_status.main", "search": "archivebox.cli.archivebox_search.main", "config": "archivebox.cli.archivebox_config.main", "schedule": "archivebox.cli.archivebox_schedule.main", "server": "archivebox.cli.archivebox_server.main", "shell": "archivebox.cli.archivebox_shell.main", "manage": "archivebox.cli.archivebox_manage.main", # Introspection commands "pluginmap": "archivebox.cli.archivebox_pluginmap.main", } legacy_model_commands = { "crawl": "archivebox.cli.archivebox_crawl_compat.main", "snapshot": "archivebox.cli.archivebox_snapshot_compat.main", } all_subcommands = { **meta_commands, **setup_commands, **model_commands, **archive_commands, } renamed_commands = { "setup": "install", "import": "add", "archive": "add", } legacy_model_subcommands = { "crawl": {"create", "list", "update", "delete"}, "snapshot": {"create", "list", "update", "delete"}, } @classmethod def get_canonical_name(cls, cmd_name): return cls.renamed_commands.get(cmd_name, cmd_name) @classmethod def _should_use_legacy_model_command(cls, cmd_name: str) -> bool: if cmd_name not in cls.legacy_model_commands: return False try: arg_idx = sys.argv.index(cmd_name) except ValueError: return False remaining_args = sys.argv[arg_idx + 1 :] if not remaining_args: return False first_arg = remaining_args[0] if first_arg in ("-h", "--help"): return False return first_arg not in cls.legacy_model_subcommands[cmd_name] def get_command(self, ctx, cmd_name): # handle renamed commands if cmd_name in self.renamed_commands: new_name = self.renamed_commands[cmd_name] print( f" [violet]Hint:[/violet] `archivebox {cmd_name}` has been renamed to `archivebox {new_name}`", file=sys.stderr, ) cmd_name = new_name ctx.invoked_subcommand = cmd_name if self._should_use_legacy_model_command(cmd_name): return self._lazy_load(self.legacy_model_commands[cmd_name]) # handle lazy loading of commands if cmd_name in self.all_subcommands: return self._lazy_load(cmd_name) # fall-back to using click's default command lookup return super().get_command(ctx, cmd_name) @classmethod def _lazy_load(cls, cmd_name_or_path): import_path = cls.all_subcommands.get(cmd_name_or_path) if import_path is None: import_path = cmd_name_or_path modname, funcname = import_path.rsplit(".", 1) # print(f'LAZY LOADING {import_path}') mod = import_module(modname) func = getattr(mod, funcname) if not hasattr(func, "__doc__"): raise ValueError(f"lazy loading of {import_path} failed - no docstring found on method") # if not isinstance(cmd, click.BaseCommand): # raise ValueError(f'lazy loading of {import_path} failed - not a click command') return func @click.group(cls=ArchiveBoxGroup, invoke_without_command=True) @click.option("--help", "-h", is_flag=True, help="Show help") @click.version_option(VERSION, "-v", "--version", package_name="archivebox", message="%(version)s") @click.pass_context def cli(ctx, help=False): """ArchiveBox: The self-hosted internet archive""" subcommand = ArchiveBoxGroup.get_canonical_name(ctx.invoked_subcommand) # if --help is passed or no subcommand is given, show custom help message if help or ctx.invoked_subcommand is None: ctx.invoke(ctx.command.get_command(ctx, "help")) # if the subcommand is in archive_commands or model_commands, # then we need to set up the django environment and check that we're in a valid data folder if subcommand in ArchiveBoxGroup.archive_commands or subcommand in ArchiveBoxGroup.model_commands: # print('SETUP DJANGO AND CHECK DATA FOLDER') try: if subcommand == "server": run_in_debug = "--reload" in sys.argv or os.environ.get("DEBUG") in ("1", "true", "True", "TRUE", "yes") if run_in_debug: os.environ["ARCHIVEBOX_RUNSERVER"] = "1" if "--reload" in sys.argv: os.environ["ARCHIVEBOX_AUTORELOAD"] = "1" from archivebox.config.common import STORAGE_CONFIG os.environ["ARCHIVEBOX_RUNSERVER_PIDFILE"] = str(STORAGE_CONFIG.TMP_DIR / "runserver.pid") from archivebox.config.django import setup_django from archivebox.misc.checks import check_data_folder setup_django() check_data_folder() except Exception as e: print(f"[red][X] Error setting up Django or checking data folder: {e}[/red]", file=sys.stderr) if subcommand not in ("manage", "shell"): # not all management commands need django to be setup beforehand raise def main(args=None, prog_name=None, stdin=None): # show `docker run archivebox xyz` in help messages if running in docker IN_DOCKER = os.environ.get("IN_DOCKER", False) in ("1", "true", "True", "TRUE", "yes") IS_TTY = sys.stdin.isatty() prog_name = prog_name or (f"docker compose run{'' if IS_TTY else ' -T'} archivebox" if IN_DOCKER else "archivebox") # stdin param allows passing input data from caller (used by __main__.py) # currently not used by click-based CLI, but kept for backwards compatibility try: cli(args=args, prog_name=prog_name) except KeyboardInterrupt: print("\n\n[red][X] Got CTRL+C. Exiting...[/red]") if __name__ == "__main__": main()