mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2026-04-06 07:47:53 +10:00
202 lines
7.7 KiB
Python
202 lines
7.7 KiB
Python
__package__ = 'archivebox.cli'
|
|
__command__ = 'archivebox'
|
|
import os
|
|
import sys
|
|
from importlib import import_module
|
|
|
|
import rich_click as click
|
|
from rich import print
|
|
|
|
from archivebox.config.version import VERSION
|
|
|
|
|
|
|
|
if '--debug' in sys.argv:
|
|
os.environ['DEBUG'] = 'True'
|
|
sys.argv.remove('--debug')
|
|
|
|
|
|
class ArchiveBoxGroup(click.Group):
|
|
"""lazy loading click group for archivebox commands"""
|
|
meta_commands = {
|
|
'help': 'archivebox.cli.archivebox_help.main',
|
|
'version': 'archivebox.cli.archivebox_version.main',
|
|
'mcp': 'archivebox.cli.archivebox_mcp.main',
|
|
}
|
|
setup_commands = {
|
|
'init': 'archivebox.cli.archivebox_init.main',
|
|
'install': 'archivebox.cli.archivebox_install.main',
|
|
}
|
|
# Model commands (CRUD operations via subcommands)
|
|
model_commands = {
|
|
'crawl': 'archivebox.cli.archivebox_crawl.main',
|
|
'snapshot': 'archivebox.cli.archivebox_snapshot.main',
|
|
'archiveresult': 'archivebox.cli.archivebox_archiveresult.main',
|
|
'tag': 'archivebox.cli.archivebox_tag.main',
|
|
'binary': 'archivebox.cli.archivebox_binary.main',
|
|
'process': 'archivebox.cli.archivebox_process.main',
|
|
'machine': 'archivebox.cli.archivebox_machine.main',
|
|
'persona': 'archivebox.cli.archivebox_persona.main',
|
|
}
|
|
archive_commands = {
|
|
# High-level commands
|
|
'add': 'archivebox.cli.archivebox_add.main',
|
|
'extract': 'archivebox.cli.archivebox_extract.main',
|
|
'list': 'archivebox.cli.archivebox_list.main',
|
|
'remove': 'archivebox.cli.archivebox_remove.main',
|
|
'run': 'archivebox.cli.archivebox_run.main',
|
|
'update': 'archivebox.cli.archivebox_update.main',
|
|
'status': 'archivebox.cli.archivebox_status.main',
|
|
'search': 'archivebox.cli.archivebox_search.main',
|
|
'config': 'archivebox.cli.archivebox_config.main',
|
|
'schedule': 'archivebox.cli.archivebox_schedule.main',
|
|
'server': 'archivebox.cli.archivebox_server.main',
|
|
'shell': 'archivebox.cli.archivebox_shell.main',
|
|
'manage': 'archivebox.cli.archivebox_manage.main',
|
|
# Introspection commands
|
|
'pluginmap': 'archivebox.cli.archivebox_pluginmap.main',
|
|
}
|
|
legacy_model_commands = {
|
|
'crawl': 'archivebox.cli.archivebox_crawl_compat.main',
|
|
'snapshot': 'archivebox.cli.archivebox_snapshot_compat.main',
|
|
}
|
|
all_subcommands = {
|
|
**meta_commands,
|
|
**setup_commands,
|
|
**model_commands,
|
|
**archive_commands,
|
|
}
|
|
renamed_commands = {
|
|
'setup': 'install',
|
|
'import': 'add',
|
|
'archive': 'add',
|
|
# Old commands replaced by new model commands
|
|
'orchestrator': 'run',
|
|
}
|
|
legacy_model_subcommands = {
|
|
'crawl': {'create', 'list', 'update', 'delete'},
|
|
'snapshot': {'create', 'list', 'update', 'delete'},
|
|
}
|
|
|
|
@classmethod
|
|
def get_canonical_name(cls, cmd_name):
|
|
return cls.renamed_commands.get(cmd_name, cmd_name)
|
|
|
|
@classmethod
|
|
def _should_use_legacy_model_command(cls, cmd_name: str) -> bool:
|
|
if cmd_name not in cls.legacy_model_commands:
|
|
return False
|
|
|
|
try:
|
|
arg_idx = sys.argv.index(cmd_name)
|
|
except ValueError:
|
|
return False
|
|
|
|
remaining_args = sys.argv[arg_idx + 1:]
|
|
if not remaining_args:
|
|
return False
|
|
|
|
first_arg = remaining_args[0]
|
|
if first_arg in ('-h', '--help'):
|
|
return False
|
|
|
|
return first_arg not in cls.legacy_model_subcommands[cmd_name]
|
|
|
|
|
|
def get_command(self, ctx, cmd_name):
|
|
# handle renamed commands
|
|
if cmd_name in self.renamed_commands:
|
|
new_name = self.renamed_commands[cmd_name]
|
|
print(
|
|
f' [violet]Hint:[/violet] `archivebox {cmd_name}` has been renamed to `archivebox {new_name}`',
|
|
file=sys.stderr,
|
|
)
|
|
cmd_name = new_name
|
|
ctx.invoked_subcommand = cmd_name
|
|
|
|
if self._should_use_legacy_model_command(cmd_name):
|
|
return self._lazy_load(self.legacy_model_commands[cmd_name])
|
|
|
|
# handle lazy loading of commands
|
|
if cmd_name in self.all_subcommands:
|
|
return self._lazy_load(cmd_name)
|
|
|
|
# fall-back to using click's default command lookup
|
|
return super().get_command(ctx, cmd_name)
|
|
|
|
@classmethod
|
|
def _lazy_load(cls, cmd_name_or_path):
|
|
import_path = cls.all_subcommands.get(cmd_name_or_path)
|
|
if import_path is None:
|
|
import_path = cmd_name_or_path
|
|
modname, funcname = import_path.rsplit('.', 1)
|
|
|
|
# print(f'LAZY LOADING {import_path}')
|
|
mod = import_module(modname)
|
|
func = getattr(mod, funcname)
|
|
|
|
if not hasattr(func, '__doc__'):
|
|
raise ValueError(f'lazy loading of {import_path} failed - no docstring found on method')
|
|
|
|
# if not isinstance(cmd, click.BaseCommand):
|
|
# raise ValueError(f'lazy loading of {import_path} failed - not a click command')
|
|
|
|
return func
|
|
|
|
|
|
@click.group(cls=ArchiveBoxGroup, invoke_without_command=True)
|
|
@click.option('--help', '-h', is_flag=True, help='Show help')
|
|
@click.version_option(VERSION, '-v', '--version', package_name='archivebox', message='%(version)s')
|
|
@click.pass_context
|
|
def cli(ctx, help=False):
|
|
"""ArchiveBox: The self-hosted internet archive"""
|
|
|
|
subcommand = ArchiveBoxGroup.get_canonical_name(ctx.invoked_subcommand)
|
|
|
|
# if --help is passed or no subcommand is given, show custom help message
|
|
if help or ctx.invoked_subcommand is None:
|
|
ctx.invoke(ctx.command.get_command(ctx, 'help'))
|
|
|
|
# if the subcommand is in archive_commands or model_commands,
|
|
# then we need to set up the django environment and check that we're in a valid data folder
|
|
if subcommand in ArchiveBoxGroup.archive_commands or subcommand in ArchiveBoxGroup.model_commands:
|
|
# print('SETUP DJANGO AND CHECK DATA FOLDER')
|
|
try:
|
|
if subcommand == 'server':
|
|
run_in_debug = '--reload' in sys.argv or os.environ.get('DEBUG') in ('1', 'true', 'True', 'TRUE', 'yes')
|
|
if run_in_debug:
|
|
os.environ['ARCHIVEBOX_RUNSERVER'] = '1'
|
|
if '--reload' in sys.argv:
|
|
os.environ['ARCHIVEBOX_AUTORELOAD'] = '1'
|
|
os.environ['ARCHIVEBOX_ORCHESTRATOR_MANAGED_BY_WATCHER'] = '1'
|
|
from archivebox.config.common import STORAGE_CONFIG
|
|
os.environ['ARCHIVEBOX_RUNSERVER_PIDFILE'] = str(STORAGE_CONFIG.TMP_DIR / 'runserver.pid')
|
|
|
|
from archivebox.config.django import setup_django
|
|
from archivebox.misc.checks import check_data_folder
|
|
setup_django()
|
|
check_data_folder()
|
|
except Exception as e:
|
|
print(f'[red][X] Error setting up Django or checking data folder: {e}[/red]', file=sys.stderr)
|
|
if subcommand not in ('manage', 'shell'): # not all management commands need django to be setup beforehand
|
|
raise
|
|
|
|
|
|
def main(args=None, prog_name=None, stdin=None):
|
|
# show `docker run archivebox xyz` in help messages if running in docker
|
|
IN_DOCKER = os.environ.get('IN_DOCKER', False) in ('1', 'true', 'True', 'TRUE', 'yes')
|
|
IS_TTY = sys.stdin.isatty()
|
|
prog_name = prog_name or (f'docker compose run{"" if IS_TTY else " -T"} archivebox' if IN_DOCKER else 'archivebox')
|
|
|
|
# stdin param allows passing input data from caller (used by __main__.py)
|
|
# currently not used by click-based CLI, but kept for backwards compatibility
|
|
|
|
try:
|
|
cli(args=args, prog_name=prog_name)
|
|
except KeyboardInterrupt:
|
|
print('\n\n[red][X] Got CTRL+C. Exiting...[/red]')
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|