-?\d+(?:\.\d+)?(?:[eE][+-]?\d+)?)'
)
def render_code_block(text: str, *, highlighted: bool = False) -> str:
code = html.escape(text, quote=False)
if highlighted:
def _wrap_token(match: re.Match[str]) -> str:
styles = {
'key': 'color: #0550ae;',
'string': 'color: #0a7f45;',
'boolean': 'color: #8250df; font-weight: 600;',
'null': 'color: #6e7781; font-style: italic;',
'number': 'color: #b35900;',
}
token_type = next(name for name, value in match.groupdict().items() if value is not None)
return f'{match.group(0)}'
code = JSON_TOKEN_RE.sub(_wrap_token, code)
return (
''
''
f'{code}'
''
)
def render_highlighted_json_block(value: Any) -> str:
return render_code_block(json.dumps(value, indent=2, ensure_ascii=False), highlighted=True)
def get_plugin_docs_url(plugin_name: str) -> str:
return f'{ABX_PLUGINS_DOCS_BASE_URL}#{plugin_name}'
def get_plugin_hook_source_url(plugin_name: str, hook_name: str) -> str:
return f'{ABX_PLUGINS_GITHUB_BASE_URL}{quote(plugin_name)}/{quote(hook_name)}'
def get_live_config_url(key: str) -> str:
return f'{LIVE_CONFIG_BASE_URL}{quote(key)}/'
def get_environment_binary_url(name: str) -> str:
return f'{ENVIRONMENT_BINARIES_BASE_URL}{quote(name)}/'
def get_installed_binary_change_url(name: str, binary: Any) -> str | None:
binary_id = getattr(binary, 'id', None)
if not binary_id:
return None
base_url = getattr(binary, 'admin_change_url', None) or f'{INSTALLED_BINARIES_BASE_URL}{binary_id}/change/'
changelist_filters = urlencode({'q': canonical_binary_name(name)})
return f'{base_url}?{urlencode({"_changelist_filters": changelist_filters})}'
def get_machine_admin_url() -> str | None:
try:
from archivebox.machine.models import Machine
return Machine.current().admin_change_url
except Exception:
return None
def render_code_tag_list(values: list[str]) -> str:
if not values:
return '(none)'
tags = ''.join(
str(format_html(
'{}',
value,
))
for value in values
)
return f'{tags}
'
def render_plugin_metadata_html(config: dict[str, Any]) -> str:
rows = (
('Title', config.get('title') or '(none)'),
('Description', config.get('description') or '(none)'),
('Required Plugins', mark_safe(render_link_tag_list(config.get('required_plugins') or [], get_plugin_docs_url))),
('Required Binaries', mark_safe(render_link_tag_list(config.get('required_binaries') or [], get_environment_binary_url))),
('Output MIME Types', mark_safe(render_code_tag_list(config.get('output_mimetypes') or []))),
)
rendered_rows = ''.join(
str(format_html(
'',
label,
value,
))
for label, value in rows
)
return f'{rendered_rows}
'
def render_link_tag_list(values: list[str], url_resolver: Callable[[str], str] | None = None) -> str:
if not values:
return '(none)'
tags = []
for value in values:
if url_resolver is None:
tags.append(str(format_html(
'{}',
value,
)))
else:
tags.append(str(format_html(
''
'{}'
'',
url_resolver(value),
value,
)))
return f'{"".join(tags)}
'
def render_property_links(prop_name: str, prop_info: dict[str, Any], machine_admin_url: str | None) -> str:
links = [
str(format_html('Computed value', get_live_config_url(prop_name))),
]
if machine_admin_url:
links.append(str(format_html('Edit override', machine_admin_url)))
fallback = prop_info.get('x-fallback')
if isinstance(fallback, str) and fallback:
links.append(str(format_html('Fallback: {}', get_live_config_url(fallback), fallback)))
aliases = prop_info.get('x-aliases') or []
if isinstance(aliases, list):
for alias in aliases:
if isinstance(alias, str) and alias:
links.append(str(format_html('Alias: {}', get_live_config_url(alias), alias)))
default = prop_info.get('default')
if prop_name.endswith('_BINARY') and isinstance(default, str) and default:
links.append(str(format_html('Binary: {}', get_environment_binary_url(default), default)))
return ' '.join(links)
def render_config_properties_html(properties: dict[str, Any], machine_admin_url: str | None) -> str:
header_links = [
str(format_html('Dependencies', ENVIRONMENT_BINARIES_BASE_URL)),
str(format_html('Installed Binaries', INSTALLED_BINARIES_BASE_URL)),
]
if machine_admin_url:
header_links.insert(0, str(format_html('Machine Config Editor', machine_admin_url)))
cards = [
f'{" | ".join(header_links)}
'
]
for prop_name, prop_info in properties.items():
prop_type = prop_info.get('type', 'unknown')
if isinstance(prop_type, list):
prop_type = ' | '.join(str(type_name) for type_name in prop_type)
prop_desc = prop_info.get('description', '')
default_html = ''
if 'default' in prop_info:
default_html = str(format_html(
'Default: {}
',
prop_info['default'],
))
description_html = prop_desc or mark_safe('(no description)')
cards.append(str(format_html(
'',
get_live_config_url(prop_name),
prop_name,
prop_type,
description_html,
mark_safe(render_property_links(prop_name, prop_info, machine_admin_url)),
mark_safe(default_html),
)))
return ''.join(cards)
def render_hook_links_html(plugin_name: str, hooks: list[str], source: str) -> str:
if not hooks:
return '(none)'
items = []
for hook_name in hooks:
if source == 'builtin':
items.append(str(format_html(
'',
get_plugin_hook_source_url(plugin_name, hook_name),
hook_name,
)))
else:
items.append(str(format_html(
'{}
',
hook_name,
)))
return ''.join(items)
def render_binary_detail_description(name: str, merged: dict[str, Any], db_binary: Any) -> str:
installed_binary_url = get_installed_binary_change_url(name, db_binary)
if installed_binary_url:
return str(format_html(
'{}
'
'View Installed Binary Record',
merged['abspath'],
installed_binary_url,
))
return str(format_html('{}', merged['abspath']))
def obj_to_yaml(obj: Any, indent: int = 0) -> str:
indent_str = " " * indent
if indent == 0:
indent_str = '\n' # put extra newline between top-level entries
if isinstance(obj, dict):
if not obj:
return "{}"
result = "\n"
for key, value in obj.items():
result += f"{indent_str}{key}:{obj_to_yaml(value, indent + 1)}\n"
return result
elif isinstance(obj, list):
if not obj:
return "[]"
result = "\n"
for item in obj:
result += f"{indent_str}- {obj_to_yaml(item, indent + 1).lstrip()}\n"
return result.rstrip()
elif isinstance(obj, str):
if "\n" in obj:
return f" |\n{indent_str} " + obj.replace("\n", f"\n{indent_str} ")
else:
return f" {obj}"
elif isinstance(obj, (int, float, bool)):
return f" {str(obj)}"
elif callable(obj):
source = '\n'.join(
'' if 'def ' in line else line
for line in inspect.getsource(obj).split('\n')
if line.strip()
).split('lambda: ')[-1].rstrip(',')
return f" {indent_str} " + source.replace("\n", f"\n{indent_str} ")
else:
return f" {str(obj)}"
def canonical_binary_name(name: str) -> str:
return CANONICAL_BINARY_ALIASES.get(name, name)
def _binary_sort_key(binary: Binary) -> tuple[int, int, int, Any]:
return (
int(binary.status == Binary.StatusChoices.INSTALLED),
int(bool(binary.version)),
int(bool(binary.abspath)),
binary.modified_at,
)
def get_db_binaries_by_name() -> Dict[str, Binary]:
grouped: Dict[str, list[Binary]] = {}
for binary in Binary.objects.all():
grouped.setdefault(canonical_binary_name(binary.name), []).append(binary)
return {
name: max(records, key=_binary_sort_key)
for name, records in grouped.items()
}
def serialize_binary_record(name: str, binary: Binary | None) -> Dict[str, Any]:
is_installed = bool(binary and binary.status == Binary.StatusChoices.INSTALLED)
return {
'name': canonical_binary_name(name),
'version': str(getattr(binary, 'version', '') or ''),
'binprovider': str(getattr(binary, 'binprovider', '') or ''),
'abspath': str(getattr(binary, 'abspath', '') or ''),
'sha256': str(getattr(binary, 'sha256', '') or ''),
'status': str(getattr(binary, 'status', '') or ''),
'is_available': is_installed and bool(getattr(binary, 'abspath', '') or ''),
}
def get_filesystem_plugins() -> Dict[str, Dict[str, Any]]:
"""Discover plugins from filesystem directories."""
import json
from archivebox.hooks import BUILTIN_PLUGINS_DIR, USER_PLUGINS_DIR
plugins = {}
for base_dir, source in [(BUILTIN_PLUGINS_DIR, 'builtin'), (USER_PLUGINS_DIR, 'user')]:
if not base_dir.exists():
continue
for plugin_dir in base_dir.iterdir():
if plugin_dir.is_dir() and not plugin_dir.name.startswith('_'):
plugin_id = f'{source}.{plugin_dir.name}'
# Find hook scripts
hooks = []
for ext in ('sh', 'py', 'js'):
hooks.extend(plugin_dir.glob(f'on_*__*.{ext}'))
# Load config.json if it exists
config_file = plugin_dir / 'config.json'
config_data = None
if config_file.exists():
try:
with open(config_file, 'r') as f:
config_data = json.load(f)
except (json.JSONDecodeError, IOError):
config_data = None
plugins[plugin_id] = {
'id': plugin_id,
'name': plugin_dir.name,
'path': str(plugin_dir),
'source': source,
'hooks': [str(h.name) for h in hooks],
'config': config_data,
}
return plugins
@render_with_table_view
def binaries_list_view(request: HttpRequest, **kwargs) -> TableContext:
assert is_superuser(request), 'Must be a superuser to view configuration settings.'
rows = {
"Binary Name": [],
"Found Version": [],
"Provided By": [],
"Found Abspath": [],
}
db_binaries = get_db_binaries_by_name()
all_binary_names = sorted(db_binaries.keys())
for name in all_binary_names:
merged = serialize_binary_record(name, db_binaries.get(name))
rows['Binary Name'].append(ItemLink(name, key=name))
if merged['is_available']:
rows['Found Version'].append(f"✅ {merged['version']}" if merged['version'] else '✅ found')
rows['Provided By'].append(merged['binprovider'] or '-')
rows['Found Abspath'].append(merged['abspath'] or '-')
else:
rows['Found Version'].append('❌ missing')
rows['Provided By'].append('-')
rows['Found Abspath'].append('-')
return TableContext(
title="Binaries",
table=rows,
)
@render_with_item_view
def binary_detail_view(request: HttpRequest, key: str, **kwargs) -> ItemContext:
assert is_superuser(request), 'Must be a superuser to view configuration settings.'
key = canonical_binary_name(key)
db_binary = get_db_binaries_by_name().get(key)
merged = serialize_binary_record(key, db_binary)
if merged['is_available']:
section: SectionData = {
"name": key,
"description": mark_safe(render_binary_detail_description(key, merged, db_binary)),
"fields": {
'name': key,
'binprovider': merged['binprovider'] or '-',
'abspath': merged['abspath'] or 'not found',
'version': merged['version'] or 'unknown',
'sha256': merged['sha256'],
'status': merged['status'],
},
"help_texts": {},
}
return ItemContext(
slug=key,
title=key,
data=[section],
)
section: SectionData = {
"name": key,
"description": "No persisted Binary record found",
"fields": {
'name': key,
'binprovider': merged['binprovider'] or 'not recorded',
'abspath': merged['abspath'] or 'not recorded',
'version': merged['version'] or 'N/A',
'status': merged['status'] or 'unrecorded',
},
"help_texts": {},
}
return ItemContext(
slug=key,
title=key,
data=[section],
)
@render_with_table_view
def plugins_list_view(request: HttpRequest, **kwargs) -> TableContext:
assert is_superuser(request), 'Must be a superuser to view configuration settings.'
rows = {
"Name": [],
"Source": [],
"Path": [],
"Hooks": [],
"Config": [],
}
plugins = get_filesystem_plugins()
for plugin_id, plugin in plugins.items():
rows['Name'].append(ItemLink(plugin['name'], key=plugin_id))
rows['Source'].append(plugin['source'])
rows['Path'].append(format_html('{}', plugin['path']))
rows['Hooks'].append(', '.join(plugin['hooks']) or '(none)')
# Show config status
if plugin.get('config'):
config_properties = plugin['config'].get('properties', {})
config_count = len(config_properties)
rows['Config'].append(f'✅ {config_count} properties' if config_count > 0 else '✅ present')
else:
rows['Config'].append('❌ none')
if not plugins:
# Show a helpful message when no plugins found
rows['Name'].append('(no plugins found)')
rows['Source'].append('-')
rows['Path'].append(mark_safe('abx_plugins/plugins/ or data/custom_plugins/'))
rows['Hooks'].append('-')
rows['Config'].append('-')
return TableContext(
title="Installed plugins",
table=rows,
)
@render_with_item_view
def plugin_detail_view(request: HttpRequest, key: str, **kwargs) -> ItemContext:
assert is_superuser(request), 'Must be a superuser to view configuration settings.'
plugins = get_filesystem_plugins()
plugin = plugins.get(key)
if not plugin:
return ItemContext(
slug=key,
title=f'Plugin not found: {key}',
data=[],
)
# Base fields that all plugins have
docs_url = get_plugin_docs_url(plugin['name'])
machine_admin_url = get_machine_admin_url()
fields = {
"id": plugin['id'],
"name": plugin['name'],
"source": plugin['source'],
}
sections: list[SectionData] = [{
"name": plugin['name'],
"description": format_html(
'{}
ABX Plugin Docs',
plugin['path'],
docs_url,
),
"fields": fields,
"help_texts": {},
}]
if plugin['hooks']:
sections.append({
"name": "Hooks",
"description": mark_safe(render_hook_links_html(plugin['name'], plugin['hooks'], plugin['source'])),
"fields": {},
"help_texts": {},
})
if plugin.get('config'):
sections.append({
"name": "Plugin Metadata",
"description": mark_safe(render_plugin_metadata_html(plugin['config'])),
"fields": {},
"help_texts": {},
})
sections.append({
"name": "config.json",
"description": mark_safe(render_highlighted_json_block(plugin['config'])),
"fields": {},
"help_texts": {},
})
config_properties = plugin['config'].get('properties', {})
if config_properties:
sections.append({
"name": "Config Properties",
"description": mark_safe(render_config_properties_html(config_properties, machine_admin_url)),
"fields": {},
"help_texts": {},
})
return ItemContext(
slug=key,
title=plugin['name'],
data=sections,
)
@render_with_table_view
def worker_list_view(request: HttpRequest, **kwargs) -> TableContext:
assert is_superuser(request), "Must be a superuser to view configuration settings."
rows = {
"Name": [],
"State": [],
"PID": [],
"Started": [],
"Command": [],
"Logfile": [],
"Exit Status": [],
}
from archivebox.workers.supervisord_util import get_existing_supervisord_process
supervisor = get_existing_supervisord_process()
if supervisor is None:
return TableContext(
title="No running worker processes",
table=rows,
)
all_config: dict[str, dict[str, object]] = {}
config_items = supervisor.getAllConfigInfo()
if not isinstance(config_items, list):
config_items = []
for config_data in config_items:
if not isinstance(config_data, dict):
continue
config_name = config_data.get("name")
if not isinstance(config_name, str):
continue
all_config[config_name] = config_data
# Add top row for supervisord process manager
rows["Name"].append(ItemLink('supervisord', key='supervisord'))
supervisor_state = supervisor.getState()
rows["State"].append(str(supervisor_state.get('statename') if isinstance(supervisor_state, dict) else ''))
rows['PID'].append(str(supervisor.getPID()))
rows["Started"].append('-')
rows["Command"].append('supervisord --configuration=tmp/supervisord.conf')
rows["Logfile"].append(
format_html(
'{}',
'supervisord',
'logs/supervisord.log',
)
)
rows['Exit Status'].append('0')
# Add a row for each worker process managed by supervisord
process_items = supervisor.getAllProcessInfo()
if not isinstance(process_items, list):
process_items = []
for proc_data in process_items:
if not isinstance(proc_data, dict):
continue
proc_name = str(proc_data.get("name") or "")
proc_description = str(proc_data.get("description") or "")
proc_start = proc_data.get("start")
proc_logfile = str(proc_data.get("stdout_logfile") or "")
proc_config = all_config.get(proc_name, {})
rows["Name"].append(ItemLink(proc_name, key=proc_name))
rows["State"].append(str(proc_data.get("statename") or ""))
rows['PID'].append(proc_description.replace('pid ', ''))
rows["Started"].append(format_parsed_datetime(proc_start))
rows["Command"].append(str(proc_config.get("command") or ""))
rows["Logfile"].append(
format_html(
'{}',
proc_logfile.split("/")[-1].split('.')[0],
proc_logfile,
)
)
rows["Exit Status"].append(str(proc_data.get("exitstatus") or ""))
return TableContext(
title="Running worker processes",
table=rows,
)
@render_with_item_view
def worker_detail_view(request: HttpRequest, key: str, **kwargs) -> ItemContext:
assert is_superuser(request), "Must be a superuser to view configuration settings."
from archivebox.workers.supervisord_util import get_existing_supervisord_process, get_worker, get_sock_file, CONFIG_FILE_NAME
SOCK_FILE = get_sock_file()
CONFIG_FILE = SOCK_FILE.parent / CONFIG_FILE_NAME
supervisor = get_existing_supervisord_process()
if supervisor is None:
return ItemContext(
slug='none',
title='error: No running supervisord process.',
data=[],
)
all_config: list[dict[str, object]] = []
config_items = supervisor.getAllConfigInfo()
if not isinstance(config_items, list):
config_items = []
for config_data in config_items:
if isinstance(config_data, dict):
all_config.append(config_data)
if key == 'supervisord':
relevant_config = CONFIG_FILE.read_text()
relevant_logs = str(supervisor.readLog(0, 10_000_000))
start_ts = [line for line in relevant_logs.split("\n") if "RPC interface 'supervisor' initialized" in line][-1].split(",", 1)[0]
start_dt = parse_date(start_ts)
uptime = str(timezone.now() - start_dt).split(".")[0] if start_dt else ""
supervisor_state = supervisor.getState()
proc: Dict[str, object] = {
"name": "supervisord",
"pid": supervisor.getPID(),
"statename": str(supervisor_state.get("statename") if isinstance(supervisor_state, dict) else ""),
"start": start_ts,
"stop": None,
"exitstatus": "",
"stdout_logfile": "logs/supervisord.log",
"description": f'pid 000, uptime {uptime}',
}
else:
worker_data = get_worker(supervisor, key)
proc = worker_data if isinstance(worker_data, dict) else {}
relevant_config = next((config for config in all_config if config.get('name') == key), {})
log_result = supervisor.tailProcessStdoutLog(key, 0, 10_000_000)
relevant_logs = str(log_result[0] if isinstance(log_result, tuple) else log_result)
section: SectionData = {
"name": key,
"description": key,
"fields": {
"Command": str(proc.get("name") or ""),
"PID": str(proc.get("pid") or ""),
"State": str(proc.get("statename") or ""),
"Started": format_parsed_datetime(proc.get("start")),
"Stopped": format_parsed_datetime(proc.get("stop")),
"Exit Status": str(proc.get("exitstatus") or ""),
"Logfile": str(proc.get("stdout_logfile") or ""),
"Uptime": str(str(proc.get("description") or "").split("uptime ", 1)[-1]),
"Config": obj_to_yaml(relevant_config) if isinstance(relevant_config, dict) else str(relevant_config),
"Logs": relevant_logs,
},
"help_texts": {"Uptime": "How long the process has been running ([days:]hours:minutes:seconds)"},
}
return ItemContext(
slug=key,
title=key,
data=[section],
)
@render_with_table_view
def log_list_view(request: HttpRequest, **kwargs) -> TableContext:
assert is_superuser(request), "Must be a superuser to view configuration settings."
log_files: list[Path] = []
for logfile in sorted(CONSTANTS.LOGS_DIR.glob("*.log"), key=os.path.getmtime)[::-1]:
if isinstance(logfile, Path):
log_files.append(logfile)
rows = {
"Name": [],
"Last Updated": [],
"Size": [],
"Most Recent Lines": [],
}
# Add a row for each worker process managed by supervisord
for logfile in log_files:
st = logfile.stat()
rows["Name"].append(ItemLink("logs" + str(logfile).rsplit("/logs", 1)[-1], key=logfile.name))
rows["Last Updated"].append(format_parsed_datetime(st.st_mtime))
rows["Size"].append(f'{st.st_size//1000} kb')
with open(logfile, 'rb') as f:
try:
f.seek(-1024, os.SEEK_END)
except OSError:
f.seek(0)
last_lines = f.read().decode('utf-8', errors='replace').split("\n")
non_empty_lines = [line for line in last_lines if line.strip()]
rows["Most Recent Lines"].append(non_empty_lines[-1])
return TableContext(
title="Debug Log files",
table=rows,
)
@render_with_item_view
def log_detail_view(request: HttpRequest, key: str, **kwargs) -> ItemContext:
assert is_superuser(request), "Must be a superuser to view configuration settings."
log_file = [logfile for logfile in CONSTANTS.LOGS_DIR.glob('*.log') if key in logfile.name][0]
log_text = log_file.read_text()
log_stat = log_file.stat()
section: SectionData = {
"name": key,
"description": key,
"fields": {
"Path": str(log_file),
"Size": f"{log_stat.st_size//1000} kb",
"Last Updated": format_parsed_datetime(log_stat.st_mtime),
"Tail": "\n".join(log_text[-10_000:].split("\n")[-20:]),
"Full Log": log_text,
},
}
return ItemContext(
slug=key,
title=key,
data=[section],
)