mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2026-04-06 07:47:53 +10:00
move config into dedicated global app
This commit is contained in:
26
archivebox/config/__init__.py
Normal file
26
archivebox/config/__init__.py
Normal file
@@ -0,0 +1,26 @@
|
||||
__package__ = 'archivebox.config'
|
||||
|
||||
from .constants import CONSTANTS, PACKAGE_DIR, DATA_DIR, ARCHIVE_DIR, VERSION
|
||||
from .defaults import (
|
||||
SHELL_CONFIG,
|
||||
STORAGE_CONFIG,
|
||||
GENERAL_CONFIG,
|
||||
SERVER_CONFIG,
|
||||
ARCHIVING_CONFIG,
|
||||
SEARCH_BACKEND_CONFIG,
|
||||
)
|
||||
|
||||
|
||||
__all__ = [
|
||||
'CONSTANTS',
|
||||
'PACKAGE_DIR',
|
||||
'DATA_DIR',
|
||||
'ARCHIVE_DIR',
|
||||
'VERSION',
|
||||
'SHELL_CONFIG',
|
||||
'STORAGE_CONFIG',
|
||||
'GENERAL_CONFIG',
|
||||
'SERVER_CONFIG',
|
||||
'ARCHIVING_CONFIG',
|
||||
'SEARCH_BACKEND_CONFIG',
|
||||
]
|
||||
58
archivebox/config/apps.py
Normal file
58
archivebox/config/apps.py
Normal file
@@ -0,0 +1,58 @@
|
||||
__package__ = 'archivebox.config'
|
||||
|
||||
from typing import List
|
||||
from pydantic import InstanceOf
|
||||
|
||||
from abx.archivebox.base_plugin import BasePlugin
|
||||
from abx.archivebox.base_hook import BaseHook
|
||||
|
||||
|
||||
from .constants import CONSTANTS, CONSTANTS_CONFIG, PACKAGE_DIR, DATA_DIR, ARCHIVE_DIR # noqa
|
||||
from .defaults import (
|
||||
ShellConfig, # noqa: F401
|
||||
StorageConfig, # noqa: F401
|
||||
GeneralConfig, # noqa: F401
|
||||
ServerConfig, # noqa: F401
|
||||
ArchivingConfig, # noqa: F401
|
||||
SearchBackendConfig, # noqa: F401
|
||||
SHELL_CONFIG,
|
||||
STORAGE_CONFIG,
|
||||
GENERAL_CONFIG,
|
||||
SERVER_CONFIG,
|
||||
ARCHIVING_CONFIG,
|
||||
SEARCH_BACKEND_CONFIG,
|
||||
)
|
||||
|
||||
###################### Config ##########################
|
||||
|
||||
|
||||
class ConfigPlugin(BasePlugin):
|
||||
app_label: str = 'CONFIG'
|
||||
verbose_name: str = 'Configuration'
|
||||
|
||||
hooks: List[InstanceOf[BaseHook]] = [
|
||||
SHELL_CONFIG,
|
||||
GENERAL_CONFIG,
|
||||
STORAGE_CONFIG,
|
||||
SERVER_CONFIG,
|
||||
ARCHIVING_CONFIG,
|
||||
SEARCH_BACKEND_CONFIG,
|
||||
]
|
||||
|
||||
|
||||
|
||||
PLUGIN = ConfigPlugin()
|
||||
DJANGO_APP = PLUGIN.AppConfig
|
||||
|
||||
|
||||
|
||||
# # register django apps
|
||||
# @abx.hookimpl
|
||||
# def get_INSTALLED_APPS():
|
||||
# return [DJANGO_APP.name]
|
||||
|
||||
# # register configs
|
||||
# @abx.hookimpl
|
||||
# def register_CONFIG():
|
||||
# return PLUGIN.HOOKS_BY_TYPE['CONFIG'].values()
|
||||
|
||||
47
archivebox/config/check_for_update.py
Normal file
47
archivebox/config/check_for_update.py
Normal file
@@ -0,0 +1,47 @@
|
||||
# def get_versions_available_on_github(config):
|
||||
# """
|
||||
# returns a dictionary containing the ArchiveBox GitHub release info for
|
||||
# the recommended upgrade version and the currently installed version
|
||||
# """
|
||||
|
||||
# # we only want to perform the (relatively expensive) check for new versions
|
||||
# # when its most relevant, e.g. when the user runs a long-running command
|
||||
# subcommand_run_by_user = sys.argv[3] if len(sys.argv) > 3 else 'help'
|
||||
# long_running_commands = ('add', 'schedule', 'update', 'status', 'server')
|
||||
# if subcommand_run_by_user not in long_running_commands:
|
||||
# return None
|
||||
|
||||
# github_releases_api = "https://api.github.com/repos/ArchiveBox/ArchiveBox/releases"
|
||||
# response = requests.get(github_releases_api)
|
||||
# if response.status_code != 200:
|
||||
# stderr(f'[!] Warning: GitHub API call to check for new ArchiveBox version failed! (status={response.status_code})', color='lightyellow', config=config)
|
||||
# return None
|
||||
# all_releases = response.json()
|
||||
|
||||
# installed_version = parse_version_string(config['VERSION'])
|
||||
|
||||
# # find current version or nearest older version (to link to)
|
||||
# current_version = None
|
||||
# for idx, release in enumerate(all_releases):
|
||||
# release_version = parse_version_string(release['tag_name'])
|
||||
# if release_version <= installed_version:
|
||||
# current_version = release
|
||||
# break
|
||||
|
||||
# current_version = current_version or all_releases[-1]
|
||||
|
||||
# # recommended version is whatever comes after current_version in the release list
|
||||
# # (perhaps too conservative to only recommend upgrading one version at a time, but it's safest)
|
||||
# try:
|
||||
# recommended_version = all_releases[idx+1]
|
||||
# except IndexError:
|
||||
# recommended_version = None
|
||||
|
||||
# return {'recommended_version': recommended_version, 'current_version': current_version}
|
||||
|
||||
# def can_upgrade(config):
|
||||
# if config['VERSIONS_AVAILABLE'] and config['VERSIONS_AVAILABLE']['recommended_version']:
|
||||
# recommended_version = parse_version_string(config['VERSIONS_AVAILABLE']['recommended_version']['tag_name'])
|
||||
# current_version = parse_version_string(config['VERSIONS_AVAILABLE']['current_version']['tag_name'])
|
||||
# return recommended_version > current_version
|
||||
# return False
|
||||
121
archivebox/config/config_stubs.py
Normal file
121
archivebox/config/config_stubs.py
Normal file
@@ -0,0 +1,121 @@
|
||||
from pathlib import Path
|
||||
from typing import Optional, Dict, Union, Tuple, Callable, Pattern, Type, Any, List
|
||||
from mypy_extensions import TypedDict
|
||||
|
||||
from benedict import benedict
|
||||
|
||||
SimpleConfigValue = Union[str, bool, int, None, Pattern, Dict[str, Any]]
|
||||
SimpleConfigValueDict = Dict[str, SimpleConfigValue]
|
||||
SimpleConfigValueGetter = Callable[[], SimpleConfigValue]
|
||||
ConfigValue = Union[SimpleConfigValue, SimpleConfigValueDict, SimpleConfigValueGetter]
|
||||
|
||||
# class AttrDict(dict):
|
||||
# def __init__(self, *args, **kwargs):
|
||||
# super().__init__(*args, **kwargs)
|
||||
# self.__dict__ = self
|
||||
AttrDict = benedict # https://github.com/fabiocaccamo/python-benedict/
|
||||
|
||||
|
||||
class BaseConfig(TypedDict):
|
||||
pass
|
||||
|
||||
class ConfigDict(BaseConfig, AttrDict, total=False):
|
||||
"""
|
||||
# Regenerate by pasting this quine into `archivebox shell` 🥚
|
||||
from archivebox.config import ConfigDict, CONFIG_DEFAULTS
|
||||
print('class ConfigDict(BaseConfig, total=False):')
|
||||
print(' ' + '"'*3 + ConfigDict.__doc__ + '"'*3)
|
||||
for section, configs in CONFIG_DEFAULTS.items():
|
||||
for key, attrs in configs.items():
|
||||
Type, default = attrs['type'], attrs['default']
|
||||
if default is None:
|
||||
print(f' {key}: Optional[{Type.__name__}]')
|
||||
else:
|
||||
print(f' {key}: {Type.__name__}')
|
||||
print()
|
||||
"""
|
||||
|
||||
IS_TTY: bool
|
||||
USE_COLOR: bool
|
||||
SHOW_PROGRESS: bool
|
||||
IN_DOCKER: bool
|
||||
|
||||
PACKAGE_DIR: Path
|
||||
OUTPUT_DIR: Path
|
||||
CONFIG_FILE: Path
|
||||
ONLY_NEW: bool
|
||||
TIMEOUT: int
|
||||
MEDIA_TIMEOUT: int
|
||||
OUTPUT_PERMISSIONS: str
|
||||
RESTRICT_FILE_NAMES: str
|
||||
URL_DENYLIST: str
|
||||
|
||||
SECRET_KEY: Optional[str]
|
||||
BIND_ADDR: str
|
||||
ALLOWED_HOSTS: str
|
||||
DEBUG: bool
|
||||
PUBLIC_INDEX: bool
|
||||
PUBLIC_SNAPSHOTS: bool
|
||||
FOOTER_INFO: str
|
||||
|
||||
SAVE_TITLE: bool
|
||||
SAVE_FAVICON: bool
|
||||
SAVE_WGET: bool
|
||||
SAVE_WGET_REQUISITES: bool
|
||||
SAVE_SINGLEFILE: bool
|
||||
SAVE_READABILITY: bool
|
||||
SAVE_MERCURY: bool
|
||||
SAVE_PDF: bool
|
||||
SAVE_SCREENSHOT: bool
|
||||
SAVE_DOM: bool
|
||||
SAVE_WARC: bool
|
||||
SAVE_GIT: bool
|
||||
SAVE_MEDIA: bool
|
||||
SAVE_ARCHIVE_DOT_ORG: bool
|
||||
|
||||
RESOLUTION: str
|
||||
GIT_DOMAINS: str
|
||||
CHECK_SSL_VALIDITY: bool
|
||||
CURL_USER_AGENT: str
|
||||
WGET_USER_AGENT: str
|
||||
CHROME_USER_AGENT: str
|
||||
COOKIES_FILE: Union[str, Path, None]
|
||||
CHROME_USER_DATA_DIR: Union[str, Path, None]
|
||||
CHROME_TIMEOUT: int
|
||||
CHROME_HEADLESS: bool
|
||||
CHROME_SANDBOX: bool
|
||||
|
||||
USE_CURL: bool
|
||||
USE_WGET: bool
|
||||
USE_SINGLEFILE: bool
|
||||
USE_READABILITY: bool
|
||||
USE_MERCURY: bool
|
||||
USE_GIT: bool
|
||||
USE_CHROME: bool
|
||||
USE_YOUTUBEDL: bool
|
||||
CURL_BINARY: str
|
||||
GIT_BINARY: str
|
||||
WGET_BINARY: str
|
||||
SINGLEFILE_BINARY: str
|
||||
READABILITY_BINARY: str
|
||||
MERCURY_BINARY: str
|
||||
YOUTUBEDL_BINARY: str
|
||||
CHROME_BINARY: Optional[str]
|
||||
|
||||
YOUTUBEDL_ARGS: List[str]
|
||||
WGET_ARGS: List[str]
|
||||
CURL_ARGS: List[str]
|
||||
GIT_ARGS: List[str]
|
||||
TAG_SEPARATOR_PATTERN: str
|
||||
|
||||
|
||||
ConfigDefaultValueGetter = Callable[[ConfigDict], ConfigValue]
|
||||
ConfigDefaultValue = Union[ConfigValue, ConfigDefaultValueGetter]
|
||||
|
||||
ConfigDefault = TypedDict('ConfigDefault', {
|
||||
'default': ConfigDefaultValue,
|
||||
'type': Optional[Type],
|
||||
'aliases': Optional[Tuple[str, ...]],
|
||||
}, total=False)
|
||||
|
||||
ConfigDefaultDict = Dict[str, ConfigDefault]
|
||||
267
archivebox/config/constants.py
Normal file
267
archivebox/config/constants.py
Normal file
@@ -0,0 +1,267 @@
|
||||
__package__ = 'archivebox.config'
|
||||
|
||||
|
||||
import os
|
||||
import re
|
||||
from typing import Dict
|
||||
from pathlib import Path
|
||||
import importlib.metadata
|
||||
|
||||
from benedict import benedict
|
||||
|
||||
from ..misc.logging import DEFAULT_CLI_COLORS
|
||||
|
||||
###################### Config ##########################
|
||||
|
||||
PACKAGE_DIR = Path(__file__).resolve().parent.parent # archivebox source code dir
|
||||
DATA_DIR = Path(os.curdir).resolve() # archivebox user data dir
|
||||
ARCHIVE_DIR = DATA_DIR / 'archive' # archivebox snapshot data dir
|
||||
|
||||
|
||||
def _detect_installed_version():
|
||||
"""Autodetect the installed archivebox version by using pip package metadata or pyproject.toml file"""
|
||||
try:
|
||||
return importlib.metadata.version(__package__ or 'archivebox')
|
||||
except importlib.metadata.PackageNotFoundError:
|
||||
try:
|
||||
pyproject_config = (PACKAGE_DIR / 'pyproject.toml').read_text()
|
||||
for line in pyproject_config:
|
||||
if line.startswith('version = '):
|
||||
return line.split(' = ', 1)[-1].strip('"')
|
||||
except FileNotFoundError:
|
||||
# building docs, pyproject.toml is not available
|
||||
return 'dev'
|
||||
|
||||
raise Exception('Failed to detect installed archivebox version!')
|
||||
|
||||
VERSION = _detect_installed_version()
|
||||
__version__ = VERSION
|
||||
|
||||
|
||||
PACKAGE_DIR_NAME: str = PACKAGE_DIR.name
|
||||
TEMPLATES_DIR_NAME: str = 'templates'
|
||||
TEMPLATES_DIR: Path = PACKAGE_DIR / TEMPLATES_DIR_NAME
|
||||
STATIC_DIR: Path = TEMPLATES_DIR / 'static'
|
||||
USER_PLUGINS_DIR_NAME: str = 'user_plugins'
|
||||
CUSTOM_TEMPLATES_DIR_NAME: str = 'user_templates'
|
||||
|
||||
ARCHIVE_DIR_NAME: str = 'archive'
|
||||
SOURCES_DIR_NAME: str = 'sources'
|
||||
PERSONAS_DIR_NAME: str = 'personas'
|
||||
CRONTABS_DIR_NAME: str = 'crontabs'
|
||||
CACHE_DIR_NAME: str = 'cache'
|
||||
LOGS_DIR_NAME: str = 'logs'
|
||||
LIB_DIR_NAME: str = 'lib'
|
||||
TMP_DIR_NAME: str = 'tmp'
|
||||
|
||||
OUTPUT_DIR: Path = DATA_DIR
|
||||
ARCHIVE_DIR: Path = DATA_DIR / ARCHIVE_DIR_NAME
|
||||
SOURCES_DIR: Path = DATA_DIR / SOURCES_DIR_NAME
|
||||
PERSONAS_DIR: Path = DATA_DIR / PERSONAS_DIR_NAME
|
||||
CACHE_DIR: Path = DATA_DIR / CACHE_DIR_NAME
|
||||
LOGS_DIR: Path = DATA_DIR / LOGS_DIR_NAME
|
||||
LIB_DIR: Path = DATA_DIR / LIB_DIR_NAME
|
||||
TMP_DIR: Path = DATA_DIR / TMP_DIR_NAME
|
||||
CUSTOM_TEMPLATES_DIR: Path = DATA_DIR / CUSTOM_TEMPLATES_DIR_NAME
|
||||
USER_PLUGINS_DIR: Path = DATA_DIR / USER_PLUGINS_DIR_NAME
|
||||
|
||||
LIB_PIP_DIR: Path = LIB_DIR / 'pip'
|
||||
LIB_NPM_DIR: Path = LIB_DIR / 'npm'
|
||||
LIB_BROWSERS_DIR: Path = LIB_DIR / 'browsers'
|
||||
LIB_BIN_DIR: Path = LIB_DIR / 'bin'
|
||||
BIN_DIR: Path = LIB_BIN_DIR
|
||||
|
||||
CONFIG_FILENAME: str = 'ArchiveBox.conf'
|
||||
SQL_INDEX_FILENAME: str = 'index.sqlite3'
|
||||
|
||||
CONFIG_FILE: Path = DATA_DIR / CONFIG_FILENAME
|
||||
DATABASE_FILE: Path = DATA_DIR / SQL_INDEX_FILENAME
|
||||
QUEUE_DATABASE_FILE: Path = DATA_DIR / SQL_INDEX_FILENAME.replace('index.', 'queue.')
|
||||
|
||||
JSON_INDEX_FILENAME: str = 'index.json'
|
||||
HTML_INDEX_FILENAME: str = 'index.html'
|
||||
ROBOTS_TXT_FILENAME: str = 'robots.txt'
|
||||
FAVICON_FILENAME: str = 'favicon.ico'
|
||||
|
||||
TIMEZONE: str = 'UTC'
|
||||
DEFAULT_CLI_COLORS: Dict[str, str] = DEFAULT_CLI_COLORS
|
||||
DISABLED_CLI_COLORS: Dict[str, str] = benedict({k: '' for k in DEFAULT_CLI_COLORS})
|
||||
|
||||
ALLOWDENYLIST_REGEX_FLAGS: int = re.IGNORECASE | re.UNICODE | re.MULTILINE
|
||||
|
||||
STATICFILE_EXTENSIONS: frozenset[str] = frozenset((
|
||||
# 99.999% of the time, URLs ending in these extensions are static files
|
||||
# that can be downloaded as-is, not html pages that need to be rendered
|
||||
'gif', 'jpeg', 'jpg', 'png', 'tif', 'tiff', 'wbmp', 'ico', 'jng', 'bmp',
|
||||
'svg', 'svgz', 'webp', 'ps', 'eps', 'ai',
|
||||
'mp3', 'mp4', 'm4a', 'mpeg', 'mpg', 'mkv', 'mov', 'webm', 'm4v',
|
||||
'flv', 'wmv', 'avi', 'ogg', 'ts', 'm3u8',
|
||||
'pdf', 'txt', 'rtf', 'rtfd', 'doc', 'docx', 'ppt', 'pptx', 'xls', 'xlsx',
|
||||
'atom', 'rss', 'css', 'js', 'json',
|
||||
'dmg', 'iso', 'img',
|
||||
'rar', 'war', 'hqx', 'zip', 'gz', 'bz2', '7z',
|
||||
|
||||
# Less common extensions to consider adding later
|
||||
# jar, swf, bin, com, exe, dll, deb
|
||||
# ear, hqx, eot, wmlc, kml, kmz, cco, jardiff, jnlp, run, msi, msp, msm,
|
||||
# pl pm, prc pdb, rar, rpm, sea, sit, tcl tk, der, pem, crt, xpi, xspf,
|
||||
# ra, mng, asx, asf, 3gpp, 3gp, mid, midi, kar, jad, wml, htc, mml
|
||||
|
||||
# These are always treated as pages, not as static files, never add them:
|
||||
# html, htm, shtml, xhtml, xml, aspx, php, cgi
|
||||
))
|
||||
|
||||
INGORED_PATHS: frozenset[str] = frozenset((
|
||||
".git",
|
||||
".svn",
|
||||
".DS_Store",
|
||||
".gitignore",
|
||||
"lost+found",
|
||||
".DS_Store",
|
||||
".env",
|
||||
"Dockerfile",
|
||||
))
|
||||
PIP_RELATED_NAMES: frozenset[str] = frozenset((
|
||||
".venv",
|
||||
"venv",
|
||||
"virtualenv",
|
||||
".virtualenv",
|
||||
))
|
||||
NPM_RELATED_NAMES: frozenset[str] = frozenset((
|
||||
"node_modules",
|
||||
"package.json",
|
||||
"package-lock.json",
|
||||
"yarn.lock",
|
||||
))
|
||||
|
||||
DATA_DIR_NAMES: frozenset[str] = frozenset((
|
||||
ARCHIVE_DIR_NAME,
|
||||
SOURCES_DIR_NAME,
|
||||
LOGS_DIR_NAME,
|
||||
CACHE_DIR_NAME,
|
||||
LIB_DIR_NAME,
|
||||
PERSONAS_DIR_NAME,
|
||||
CUSTOM_TEMPLATES_DIR_NAME,
|
||||
USER_PLUGINS_DIR_NAME,
|
||||
))
|
||||
DATA_DIRS: frozenset[Path] = frozenset(DATA_DIR / dirname for dirname in DATA_DIR_NAMES)
|
||||
DATA_FILE_NAMES: frozenset[str] = frozenset((
|
||||
CONFIG_FILENAME,
|
||||
SQL_INDEX_FILENAME,
|
||||
f"{SQL_INDEX_FILENAME}-wal",
|
||||
f"{SQL_INDEX_FILENAME}-shm",
|
||||
"queue.sqlite3",
|
||||
"queue.sqlite3-wal",
|
||||
"queue.sqlite3-shm",
|
||||
"search.sqlite3",
|
||||
JSON_INDEX_FILENAME,
|
||||
HTML_INDEX_FILENAME,
|
||||
ROBOTS_TXT_FILENAME,
|
||||
FAVICON_FILENAME,
|
||||
CONFIG_FILENAME,
|
||||
f"{CONFIG_FILENAME}.bak",
|
||||
"static_index.json",
|
||||
))
|
||||
|
||||
# When initializing archivebox in a new directory, we check to make sure the dir is
|
||||
# actually empty so that we dont clobber someone's home directory or desktop by accident.
|
||||
# These files are exceptions to the is_empty check when we're trying to init a new dir,
|
||||
# as they could be from a previous archivebox version, system artifacts, dependencies, etc.
|
||||
ALLOWED_IN_OUTPUT_DIR: frozenset[str] = frozenset((
|
||||
*INGORED_PATHS,
|
||||
*PIP_RELATED_NAMES,
|
||||
*NPM_RELATED_NAMES,
|
||||
*DATA_DIR_NAMES,
|
||||
*DATA_FILE_NAMES,
|
||||
"static", # created by old static exports <v0.6.0
|
||||
"sonic", # created by docker bind mount
|
||||
))
|
||||
|
||||
CODE_LOCATIONS = benedict({
|
||||
'PACKAGE_DIR': {
|
||||
'path': (PACKAGE_DIR).resolve(),
|
||||
'enabled': True,
|
||||
'is_valid': (PACKAGE_DIR / '__main__.py').exists(),
|
||||
},
|
||||
'LIB_DIR': {
|
||||
'path': LIB_DIR.resolve(),
|
||||
'enabled': True,
|
||||
'is_valid': LIB_DIR.is_dir(),
|
||||
},
|
||||
'RUNTIME_CONFIG': {
|
||||
'path': TMP_DIR.resolve(),
|
||||
'enabled': True,
|
||||
'is_valid': TMP_DIR.is_dir(),
|
||||
},
|
||||
'TEMPLATES_DIR': {
|
||||
'path': TEMPLATES_DIR.resolve(),
|
||||
'enabled': True,
|
||||
'is_valid': STATIC_DIR.exists(),
|
||||
},
|
||||
'CUSTOM_TEMPLATES_DIR': {
|
||||
'path': CUSTOM_TEMPLATES_DIR.resolve(),
|
||||
'enabled': True,
|
||||
'is_valid': CUSTOM_TEMPLATES_DIR.is_dir(),
|
||||
},
|
||||
})
|
||||
|
||||
DATA_LOCATIONS = benedict({
|
||||
"OUTPUT_DIR": {
|
||||
"path": DATA_DIR.resolve(),
|
||||
"enabled": True,
|
||||
"is_valid": DATABASE_FILE.exists(),
|
||||
"is_mount": os.path.ismount(DATA_DIR.resolve()),
|
||||
},
|
||||
"CONFIG_FILE": {
|
||||
"path": CONFIG_FILE.resolve(),
|
||||
"enabled": True,
|
||||
"is_valid": CONFIG_FILE.exists(),
|
||||
},
|
||||
"SQL_INDEX": {
|
||||
"path": DATABASE_FILE.resolve(),
|
||||
"enabled": True,
|
||||
"is_valid": DATABASE_FILE.exists(),
|
||||
"is_mount": os.path.ismount(DATABASE_FILE.resolve()),
|
||||
},
|
||||
"QUEUE_DATABASE": {
|
||||
"path": QUEUE_DATABASE_FILE.resolve(),
|
||||
"enabled": True,
|
||||
"is_valid": QUEUE_DATABASE_FILE.exists(),
|
||||
"is_mount": os.path.ismount(QUEUE_DATABASE_FILE.resolve()),
|
||||
},
|
||||
"ARCHIVE_DIR": {
|
||||
"path": ARCHIVE_DIR.resolve(),
|
||||
"enabled": True,
|
||||
"is_valid": ARCHIVE_DIR.exists(),
|
||||
"is_mount": os.path.ismount(ARCHIVE_DIR.resolve()),
|
||||
},
|
||||
"SOURCES_DIR": {
|
||||
"path": SOURCES_DIR.resolve(),
|
||||
"enabled": True,
|
||||
"is_valid": SOURCES_DIR.exists(),
|
||||
},
|
||||
"PERSONAS_DIR": {
|
||||
"path": PERSONAS_DIR.resolve(),
|
||||
"enabled": PERSONAS_DIR.exists(),
|
||||
"is_valid": PERSONAS_DIR.exists(),
|
||||
},
|
||||
"LOGS_DIR": {
|
||||
"path": LOGS_DIR.resolve(),
|
||||
"enabled": True,
|
||||
"is_valid": LOGS_DIR.is_dir(),
|
||||
},
|
||||
"CACHE_DIR": {
|
||||
"path": CACHE_DIR.resolve(),
|
||||
"enabled": True,
|
||||
"is_valid": CACHE_DIR.is_dir(),
|
||||
},
|
||||
})
|
||||
|
||||
|
||||
|
||||
CONSTANTS = benedict({
|
||||
key: value
|
||||
for key, value in globals().items()
|
||||
if key.isupper() and not key.startswith('_')
|
||||
})
|
||||
CONSTANTS_CONFIG = CONSTANTS
|
||||
226
archivebox/config/defaults.py
Normal file
226
archivebox/config/defaults.py
Normal file
@@ -0,0 +1,226 @@
|
||||
__package__ = 'archivebox.config'
|
||||
|
||||
import os
|
||||
import sys
|
||||
import shutil
|
||||
|
||||
from typing import ClassVar, Dict, Optional
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
from rich import print
|
||||
from pydantic import Field, field_validator, model_validator, computed_field
|
||||
from django.utils.crypto import get_random_string
|
||||
|
||||
from abx.archivebox.base_configset import BaseConfigSet, ConfigSectionName
|
||||
|
||||
|
||||
from .constants import CONSTANTS, PACKAGE_DIR
|
||||
|
||||
###################### Config ##########################
|
||||
|
||||
|
||||
class ShellConfig(BaseConfigSet):
|
||||
section: ClassVar[ConfigSectionName] = 'SHELL_CONFIG'
|
||||
|
||||
DEBUG: bool = Field(default=lambda: '--debug' in sys.argv)
|
||||
|
||||
IS_TTY: bool = Field(default=sys.stdout.isatty())
|
||||
USE_COLOR: bool = Field(default=lambda c: c.IS_TTY)
|
||||
SHOW_PROGRESS: bool = Field(default=lambda c: c.IS_TTY)
|
||||
|
||||
IN_DOCKER: bool = Field(default=False)
|
||||
IN_QEMU: bool = Field(default=False)
|
||||
|
||||
USER: str = Field(default=Path('~').expanduser().resolve().name)
|
||||
PUID: int = Field(default=os.getuid())
|
||||
PGID: int = Field(default=os.getgid())
|
||||
|
||||
PYTHON_ENCODING: str = Field(default=(sys.__stdout__ or sys.stdout or sys.__stderr__ or sys.stderr).encoding.upper().replace('UTF8', 'UTF-8'))
|
||||
|
||||
ANSI: Dict[str, str] = Field(default=lambda c: CONSTANTS.DEFAULT_CLI_COLORS if c.USE_COLOR else CONSTANTS.DISABLED_CLI_COLORS)
|
||||
|
||||
VERSIONS_AVAILABLE: bool = False # .check_for_update.get_versions_available_on_github(c)},
|
||||
CAN_UPGRADE: bool = False # .check_for_update.can_upgrade(c)},
|
||||
|
||||
|
||||
@computed_field
|
||||
@property
|
||||
def TERM_WIDTH(self) -> int:
|
||||
return shutil.get_terminal_size((100, 10)).columns
|
||||
|
||||
@computed_field
|
||||
@property
|
||||
def COMMIT_HASH(self) -> Optional[str]:
|
||||
try:
|
||||
git_dir = PACKAGE_DIR / '../.git'
|
||||
ref = (git_dir / 'HEAD').read_text().strip().split(' ')[-1]
|
||||
commit_hash = git_dir.joinpath(ref).read_text().strip()
|
||||
return commit_hash
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
return list((PACKAGE_DIR / '../.git/refs/heads/').glob('*'))[0].read_text().strip()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return None
|
||||
|
||||
@computed_field
|
||||
@property
|
||||
def BUILD_TIME(self) -> str:
|
||||
if self.IN_DOCKER:
|
||||
docker_build_end_time = Path('/VERSION.txt').read_text().rsplit('BUILD_END_TIME=')[-1].split('\n', 1)[0]
|
||||
return docker_build_end_time
|
||||
|
||||
src_last_modified_unix_timestamp = (PACKAGE_DIR / 'package.json').stat().st_mtime
|
||||
return datetime.fromtimestamp(src_last_modified_unix_timestamp).strftime('%Y-%m-%d %H:%M:%S %s')
|
||||
|
||||
|
||||
@model_validator(mode='after')
|
||||
def validate_not_running_as_root(self):
|
||||
attempted_command = ' '.join(sys.argv[:3])
|
||||
if self.PUID == 0 and attempted_command != 'setup':
|
||||
# stderr('[!] ArchiveBox should never be run as root!', color='red')
|
||||
# stderr(' For more information, see the security overview documentation:')
|
||||
# stderr(' https://github.com/ArchiveBox/ArchiveBox/wiki/Security-Overview#do-not-run-as-root')
|
||||
print('[red][!] ArchiveBox should never be run as root![/red]', file=sys.stderr)
|
||||
print(' For more information, see the security overview documentation:', file=sys.stderr)
|
||||
print(' https://github.com/ArchiveBox/ArchiveBox/wiki/Security-Overview#do-not-run-as-root', file=sys.stderr)
|
||||
|
||||
if self.IN_DOCKER:
|
||||
print('[red][!] When using Docker, you must run commands with [green]docker run[/green] instead of [yellow3]docker exec[/yellow3], e.g.:', file=sys.stderr)
|
||||
print(' docker compose run archivebox {attempted_command}', file=sys.stderr)
|
||||
print(f' docker run -it -v $PWD/data:/data archivebox/archivebox {attempted_command}', file=sys.stderr)
|
||||
print(' or:', file=sys.stderr)
|
||||
print(f' docker compose exec --user=archivebox archivebox /bin/bash -c "archivebox {attempted_command}"', file=sys.stderr)
|
||||
print(f' docker exec -it --user=archivebox <container id> /bin/bash -c "archivebox {attempted_command}"', file=sys.stderr)
|
||||
raise SystemExit(2)
|
||||
|
||||
# check python locale
|
||||
if self.PYTHON_ENCODING != 'UTF-8':
|
||||
print(f'[red][X] Your system is running python3 scripts with a bad locale setting: {self.PYTHON_ENCODING} (it should be UTF-8).[/red]', file=sys.stderr)
|
||||
print(' To fix it, add the line "export PYTHONIOENCODING=UTF-8" to your ~/.bashrc file (without quotes)', file=sys.stderr)
|
||||
print(' Or if you\'re using ubuntu/debian, run "dpkg-reconfigure locales"', file=sys.stderr)
|
||||
print('')
|
||||
print(' Confirm that it\'s fixed by opening a new shell and running:', file=sys.stderr)
|
||||
print(' python3 -c "import sys; print(sys.stdout.encoding)" # should output UTF-8', file=sys.stderr)
|
||||
raise SystemExit(2)
|
||||
|
||||
return self
|
||||
|
||||
SHELL_CONFIG = ShellConfig()
|
||||
|
||||
|
||||
class StorageConfig(BaseConfigSet):
|
||||
section: ClassVar[ConfigSectionName] = 'STORAGE_CONFIG'
|
||||
|
||||
OUTPUT_PERMISSIONS: str = Field(default='644')
|
||||
RESTRICT_FILE_NAMES: str = Field(default='windows')
|
||||
ENFORCE_ATOMIC_WRITES: bool = Field(default=True)
|
||||
|
||||
# not supposed to be user settable:
|
||||
DIR_OUTPUT_PERMISSIONS: str = Field(default=lambda c: c['OUTPUT_PERMISSIONS'].replace('6', '7').replace('4', '5'))
|
||||
|
||||
|
||||
STORAGE_CONFIG = StorageConfig()
|
||||
|
||||
|
||||
class GeneralConfig(BaseConfigSet):
|
||||
section: ClassVar[ConfigSectionName] = 'GENERAL_CONFIG'
|
||||
|
||||
TAG_SEPARATOR_PATTERN: str = Field(default=r'[,]')
|
||||
|
||||
|
||||
GENERAL_CONFIG = GeneralConfig()
|
||||
|
||||
|
||||
class ServerConfig(BaseConfigSet):
|
||||
section: ClassVar[ConfigSectionName] = 'SERVER_CONFIG'
|
||||
|
||||
SECRET_KEY: str = Field(default=lambda: get_random_string(50, 'abcdefghijklmnopqrstuvwxyz0123456789_'))
|
||||
BIND_ADDR: str = Field(default=lambda: ['127.0.0.1:8000', '0.0.0.0:8000'][SHELL_CONFIG.IN_DOCKER])
|
||||
ALLOWED_HOSTS: str = Field(default='*')
|
||||
CSRF_TRUSTED_ORIGINS: str = Field(default=lambda c: 'http://localhost:8000,http://127.0.0.1:8000,http://0.0.0.0:8000,http://{}'.format(c.BIND_ADDR))
|
||||
|
||||
SNAPSHOTS_PER_PAGE: int = Field(default=40)
|
||||
FOOTER_INFO: str = Field(default='Content is hosted for personal archiving purposes only. Contact server owner for any takedown requests.')
|
||||
CUSTOM_TEMPLATES_DIR: Path = Field(default=None)
|
||||
|
||||
PUBLIC_INDEX: bool = Field(default=True)
|
||||
PUBLIC_SNAPSHOTS: bool = Field(default=True)
|
||||
PUBLIC_ADD_VIEW: bool = Field(default=False)
|
||||
|
||||
ADMIN_USERNAME: str = Field(default=None)
|
||||
ADMIN_PASSWORD: str = Field(default=None)
|
||||
REVERSE_PROXY_USER_HEADER: str = Field(default='Remote-User')
|
||||
REVERSE_PROXY_WHITELIST: str = Field(default='')
|
||||
LOGOUT_REDIRECT_URL: str = Field(default='/')
|
||||
PREVIEW_ORIGINALS: bool = Field(default=True)
|
||||
|
||||
SERVER_CONFIG = ServerConfig()
|
||||
|
||||
|
||||
class ArchivingConfig(BaseConfigSet):
|
||||
section: ClassVar[ConfigSectionName] = 'ARCHIVING_CONFIG'
|
||||
|
||||
ONLY_NEW: bool = Field(default=True)
|
||||
|
||||
TIMEOUT: int = Field(default=60)
|
||||
MEDIA_TIMEOUT: int = Field(default=3600)
|
||||
|
||||
MEDIA_MAX_SIZE: str = Field(default='750m')
|
||||
RESOLUTION: str = Field(default='1440,2000')
|
||||
CHECK_SSL_VALIDITY: bool = Field(default=True)
|
||||
USER_AGENT: str = Field(default='Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36 ArchiveBox/{VERSION} (+https://github.com/ArchiveBox/ArchiveBox/)')
|
||||
COOKIES_FILE: Path | None = Field(default=None)
|
||||
|
||||
URL_DENYLIST: str = Field(default=r'\.(css|js|otf|ttf|woff|woff2|gstatic\.com|googleapis\.com/css)(\?.*)?$', alias='URL_BLACKLIST')
|
||||
URL_ALLOWLIST: str | None = Field(default=None, alias='URL_WHITELIST')
|
||||
|
||||
# GIT_DOMAINS: str = Field(default='github.com,bitbucket.org,gitlab.com,gist.github.com,codeberg.org,gitea.com,git.sr.ht')
|
||||
# WGET_USER_AGENT: str = Field(default=lambda c: c['USER_AGENT'] + ' wget/{WGET_VERSION}')
|
||||
# CURL_USER_AGENT: str = Field(default=lambda c: c['USER_AGENT'] + ' curl/{CURL_VERSION}')
|
||||
# CHROME_USER_AGENT: str = Field(default=lambda c: c['USER_AGENT'])
|
||||
# CHROME_USER_DATA_DIR: str | None = Field(default=None)
|
||||
# CHROME_TIMEOUT: int = Field(default=0)
|
||||
# CHROME_HEADLESS: bool = Field(default=True)
|
||||
# CHROME_SANDBOX: bool = Field(default=lambda: not SHELL_CONFIG.IN_DOCKER)
|
||||
|
||||
@field_validator('TIMEOUT', mode='after')
|
||||
def validate_timeout(cls, v):
|
||||
print(f'[red][!] Warning: TIMEOUT is set too low! (currently set to TIMEOUT={v} seconds)[/red]', file=sys.stderr)
|
||||
print(' You must allow *at least* 5 seconds for indexing and archive methods to run succesfully.', file=sys.stderr)
|
||||
print(' (Setting it to somewhere between 30 and 3000 seconds is recommended)', file=sys.stderr)
|
||||
print(file=sys.stderr)
|
||||
print(' If you want to make ArchiveBox run faster, disable specific archive methods instead:', file=sys.stderr)
|
||||
print(' https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#archive-method-toggles', file=sys.stderr)
|
||||
print(file=sys.stderr)
|
||||
return v
|
||||
|
||||
@field_validator('CHECK_SSL_VALIDITY', mode='after')
|
||||
def validate_check_ssl_validity(cls, v):
|
||||
"""SIDE EFFECT: disable "you really shouldnt disable ssl" warnings emitted by requests"""
|
||||
if not v:
|
||||
import requests
|
||||
import urllib3
|
||||
requests.packages.urllib3.disable_warnings(requests.packages.urllib3.exceptions.InsecureRequestWarning)
|
||||
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
||||
return v
|
||||
|
||||
ARCHIVING_CONFIG = ArchivingConfig()
|
||||
|
||||
|
||||
class SearchBackendConfig(BaseConfigSet):
|
||||
section: ClassVar[ConfigSectionName] = 'SEARCH_BACKEND_CONFIG'
|
||||
|
||||
USE_INDEXING_BACKEND: bool = Field(default=True)
|
||||
USE_SEARCHING_BACKEND: bool = Field(default=True)
|
||||
|
||||
SEARCH_BACKEND_ENGINE: str = Field(default='ripgrep')
|
||||
SEARCH_PROCESS_HTML: bool = Field(default=True)
|
||||
SEARCH_BACKEND_TIMEOUT: int = Field(default=10)
|
||||
|
||||
SEARCH_BACKEND_CONFIG = SearchBackendConfig()
|
||||
|
||||
883
archivebox/config/legacy.py
Normal file
883
archivebox/config/legacy.py
Normal file
@@ -0,0 +1,883 @@
|
||||
"""
|
||||
ArchiveBox config definitons (including defaults and dynamic config options).
|
||||
|
||||
Config Usage Example:
|
||||
|
||||
archivebox config --set MEDIA_TIMEOUT=600
|
||||
env MEDIA_TIMEOUT=600 USE_COLOR=False ... archivebox [subcommand] ...
|
||||
|
||||
Config Precedence Order:
|
||||
|
||||
1. cli args (--update-all / --index-only / etc.)
|
||||
2. shell environment vars (env USE_COLOR=False archivebox add '...')
|
||||
3. config file (echo "SAVE_FAVICON=False" >> ArchiveBox.conf)
|
||||
4. defaults (defined below in Python)
|
||||
|
||||
Documentation:
|
||||
|
||||
https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration
|
||||
|
||||
"""
|
||||
|
||||
__package__ = 'archivebox.config'
|
||||
|
||||
import os
|
||||
import io
|
||||
import re
|
||||
import sys
|
||||
import json
|
||||
import shutil
|
||||
|
||||
from hashlib import md5
|
||||
from pathlib import Path
|
||||
from datetime import datetime, timezone
|
||||
from typing import Optional, Type, Tuple, Dict
|
||||
from subprocess import run, PIPE, DEVNULL, STDOUT, TimeoutExpired
|
||||
from configparser import ConfigParser
|
||||
|
||||
from rich.progress import Progress
|
||||
from rich.console import Console
|
||||
from benedict import benedict
|
||||
from pydantic_pkgr import SemVer
|
||||
|
||||
import django
|
||||
from django.db.backends.sqlite3.base import Database as sqlite3
|
||||
|
||||
|
||||
from .constants import CONSTANTS, TIMEZONE, OUTPUT_DIR
|
||||
from .constants import *
|
||||
from .config_stubs import (
|
||||
ConfigValue,
|
||||
ConfigDefaultValue,
|
||||
ConfigDefaultDict,
|
||||
)
|
||||
from ..misc.logging import (
|
||||
stderr,
|
||||
hint, # noqa
|
||||
)
|
||||
|
||||
from .defaults import SHELL_CONFIG, GENERAL_CONFIG, ARCHIVING_CONFIG, SERVER_CONFIG, SEARCH_BACKEND_CONFIG, STORAGE_CONFIG
|
||||
from ..plugins_auth.ldap.apps import LDAP_CONFIG
|
||||
from ..plugins_extractor.favicon.apps import FAVICON_CONFIG
|
||||
ANSI = SHELL_CONFIG.ANSI
|
||||
LDAP = LDAP_CONFIG.LDAP_ENABLED
|
||||
|
||||
############################### Config Schema ##################################
|
||||
|
||||
CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = {
|
||||
'SHELL_CONFIG': SHELL_CONFIG.as_legacy_config_schema(),
|
||||
|
||||
'SERVER_CONFIG': SERVER_CONFIG.as_legacy_config_schema(),
|
||||
|
||||
'GENERAL_CONFIG': GENERAL_CONFIG.as_legacy_config_schema(),
|
||||
|
||||
'ARCHIVING_CONFIG': ARCHIVING_CONFIG.as_legacy_config_schema(),
|
||||
|
||||
'SEARCH_BACKEND_CONFIG': SEARCH_BACKEND_CONFIG.as_legacy_config_schema(),
|
||||
|
||||
'STORAGE_CONFIG': STORAGE_CONFIG.as_legacy_config_schema(),
|
||||
|
||||
'LDAP_CONFIG': LDAP_CONFIG.as_legacy_config_schema(),
|
||||
|
||||
'FAVICON_CONFIG': FAVICON_CONFIG.as_legacy_config_schema(),
|
||||
|
||||
|
||||
'ARCHIVE_METHOD_TOGGLES': {
|
||||
'SAVE_TITLE': {'type': bool, 'default': True, 'aliases': ('FETCH_TITLE',)},
|
||||
'SAVE_FAVICON': {'type': bool, 'default': True, 'aliases': ('FETCH_FAVICON',)},
|
||||
'SAVE_WGET': {'type': bool, 'default': True, 'aliases': ('FETCH_WGET',)},
|
||||
'SAVE_WGET_REQUISITES': {'type': bool, 'default': True, 'aliases': ('FETCH_WGET_REQUISITES',)},
|
||||
'SAVE_SINGLEFILE': {'type': bool, 'default': True, 'aliases': ('FETCH_SINGLEFILE',)},
|
||||
'SAVE_READABILITY': {'type': bool, 'default': True, 'aliases': ('FETCH_READABILITY',)},
|
||||
'SAVE_MERCURY': {'type': bool, 'default': True, 'aliases': ('FETCH_MERCURY',)},
|
||||
'SAVE_HTMLTOTEXT': {'type': bool, 'default': True, 'aliases': ('FETCH_HTMLTOTEXT',)},
|
||||
'SAVE_PDF': {'type': bool, 'default': True, 'aliases': ('FETCH_PDF',)},
|
||||
'SAVE_SCREENSHOT': {'type': bool, 'default': True, 'aliases': ('FETCH_SCREENSHOT',)},
|
||||
'SAVE_DOM': {'type': bool, 'default': True, 'aliases': ('FETCH_DOM',)},
|
||||
'SAVE_HEADERS': {'type': bool, 'default': True, 'aliases': ('FETCH_HEADERS',)},
|
||||
'SAVE_WARC': {'type': bool, 'default': True, 'aliases': ('FETCH_WARC',)},
|
||||
'SAVE_GIT': {'type': bool, 'default': True, 'aliases': ('FETCH_GIT',)},
|
||||
'SAVE_MEDIA': {'type': bool, 'default': True, 'aliases': ('FETCH_MEDIA',)},
|
||||
'SAVE_ARCHIVE_DOT_ORG': {'type': bool, 'default': True, 'aliases': ('SUBMIT_ARCHIVE_DOT_ORG',)},
|
||||
'SAVE_ALLOWLIST': {'type': dict, 'default': {},},
|
||||
'SAVE_DENYLIST': {'type': dict, 'default': {},},
|
||||
},
|
||||
|
||||
'ARCHIVE_METHOD_OPTIONS': {
|
||||
'RESOLUTION': {'type': str, 'default': '1440,2000', 'aliases': ('SCREENSHOT_RESOLUTION','WINDOW_SIZE')},
|
||||
'GIT_DOMAINS': {'type': str, 'default': 'github.com,bitbucket.org,gitlab.com,gist.github.com,codeberg.org,gitea.com,git.sr.ht'},
|
||||
'CHECK_SSL_VALIDITY': {'type': bool, 'default': True},
|
||||
'MEDIA_MAX_SIZE': {'type': str, 'default': '750m'},
|
||||
|
||||
'USER_AGENT': {'type': str, 'default': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36 ArchiveBox/{VERSION} (+https://github.com/ArchiveBox/ArchiveBox/)'},
|
||||
'CURL_USER_AGENT': {'type': str, 'default': lambda c: c['USER_AGENT']}, # + ' curl/{CURL_VERSION}'},
|
||||
'WGET_USER_AGENT': {'type': str, 'default': lambda c: c['USER_AGENT']}, # + ' wget/{WGET_VERSION}'},
|
||||
|
||||
'COOKIES_FILE': {'type': str, 'default': None},
|
||||
|
||||
'YOUTUBEDL_ARGS': {'type': list, 'default': lambda c: [
|
||||
'--restrict-filenames',
|
||||
'--trim-filenames', '128',
|
||||
'--write-description',
|
||||
'--write-info-json',
|
||||
'--write-annotations',
|
||||
'--write-thumbnail',
|
||||
'--no-call-home',
|
||||
'--write-sub',
|
||||
'--write-auto-subs',
|
||||
'--convert-subs=srt',
|
||||
'--yes-playlist',
|
||||
'--continue',
|
||||
# This flag doesn't exist in youtube-dl
|
||||
# only in yt-dlp
|
||||
'--no-abort-on-error',
|
||||
# --ignore-errors must come AFTER
|
||||
# --no-abort-on-error
|
||||
# https://github.com/yt-dlp/yt-dlp/issues/4914
|
||||
'--ignore-errors',
|
||||
'--geo-bypass',
|
||||
'--add-metadata',
|
||||
'--format=(bv*+ba/b)[filesize<={}][filesize_approx<=?{}]/(bv*+ba/b)'.format(c['MEDIA_MAX_SIZE'], c['MEDIA_MAX_SIZE']),
|
||||
]},
|
||||
'YOUTUBEDL_EXTRA_ARGS': {'type': list, 'default': None},
|
||||
|
||||
|
||||
'WGET_ARGS': {'type': list, 'default': ['--no-verbose',
|
||||
'--adjust-extension',
|
||||
'--convert-links',
|
||||
'--force-directories',
|
||||
'--backup-converted',
|
||||
'--span-hosts',
|
||||
'--no-parent',
|
||||
'-e', 'robots=off',
|
||||
]},
|
||||
'WGET_EXTRA_ARGS': {'type': list, 'default': None},
|
||||
'CURL_ARGS': {'type': list, 'default': ['--silent',
|
||||
'--location',
|
||||
'--compressed'
|
||||
]},
|
||||
'CURL_EXTRA_ARGS': {'type': list, 'default': None},
|
||||
'GIT_ARGS': {'type': list, 'default': ['--recursive']},
|
||||
'SINGLEFILE_ARGS': {'type': list, 'default': None},
|
||||
'SINGLEFILE_EXTRA_ARGS': {'type': list, 'default': None},
|
||||
'MERCURY_ARGS': {'type': list, 'default': ['--format=text']},
|
||||
'MERCURY_EXTRA_ARGS': {'type': list, 'default': None},
|
||||
},
|
||||
|
||||
'DEPENDENCY_CONFIG': {
|
||||
'USE_CURL': {'type': bool, 'default': True},
|
||||
'USE_WGET': {'type': bool, 'default': True},
|
||||
'USE_SINGLEFILE': {'type': bool, 'default': True},
|
||||
'USE_READABILITY': {'type': bool, 'default': True},
|
||||
'USE_MERCURY': {'type': bool, 'default': True},
|
||||
'USE_GIT': {'type': bool, 'default': True},
|
||||
'USE_CHROME': {'type': bool, 'default': True},
|
||||
'USE_YOUTUBEDL': {'type': bool, 'default': True},
|
||||
'USE_RIPGREP': {'type': bool, 'default': True},
|
||||
|
||||
'CURL_BINARY': {'type': str, 'default': 'curl'},
|
||||
'GIT_BINARY': {'type': str, 'default': 'git'},
|
||||
'WGET_BINARY': {'type': str, 'default': 'wget'}, # also can accept wget2
|
||||
'MERCURY_BINARY': {'type': str, 'default': lambda c: bin_path('postlight-parser')},
|
||||
'NODE_BINARY': {'type': str, 'default': 'node'},
|
||||
# 'YOUTUBEDL_BINARY': {'type': str, 'default': 'yt-dlp'}, # also can accept youtube-dl
|
||||
# 'SINGLEFILE_BINARY': {'type': str, 'default': lambda c: bin_path('single-file')},
|
||||
# 'READABILITY_BINARY': {'type': str, 'default': lambda c: bin_path('readability-extractor')},
|
||||
# 'RIPGREP_BINARY': {'type': str, 'default': 'rg'},
|
||||
|
||||
'POCKET_CONSUMER_KEY': {'type': str, 'default': None},
|
||||
'POCKET_ACCESS_TOKENS': {'type': dict, 'default': {}},
|
||||
|
||||
'READWISE_READER_TOKENS': {'type': dict, 'default': {}},
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
########################## Backwards-Compatibility #############################
|
||||
|
||||
|
||||
# for backwards compatibility with old config files, check old/deprecated names for each key
|
||||
CONFIG_ALIASES = {
|
||||
alias: key
|
||||
for section in CONFIG_SCHEMA.values()
|
||||
for key, default in section.items()
|
||||
for alias in default.get('aliases', ())
|
||||
}
|
||||
USER_CONFIG = {key: section[key] for section in CONFIG_SCHEMA.values() for key in section.keys()}
|
||||
|
||||
def get_real_name(key: str) -> str:
|
||||
"""get the current canonical name for a given deprecated config key"""
|
||||
return CONFIG_ALIASES.get(key.upper().strip(), key.upper().strip())
|
||||
|
||||
|
||||
|
||||
# These are derived/computed values calculated *after* all user-provided config values are ingested
|
||||
# they appear in `archivebox config` output and are intended to be read-only for the user
|
||||
DYNAMIC_CONFIG_SCHEMA: ConfigDefaultDict = {
|
||||
'PACKAGE_DIR': {'default': lambda c: CONSTANTS.PACKAGE_DIR.resolve()},
|
||||
'TEMPLATES_DIR': {'default': lambda c: c['PACKAGE_DIR'] / CONSTANTS.TEMPLATES_DIR_NAME},
|
||||
'CUSTOM_TEMPLATES_DIR': {'default': lambda c: c['CUSTOM_TEMPLATES_DIR'] and Path(c['CUSTOM_TEMPLATES_DIR'])},
|
||||
|
||||
|
||||
'URL_DENYLIST_PTN': {'default': lambda c: c['URL_DENYLIST'] and re.compile(c['URL_DENYLIST'] or '', CONSTANTS.ALLOWDENYLIST_REGEX_FLAGS)},
|
||||
'URL_ALLOWLIST_PTN': {'default': lambda c: c['URL_ALLOWLIST'] and re.compile(c['URL_ALLOWLIST'] or '', CONSTANTS.ALLOWDENYLIST_REGEX_FLAGS)},
|
||||
|
||||
|
||||
'USE_CURL': {'default': lambda c: c['USE_CURL'] and (c['SAVE_FAVICON'] or c['SAVE_TITLE'] or c['SAVE_ARCHIVE_DOT_ORG'])},
|
||||
'CURL_VERSION': {'default': lambda c: bin_version(c['CURL_BINARY']) if c['USE_CURL'] else None},
|
||||
# 'CURL_USER_AGENT': {'default': lambda c: c['CURL_USER_AGENT'].format(**c)},
|
||||
'CURL_ARGS': {'default': lambda c: c['CURL_ARGS'] or []},
|
||||
'CURL_EXTRA_ARGS': {'default': lambda c: c['CURL_EXTRA_ARGS'] or []},
|
||||
'SAVE_FAVICON': {'default': lambda c: c['USE_CURL'] and c['SAVE_FAVICON']},
|
||||
'SAVE_ARCHIVE_DOT_ORG': {'default': lambda c: c['USE_CURL'] and c['SAVE_ARCHIVE_DOT_ORG']},
|
||||
|
||||
'USE_WGET': {'default': lambda c: c['USE_WGET'] and (c['SAVE_WGET'] or c['SAVE_WARC'])},
|
||||
'WGET_VERSION': {'default': lambda c: bin_version(c['WGET_BINARY']) if c['USE_WGET'] else None},
|
||||
'WGET_AUTO_COMPRESSION': {'default': lambda c: wget_supports_compression(c) if c['USE_WGET'] else False},
|
||||
# 'WGET_USER_AGENT': {'default': lambda c: c['WGET_USER_AGENT'].format(**c)},
|
||||
'SAVE_WGET': {'default': lambda c: c['USE_WGET'] and c['SAVE_WGET']},
|
||||
'SAVE_WARC': {'default': lambda c: c['USE_WGET'] and c['SAVE_WARC']},
|
||||
'WGET_ARGS': {'default': lambda c: c['WGET_ARGS'] or []},
|
||||
'WGET_EXTRA_ARGS': {'default': lambda c: c['WGET_EXTRA_ARGS'] or []},
|
||||
|
||||
'USE_MERCURY': {'default': lambda c: c['USE_MERCURY'] and c['SAVE_MERCURY']},
|
||||
'SAVE_MERCURY': {'default': lambda c: c['USE_MERCURY']},
|
||||
'MERCURY_VERSION': {'default': lambda c: '1.0.0' if shutil.which(str(bin_path(c['MERCURY_BINARY']))) else None}, # mercury doesnt expose version info until this is merged https://github.com/postlight/parser/pull/750
|
||||
'MERCURY_ARGS': {'default': lambda c: c['MERCURY_ARGS'] or []},
|
||||
'MERCURY_EXTRA_ARGS': {'default': lambda c: c['MERCURY_EXTRA_ARGS'] or []},
|
||||
|
||||
'USE_GIT': {'default': lambda c: c['USE_GIT'] and c['SAVE_GIT']},
|
||||
'GIT_VERSION': {'default': lambda c: bin_version(c['GIT_BINARY']) if c['USE_GIT'] else None},
|
||||
'SAVE_GIT': {'default': lambda c: c['USE_GIT'] and c['SAVE_GIT']},
|
||||
|
||||
|
||||
'DEPENDENCIES': {'default': lambda c: get_dependency_info(c)},
|
||||
# 'CODE_LOCATIONS': {'default': lambda c: get_code_locations(c)},
|
||||
# 'DATA_LOCATIONS': {'default': lambda c: get_data_locations(c)},
|
||||
|
||||
'SAVE_ALLOWLIST_PTN': {'default': lambda c: c['SAVE_ALLOWLIST'] and {re.compile(k, CONSTANTS.ALLOWDENYLIST_REGEX_FLAGS): v for k, v in c['SAVE_ALLOWLIST'].items()}},
|
||||
'SAVE_DENYLIST_PTN': {'default': lambda c: c['SAVE_DENYLIST'] and {re.compile(k, CONSTANTS.ALLOWDENYLIST_REGEX_FLAGS): v for k, v in c['SAVE_DENYLIST'].items()}},
|
||||
}
|
||||
|
||||
|
||||
# print("FINISHED DEFINING SCHEMAS")
|
||||
|
||||
################################### Helpers ####################################
|
||||
|
||||
|
||||
def load_config_val(key: str,
|
||||
default: ConfigDefaultValue=None,
|
||||
type: Optional[Type]=None,
|
||||
aliases: Optional[Tuple[str, ...]]=None,
|
||||
config: Optional[benedict]=None,
|
||||
env_vars: Optional[os._Environ]=None,
|
||||
config_file_vars: Optional[Dict[str, str]]=None) -> ConfigValue:
|
||||
"""parse bool, int, and str key=value pairs from env"""
|
||||
|
||||
assert isinstance(config, dict)
|
||||
|
||||
is_read_only = type is None
|
||||
if is_read_only:
|
||||
if callable(default):
|
||||
return default(config)
|
||||
return default
|
||||
|
||||
# get value from environment variables or config files
|
||||
config_keys_to_check = (key, *(aliases or ()))
|
||||
val = None
|
||||
for key in config_keys_to_check:
|
||||
if env_vars:
|
||||
val = env_vars.get(key)
|
||||
if val:
|
||||
break
|
||||
|
||||
if config_file_vars:
|
||||
val = config_file_vars.get(key)
|
||||
if val:
|
||||
break
|
||||
|
||||
is_unset = val is None
|
||||
if is_unset:
|
||||
if callable(default):
|
||||
return default(config)
|
||||
return default
|
||||
|
||||
# calculate value based on expected type
|
||||
BOOL_TRUEIES = ('true', 'yes', '1')
|
||||
BOOL_FALSEIES = ('false', 'no', '0')
|
||||
|
||||
if type is bool:
|
||||
if val.lower() in BOOL_TRUEIES:
|
||||
return True
|
||||
elif val.lower() in BOOL_FALSEIES:
|
||||
return False
|
||||
else:
|
||||
raise ValueError(f'Invalid configuration option {key}={val} (expected a boolean: True/False)')
|
||||
|
||||
elif type is str:
|
||||
if val.lower() in (*BOOL_TRUEIES, *BOOL_FALSEIES):
|
||||
raise ValueError(f'Invalid configuration option {key}={val} (expected a string, but value looks like a boolean)')
|
||||
return val.strip()
|
||||
|
||||
elif type is int:
|
||||
if not val.strip().isdigit():
|
||||
raise ValueError(f'Invalid configuration option {key}={val} (expected an integer)')
|
||||
return int(val.strip())
|
||||
|
||||
elif type is list or type is dict:
|
||||
return json.loads(val)
|
||||
|
||||
raise Exception('Config values can only be str, bool, int, or json')
|
||||
|
||||
|
||||
def load_config_file(out_dir: str | None=CONSTANTS.DATA_DIR) -> Optional[benedict]:
|
||||
"""load the ini-formatted config file from OUTPUT_DIR/Archivebox.conf"""
|
||||
|
||||
config_path = CONSTANTS.CONFIG_FILE
|
||||
if config_path.exists():
|
||||
config_file = ConfigParser()
|
||||
config_file.optionxform = str
|
||||
config_file.read(config_path)
|
||||
# flatten into one namespace
|
||||
config_file_vars = benedict({
|
||||
key.upper(): val
|
||||
for section, options in config_file.items()
|
||||
for key, val in options.items()
|
||||
})
|
||||
# print('[i] Loaded config file', os.path.abspath(config_path))
|
||||
# print(config_file_vars)
|
||||
return config_file_vars
|
||||
return None
|
||||
|
||||
|
||||
def write_config_file(config: Dict[str, str], out_dir: str | None=CONSTANTS.DATA_DIR) -> benedict:
|
||||
"""load the ini-formatted config file from OUTPUT_DIR/Archivebox.conf"""
|
||||
|
||||
from ..system import atomic_write
|
||||
|
||||
CONFIG_HEADER = (
|
||||
"""# This is the config file for your ArchiveBox collection.
|
||||
#
|
||||
# You can add options here manually in INI format, or automatically by running:
|
||||
# archivebox config --set KEY=VALUE
|
||||
#
|
||||
# If you modify this file manually, make sure to update your archive after by running:
|
||||
# archivebox init
|
||||
#
|
||||
# A list of all possible config with documentation and examples can be found here:
|
||||
# https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration
|
||||
|
||||
""")
|
||||
|
||||
config_path = CONSTANTS.CONFIG_FILE
|
||||
|
||||
if not config_path.exists():
|
||||
atomic_write(config_path, CONFIG_HEADER)
|
||||
|
||||
config_file = ConfigParser()
|
||||
config_file.optionxform = str
|
||||
config_file.read(config_path)
|
||||
|
||||
with open(config_path, 'r', encoding='utf-8') as old:
|
||||
atomic_write(f'{config_path}.bak', old.read())
|
||||
|
||||
find_section = lambda key: [name for name, opts in CONFIG_SCHEMA.items() if key in opts][0]
|
||||
|
||||
# Set up sections in empty config file
|
||||
for key, val in config.items():
|
||||
section = find_section(key)
|
||||
if section in config_file:
|
||||
existing_config = dict(config_file[section])
|
||||
else:
|
||||
existing_config = {}
|
||||
config_file[section] = benedict({**existing_config, key: val})
|
||||
|
||||
# always make sure there's a SECRET_KEY defined for Django
|
||||
existing_secret_key = None
|
||||
if 'SERVER_CONFIG' in config_file and 'SECRET_KEY' in config_file['SERVER_CONFIG']:
|
||||
existing_secret_key = config_file['SERVER_CONFIG']['SECRET_KEY']
|
||||
|
||||
if (not existing_secret_key) or ('not a valid secret' in existing_secret_key):
|
||||
from django.utils.crypto import get_random_string
|
||||
chars = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_'
|
||||
random_secret_key = get_random_string(50, chars)
|
||||
if 'SERVER_CONFIG' in config_file:
|
||||
config_file['SERVER_CONFIG']['SECRET_KEY'] = random_secret_key
|
||||
else:
|
||||
config_file['SERVER_CONFIG'] = {'SECRET_KEY': random_secret_key}
|
||||
|
||||
with open(config_path, 'w+', encoding='utf-8') as new:
|
||||
config_file.write(new)
|
||||
|
||||
try:
|
||||
# validate the config by attempting to re-parse it
|
||||
CONFIG = load_all_config()
|
||||
except BaseException: # lgtm [py/catch-base-exception]
|
||||
# something went horribly wrong, rever to the previous version
|
||||
with open(f'{config_path}.bak', 'r', encoding='utf-8') as old:
|
||||
atomic_write(config_path, old.read())
|
||||
|
||||
raise
|
||||
|
||||
if Path(f'{config_path}.bak').exists():
|
||||
os.remove(f'{config_path}.bak')
|
||||
|
||||
return benedict({
|
||||
key.upper(): CONFIG.get(key.upper())
|
||||
for key in config.keys()
|
||||
})
|
||||
|
||||
|
||||
|
||||
def load_config(defaults: ConfigDefaultDict,
|
||||
config: Optional[benedict]=None,
|
||||
out_dir: Optional[str]=None,
|
||||
env_vars: Optional[os._Environ]=None,
|
||||
config_file_vars: Optional[Dict[str, str]]=None) -> benedict:
|
||||
|
||||
env_vars = env_vars or os.environ
|
||||
config_file_vars = config_file_vars or load_config_file(out_dir=out_dir)
|
||||
|
||||
extended_config = benedict(config.copy() if config else {})
|
||||
for key, default in defaults.items():
|
||||
try:
|
||||
# print('LOADING CONFIG KEY:', key, 'DEFAULT=', default)
|
||||
extended_config[key] = load_config_val(
|
||||
key,
|
||||
default=default['default'],
|
||||
type=default.get('type'),
|
||||
aliases=default.get('aliases'),
|
||||
config=extended_config,
|
||||
env_vars=env_vars,
|
||||
config_file_vars=config_file_vars,
|
||||
)
|
||||
except KeyboardInterrupt:
|
||||
raise SystemExit(0)
|
||||
except Exception as e:
|
||||
stderr()
|
||||
stderr(f'[X] Error while loading configuration value: {key}', color='red', config=extended_config)
|
||||
stderr(' {}: {}'.format(e.__class__.__name__, e))
|
||||
stderr()
|
||||
stderr(' Check your config for mistakes and try again (your archive data is unaffected).')
|
||||
stderr()
|
||||
stderr(' For config documentation and examples see:')
|
||||
stderr(' https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration')
|
||||
stderr()
|
||||
# raise
|
||||
# raise SystemExit(2)
|
||||
|
||||
return benedict(extended_config)
|
||||
|
||||
|
||||
|
||||
# Dependency Metadata Helpers
|
||||
def bin_version(binary: Optional[str], cmd: Optional[str]=None, timeout: int=3) -> Optional[str]:
|
||||
"""check the presence and return valid version line of a specified binary"""
|
||||
|
||||
abspath = bin_path(binary)
|
||||
if not binary or not abspath:
|
||||
return None
|
||||
|
||||
return '999.999.999'
|
||||
|
||||
# Now handled by new BinProvider plugin system, no longer needed:
|
||||
|
||||
try:
|
||||
bin_env = os.environ | {'LANG': 'C'}
|
||||
is_cmd_str = cmd and isinstance(cmd, str)
|
||||
version_str = (
|
||||
run(cmd or [abspath, "--version"], timeout=timeout, shell=is_cmd_str, stdout=PIPE, stderr=STDOUT, env=bin_env)
|
||||
.stdout.strip()
|
||||
.decode()
|
||||
)
|
||||
if not version_str:
|
||||
version_str = (
|
||||
run(cmd or [abspath, "--version"], timeout=timeout, shell=is_cmd_str, stdout=PIPE, stderr=STDOUT)
|
||||
.stdout.strip()
|
||||
.decode()
|
||||
)
|
||||
|
||||
# take first 3 columns of first line of version info
|
||||
semver = SemVer.parse(version_str)
|
||||
if semver:
|
||||
return str(semver)
|
||||
except (OSError, TimeoutExpired):
|
||||
pass
|
||||
# stderr(f'[X] Unable to find working version of dependency: {binary}', color='red')
|
||||
# stderr(' Make sure it\'s installed, then confirm it\'s working by running:')
|
||||
# stderr(f' {binary} --version')
|
||||
# stderr()
|
||||
# stderr(' If you don\'t want to install it, you can disable it via config. See here for more info:')
|
||||
# stderr(' https://github.com/ArchiveBox/ArchiveBox/wiki/Install')
|
||||
return None
|
||||
|
||||
def bin_path(binary: Optional[str]) -> Optional[str]:
|
||||
if binary is None:
|
||||
return None
|
||||
|
||||
node_modules_bin = Path('.') / 'node_modules' / '.bin' / binary
|
||||
if node_modules_bin.exists():
|
||||
return str(node_modules_bin.resolve())
|
||||
|
||||
return shutil.which(str(Path(binary).expanduser())) or shutil.which(str(binary)) or binary
|
||||
|
||||
def bin_hash(binary: Optional[str]) -> Optional[str]:
|
||||
return 'UNUSED'
|
||||
# DEPRECATED: now handled by new BinProvider plugin system, no longer needed:
|
||||
|
||||
if binary is None:
|
||||
return None
|
||||
abs_path = bin_path(binary)
|
||||
if abs_path is None or not Path(abs_path).exists():
|
||||
return None
|
||||
|
||||
file_hash = md5()
|
||||
with io.open(abs_path, mode='rb') as f:
|
||||
for chunk in iter(lambda: f.read(io.DEFAULT_BUFFER_SIZE), b''):
|
||||
file_hash.update(chunk)
|
||||
|
||||
return f'md5:{file_hash.hexdigest()}'
|
||||
|
||||
def find_chrome_binary() -> Optional[str]:
|
||||
"""find any installed chrome binaries in the default locations"""
|
||||
# Precedence: Chromium, Chrome, Beta, Canary, Unstable, Dev
|
||||
# make sure data dir finding precedence order always matches binary finding order
|
||||
default_executable_paths = (
|
||||
# '~/Library/Caches/ms-playwright/chromium-*/chrome-mac/Chromium.app/Contents/MacOS/Chromium',
|
||||
'chromium-browser',
|
||||
'chromium',
|
||||
'/Applications/Chromium.app/Contents/MacOS/Chromium',
|
||||
'chrome',
|
||||
'google-chrome',
|
||||
'/Applications/Google Chrome.app/Contents/MacOS/Google Chrome',
|
||||
'google-chrome-stable',
|
||||
'google-chrome-beta',
|
||||
'google-chrome-canary',
|
||||
'/Applications/Google Chrome Canary.app/Contents/MacOS/Google Chrome Canary',
|
||||
'google-chrome-unstable',
|
||||
'google-chrome-dev',
|
||||
)
|
||||
for name in default_executable_paths:
|
||||
full_path_exists = shutil.which(name)
|
||||
if full_path_exists:
|
||||
return name
|
||||
|
||||
return None
|
||||
|
||||
def find_chrome_data_dir() -> Optional[str]:
|
||||
"""find any installed chrome user data directories in the default locations"""
|
||||
# deprecated because this is DANGEROUS, do not re-implement/uncomment this behavior.
|
||||
|
||||
# Going forward we want to discourage people from using their main chrome profile for archiving.
|
||||
# Session tokens, personal data, and cookies are often returned in server responses,
|
||||
# when they get archived, they are essentially burned as anyone who can view the archive
|
||||
# can use that data to masquerade as the logged-in user that did the archiving.
|
||||
# For this reason users should always create dedicated burner profiles for archiving and not use
|
||||
# their daily driver main accounts.
|
||||
|
||||
# # Precedence: Chromium, Chrome, Beta, Canary, Unstable, Dev
|
||||
# # make sure data dir finding precedence order always matches binary finding order
|
||||
# default_profile_paths = (
|
||||
# '~/.config/chromium',
|
||||
# '~/Library/Application Support/Chromium',
|
||||
# '~/AppData/Local/Chromium/User Data',
|
||||
# '~/.config/chrome',
|
||||
# '~/.config/google-chrome',
|
||||
# '~/Library/Application Support/Google/Chrome',
|
||||
# '~/AppData/Local/Google/Chrome/User Data',
|
||||
# '~/.config/google-chrome-stable',
|
||||
# '~/.config/google-chrome-beta',
|
||||
# '~/Library/Application Support/Google/Chrome Canary',
|
||||
# '~/AppData/Local/Google/Chrome SxS/User Data',
|
||||
# '~/.config/google-chrome-unstable',
|
||||
# '~/.config/google-chrome-dev',
|
||||
# )
|
||||
# for path in default_profile_paths:
|
||||
# full_path = Path(path).resolve()
|
||||
# if full_path.exists():
|
||||
# return full_path
|
||||
return None
|
||||
|
||||
def wget_supports_compression(config):
|
||||
try:
|
||||
cmd = [
|
||||
config['WGET_BINARY'],
|
||||
"--compression=auto",
|
||||
"--help",
|
||||
]
|
||||
return not run(cmd, stdout=DEVNULL, stderr=DEVNULL).returncode
|
||||
except (FileNotFoundError, OSError):
|
||||
return False
|
||||
|
||||
|
||||
def get_dependency_info(config: benedict) -> ConfigValue:
|
||||
return {
|
||||
# 'PYTHON_BINARY': {
|
||||
# 'path': bin_path(config['PYTHON_BINARY']),
|
||||
# 'version': config['PYTHON_VERSION'],
|
||||
# 'hash': bin_hash(config['PYTHON_BINARY']),
|
||||
# 'enabled': True,
|
||||
# 'is_valid': bool(config['PYTHON_VERSION']),
|
||||
# },
|
||||
# 'SQLITE_BINARY': {
|
||||
# 'path': bin_path(config['SQLITE_BINARY']),
|
||||
# 'version': config['SQLITE_VERSION'],
|
||||
# 'hash': bin_hash(config['SQLITE_BINARY']),
|
||||
# 'enabled': True,
|
||||
# 'is_valid': bool(config['SQLITE_VERSION']),
|
||||
# },
|
||||
# 'DJANGO_BINARY': {
|
||||
# 'path': bin_path(config['DJANGO_BINARY']),
|
||||
# 'version': config['DJANGO_VERSION'],
|
||||
# 'hash': bin_hash(config['DJANGO_BINARY']),
|
||||
# 'enabled': True,
|
||||
# 'is_valid': bool(config['DJANGO_VERSION']),
|
||||
# },
|
||||
# 'ARCHIVEBOX_BINARY': {
|
||||
# 'path': bin_path(config['ARCHIVEBOX_BINARY']),
|
||||
# 'version': config['VERSION'],
|
||||
# 'hash': bin_hash(config['ARCHIVEBOX_BINARY']),
|
||||
# 'enabled': True,
|
||||
# 'is_valid': True,
|
||||
# },
|
||||
|
||||
'CURL_BINARY': {
|
||||
'path': bin_path(config['CURL_BINARY']),
|
||||
'version': config['CURL_VERSION'],
|
||||
'hash': bin_hash(config['CURL_BINARY']),
|
||||
'enabled': config['USE_CURL'],
|
||||
'is_valid': bool(config['CURL_VERSION']),
|
||||
},
|
||||
'WGET_BINARY': {
|
||||
'path': bin_path(config['WGET_BINARY']),
|
||||
'version': config['WGET_VERSION'],
|
||||
'hash': bin_hash(config['WGET_BINARY']),
|
||||
'enabled': config['USE_WGET'],
|
||||
'is_valid': bool(config['WGET_VERSION']),
|
||||
},
|
||||
# 'NODE_BINARY': {
|
||||
# 'path': bin_path(config['NODE_BINARY']),
|
||||
# 'version': config['NODE_VERSION'],
|
||||
# 'hash': bin_hash(config['NODE_BINARY']),
|
||||
# 'enabled': config['USE_NODE'],
|
||||
# 'is_valid': bool(config['NODE_VERSION']),
|
||||
# },
|
||||
'MERCURY_BINARY': {
|
||||
'path': bin_path(config['MERCURY_BINARY']),
|
||||
'version': config['MERCURY_VERSION'],
|
||||
'hash': bin_hash(config['MERCURY_BINARY']),
|
||||
'enabled': config['USE_MERCURY'],
|
||||
'is_valid': bool(config['MERCURY_VERSION']),
|
||||
},
|
||||
'GIT_BINARY': {
|
||||
'path': bin_path(config['GIT_BINARY']),
|
||||
'version': config['GIT_VERSION'],
|
||||
'hash': bin_hash(config['GIT_BINARY']),
|
||||
'enabled': config['USE_GIT'],
|
||||
'is_valid': bool(config['GIT_VERSION']),
|
||||
},
|
||||
# 'SINGLEFILE_BINARY': {
|
||||
# 'path': bin_path(config['SINGLEFILE_BINARY']),
|
||||
# 'version': config['SINGLEFILE_VERSION'],
|
||||
# 'hash': bin_hash(config['SINGLEFILE_BINARY']),
|
||||
# 'enabled': config['USE_SINGLEFILE'],
|
||||
# 'is_valid': bool(config['SINGLEFILE_VERSION']),
|
||||
# },
|
||||
# 'READABILITY_BINARY': {
|
||||
# 'path': bin_path(config['READABILITY_BINARY']),
|
||||
# 'version': config['READABILITY_VERSION'],
|
||||
# 'hash': bin_hash(config['READABILITY_BINARY']),
|
||||
# 'enabled': config['USE_READABILITY'],
|
||||
# 'is_valid': bool(config['READABILITY_VERSION']),
|
||||
# },
|
||||
# 'YOUTUBEDL_BINARY': {
|
||||
# 'path': bin_path(config['YOUTUBEDL_BINARY']),
|
||||
# 'version': config['YOUTUBEDL_VERSION'],
|
||||
# 'hash': bin_hash(config['YOUTUBEDL_BINARY']),
|
||||
# 'enabled': config['USE_YOUTUBEDL'],
|
||||
# 'is_valid': bool(config['YOUTUBEDL_VERSION']),
|
||||
# },
|
||||
# 'CHROME_BINARY': {
|
||||
# 'path': bin_path(config['CHROME_BINARY']),
|
||||
# 'version': config['CHROME_VERSION'],
|
||||
# 'hash': bin_hash(config['CHROME_BINARY']),
|
||||
# 'enabled': config['USE_CHROME'],
|
||||
# 'is_valid': bool(config['CHROME_VERSION']),
|
||||
# },
|
||||
# 'RIPGREP_BINARY': {
|
||||
# 'path': bin_path(config['RIPGREP_BINARY']),
|
||||
# 'version': config['RIPGREP_VERSION'],
|
||||
# 'hash': bin_hash(config['RIPGREP_BINARY']),
|
||||
# 'enabled': config['USE_RIPGREP'],
|
||||
# 'is_valid': bool(config['RIPGREP_VERSION']),
|
||||
# },
|
||||
# 'SONIC_BINARY': {
|
||||
# 'path': bin_path(config['SONIC_BINARY']),
|
||||
# 'version': config['SONIC_VERSION'],
|
||||
# 'hash': bin_hash(config['SONIC_BINARY']),
|
||||
# 'enabled': config['USE_SONIC'],
|
||||
# 'is_valid': bool(config['SONIC_VERSION']),
|
||||
# },
|
||||
}
|
||||
|
||||
# ******************************************************************************
|
||||
# ******************************************************************************
|
||||
# ******************************** Load Config *********************************
|
||||
# ******* (compile the defaults, configs, and metadata all into CONFIG) ********
|
||||
# ******************************************************************************
|
||||
# ******************************************************************************
|
||||
|
||||
|
||||
def load_all_config():
|
||||
CONFIG = benedict()
|
||||
for section_name, section_config in CONFIG_SCHEMA.items():
|
||||
# print('LOADING CONFIG SECTION:', section_name)
|
||||
CONFIG = load_config(section_config, CONFIG)
|
||||
|
||||
# print("LOADING CONFIG SECTION:", 'DYNAMIC')
|
||||
return load_config(DYNAMIC_CONFIG_SCHEMA, CONFIG)
|
||||
|
||||
# add all final config values in CONFIG to globals in this file
|
||||
CONFIG: benedict = load_all_config()
|
||||
globals().update(CONFIG)
|
||||
# this lets us do: from .config import DEBUG, MEDIA_TIMEOUT, ...
|
||||
|
||||
# print("FINISHED LOADING CONFIG USING SCHEMAS + FILE + ENV")
|
||||
|
||||
# ******************************************************************************
|
||||
# ******************************************************************************
|
||||
# ******************************************************************************
|
||||
# ******************************************************************************
|
||||
# ******************************************************************************
|
||||
|
||||
|
||||
|
||||
########################### System Environment Setup ###########################
|
||||
|
||||
|
||||
# Set timezone to UTC and umask to OUTPUT_PERMISSIONS
|
||||
assert TIMEZONE == 'UTC', f'The server timezone should always be set to UTC (got {TIMEZONE})' # noqa: F821
|
||||
os.environ["TZ"] = TIMEZONE # noqa: F821
|
||||
os.umask(0o777 - int(STORAGE_CONFIG.DIR_OUTPUT_PERMISSIONS, base=8)) # noqa: F821
|
||||
|
||||
########################### Config Validity Checkers ###########################
|
||||
|
||||
if not SHELL_CONFIG.USE_COLOR:
|
||||
os.environ['NO_COLOR'] = '1'
|
||||
if not SHELL_CONFIG.SHOW_PROGRESS:
|
||||
os.environ['TERM'] = 'dumb'
|
||||
|
||||
# recreate rich console obj based on new config values
|
||||
CONSOLE = Console()
|
||||
from ..misc import logging
|
||||
logging.CONSOLE = CONSOLE
|
||||
|
||||
|
||||
INITIAL_STARTUP_PROGRESS = None
|
||||
INITIAL_STARTUP_PROGRESS_TASK = 0
|
||||
|
||||
def bump_startup_progress_bar():
|
||||
global INITIAL_STARTUP_PROGRESS
|
||||
global INITIAL_STARTUP_PROGRESS_TASK
|
||||
if INITIAL_STARTUP_PROGRESS:
|
||||
INITIAL_STARTUP_PROGRESS.update(INITIAL_STARTUP_PROGRESS_TASK, advance=1) # type: ignore
|
||||
|
||||
|
||||
def setup_django_minimal():
|
||||
# sys.path.append(str(CONSTANTS.PACKAGE_DIR))
|
||||
# os.environ.setdefault('OUTPUT_DIR', str(CONSTANTS.DATA_DIR))
|
||||
# os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'core.settings')
|
||||
# django.setup()
|
||||
raise Exception('dont use this anymore')
|
||||
|
||||
DJANGO_SET_UP = False
|
||||
|
||||
|
||||
def setup_django(out_dir: Path | None=None, check_db=False, config: benedict=CONFIG, in_memory_db=False) -> None:
|
||||
global INITIAL_STARTUP_PROGRESS
|
||||
global INITIAL_STARTUP_PROGRESS_TASK
|
||||
global DJANGO_SET_UP
|
||||
|
||||
if DJANGO_SET_UP:
|
||||
raise Exception('django is already set up!')
|
||||
|
||||
with Progress(transient=True, expand=True, console=CONSOLE) as INITIAL_STARTUP_PROGRESS:
|
||||
INITIAL_STARTUP_PROGRESS_TASK = INITIAL_STARTUP_PROGRESS.add_task("[green]Loading modules...", total=25)
|
||||
|
||||
output_dir = out_dir or CONSTANTS.DATA_DIR
|
||||
|
||||
assert isinstance(output_dir, Path) and isinstance(CONSTANTS.PACKAGE_DIR, Path)
|
||||
|
||||
bump_startup_progress_bar()
|
||||
try:
|
||||
from django.core.management import call_command
|
||||
|
||||
bump_startup_progress_bar()
|
||||
|
||||
if in_memory_db:
|
||||
raise Exception('dont use this anymore')
|
||||
|
||||
# some commands (e.g. oneshot) dont store a long-lived sqlite3 db file on disk.
|
||||
# in those cases we create a temporary in-memory db and run the migrations
|
||||
# immediately to get a usable in-memory-database at startup
|
||||
os.environ.setdefault("ARCHIVEBOX_DATABASE_NAME", ":memory:")
|
||||
django.setup()
|
||||
|
||||
bump_startup_progress_bar()
|
||||
call_command("migrate", interactive=False, verbosity=0)
|
||||
else:
|
||||
# Otherwise use default sqlite3 file-based database and initialize django
|
||||
# without running migrations automatically (user runs them manually by calling init)
|
||||
django.setup()
|
||||
|
||||
bump_startup_progress_bar()
|
||||
|
||||
from django.conf import settings
|
||||
|
||||
# log startup message to the error log
|
||||
with open(settings.ERROR_LOG, "a", encoding='utf-8') as f:
|
||||
command = ' '.join(sys.argv)
|
||||
ts = datetime.now(timezone.utc).strftime('%Y-%m-%d__%H:%M:%S')
|
||||
f.write(f"\n> {command}; TS={ts} VERSION={CONSTANTS.VERSION} IN_DOCKER={SHELL_CONFIG.IN_DOCKER} IS_TTY={SHELL_CONFIG.IS_TTY}\n")
|
||||
|
||||
if check_db:
|
||||
# Create cache table in DB if needed
|
||||
try:
|
||||
from django.core.cache import cache
|
||||
cache.get('test', None)
|
||||
except django.db.utils.OperationalError:
|
||||
call_command("createcachetable", verbosity=0)
|
||||
|
||||
bump_startup_progress_bar()
|
||||
|
||||
# if archivebox gets imported multiple times, we have to close
|
||||
# the sqlite3 whenever we init from scratch to avoid multiple threads
|
||||
# sharing the same connection by accident
|
||||
from django.db import connections
|
||||
for conn in connections.all():
|
||||
conn.close_if_unusable_or_obsolete()
|
||||
|
||||
sql_index_path = CONSTANTS.DATABASE_FILE
|
||||
assert sql_index_path.exists(), (
|
||||
f'No database file {sql_index_path} found in: {CONSTANTS.DATA_DIR} (Are you in an ArchiveBox collection directory?)')
|
||||
|
||||
bump_startup_progress_bar()
|
||||
|
||||
# https://docs.pydantic.dev/logfire/integrations/django/ Logfire Debugging
|
||||
if settings.DEBUG_LOGFIRE:
|
||||
from opentelemetry.instrumentation.sqlite3 import SQLite3Instrumentor
|
||||
SQLite3Instrumentor().instrument()
|
||||
|
||||
import logfire
|
||||
|
||||
logfire.configure()
|
||||
logfire.instrument_django(is_sql_commentor_enabled=True)
|
||||
logfire.info(f'Started ArchiveBox v{CONSTANTS.VERSION}', argv=sys.argv)
|
||||
|
||||
except KeyboardInterrupt:
|
||||
raise SystemExit(2)
|
||||
|
||||
DJANGO_SET_UP = True
|
||||
|
||||
INITIAL_STARTUP_PROGRESS = None
|
||||
INITIAL_STARTUP_PROGRESS_TASK = None
|
||||
442
archivebox/config/views.py
Normal file
442
archivebox/config/views.py
Normal file
@@ -0,0 +1,442 @@
|
||||
__package__ = 'abx.archivebox'
|
||||
|
||||
import os
|
||||
import inspect
|
||||
from typing import Any, List, Dict, cast
|
||||
from benedict import benedict
|
||||
|
||||
from django.http import HttpRequest
|
||||
from django.conf import settings
|
||||
from django.utils import timezone
|
||||
from django.utils.html import format_html, mark_safe
|
||||
|
||||
from admin_data_views.typing import TableContext, ItemContext
|
||||
from admin_data_views.utils import render_with_table_view, render_with_item_view, ItemLink
|
||||
|
||||
from archivebox.config import CONSTANTS
|
||||
from archivebox.util import parse_date
|
||||
|
||||
|
||||
def obj_to_yaml(obj: Any, indent: int=0) -> str:
|
||||
indent_str = " " * indent
|
||||
if indent == 0:
|
||||
indent_str = '\n' # put extra newline between top-level entries
|
||||
|
||||
if isinstance(obj, dict):
|
||||
if not obj:
|
||||
return "{}"
|
||||
result = "\n"
|
||||
for key, value in obj.items():
|
||||
result += f"{indent_str}{key}:{obj_to_yaml(value, indent + 1)}\n"
|
||||
return result
|
||||
|
||||
elif isinstance(obj, list):
|
||||
if not obj:
|
||||
return "[]"
|
||||
result = "\n"
|
||||
for item in obj:
|
||||
result += f"{indent_str}- {obj_to_yaml(item, indent + 1).lstrip()}\n"
|
||||
return result.rstrip()
|
||||
|
||||
elif isinstance(obj, str):
|
||||
if "\n" in obj:
|
||||
return f" |\n{indent_str} " + obj.replace("\n", f"\n{indent_str} ")
|
||||
else:
|
||||
return f" {obj}"
|
||||
|
||||
elif isinstance(obj, (int, float, bool)):
|
||||
return f" {str(obj)}"
|
||||
|
||||
elif callable(obj):
|
||||
source = '\n'.join(
|
||||
'' if 'def ' in line else line
|
||||
for line in inspect.getsource(obj).split('\n')
|
||||
if line.strip()
|
||||
).split('lambda: ')[-1].rstrip(',')
|
||||
return f" {indent_str} " + source.replace("\n", f"\n{indent_str} ")
|
||||
|
||||
else:
|
||||
return f" {str(obj)}"
|
||||
|
||||
@render_with_table_view
|
||||
def binaries_list_view(request: HttpRequest, **kwargs) -> TableContext:
|
||||
|
||||
assert request.user.is_superuser, 'Must be a superuser to view configuration settings.'
|
||||
|
||||
rows = {
|
||||
"Binary": [],
|
||||
"Found Version": [],
|
||||
"From Plugin": [],
|
||||
"Provided By": [],
|
||||
"Found Abspath": [],
|
||||
"Related Configuration": [],
|
||||
# "Overrides": [],
|
||||
# "Description": [],
|
||||
}
|
||||
|
||||
relevant_configs = {
|
||||
key: val
|
||||
for key, val in settings.CONFIG.items()
|
||||
if '_BINARY' in key or '_VERSION' in key
|
||||
}
|
||||
|
||||
for plugin in settings.PLUGINS.values():
|
||||
for binary in plugin.HOOKS_BY_TYPE.get('BINARY', {}).values():
|
||||
try:
|
||||
binary = binary.load()
|
||||
except Exception as e:
|
||||
print(e)
|
||||
|
||||
rows['Binary'].append(ItemLink(binary.name, key=binary.name))
|
||||
rows['Found Version'].append(f'✅ {binary.loaded_version}' if binary.loaded_version else '❌ missing')
|
||||
rows['From Plugin'].append(plugin.plugin_module)
|
||||
rows['Provided By'].append(
|
||||
', '.join(
|
||||
f'[{binprovider.name}]' if binprovider.name == getattr(binary.loaded_binprovider, 'name', None) else binprovider.name
|
||||
for binprovider in binary.binproviders_supported
|
||||
if binprovider
|
||||
)
|
||||
# binary.loaded_binprovider.name
|
||||
# if binary.loaded_binprovider else
|
||||
# ', '.join(getattr(provider, 'name', str(provider)) for provider in binary.binproviders_supported)
|
||||
)
|
||||
rows['Found Abspath'].append(str(binary.loaded_abspath or '❌ missing'))
|
||||
rows['Related Configuration'].append(mark_safe(', '.join(
|
||||
f'<a href="/admin/environment/config/{config_key}/">{config_key}</a>'
|
||||
for config_key, config_value in relevant_configs.items()
|
||||
if str(binary.name).lower().replace('-', '').replace('_', '').replace('ytdlp', 'youtubedl') in config_key.lower()
|
||||
# or binary.name.lower().replace('-', '').replace('_', '') in str(config_value).lower()
|
||||
)))
|
||||
# if not binary.provider_overrides:
|
||||
# import ipdb; ipdb.set_trace()
|
||||
# rows['Overrides'].append(str(obj_to_yaml(binary.provider_overrides) or str(binary.provider_overrides))[:200])
|
||||
# rows['Description'].append(binary.description)
|
||||
|
||||
return TableContext(
|
||||
title="Binaries",
|
||||
table=rows,
|
||||
)
|
||||
|
||||
@render_with_item_view
|
||||
def binary_detail_view(request: HttpRequest, key: str, **kwargs) -> ItemContext:
|
||||
|
||||
assert request.user.is_superuser, 'Must be a superuser to view configuration settings.'
|
||||
|
||||
binary = None
|
||||
plugin = None
|
||||
for loaded_plugin in settings.PLUGINS.values():
|
||||
for loaded_binary in loaded_plugin.HOOKS_BY_TYPE.get('BINARY', {}).values():
|
||||
if loaded_binary.name == key:
|
||||
binary = loaded_binary
|
||||
plugin = loaded_plugin
|
||||
|
||||
assert plugin and binary, f'Could not find a binary matching the specified name: {key}'
|
||||
|
||||
try:
|
||||
binary = binary.load()
|
||||
except Exception as e:
|
||||
print(e)
|
||||
|
||||
return ItemContext(
|
||||
slug=key,
|
||||
title=key,
|
||||
data=[
|
||||
{
|
||||
"name": binary.name,
|
||||
"description": binary.abspath,
|
||||
"fields": {
|
||||
'plugin': plugin.name,
|
||||
'binprovider': binary.loaded_binprovider,
|
||||
'abspath': binary.loaded_abspath,
|
||||
'version': binary.loaded_version,
|
||||
'overrides': obj_to_yaml(binary.provider_overrides),
|
||||
'providers': obj_to_yaml(binary.binproviders_supported),
|
||||
},
|
||||
"help_texts": {
|
||||
# TODO
|
||||
},
|
||||
},
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
@render_with_table_view
|
||||
def plugins_list_view(request: HttpRequest, **kwargs) -> TableContext:
|
||||
|
||||
assert request.user.is_superuser, 'Must be a superuser to view configuration settings.'
|
||||
|
||||
rows = {
|
||||
"Name": [],
|
||||
"verbose_name": [],
|
||||
"module": [],
|
||||
"source_code": [],
|
||||
"hooks": [],
|
||||
}
|
||||
|
||||
|
||||
for plugin in settings.PLUGINS.values():
|
||||
# try:
|
||||
# plugin.load_binaries()
|
||||
# except Exception as e:
|
||||
# print(e)
|
||||
|
||||
rows['Name'].append(ItemLink(plugin.id, key=plugin.id))
|
||||
rows['verbose_name'].append(mark_safe(f'<a href="{plugin.docs_url}" target="_blank">{plugin.verbose_name}</a>'))
|
||||
rows['module'].append(str(plugin.plugin_module))
|
||||
rows['source_code'].append(str(plugin.plugin_dir))
|
||||
rows['hooks'].append(mark_safe(', '.join(
|
||||
f'<a href="{hook.admin_url}">{hook.id}</a>'
|
||||
for hook in plugin.hooks
|
||||
)))
|
||||
|
||||
return TableContext(
|
||||
title="Installed plugins",
|
||||
table=rows,
|
||||
)
|
||||
|
||||
@render_with_item_view
|
||||
def plugin_detail_view(request: HttpRequest, key: str, **kwargs) -> ItemContext:
|
||||
|
||||
assert request.user.is_superuser, 'Must be a superuser to view configuration settings.'
|
||||
|
||||
plugin = None
|
||||
for loaded_plugin in settings.PLUGINS.values():
|
||||
if loaded_plugin.id == key:
|
||||
plugin = loaded_plugin
|
||||
|
||||
assert plugin, f'Could not find a plugin matching the specified name: {key}'
|
||||
|
||||
try:
|
||||
plugin = plugin.load_binaries()
|
||||
except Exception as e:
|
||||
print(e)
|
||||
|
||||
return ItemContext(
|
||||
slug=key,
|
||||
title=key,
|
||||
data=[
|
||||
{
|
||||
"name": plugin.id,
|
||||
"description": plugin.verbose_name,
|
||||
"fields": {
|
||||
"hooks": plugin.hooks,
|
||||
"schema": obj_to_yaml(plugin.model_dump(include=("name", "verbose_name", "app_label", "hooks"))),
|
||||
},
|
||||
"help_texts": {
|
||||
# TODO
|
||||
},
|
||||
},
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
@render_with_table_view
|
||||
def worker_list_view(request: HttpRequest, **kwargs) -> TableContext:
|
||||
assert request.user.is_superuser, "Must be a superuser to view configuration settings."
|
||||
|
||||
rows = {
|
||||
"Name": [],
|
||||
"State": [],
|
||||
"PID": [],
|
||||
"Started": [],
|
||||
"Command": [],
|
||||
"Logfile": [],
|
||||
"Exit Status": [],
|
||||
}
|
||||
|
||||
from queues.supervisor_util import get_existing_supervisord_process
|
||||
|
||||
supervisor = get_existing_supervisord_process()
|
||||
if supervisor is None:
|
||||
return TableContext(
|
||||
title="No running worker processes",
|
||||
table=rows,
|
||||
)
|
||||
|
||||
all_config_entries = cast(List[Dict[str, Any]], supervisor.getAllConfigInfo() or [])
|
||||
all_config = {config["name"]: benedict(config) for config in all_config_entries}
|
||||
|
||||
# Add top row for supervisord process manager
|
||||
rows["Name"].append(ItemLink('supervisord', key='supervisord'))
|
||||
rows["State"].append(supervisor.getState()['statename'])
|
||||
rows['PID'].append(str(supervisor.getPID()))
|
||||
rows["Started"].append('-')
|
||||
rows["Command"].append('supervisord --configuration=tmp/supervisord.conf')
|
||||
rows["Logfile"].append(
|
||||
format_html(
|
||||
'<a href="/admin/environment/logs/{}/">{}</a>',
|
||||
'supervisord',
|
||||
'logs/supervisord.log',
|
||||
)
|
||||
)
|
||||
rows['Exit Status'].append('0')
|
||||
|
||||
# Add a row for each worker process managed by supervisord
|
||||
for proc in cast(List[Dict[str, Any]], supervisor.getAllProcessInfo()):
|
||||
proc = benedict(proc)
|
||||
# {
|
||||
# "name": "daphne",
|
||||
# "group": "daphne",
|
||||
# "start": 1725933056,
|
||||
# "stop": 0,
|
||||
# "now": 1725933438,
|
||||
# "state": 20,
|
||||
# "statename": "RUNNING",
|
||||
# "spawnerr": "",
|
||||
# "exitstatus": 0,
|
||||
# "logfile": "logs/server.log",
|
||||
# "stdout_logfile": "logs/server.log",
|
||||
# "stderr_logfile": "",
|
||||
# "pid": 33283,
|
||||
# "description": "pid 33283, uptime 0:06:22",
|
||||
# }
|
||||
rows["Name"].append(ItemLink(proc.name, key=proc.name))
|
||||
rows["State"].append(proc.statename)
|
||||
rows['PID'].append(proc.description.replace('pid ', ''))
|
||||
rows["Started"].append(parse_date(proc.start).strftime("%Y-%m-%d %H:%M:%S") if proc.start else '')
|
||||
rows["Command"].append(all_config[proc.name].command)
|
||||
rows["Logfile"].append(
|
||||
format_html(
|
||||
'<a href="/admin/environment/logs/{}/">{}</a>',
|
||||
proc.stdout_logfile.split("/")[-1].split('.')[0],
|
||||
proc.stdout_logfile,
|
||||
)
|
||||
)
|
||||
rows["Exit Status"].append(str(proc.exitstatus))
|
||||
|
||||
return TableContext(
|
||||
title="Running worker processes",
|
||||
table=rows,
|
||||
)
|
||||
|
||||
|
||||
@render_with_item_view
|
||||
def worker_detail_view(request: HttpRequest, key: str, **kwargs) -> ItemContext:
|
||||
assert request.user.is_superuser, "Must be a superuser to view configuration settings."
|
||||
|
||||
from queues.supervisor_util import get_existing_supervisord_process, get_worker
|
||||
from queues.settings import CONFIG_FILE
|
||||
|
||||
supervisor = get_existing_supervisord_process()
|
||||
if supervisor is None:
|
||||
return ItemContext(
|
||||
slug='none',
|
||||
title='error: No running supervisord process.',
|
||||
data=[],
|
||||
)
|
||||
|
||||
all_config = cast(List[Dict[str, Any]], supervisor.getAllConfigInfo() or [])
|
||||
|
||||
if key == 'supervisord':
|
||||
relevant_config = CONFIG_FILE.read_text()
|
||||
relevant_logs = cast(str, supervisor.readLog(0, 10_000_000))
|
||||
start_ts = [line for line in relevant_logs.split("\n") if "RPC interface 'supervisor' initialized" in line][-1].split(",", 1)[0]
|
||||
uptime = str(timezone.now() - parse_date(start_ts)).split(".")[0]
|
||||
|
||||
proc = benedict(
|
||||
{
|
||||
"name": "supervisord",
|
||||
"pid": supervisor.getPID(),
|
||||
"statename": supervisor.getState()["statename"],
|
||||
"start": start_ts,
|
||||
"stop": None,
|
||||
"exitstatus": "",
|
||||
"stdout_logfile": "logs/supervisord.log",
|
||||
"description": f'pid 000, uptime {uptime}',
|
||||
}
|
||||
)
|
||||
else:
|
||||
proc = benedict(get_worker(supervisor, key) or {})
|
||||
relevant_config = [config for config in all_config if config['name'] == key][0]
|
||||
relevant_logs = supervisor.tailProcessStdoutLog(key, 0, 10_000_000)[0]
|
||||
|
||||
return ItemContext(
|
||||
slug=key,
|
||||
title=key,
|
||||
data=[
|
||||
{
|
||||
"name": key,
|
||||
"description": key,
|
||||
"fields": {
|
||||
"Command": proc.name,
|
||||
"PID": proc.pid,
|
||||
"State": proc.statename,
|
||||
"Started": parse_date(proc.start).strftime("%Y-%m-%d %H:%M:%S") if proc.start else "",
|
||||
"Stopped": parse_date(proc.stop).strftime("%Y-%m-%d %H:%M:%S") if proc.stop else "",
|
||||
"Exit Status": str(proc.exitstatus),
|
||||
"Logfile": proc.stdout_logfile,
|
||||
"Uptime": (proc.description or "").split("uptime ", 1)[-1],
|
||||
"Config": relevant_config,
|
||||
"Logs": relevant_logs,
|
||||
},
|
||||
"help_texts": {"Uptime": "How long the process has been running ([days:]hours:minutes:seconds)"},
|
||||
},
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
@render_with_table_view
|
||||
def log_list_view(request: HttpRequest, **kwargs) -> TableContext:
|
||||
assert request.user.is_superuser, "Must be a superuser to view configuration settings."
|
||||
|
||||
|
||||
log_files = CONSTANTS.LOGS_DIR.glob("*.log")
|
||||
log_files = sorted(log_files, key=os.path.getmtime)[::-1]
|
||||
|
||||
rows = {
|
||||
"Name": [],
|
||||
"Last Updated": [],
|
||||
"Size": [],
|
||||
"Most Recent Lines": [],
|
||||
}
|
||||
|
||||
# Add a row for each worker process managed by supervisord
|
||||
for logfile in log_files:
|
||||
st = logfile.stat()
|
||||
rows["Name"].append(ItemLink("logs" + str(logfile).rsplit("/logs", 1)[-1], key=logfile.name))
|
||||
rows["Last Updated"].append(parse_date(st.st_mtime).strftime("%Y-%m-%d %H:%M:%S"))
|
||||
rows["Size"].append(f'{st.st_size//1000} kb')
|
||||
|
||||
with open(logfile, 'rb') as f:
|
||||
try:
|
||||
f.seek(-1024, os.SEEK_END)
|
||||
except OSError:
|
||||
f.seek(0)
|
||||
last_lines = f.read().decode('utf-8', errors='replace').split("\n")
|
||||
non_empty_lines = [line for line in last_lines if line.strip()]
|
||||
rows["Most Recent Lines"].append(non_empty_lines[-1])
|
||||
|
||||
return TableContext(
|
||||
title="Debug Log files",
|
||||
table=rows,
|
||||
)
|
||||
|
||||
|
||||
@render_with_item_view
|
||||
def log_detail_view(request: HttpRequest, key: str, **kwargs) -> ItemContext:
|
||||
assert request.user.is_superuser, "Must be a superuser to view configuration settings."
|
||||
|
||||
from django.conf import settings
|
||||
|
||||
log_file = [logfile for logfile in CONSTANTS.LOGS_DIR.glob('*.log') if key in logfile.name][0]
|
||||
|
||||
log_text = log_file.read_text()
|
||||
log_stat = log_file.stat()
|
||||
|
||||
return ItemContext(
|
||||
slug=key,
|
||||
title=key,
|
||||
data=[
|
||||
{
|
||||
"name": key,
|
||||
"description": key,
|
||||
"fields": {
|
||||
"Path": str(log_file),
|
||||
"Size": f"{log_stat.st_size//1000} kb",
|
||||
"Last Updated": parse_date(log_stat.st_mtime).strftime("%Y-%m-%d %H:%M:%S"),
|
||||
"Tail": "\n".join(log_text[-10_000:].split("\n")[-20:]),
|
||||
"Full Log": log_text,
|
||||
},
|
||||
},
|
||||
],
|
||||
)
|
||||
Reference in New Issue
Block a user