diff --git a/archivebox/__init__.py b/archivebox/__init__.py
index bb2a9806..69df1876 100755
--- a/archivebox/__init__.py
+++ b/archivebox/__init__.py
@@ -15,7 +15,7 @@ import os
import sys
from pathlib import Path
-
+from typing import cast
ASCII_LOGO = """
█████╗ ██████╗ ██████╗██╗ ██╗██╗██╗ ██╗███████╗ ██████╗ ██████╗ ██╗ ██╗
██╔══██╗██╔══██╗██╔════╝██║ ██║██║██║ ██║██╔════╝ ██╔══██╗██╔═══██╗╚██╗██╔╝
@@ -52,6 +52,50 @@ load_vendored_libs()
# print('DONE LOADING VENDORED LIBRARIES')
+import abx # noqa
+import abx_spec_archivebox # noqa
+import abx_spec_config # noqa
+import abx_spec_pydantic_pkgr # noqa
+import abx_spec_django # noqa
+import abx_spec_searchbackend # noqa
+
+
+abx.pm.add_hookspecs(abx_spec_config.PLUGIN_SPEC)
+abx.pm.register(abx_spec_config.PLUGIN_SPEC())
+
+abx.pm.add_hookspecs(abx_spec_pydantic_pkgr.PLUGIN_SPEC)
+abx.pm.register(abx_spec_pydantic_pkgr.PLUGIN_SPEC())
+
+abx.pm.add_hookspecs(abx_spec_django.PLUGIN_SPEC)
+abx.pm.register(abx_spec_django.PLUGIN_SPEC())
+
+abx.pm.add_hookspecs(abx_spec_searchbackend.PLUGIN_SPEC)
+abx.pm.register(abx_spec_searchbackend.PLUGIN_SPEC())
+
+
+abx.pm = cast(abx.ABXPluginManager[abx_spec_archivebox.ArchiveBoxPluginSpec], abx.pm)
+pm = abx.pm
+
+
+# Load all installed ABX-compatible plugins
+ABX_ECOSYSTEM_PLUGINS = abx.get_pip_installed_plugins(group='abx')
+# Load all ArchiveBox-specific plugins
+ARCHIVEBOX_BUILTIN_PLUGINS = {
+ 'config': PACKAGE_DIR / 'config',
+ 'core': PACKAGE_DIR / 'core',
+ # 'search': PACKAGE_DIR / 'search',
+ # 'core': PACKAGE_DIR / 'core',
+}
+# Load all user-defined ArchiveBox plugins
+USER_PLUGINS = abx.find_plugins_in_dir(Path(os.getcwd()) / 'user_plugins')
+# Merge all plugins together
+ALL_PLUGINS = {**ABX_ECOSYSTEM_PLUGINS, **ARCHIVEBOX_BUILTIN_PLUGINS, **USER_PLUGINS}
+
+
+# Load ArchiveBox plugins
+LOADED_PLUGINS = abx.load_plugins(ALL_PLUGINS)
+
+
from .config.constants import CONSTANTS # noqa
from .config.paths import PACKAGE_DIR, DATA_DIR, ARCHIVE_DIR # noqa
from .config.version import VERSION # noqa
diff --git a/archivebox/config/__init__.py b/archivebox/config/__init__.py
index a3e96681..55a76384 100644
--- a/archivebox/config/__init__.py
+++ b/archivebox/config/__init__.py
@@ -1,4 +1,5 @@
-__package__ = 'archivebox.config'
+__package__ = 'config'
+__order__ = 200
from .paths import (
PACKAGE_DIR, # noqa
@@ -9,30 +10,3 @@ from .constants import CONSTANTS, CONSTANTS_CONFIG, PACKAGE_DIR, DATA_DIR, ARCHI
from .version import VERSION # noqa
-import abx
-
-
-# @abx.hookimpl
-# def get_INSTALLED_APPS():
-# return ['config']
-
-
-@abx.hookimpl
-def get_CONFIG():
- from .common import (
- SHELL_CONFIG,
- STORAGE_CONFIG,
- GENERAL_CONFIG,
- SERVER_CONFIG,
- ARCHIVING_CONFIG,
- SEARCH_BACKEND_CONFIG,
- )
- return {
- 'SHELL_CONFIG': SHELL_CONFIG,
- 'STORAGE_CONFIG': STORAGE_CONFIG,
- 'GENERAL_CONFIG': GENERAL_CONFIG,
- 'SERVER_CONFIG': SERVER_CONFIG,
- 'ARCHIVING_CONFIG': ARCHIVING_CONFIG,
- 'SEARCHBACKEND_CONFIG': SEARCH_BACKEND_CONFIG,
- }
-
diff --git a/archivebox/config/configfile.py b/archivebox/config/collection.py
similarity index 94%
rename from archivebox/config/configfile.py
rename to archivebox/config/collection.py
index 911e1559..d0c5a273 100644
--- a/archivebox/config/configfile.py
+++ b/archivebox/config/collection.py
@@ -9,6 +9,8 @@ from configparser import ConfigParser
from benedict import benedict
+import archivebox
+
from archivebox.config.constants import CONSTANTS
from archivebox.misc.logging import stderr
@@ -16,9 +18,9 @@ from archivebox.misc.logging import stderr
def get_real_name(key: str) -> str:
"""get the up-to-date canonical name for a given old alias or current key"""
- from django.conf import settings
+ CONFIGS = archivebox.pm.hook.get_CONFIGS()
- for section in settings.CONFIGS.values():
+ for section in CONFIGS.values():
try:
return section.aliases[key]
except KeyError:
@@ -115,17 +117,15 @@ def load_config_file() -> Optional[benedict]:
def section_for_key(key: str) -> Any:
- from django.conf import settings
- for config_section in settings.CONFIGS.values():
+ for config_section in archivebox.pm.hook.get_CONFIGS().values():
if hasattr(config_section, key):
return config_section
- return None
+ raise ValueError(f'No config section found for key: {key}')
def write_config_file(config: Dict[str, str]) -> benedict:
"""load the ini-formatted config file from DATA_DIR/Archivebox.conf"""
- import abx.archivebox.reads
from archivebox.misc.system import atomic_write
CONFIG_HEADER = (
@@ -175,7 +175,7 @@ def write_config_file(config: Dict[str, str]) -> benedict:
updated_config = {}
try:
# validate the updated_config by attempting to re-parse it
- updated_config = {**load_all_config(), **abx.archivebox.reads.get_FLAT_CONFIG()}
+ updated_config = {**load_all_config(), **archivebox.pm.hook.get_FLAT_CONFIG()}
except BaseException: # lgtm [py/catch-base-exception]
# something went horribly wrong, revert to the previous version
with open(f'{config_path}.bak', 'r', encoding='utf-8') as old:
@@ -233,11 +233,11 @@ def load_config(defaults: Dict[str, Any],
return benedict(extended_config)
def load_all_config():
- import abx.archivebox.reads
+ import abx
flat_config = benedict()
- for config_section in abx.archivebox.reads.get_CONFIGS().values():
+ for config_section in abx.pm.hook.get_CONFIGS().values():
config_section.__init__()
flat_config.update(config_section.model_dump())
diff --git a/archivebox/config/common.py b/archivebox/config/common.py
index 2deccb0d..238fcfac 100644
--- a/archivebox/config/common.py
+++ b/archivebox/config/common.py
@@ -7,10 +7,10 @@ from typing import Dict, Optional, List
from pathlib import Path
from rich import print
-from pydantic import Field, field_validator, computed_field
+from pydantic import Field, field_validator
from django.utils.crypto import get_random_string
-from abx.archivebox.base_configset import BaseConfigSet
+from abx_spec_config.base_configset import BaseConfigSet
from .constants import CONSTANTS
from .version import get_COMMIT_HASH, get_BUILD_TIME
@@ -31,22 +31,19 @@ class ShellConfig(BaseConfigSet):
ANSI: Dict[str, str] = Field(default=lambda c: CONSTANTS.DEFAULT_CLI_COLORS if c.USE_COLOR else CONSTANTS.DISABLED_CLI_COLORS)
- VERSIONS_AVAILABLE: bool = False # .check_for_update.get_versions_available_on_github(c)},
- CAN_UPGRADE: bool = False # .check_for_update.can_upgrade(c)},
+ # VERSIONS_AVAILABLE: bool = False # .check_for_update.get_versions_available_on_github(c)},
+ # CAN_UPGRADE: bool = False # .check_for_update.can_upgrade(c)},
- @computed_field
@property
def TERM_WIDTH(self) -> int:
if not self.IS_TTY:
return 200
return shutil.get_terminal_size((140, 10)).columns
- @computed_field
@property
def COMMIT_HASH(self) -> Optional[str]:
return get_COMMIT_HASH()
- @computed_field
@property
def BUILD_TIME(self) -> str:
return get_BUILD_TIME()
diff --git a/archivebox/config/django.py b/archivebox/config/django.py
index eb79ab43..ad3d17c1 100644
--- a/archivebox/config/django.py
+++ b/archivebox/config/django.py
@@ -97,7 +97,7 @@ def setup_django(check_db=False, in_memory_db=False) -> None:
except Exception as e:
bump_startup_progress_bar(advance=1000)
- is_using_meta_cmd = any(ignored_subcommand in sys.argv for ignored_subcommand in ('help', 'version', '--help', '--version', 'init'))
+ is_using_meta_cmd = any(ignored_subcommand in sys.argv for ignored_subcommand in ('help', 'version', '--help', '--version'))
if not is_using_meta_cmd:
# show error message to user only if they're not running a meta command / just trying to get help
STDERR.print()
diff --git a/archivebox/config/views.py b/archivebox/config/views.py
index db2c7eaa..975ef7ff 100644
--- a/archivebox/config/views.py
+++ b/archivebox/config/views.py
@@ -14,8 +14,8 @@ from django.utils.html import format_html, mark_safe
from admin_data_views.typing import TableContext, ItemContext
from admin_data_views.utils import render_with_table_view, render_with_item_view, ItemLink
-import abx.archivebox.reads
-
+import abx
+import archivebox
from archivebox.config import CONSTANTS
from archivebox.misc.util import parse_date
@@ -65,7 +65,7 @@ def obj_to_yaml(obj: Any, indent: int=0) -> str:
@render_with_table_view
def binaries_list_view(request: HttpRequest, **kwargs) -> TableContext:
-
+ FLAT_CONFIG = archivebox.pm.hook.get_FLAT_CONFIG()
assert request.user.is_superuser, 'Must be a superuser to view configuration settings.'
rows = {
@@ -81,12 +81,11 @@ def binaries_list_view(request: HttpRequest, **kwargs) -> TableContext:
relevant_configs = {
key: val
- for key, val in settings.FLAT_CONFIG.items()
+ for key, val in FLAT_CONFIG.items()
if '_BINARY' in key or '_VERSION' in key
}
- for plugin_id, plugin in abx.archivebox.reads.get_PLUGINS().items():
- plugin = abx.archivebox.reads.get_PLUGIN(plugin_id)
+ for plugin_id, plugin in abx.get_all_plugins().items():
if not plugin.hooks.get('get_BINARIES'):
continue
@@ -131,17 +130,16 @@ def binaries_list_view(request: HttpRequest, **kwargs) -> TableContext:
@render_with_item_view
def binary_detail_view(request: HttpRequest, key: str, **kwargs) -> ItemContext:
- assert request.user.is_superuser, 'Must be a superuser to view configuration settings.'
+ assert request.user and request.user.is_superuser, 'Must be a superuser to view configuration settings.'
binary = None
plugin = None
- for plugin_id in abx.archivebox.reads.get_PLUGINS().keys():
- loaded_plugin = abx.archivebox.reads.get_PLUGIN(plugin_id)
+ for plugin_id, plugin in abx.get_all_plugins().items():
try:
- for loaded_binary in loaded_plugin.hooks.get_BINARIES().values():
+ for loaded_binary in plugin['hooks'].get_BINARIES().values():
if loaded_binary.name == key:
binary = loaded_binary
- plugin = loaded_plugin
+ plugin = plugin
# break # last write wins
except Exception as e:
print(e)
@@ -161,7 +159,7 @@ def binary_detail_view(request: HttpRequest, key: str, **kwargs) -> ItemContext:
"name": binary.name,
"description": binary.abspath,
"fields": {
- 'plugin': plugin.package,
+ 'plugin': plugin['package'],
'binprovider': binary.loaded_binprovider,
'abspath': binary.loaded_abspath,
'version': binary.loaded_version,
@@ -215,9 +213,7 @@ def plugins_list_view(request: HttpRequest, **kwargs) -> TableContext:
return color
return 'black'
- for plugin_id in settings.PLUGINS.keys():
-
- plugin = abx.archivebox.reads.get_PLUGIN(plugin_id)
+ for plugin_id, plugin in abx.get_all_plugins().items():
plugin.hooks.get_BINPROVIDERS = plugin.hooks.get('get_BINPROVIDERS', lambda: {})
plugin.hooks.get_BINARIES = plugin.hooks.get('get_BINARIES', lambda: {})
plugin.hooks.get_CONFIG = plugin.hooks.get('get_CONFIG', lambda: {})
@@ -263,7 +259,7 @@ def plugin_detail_view(request: HttpRequest, key: str, **kwargs) -> ItemContext:
assert plugin_id, f'Could not find a plugin matching the specified name: {key}'
- plugin = abx.archivebox.reads.get_PLUGIN(plugin_id)
+ plugin = abx.get_plugin(plugin_id)
return ItemContext(
slug=key,
diff --git a/archivebox/core/__init__.py b/archivebox/core/__init__.py
index ac3ec769..9a301977 100644
--- a/archivebox/core/__init__.py
+++ b/archivebox/core/__init__.py
@@ -1,2 +1,31 @@
__package__ = 'archivebox.core'
+import abx
+
+@abx.hookimpl
+def register_admin(admin_site):
+ """Register the core.models views (Snapshot, ArchiveResult, Tag, etc.) with the admin site"""
+ from core.admin import register_admin
+ register_admin(admin_site)
+
+
+
+@abx.hookimpl
+def get_CONFIG():
+ from archivebox.config.common import (
+ SHELL_CONFIG,
+ STORAGE_CONFIG,
+ GENERAL_CONFIG,
+ SERVER_CONFIG,
+ ARCHIVING_CONFIG,
+ SEARCH_BACKEND_CONFIG,
+ )
+ return {
+ 'SHELL_CONFIG': SHELL_CONFIG,
+ 'STORAGE_CONFIG': STORAGE_CONFIG,
+ 'GENERAL_CONFIG': GENERAL_CONFIG,
+ 'SERVER_CONFIG': SERVER_CONFIG,
+ 'ARCHIVING_CONFIG': ARCHIVING_CONFIG,
+ 'SEARCHBACKEND_CONFIG': SEARCH_BACKEND_CONFIG,
+ }
+
diff --git a/archivebox/core/admin_site.py b/archivebox/core/admin_site.py
index de92db8c..7aea2cf5 100644
--- a/archivebox/core/admin_site.py
+++ b/archivebox/core/admin_site.py
@@ -2,7 +2,7 @@ __package__ = 'archivebox.core'
from django.contrib import admin
-import abx.django.use
+import archivebox
class ArchiveBoxAdmin(admin.AdminSite):
site_header = 'ArchiveBox'
@@ -37,6 +37,6 @@ def register_admin_site():
sites.site = archivebox_admin
# register all plugins admin classes
- abx.django.use.register_admin(archivebox_admin)
+ archivebox.pm.hook.register_admin(admin_site=archivebox_admin)
return archivebox_admin
diff --git a/archivebox/core/apps.py b/archivebox/core/apps.py
index 870a77f8..b516678f 100644
--- a/archivebox/core/apps.py
+++ b/archivebox/core/apps.py
@@ -2,7 +2,7 @@ __package__ = 'archivebox.core'
from django.apps import AppConfig
-import abx
+import archivebox
class CoreConfig(AppConfig):
@@ -10,16 +10,11 @@ class CoreConfig(AppConfig):
def ready(self):
"""Register the archivebox.core.admin_site as the main django admin site"""
+ from django.conf import settings
+ archivebox.pm.hook.ready(settings=settings)
+
from core.admin_site import register_admin_site
register_admin_site()
- abx.pm.hook.ready()
-
-
-@abx.hookimpl
-def register_admin(admin_site):
- """Register the core.models views (Snapshot, ArchiveResult, Tag, etc.) with the admin site"""
- from core.admin import register_admin
- register_admin(admin_site)
diff --git a/archivebox/core/settings.py b/archivebox/core/settings.py
index 88858156..06cfa8b2 100644
--- a/archivebox/core/settings.py
+++ b/archivebox/core/settings.py
@@ -9,10 +9,12 @@ from pathlib import Path
from django.utils.crypto import get_random_string
import abx
+import archivebox
-from archivebox.config import DATA_DIR, PACKAGE_DIR, ARCHIVE_DIR, CONSTANTS
+from archivebox.config import DATA_DIR, PACKAGE_DIR, ARCHIVE_DIR, CONSTANTS # noqa
from archivebox.config.common import SHELL_CONFIG, SERVER_CONFIG # noqa
+
IS_MIGRATING = 'makemigrations' in sys.argv[:3] or 'migrate' in sys.argv[:3]
IS_TESTING = 'test' in sys.argv[:3] or 'PYTEST_CURRENT_TEST' in os.environ
IS_SHELL = 'shell' in sys.argv[:3] or 'shell_plus' in sys.argv[:3]
@@ -22,24 +24,8 @@ IS_GETTING_VERSION_OR_HELP = 'version' in sys.argv or 'help' in sys.argv or '--v
### ArchiveBox Plugin Settings
################################################################################
-PLUGIN_HOOKSPECS = [
- 'abx_spec_django',
- 'abx_spec_pydantic_pkgr',
- 'abx_spec_config',
- 'abx_spec_archivebox',
-]
-abx.register_hookspecs(PLUGIN_HOOKSPECS)
-
-SYSTEM_PLUGINS = abx.get_pip_installed_plugins(group='abx')
-USER_PLUGINS = abx.find_plugins_in_dir(DATA_DIR / 'user_plugins')
-
-ALL_PLUGINS = {**SYSTEM_PLUGINS, **USER_PLUGINS}
-
-# Load ArchiveBox plugins
-abx.load_plugins(ALL_PLUGINS)
-
-# # Load ArchiveBox config from plugins
-
+ALL_PLUGINS = archivebox.ALL_PLUGINS
+LOADED_PLUGINS = archivebox.LOADED_PLUGINS
################################################################################
### Django Core Settings
@@ -101,6 +87,7 @@ INSTALLED_APPS = [
+
MIDDLEWARE = [
'core.middleware.TimezoneMiddleware',
'django.middleware.security.SecurityMiddleware',
diff --git a/archivebox/core/settings_logging.py b/archivebox/core/settings_logging.py
index d9fc28bd..d292e15a 100644
--- a/archivebox/core/settings_logging.py
+++ b/archivebox/core/settings_logging.py
@@ -163,11 +163,6 @@ SETTINGS_LOGGING = {
"level": "DEBUG",
"propagate": False,
},
- "plugins_extractor": {
- "handlers": ["default", "logfile"],
- "level": "DEBUG",
- "propagate": False,
- },
"httpx": {
"handlers": ["outbound_webhooks"],
"level": "INFO",
diff --git a/archivebox/core/views.py b/archivebox/core/views.py
index d423c146..bb1c234f 100644
--- a/archivebox/core/views.py
+++ b/archivebox/core/views.py
@@ -21,6 +21,7 @@ from django.utils.decorators import method_decorator
from admin_data_views.typing import TableContext, ItemContext
from admin_data_views.utils import render_with_table_view, render_with_item_view, ItemLink
+import archivebox
from core.models import Snapshot
from core.forms import AddLinkForm
@@ -32,9 +33,8 @@ from archivebox.config.common import SHELL_CONFIG, SERVER_CONFIG
from archivebox.misc.util import base_url, htmlencode, ts_to_date_str
from archivebox.misc.serve_static import serve_static_with_byterange_support
-from ..plugins_extractor.archivedotorg.config import ARCHIVEDOTORG_CONFIG
-from ..logging_util import printable_filesize
-from ..search import query_search_index
+from archivebox.logging_util import printable_filesize
+from archivebox.search import query_search_index
class HomepageView(View):
@@ -154,7 +154,7 @@ class SnapshotView(View):
'status_color': 'success' if link.is_archived else 'danger',
'oldest_archive_date': ts_to_date_str(link.oldest_archive_date),
'warc_path': warc_path,
- 'SAVE_ARCHIVE_DOT_ORG': ARCHIVEDOTORG_CONFIG.SAVE_ARCHIVE_DOT_ORG,
+ 'SAVE_ARCHIVE_DOT_ORG': archivebox.pm.hook.get_FLAT_CONFIG().SAVE_ARCHIVE_DOT_ORG,
'PREVIEW_ORIGINALS': SERVER_CONFIG.PREVIEW_ORIGINALS,
'archiveresults': sorted(archiveresults.values(), key=lambda r: all_types.index(r['name']) if r['name'] in all_types else -r['size']),
'best_result': best_result,
@@ -500,21 +500,25 @@ class HealthCheckView(View):
def find_config_section(key: str) -> str:
+ CONFIGS = archivebox.pm.hook.get_CONFIGS()
+
if key in CONSTANTS_CONFIG:
return 'CONSTANT'
matching_sections = [
- section_id for section_id, section in settings.CONFIGS.items() if key in section.model_fields
+ section_id for section_id, section in CONFIGS.items() if key in section.model_fields
]
section = matching_sections[0] if matching_sections else 'DYNAMIC'
return section
def find_config_default(key: str) -> str:
+ CONFIGS = archivebox.pm.hook.get_CONFIGS()
+
if key in CONSTANTS_CONFIG:
return str(CONSTANTS_CONFIG[key])
default_val = None
- for config in settings.CONFIGS.values():
+ for config in CONFIGS.values():
if key in config.model_fields:
default_val = config.model_fields[key].default
break
@@ -530,7 +534,9 @@ def find_config_default(key: str) -> str:
return default_val
def find_config_type(key: str) -> str:
- for config in settings.CONFIGS.values():
+ CONFIGS = archivebox.pm.hook.get_CONFIGS()
+
+ for config in CONFIGS.values():
if hasattr(config, key):
type_hints = get_type_hints(config)
try:
@@ -547,7 +553,8 @@ def key_is_safe(key: str) -> bool:
@render_with_table_view
def live_config_list_view(request: HttpRequest, **kwargs) -> TableContext:
-
+ CONFIGS = archivebox.pm.hook.get_CONFIGS()
+
assert request.user.is_superuser, 'Must be a superuser to view configuration settings.'
rows = {
@@ -560,7 +567,7 @@ def live_config_list_view(request: HttpRequest, **kwargs) -> TableContext:
# "Aliases": [],
}
- for section_id, section in reversed(list(settings.CONFIGS.items())):
+ for section_id, section in reversed(list(CONFIGS.items())):
for key, field in section.model_fields.items():
rows['Section'].append(section_id) # section.replace('_', ' ').title().replace(' Config', '')
rows['Key'].append(ItemLink(key, key=key))
@@ -570,7 +577,6 @@ def live_config_list_view(request: HttpRequest, **kwargs) -> TableContext:
# rows['Documentation'].append(mark_safe(f'Wiki: {key}'))
# rows['Aliases'].append(', '.join(find_config_aliases(key)))
-
section = 'CONSTANT'
for key in CONSTANTS_CONFIG.keys():
rows['Section'].append(section) # section.replace('_', ' ').title().replace(' Config', '')
@@ -589,7 +595,9 @@ def live_config_list_view(request: HttpRequest, **kwargs) -> TableContext:
@render_with_item_view
def live_config_value_view(request: HttpRequest, key: str, **kwargs) -> ItemContext:
-
+ CONFIGS = archivebox.pm.hook.get_CONFIGS()
+ FLAT_CONFIG = archivebox.pm.hook.get_FLAT_CONFIG()
+
assert request.user.is_superuser, 'Must be a superuser to view configuration settings.'
# aliases = USER_CONFIG.get(key, {}).get("aliases", [])
@@ -597,7 +605,7 @@ def live_config_value_view(request: HttpRequest, key: str, **kwargs) -> ItemCont
if key in CONSTANTS_CONFIG:
section_header = mark_safe(f'[CONSTANTS] {key} (read-only, hardcoded by ArchiveBox)')
- elif key in settings.FLAT_CONFIG:
+ elif key in FLAT_CONFIG:
section_header = mark_safe(f'data / ArchiveBox.conf [{find_config_section(key)}] {key}')
else:
section_header = mark_safe(f'[DYNAMIC CONFIG] {key} (read-only, calculated at runtime)')
@@ -613,7 +621,7 @@ def live_config_value_view(request: HttpRequest, key: str, **kwargs) -> ItemCont
"fields": {
'Key': key,
'Type': find_config_type(key),
- 'Value': settings.FLAT_CONFIG.get(key, settings.CONFIGS.get(key, None)) if key_is_safe(key) else '********',
+ 'Value': FLAT_CONFIG.get(key, CONFIGS.get(key, None)) if key_is_safe(key) else '********',
},
"help_texts": {
'Key': mark_safe(f'''
@@ -635,13 +643,13 @@ def live_config_value_view(request: HttpRequest, key: str, **kwargs) -> ItemCont
{find_config_default(key) or '↗️ See in ArchiveBox source code...'}
-
+
To change this value, edit data/ArchiveBox.conf or run:
archivebox config --set {key}="{
val.strip("'")
if (val := find_config_default(key)) else
- (repr(settings.FLAT_CONFIG[key] if key_is_safe(key) else '********')).strip("'")
+ (repr(FLAT_CONFIG[key] if key_is_safe(key) else '********')).strip("'")
}"
'''),
diff --git a/archivebox/extractors/__init__.py b/archivebox/extractors/__init__.py
index 07ebb415..42f9d6c7 100644
--- a/archivebox/extractors/__init__.py
+++ b/archivebox/extractors/__init__.py
@@ -27,43 +27,29 @@ from ..logging_util import (
log_archive_method_finished,
)
-from .title import should_save_title, save_title
-from .favicon import should_save_favicon, save_favicon
-from .wget import should_save_wget, save_wget
-from .singlefile import should_save_singlefile, save_singlefile
-from .readability import should_save_readability, save_readability
-from .mercury import should_save_mercury, save_mercury
-from .htmltotext import should_save_htmltotext, save_htmltotext
-from .pdf import should_save_pdf, save_pdf
-from .screenshot import should_save_screenshot, save_screenshot
-from .dom import should_save_dom, save_dom
-from .git import should_save_git, save_git
-from .media import should_save_media, save_media
-from .archive_org import should_save_archive_dot_org, save_archive_dot_org
-from .headers import should_save_headers, save_headers
-
ShouldSaveFunction = Callable[[Link, Optional[Path], Optional[bool]], bool]
SaveFunction = Callable[[Link, Optional[Path], int], ArchiveResult]
ArchiveMethodEntry = tuple[str, ShouldSaveFunction, SaveFunction]
def get_default_archive_methods() -> List[ArchiveMethodEntry]:
+ # TODO: move to abx.pm.hook.get_EXTRACTORS()
return [
- ('favicon', should_save_favicon, save_favicon),
- ('headers', should_save_headers, save_headers),
- ('singlefile', should_save_singlefile, save_singlefile),
- ('pdf', should_save_pdf, save_pdf),
- ('screenshot', should_save_screenshot, save_screenshot),
- ('dom', should_save_dom, save_dom),
- ('wget', should_save_wget, save_wget),
- # keep title, readability, and htmltotext below wget and singlefile, as they depend on them
- ('title', should_save_title, save_title),
- ('readability', should_save_readability, save_readability),
- ('mercury', should_save_mercury, save_mercury),
- ('htmltotext', should_save_htmltotext, save_htmltotext),
- ('git', should_save_git, save_git),
- ('media', should_save_media, save_media),
- ('archive_org', should_save_archive_dot_org, save_archive_dot_org),
+ # ('favicon', should_save_favicon, save_favicon),
+ # ('headers', should_save_headers, save_headers),
+ # ('singlefile', should_save_singlefile, save_singlefile),
+ # ('pdf', should_save_pdf, save_pdf),
+ # ('screenshot', should_save_screenshot, save_screenshot),
+ # ('dom', should_save_dom, save_dom),
+ # ('wget', should_save_wget, save_wget),
+ # # keep title, readability, and htmltotext below wget and singlefile, as they depend on them
+ # ('title', should_save_title, save_title),
+ # ('readability', should_save_readability, save_readability),
+ # ('mercury', should_save_mercury, save_mercury),
+ # ('htmltotext', should_save_htmltotext, save_htmltotext),
+ # ('git', should_save_git, save_git),
+ # ('media', should_save_media, save_media),
+ # ('archive_org', should_save_archive_dot_org, save_archive_dot_org),
]
ARCHIVE_METHODS_INDEXING_PRECEDENCE = [
diff --git a/archivebox/index/html.py b/archivebox/index/html.py
index eae93e67..24cad5c0 100644
--- a/archivebox/index/html.py
+++ b/archivebox/index/html.py
@@ -8,6 +8,8 @@ from typing import List, Optional, Iterator, Mapping
from django.utils.html import format_html, mark_safe # type: ignore
from django.core.cache import cache
+import abx
+
from archivebox.misc.system import atomic_write
from archivebox.misc.util import (
enforce_types,
@@ -19,7 +21,6 @@ from archivebox.misc.util import (
from archivebox.config import CONSTANTS, DATA_DIR, VERSION
from archivebox.config.common import SERVER_CONFIG
from archivebox.config.version import get_COMMIT_HASH
-from archivebox.plugins_extractor.archivedotorg.config import ARCHIVEDOTORG_CONFIG
from .schema import Link
from ..logging_util import printable_filesize
@@ -79,8 +80,10 @@ def write_html_link_details(link: Link, out_dir: Optional[str]=None) -> None:
@enforce_types
def link_details_template(link: Link) -> str:
-
- from ..extractors.wget import wget_output_path
+
+ from abx_plugin_wget_extractor.wget import wget_output_path
+
+ SAVE_ARCHIVE_DOT_ORG = abx.pm.hook.get_FLAT_CONFIG().SAVE_ARCHIVE_DOT_ORG
link_info = link._asdict(extended=True)
@@ -102,7 +105,7 @@ def link_details_template(link: Link) -> str:
'status': 'archived' if link.is_archived else 'not yet archived',
'status_color': 'success' if link.is_archived else 'danger',
'oldest_archive_date': ts_to_date_str(link.oldest_archive_date),
- 'SAVE_ARCHIVE_DOT_ORG': ARCHIVEDOTORG_CONFIG.SAVE_ARCHIVE_DOT_ORG,
+ 'SAVE_ARCHIVE_DOT_ORG': SAVE_ARCHIVE_DOT_ORG,
'PREVIEW_ORIGINALS': SERVER_CONFIG.PREVIEW_ORIGINALS,
})
diff --git a/archivebox/index/json.py b/archivebox/index/json.py
index eaa93c2e..0a484c75 100644
--- a/archivebox/index/json.py
+++ b/archivebox/index/json.py
@@ -8,7 +8,7 @@ from pathlib import Path
from datetime import datetime, timezone
from typing import List, Optional, Iterator, Any, Union
-import abx.archivebox.reads
+import abx
from archivebox.config import VERSION, DATA_DIR, CONSTANTS
from archivebox.config.common import SERVER_CONFIG, SHELL_CONFIG
@@ -33,7 +33,7 @@ def generate_json_index_from_links(links: List[Link], with_headers: bool):
'docs': 'https://github.com/ArchiveBox/ArchiveBox/wiki',
'source': 'https://github.com/ArchiveBox/ArchiveBox',
'issues': 'https://github.com/ArchiveBox/ArchiveBox/issues',
- 'dependencies': dict(abx.archivebox.reads.get_BINARIES()),
+ 'dependencies': dict(abx.pm.hook.get_BINARIES()),
},
}
diff --git a/archivebox/index/schema.py b/archivebox/index/schema.py
index a3c0e967..78e80ef9 100644
--- a/archivebox/index/schema.py
+++ b/archivebox/index/schema.py
@@ -17,9 +17,9 @@ from dataclasses import dataclass, asdict, field, fields
from django.utils.functional import cached_property
-from archivebox.config import ARCHIVE_DIR, CONSTANTS
+import abx
-from plugins_extractor.favicon.config import FAVICON_CONFIG
+from archivebox.config import ARCHIVE_DIR, CONSTANTS
from archivebox.misc.system import get_dir_size
from archivebox.misc.util import ts_to_date_str, parse_date
@@ -426,7 +426,10 @@ class Link:
def canonical_outputs(self) -> Dict[str, Optional[str]]:
"""predict the expected output paths that should be present after archiving"""
- from ..extractors.wget import wget_output_path
+ from abx_plugin_wget.wget import wget_output_path
+
+ FAVICON_CONFIG = abx.pm.hook.get_CONFIGS().favicon
+
# TODO: banish this awful duplication from the codebase and import these
# from their respective extractor files
canonical = {
diff --git a/archivebox/machine/models.py b/archivebox/machine/models.py
index 229e1d83..7686b73e 100644
--- a/archivebox/machine/models.py
+++ b/archivebox/machine/models.py
@@ -8,9 +8,10 @@ from django.db import models
from django.utils import timezone
from django.utils.functional import cached_property
-import abx.archivebox.reads
+import abx
+import archivebox
-from abx.archivebox.base_binary import BaseBinary, BaseBinProvider
+from pydantic_pkgr import Binary, BinProvider
from archivebox.abid_utils.models import ABIDModel, ABIDField, AutoDateTimeField, ModelWithHealthStats
from .detect import get_host_guid, get_os_info, get_vm_info, get_host_network, get_host_stats
@@ -180,7 +181,7 @@ class NetworkInterface(ABIDModel, ModelWithHealthStats):
class InstalledBinaryManager(models.Manager):
- def get_from_db_or_cache(self, binary: BaseBinary) -> 'InstalledBinary':
+ def get_from_db_or_cache(self, binary: Binary) -> 'InstalledBinary':
"""Get or create an InstalledBinary record for a Binary on the local machine"""
global _CURRENT_BINARIES
@@ -216,7 +217,7 @@ class InstalledBinaryManager(models.Manager):
# if binary was not yet loaded from filesystem, do it now
# this is expensive, we have to find it's abspath, version, and sha256, but it's necessary
# to make sure we have a good, up-to-date record of it in the DB & in-memroy cache
- binary = binary.load(fresh=True)
+ binary = archivebox.pm.hook.binary_load(binary=binary, fresh=True)
assert binary.loaded_binprovider and binary.loaded_abspath and binary.loaded_version and binary.loaded_sha256, f'Failed to load binary {binary.name} abspath, version, and sha256'
@@ -291,8 +292,8 @@ class InstalledBinary(ABIDModel, ModelWithHealthStats):
if not hasattr(self, 'machine'):
self.machine = Machine.objects.current()
if not self.binprovider:
- all_known_binproviders = list(abx.archivebox.reads.get_BINPROVIDERS().values())
- binary = BaseBinary(name=self.name, binproviders=all_known_binproviders).load(fresh=True)
+ all_known_binproviders = list(abx.as_dict(archivebox.pm.hook.get_BINPROVIDERS()).values())
+ binary = archivebox.pm.hook.binary_load(binary=Binary(name=self.name, binproviders=all_known_binproviders), fresh=True)
self.binprovider = binary.loaded_binprovider.name if binary.loaded_binprovider else None
if not self.abspath:
self.abspath = self.BINPROVIDER.get_abspath(self.name)
@@ -304,16 +305,16 @@ class InstalledBinary(ABIDModel, ModelWithHealthStats):
super().clean(*args, **kwargs)
@cached_property
- def BINARY(self) -> BaseBinary:
- for binary in abx.archivebox.reads.get_BINARIES().values():
+ def BINARY(self) -> Binary:
+ for binary in abx.as_dict(archivebox.pm.hook.get_BINARIES()).values():
if binary.name == self.name:
return binary
raise Exception(f'Orphaned InstalledBinary {self.name} {self.binprovider} was found in DB, could not find any plugin that defines it')
# TODO: we could technically reconstruct it from scratch, but why would we ever want to do that?
@cached_property
- def BINPROVIDER(self) -> BaseBinProvider:
- for binprovider in abx.archivebox.reads.get_BINPROVIDERS().values():
+ def BINPROVIDER(self) -> BinProvider:
+ for binprovider in abx.as_dict(archivebox.pm.hook.get_BINPROVIDERS()).values():
if binprovider.name == self.binprovider:
return binprovider
raise Exception(f'Orphaned InstalledBinary(name={self.name}) was found in DB, could not find any plugin that defines BinProvider(name={self.binprovider})')
@@ -321,7 +322,7 @@ class InstalledBinary(ABIDModel, ModelWithHealthStats):
# maybe not a good idea to provide this? Binary in DB is a record of the binary's config
# whereas a loaded binary is a not-yet saved instance that may not have the same config
# why would we want to load a binary record from the db when it could be freshly loaded?
- def load_from_db(self) -> BaseBinary:
+ def load_from_db(self) -> Binary:
# TODO: implement defaults arg in pydantic_pkgr
# return self.BINARY.load(defaults={
# 'binprovider': self.BINPROVIDER,
@@ -330,7 +331,7 @@ class InstalledBinary(ABIDModel, ModelWithHealthStats):
# 'sha256': self.sha256,
# })
- return BaseBinary.model_validate({
+ return Binary.model_validate({
**self.BINARY.model_dump(),
'abspath': self.abspath and Path(self.abspath),
'version': self.version,
@@ -340,5 +341,5 @@ class InstalledBinary(ABIDModel, ModelWithHealthStats):
'overrides': self.BINARY.overrides,
})
- def load_fresh(self) -> BaseBinary:
- return self.BINARY.load(fresh=True)
+ def load_fresh(self) -> Binary:
+ return archivebox.pm.hook.binary_load(binary=self.BINARY, fresh=True)
diff --git a/archivebox/main.py b/archivebox/main.py
index fab99dc9..ce6347b2 100755
--- a/archivebox/main.py
+++ b/archivebox/main.py
@@ -14,6 +14,10 @@ from crontab import CronTab, CronSlices
from django.db.models import QuerySet
from django.utils import timezone
+from pydantic_pkgr import Binary
+
+import abx
+import archivebox
from archivebox.misc.checks import check_data_folder
from archivebox.misc.util import enforce_types # type: ignore
from archivebox.misc.system import get_dir_size, dedupe_cron_jobs, CRON_COMMENT
@@ -197,13 +201,13 @@ def version(quiet: bool=False,
from django.conf import settings
- from abx.archivebox.base_binary import BaseBinary, apt, brew, env
+ from abx_plugin_default_binproviders import apt, brew, env
from archivebox.config.version import get_COMMIT_HASH, get_BUILD_TIME
from archivebox.config.permissions import ARCHIVEBOX_USER, ARCHIVEBOX_GROUP, RUNNING_AS_UID, RUNNING_AS_GID
from archivebox.config.paths import get_data_locations, get_code_locations
- from plugins_auth.ldap.config import LDAP_CONFIG
+ LDAP_ENABLED = archivebox.pm.hook.get_FLAT_CONFIG().LDAP_ENABLED
# 0.7.1
@@ -242,7 +246,7 @@ def version(quiet: bool=False,
f'SUDO={CONSTANTS.IS_ROOT}',
f'ID={CONSTANTS.MACHINE_ID}:{CONSTANTS.COLLECTION_ID}',
f'SEARCH_BACKEND={SEARCH_BACKEND_CONFIG.SEARCH_BACKEND_ENGINE}',
- f'LDAP={LDAP_CONFIG.LDAP_ENABLED}',
+ f'LDAP={LDAP_ENABLED}',
#f'DB=django.db.backends.sqlite3 (({CONFIG["SQLITE_JOURNAL_MODE"]})', # add this if we have more useful info to show eventually
)
prnt()
@@ -264,7 +268,8 @@ def version(quiet: bool=False,
prnt('[pale_green1][i] Binary Dependencies:[/pale_green1]')
failures = []
- for name, binary in list(settings.BINARIES.items()):
+ BINARIES = abx.as_dict(archivebox.pm.hook.get_BINARIES())
+ for name, binary in list(BINARIES.items()):
if binary.name == 'archivebox':
continue
@@ -295,14 +300,15 @@ def version(quiet: bool=False,
prnt()
prnt('[gold3][i] Package Managers:[/gold3]')
- for name, binprovider in list(settings.BINPROVIDERS.items()):
+ BINPROVIDERS = abx.as_dict(archivebox.pm.hook.get_BINPROVIDERS())
+ for name, binprovider in list(BINPROVIDERS.items()):
err = None
if binproviders and binprovider.name not in binproviders:
continue
# TODO: implement a BinProvider.BINARY() method that gets the loaded binary for a binprovider's INSTALLER_BIN
- loaded_bin = binprovider.INSTALLER_BINARY or BaseBinary(name=binprovider.INSTALLER_BIN, binproviders=[env, apt, brew])
+ loaded_bin = binprovider.INSTALLER_BINARY or Binary(name=binprovider.INSTALLER_BIN, binproviders=[env, apt, brew])
abspath = None
if loaded_bin.abspath:
@@ -1050,10 +1056,7 @@ def install(out_dir: Path=DATA_DIR, binproviders: Optional[List[str]]=None, bina
# - recommend user re-run with sudo if any deps need to be installed as root
from rich import print
- from django.conf import settings
-
- import abx.archivebox.reads
from archivebox.config.permissions import IS_ROOT, ARCHIVEBOX_USER, ARCHIVEBOX_GROUP
from archivebox.config.paths import get_or_create_working_lib_dir
@@ -1076,11 +1079,11 @@ def install(out_dir: Path=DATA_DIR, binproviders: Optional[List[str]]=None, bina
package_manager_names = ', '.join(
f'[yellow]{binprovider.name}[/yellow]'
- for binprovider in reversed(list(abx.archivebox.reads.get_BINPROVIDERS().values()))
+ for binprovider in reversed(list(abx.as_dict(abx.pm.hook.get_BINPROVIDERS()).values()))
if not binproviders or (binproviders and binprovider.name in binproviders)
)
print(f'[+] Setting up package managers {package_manager_names}...')
- for binprovider in reversed(list(abx.archivebox.reads.get_BINPROVIDERS().values())):
+ for binprovider in reversed(list(abx.as_dict(abx.pm.hook.get_BINPROVIDERS()).values())):
if binproviders and binprovider.name not in binproviders:
continue
try:
@@ -1093,7 +1096,7 @@ def install(out_dir: Path=DATA_DIR, binproviders: Optional[List[str]]=None, bina
print()
- for binary in reversed(list(abx.archivebox.reads.get_BINARIES().values())):
+ for binary in reversed(list(abx.as_dict(abx.pm.hook.get_BINARIES()).values())):
if binary.name in ('archivebox', 'django', 'sqlite', 'python'):
# obviously must already be installed if we are running
continue
@@ -1123,7 +1126,8 @@ def install(out_dir: Path=DATA_DIR, binproviders: Optional[List[str]]=None, bina
result = binary.install(binproviders=[binprovider_name], dry_run=dry_run).model_dump(exclude={'overrides', 'bin_dir', 'hook_type'})
sys.stderr.write("\033[00m\n") # reset
else:
- result = binary.load_or_install(binproviders=[binprovider_name], fresh=True, dry_run=dry_run, quiet=False).model_dump(exclude={'overrides', 'bin_dir', 'hook_type'})
+ loaded_binary = archivebox.pm.hook.binary_load_or_install(binary=binary, binproviders=[binprovider_name], fresh=True, dry_run=dry_run, quiet=False)
+ result = loaded_binary.model_dump(exclude={'overrides', 'bin_dir', 'hook_type'})
if result and result['loaded_version']:
break
except Exception as e:
@@ -1134,7 +1138,8 @@ def install(out_dir: Path=DATA_DIR, binproviders: Optional[List[str]]=None, bina
binary.install(dry_run=dry_run).model_dump(exclude={'overrides', 'bin_dir', 'hook_type'})
sys.stderr.write("\033[00m\n") # reset
else:
- binary.load_or_install(fresh=True, dry_run=dry_run).model_dump(exclude={'overrides', 'bin_dir', 'hook_type'})
+ loaded_binary = archivebox.pm.hook.binary_load_or_install(binary=binary, fresh=True, dry_run=dry_run)
+ result = loaded_binary.model_dump(exclude={'overrides', 'bin_dir', 'hook_type'})
if IS_ROOT and LIB_DIR:
with SudoPermission(uid=0):
if ARCHIVEBOX_USER == 0:
@@ -1158,7 +1163,7 @@ def install(out_dir: Path=DATA_DIR, binproviders: Optional[List[str]]=None, bina
print('\n[green][√] Set up ArchiveBox and its dependencies successfully.[/green]\n', file=sys.stderr)
- from plugins_pkg.pip.binaries import ARCHIVEBOX_BINARY
+ from abx_plugin_pip.binaries import ARCHIVEBOX_BINARY
extra_args = []
if binproviders:
@@ -1184,8 +1189,6 @@ def config(config_options_str: Optional[str]=None,
out_dir: Path=DATA_DIR) -> None:
"""Get and set your ArchiveBox project configuration values"""
- import abx.archivebox.reads
-
from rich import print
check_data_folder()
@@ -1199,7 +1202,8 @@ def config(config_options_str: Optional[str]=None,
elif config_options_str:
config_options = config_options_str.split('\n')
- from django.conf import settings
+ FLAT_CONFIG = archivebox.pm.hook.get_FLAT_CONFIG()
+ CONFIGS = archivebox.pm.hook.get_CONFIGS()
config_options = config_options or []
@@ -1209,8 +1213,8 @@ def config(config_options_str: Optional[str]=None,
if search:
if config_options:
config_options = [get_real_name(key) for key in config_options]
- matching_config = {key: settings.FLAT_CONFIG[key] for key in config_options if key in settings.FLAT_CONFIG}
- for config_section in settings.CONFIGS.values():
+ matching_config = {key: FLAT_CONFIG[key] for key in config_options if key in FLAT_CONFIG}
+ for config_section in CONFIGS.values():
aliases = config_section.aliases
for search_key in config_options:
@@ -1229,15 +1233,15 @@ def config(config_options_str: Optional[str]=None,
elif get or no_args:
if config_options:
config_options = [get_real_name(key) for key in config_options]
- matching_config = {key: settings.FLAT_CONFIG[key] for key in config_options if key in settings.FLAT_CONFIG}
- failed_config = [key for key in config_options if key not in settings.FLAT_CONFIG]
+ matching_config = {key: FLAT_CONFIG[key] for key in config_options if key in FLAT_CONFIG}
+ failed_config = [key for key in config_options if key not in FLAT_CONFIG]
if failed_config:
stderr()
stderr('[X] These options failed to get', color='red')
stderr(' {}'.format('\n '.join(config_options)))
raise SystemExit(1)
else:
- matching_config = settings.FLAT_CONFIG
+ matching_config = FLAT_CONFIG
print(printable_config(matching_config))
raise SystemExit(not matching_config)
@@ -1258,20 +1262,20 @@ def config(config_options_str: Optional[str]=None,
if key != raw_key:
stderr(f'[i] Note: The config option {raw_key} has been renamed to {key}, please use the new name going forwards.', color='lightyellow')
- if key in settings.FLAT_CONFIG:
+ if key in FLAT_CONFIG:
new_config[key] = val.strip()
else:
failed_options.append(line)
if new_config:
- before = settings.FLAT_CONFIG
+ before = FLAT_CONFIG
matching_config = write_config_file(new_config)
- after = {**load_all_config(), **abx.archivebox.reads.get_FLAT_CONFIG()}
+ after = {**load_all_config(), **archivebox.pm.hook.get_FLAT_CONFIG()}
print(printable_config(matching_config))
side_effect_changes = {}
for key, val in after.items():
- if key in settings.FLAT_CONFIG and (str(before[key]) != str(after[key])) and (key not in matching_config):
+ if key in FLAT_CONFIG and (str(before[key]) != str(after[key])) and (key not in matching_config):
side_effect_changes[key] = after[key]
# import ipdb; ipdb.set_trace()
@@ -1313,7 +1317,7 @@ def schedule(add: bool=False,
"""Set ArchiveBox to regularly import URLs at specific times using cron"""
check_data_folder()
- from archivebox.plugins_pkg.pip.binaries import ARCHIVEBOX_BINARY
+ from abx_plugin_pip.binaries import ARCHIVEBOX_BINARY
from archivebox.config.permissions import USER
Path(CONSTANTS.LOGS_DIR).mkdir(exist_ok=True)
diff --git a/archivebox/misc/checks.py b/archivebox/misc/checks.py
index b0322a1e..8a2894fe 100644
--- a/archivebox/misc/checks.py
+++ b/archivebox/misc/checks.py
@@ -201,6 +201,7 @@ def check_tmp_dir(tmp_dir=None, throw=False, quiet=False, must_exist=True):
def check_lib_dir(lib_dir: Path | None = None, throw=False, quiet=False, must_exist=True):
+ import archivebox
from archivebox.config.permissions import ARCHIVEBOX_USER, ARCHIVEBOX_GROUP
from archivebox.misc.logging import STDERR
from archivebox.config.paths import dir_is_writable, get_or_create_working_lib_dir
@@ -209,6 +210,8 @@ def check_lib_dir(lib_dir: Path | None = None, throw=False, quiet=False, must_ex
lib_dir = lib_dir or STORAGE_CONFIG.LIB_DIR
+ assert lib_dir == archivebox.pm.hook.get_LIB_DIR(), "lib_dir is not the same as the one in the flat config"
+
if not must_exist and not os.path.isdir(lib_dir):
return True
diff --git a/archivebox/misc/shell_welcome_message.py b/archivebox/misc/shell_welcome_message.py
index 5b85e6bd..26314dc0 100644
--- a/archivebox/misc/shell_welcome_message.py
+++ b/archivebox/misc/shell_welcome_message.py
@@ -23,7 +23,7 @@ from archivebox import CONSTANTS # noqa
from ..main import * # noqa
from ..cli import CLI_SUBCOMMANDS
-CONFIG = settings.FLAT_CONFIG
+CONFIG = archivebox.pm.hook.get_FLAT_CONFIG()
CLI_COMMAND_NAMES = ", ".join(CLI_SUBCOMMANDS.keys())
if __name__ == '__main__':
@@ -55,6 +55,5 @@ if __name__ == '__main__':
prnt(' add[blink][deep_sky_blue4]?[/deep_sky_blue4][/blink] [grey53]# add ? after anything to get help[/]')
prnt(' add("https://example.com/some/new/url") [grey53]# call CLI methods from the shell[/]')
prnt(' snap = Snapshot.objects.filter(url__contains="https://example.com").last() [grey53]# query for individual snapshots[/]')
- prnt(' archivebox.plugins_extractor.wget.apps.WGET_EXTRACTOR.extract(snap.id) [grey53]# call an extractor directly[/]')
prnt(' snap.archiveresult_set.all() [grey53]# see extractor results[/]')
prnt(' bool(re.compile(CONFIG.URL_DENYLIST).search("https://example.com/abc.exe")) [grey53]# test out a config change[/]')
diff --git a/archivebox/parsers/pocket_api.py b/archivebox/parsers/pocket_api.py
index 9b88d958..52dbba17 100644
--- a/archivebox/parsers/pocket_api.py
+++ b/archivebox/parsers/pocket_api.py
@@ -6,8 +6,7 @@ import re
from typing import IO, Iterable, Optional
from configparser import ConfigParser
-from pocket import Pocket
-
+import archivebox
from archivebox.config import CONSTANTS
from archivebox.misc.util import enforce_types
from archivebox.misc.system import atomic_write
@@ -22,7 +21,7 @@ API_DB_PATH = CONSTANTS.SOURCES_DIR / 'pocket_api.db'
_BROKEN_PROTOCOL_RE = re.compile('^(http[s]?)(:/(?!/))')
-def get_pocket_articles(api: Pocket, since=None, page=0):
+def get_pocket_articles(api, since=None, page=0):
body, headers = api.get(
state='archive',
sort='oldest',
@@ -94,7 +93,9 @@ def should_parse_as_pocket_api(text: str) -> bool:
def parse_pocket_api_export(input_buffer: IO[str], **_kwargs) -> Iterable[Link]:
"""Parse bookmarks from the Pocket API"""
- from archivebox.plugins_extractor.pocket.config import POCKET_CONFIG
+ from pocket import Pocket
+
+ FLAT_CONFIG = archivebox.pm.hook.get_FLAT_CONFIG()
input_buffer.seek(0)
pattern = re.compile(r"^pocket:\/\/(\w+)")
@@ -102,7 +103,7 @@ def parse_pocket_api_export(input_buffer: IO[str], **_kwargs) -> Iterable[Link]:
if should_parse_as_pocket_api(line):
username = pattern.search(line).group(1)
- api = Pocket(POCKET_CONFIG.POCKET_CONSUMER_KEY, POCKET_CONFIG.POCKET_ACCESS_TOKENS[username])
+ api = Pocket(FLAT_CONFIG.POCKET_CONSUMER_KEY, FLAT_CONFIG.POCKET_ACCESS_TOKENS[username])
api.last_since = None
for article in get_pocket_articles(api, since=read_since(username)):
diff --git a/archivebox/parsers/readwise_reader_api.py b/archivebox/parsers/readwise_reader_api.py
index ad464537..20a792f3 100644
--- a/archivebox/parsers/readwise_reader_api.py
+++ b/archivebox/parsers/readwise_reader_api.py
@@ -8,9 +8,10 @@ from datetime import datetime
from typing import IO, Iterable, Optional
from configparser import ConfigParser
+import abx
+
from archivebox.misc.util import enforce_types
from archivebox.misc.system import atomic_write
-from archivebox.plugins_extractor.readwise.config import READWISE_CONFIG
from ..index.schema import Link
@@ -62,26 +63,30 @@ def link_from_article(article: dict, sources: list):
def write_cursor(username: str, since: str):
- if not READWISE_CONFIG.READWISE_DB_PATH.exists():
- atomic_write(READWISE_CONFIG.READWISE_DB_PATH, "")
+ READWISE_DB_PATH = abx.pm.hook.get_CONFIG().READWISE_DB_PATH
+
+ if not READWISE_DB_PATH.exists():
+ atomic_write(READWISE_DB_PATH, "")
since_file = ConfigParser()
since_file.optionxform = str
- since_file.read(READWISE_CONFIG.READWISE_DB_PATH)
+ since_file.read(READWISE_DB_PATH)
since_file[username] = {"since": since}
- with open(READWISE_CONFIG.READWISE_DB_PATH, "w+") as new:
+ with open(READWISE_DB_PATH, "w+") as new:
since_file.write(new)
def read_cursor(username: str) -> Optional[str]:
- if not READWISE_CONFIG.READWISE_DB_PATH.exists():
- atomic_write(READWISE_CONFIG.READWISE_DB_PATH, "")
+ READWISE_DB_PATH = abx.pm.hook.get_CONFIG().READWISE_DB_PATH
+
+ if not READWISE_DB_PATH.exists():
+ atomic_write(READWISE_DB_PATH, "")
config_file = ConfigParser()
config_file.optionxform = str
- config_file.read(READWISE_CONFIG.READWISE_DB_PATH)
+ config_file.read(READWISE_DB_PATH)
return config_file.get(username, "since", fallback=None)
@@ -97,12 +102,14 @@ def should_parse_as_readwise_reader_api(text: str) -> bool:
def parse_readwise_reader_api_export(input_buffer: IO[str], **_kwargs) -> Iterable[Link]:
"""Parse bookmarks from the Readwise Reader API"""
+ READWISE_READER_TOKENS = abx.pm.hook.get_CONFIG().READWISE_READER_TOKENS
+
input_buffer.seek(0)
pattern = re.compile(r"^readwise-reader:\/\/(\w+)")
for line in input_buffer:
if should_parse_as_readwise_reader_api(line):
username = pattern.search(line).group(1)
- api = ReadwiseReaderAPI(READWISE_CONFIG.READWISE_READER_TOKENS[username], cursor=read_cursor(username))
+ api = ReadwiseReaderAPI(READWISE_READER_TOKENS[username], cursor=read_cursor(username))
for article in get_readwise_reader_articles(api):
yield link_from_article(article, sources=[line])
diff --git a/archivebox/search/__init__.py b/archivebox/search/__init__.py
index 2e7d4f69..921c074f 100644
--- a/archivebox/search/__init__.py
+++ b/archivebox/search/__init__.py
@@ -6,8 +6,8 @@ from typing import List, Union
from django.db.models import QuerySet
from django.conf import settings
-import abx.archivebox.reads
-
+import abx
+import archivebox
from archivebox.index.schema import Link
from archivebox.misc.util import enforce_types
from archivebox.misc.logging import stderr
@@ -57,7 +57,7 @@ def get_indexable_content(results: QuerySet):
def import_backend():
- for backend in abx.archivebox.reads.get_SEARCHBACKENDS().values():
+ for backend in abx.as_dict(archivebox.pm.hook.get_SEARCHBACKENDS()).values():
if backend.name == SEARCH_BACKEND_CONFIG.SEARCH_BACKEND_ENGINE:
return backend
raise Exception(f'Could not load {SEARCH_BACKEND_CONFIG.SEARCH_BACKEND_ENGINE} as search backend')
diff --git a/archivebox/vendor/__init__.py b/archivebox/vendor/__init__.py
index fcd93405..de31354a 100644
--- a/archivebox/vendor/__init__.py
+++ b/archivebox/vendor/__init__.py
@@ -4,23 +4,27 @@ from pathlib import Path
VENDOR_DIR = Path(__file__).parent
-VENDORED_LIBS = {
- # sys.path dir: library name
- #'python-atomicwrites': 'atomicwrites',
- #'django-taggit': 'taggit',
- # 'pydantic-pkgr': 'pydantic_pkgr',
- # 'pocket': 'pocket',
- #'base32-crockford': 'base32_crockford',
-}
+VENDORED_LIBS = [
+ 'abx',
+ 'pydantic-pkgr',
+ 'pocket',
+]
+
+for subdir in reversed(sorted(VENDOR_DIR.iterdir())):
+ if subdir.is_dir() and subdir.name not in VENDORED_LIBS and not subdir.name.startswith('_'):
+ VENDORED_LIBS.append(subdir.name)
def load_vendored_libs():
- for lib_subdir, lib_name in VENDORED_LIBS.items():
- lib_dir = VENDOR_DIR / lib_subdir
- assert lib_dir.is_dir(), 'Expected vendor libary {lib_name} could not be found in {lib_dir}'
+ if str(VENDOR_DIR) not in sys.path:
+ sys.path.append(str(VENDOR_DIR))
+
+ for lib_name in VENDORED_LIBS:
+ lib_dir = VENDOR_DIR / lib_name
+ assert lib_dir.is_dir(), f'Expected vendor libary {lib_name} could not be found in {lib_dir}'
try:
lib = importlib.import_module(lib_name)
- # print(f"Successfully imported lib from environment {lib_name}: {inspect.getfile(lib)}")
+ # print(f"Successfully imported lib from environment {lib_name}")
except ImportError:
sys.path.append(str(lib_dir))
try:
diff --git a/packages/abx-plugin-archivedotorg-extractor/README.md b/archivebox/vendor/abx-plugin-archivedotorg/README.md
similarity index 100%
rename from packages/abx-plugin-archivedotorg-extractor/README.md
rename to archivebox/vendor/abx-plugin-archivedotorg/README.md
diff --git a/archivebox/vendor/abx-plugin-archivedotorg/abx_plugin_archivedotorg/__init__.py b/archivebox/vendor/abx-plugin-archivedotorg/abx_plugin_archivedotorg/__init__.py
new file mode 100644
index 00000000..025d83bf
--- /dev/null
+++ b/archivebox/vendor/abx-plugin-archivedotorg/abx_plugin_archivedotorg/__init__.py
@@ -0,0 +1,21 @@
+__label__ = 'Archive.org'
+__homepage__ = 'https://archive.org'
+
+import abx
+
+@abx.hookimpl
+def get_CONFIG():
+ from .config import ARCHIVEDOTORG_CONFIG
+
+ return {
+ 'ARCHIVEDOTORG_CONFIG': ARCHIVEDOTORG_CONFIG
+ }
+
+
+# @abx.hookimpl
+# def get_EXTRACTORS():
+# from .extractors import ARCHIVEDOTORG_EXTRACTOR
+#
+# return {
+# 'archivedotorg': ARCHIVEDOTORG_EXTRACTOR,
+# }
diff --git a/archivebox/extractors/archive_org.py b/archivebox/vendor/abx-plugin-archivedotorg/abx_plugin_archivedotorg/archive_org.py
similarity index 100%
rename from archivebox/extractors/archive_org.py
rename to archivebox/vendor/abx-plugin-archivedotorg/abx_plugin_archivedotorg/archive_org.py
diff --git a/packages/abx-plugin-archivedotorg-extractor/config.py b/archivebox/vendor/abx-plugin-archivedotorg/abx_plugin_archivedotorg/config.py
similarity index 54%
rename from packages/abx-plugin-archivedotorg-extractor/config.py
rename to archivebox/vendor/abx-plugin-archivedotorg/abx_plugin_archivedotorg/config.py
index bebb6c98..f4c146ab 100644
--- a/packages/abx-plugin-archivedotorg-extractor/config.py
+++ b/archivebox/vendor/abx-plugin-archivedotorg/abx_plugin_archivedotorg/config.py
@@ -1,7 +1,4 @@
-__package__ = 'plugins_extractor.archivedotorg'
-
-
-from abx.archivebox.base_configset import BaseConfigSet
+from abx_spec_config.base_configset import BaseConfigSet
class ArchivedotorgConfig(BaseConfigSet):
diff --git a/archivebox/vendor/abx-plugin-archivedotorg/pyproject.toml b/archivebox/vendor/abx-plugin-archivedotorg/pyproject.toml
new file mode 100644
index 00000000..36c91f3c
--- /dev/null
+++ b/archivebox/vendor/abx-plugin-archivedotorg/pyproject.toml
@@ -0,0 +1,18 @@
+[project]
+name = "abx-plugin-archivedotorg"
+version = "2024.10.28"
+description = "Add your description here"
+readme = "README.md"
+requires-python = ">=3.10"
+dependencies = [
+ "abx>=0.1.0",
+ "abx-spec-config>=0.1.0",
+ "abx-plugin-curl>=2024.10.24",
+]
+
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project.entry-points.abx]
+abx_plugin_archivedotorg = "abx_plugin_archivedotorg"
diff --git a/packages/abx-plugin-chrome-extractor/README.md b/archivebox/vendor/abx-plugin-chrome/README.md
similarity index 100%
rename from packages/abx-plugin-chrome-extractor/README.md
rename to archivebox/vendor/abx-plugin-chrome/README.md
diff --git a/archivebox/vendor/abx-plugin-chrome/abx_plugin_chrome/__init__.py b/archivebox/vendor/abx-plugin-chrome/abx_plugin_chrome/__init__.py
new file mode 100644
index 00000000..c300bd13
--- /dev/null
+++ b/archivebox/vendor/abx-plugin-chrome/abx_plugin_chrome/__init__.py
@@ -0,0 +1,34 @@
+__label__ = 'Chrome'
+__author__ = 'ArchiveBox'
+
+import abx
+
+@abx.hookimpl
+def get_CONFIG():
+ from .config import CHROME_CONFIG
+
+ return {
+ 'CHROME_CONFIG': CHROME_CONFIG
+ }
+
+@abx.hookimpl
+def get_BINARIES():
+ from .binaries import CHROME_BINARY
+
+ return {
+ 'chrome': CHROME_BINARY,
+ }
+
+@abx.hookimpl
+def ready():
+ from .config import CHROME_CONFIG
+ CHROME_CONFIG.validate()
+
+
+# @abx.hookimpl
+# def get_EXTRACTORS():
+# return {
+# 'pdf': PDF_EXTRACTOR,
+# 'screenshot': SCREENSHOT_EXTRACTOR,
+# 'dom': DOM_EXTRACTOR,
+# }
diff --git a/packages/abx-plugin-chrome-extractor/binaries.py b/archivebox/vendor/abx-plugin-chrome/abx_plugin_chrome/binaries.py
similarity index 92%
rename from packages/abx-plugin-chrome-extractor/binaries.py
rename to archivebox/vendor/abx-plugin-chrome/abx_plugin_chrome/binaries.py
index a79b66a2..f315c992 100644
--- a/packages/abx-plugin-chrome-extractor/binaries.py
+++ b/archivebox/vendor/abx-plugin-chrome/abx_plugin_chrome/binaries.py
@@ -1,5 +1,3 @@
-__package__ = 'plugins_extractor.chrome'
-
import os
import platform
from pathlib import Path
@@ -7,17 +5,18 @@ from typing import List, Optional
from pydantic import InstanceOf
from pydantic_pkgr import (
+ Binary,
BinProvider,
BinName,
BinaryOverrides,
bin_abspath,
)
-import abx.archivebox.reads
-from abx.archivebox.base_binary import BaseBinary, env, apt, brew
+import abx
-from abx_puppeteer_binprovider.binproviders import PUPPETEER_BINPROVIDER
-from abx_playwright_binprovider.binproviders import PLAYWRIGHT_BINPROVIDER
+from abx_plugin_default_binproviders import apt, brew, env
+from abx_plugin_puppeteer.binproviders import PUPPETEER_BINPROVIDER
+from abx_plugin_playwright.binproviders import PLAYWRIGHT_BINPROVIDER
from .config import CHROME_CONFIG
@@ -81,7 +80,7 @@ def create_macos_app_symlink(target: Path, shortcut: Path):
###################### Config ##########################
-class ChromeBinary(BaseBinary):
+class ChromeBinary(Binary):
name: BinName = CHROME_CONFIG.CHROME_BINARY
binproviders_supported: List[InstanceOf[BinProvider]] = [PUPPETEER_BINPROVIDER, env, PLAYWRIGHT_BINPROVIDER, apt, brew]
@@ -105,7 +104,7 @@ class ChromeBinary(BaseBinary):
@staticmethod
def symlink_to_lib(binary, bin_dir=None) -> None:
- bin_dir = bin_dir or abx.archivebox.reads.get_CONFIGS().STORAGE_CONFIG.LIB_DIR / 'bin'
+ bin_dir = bin_dir or abx.pm.hook.get_BIN_DIR()
if not (binary.abspath and os.path.isfile(binary.abspath)):
return
diff --git a/packages/abx-plugin-chrome-extractor/config.py b/archivebox/vendor/abx-plugin-chrome/abx_plugin_chrome/config.py
similarity index 98%
rename from packages/abx-plugin-chrome-extractor/config.py
rename to archivebox/vendor/abx-plugin-chrome/abx_plugin_chrome/config.py
index be62f360..fb1d9095 100644
--- a/packages/abx-plugin-chrome-extractor/config.py
+++ b/archivebox/vendor/abx-plugin-chrome/abx_plugin_chrome/config.py
@@ -1,5 +1,3 @@
-__package__ = 'plugins_extractor.chrome'
-
import os
from pathlib import Path
from typing import List, Optional
@@ -7,8 +5,8 @@ from typing import List, Optional
from pydantic import Field
from pydantic_pkgr import bin_abspath
-from abx.archivebox.base_configset import BaseConfigSet
-from abx.archivebox.base_binary import env
+from abx_spec_config.base_configset import BaseConfigSet
+from abx_plugin_default_binproviders import env
from archivebox.config import CONSTANTS
from archivebox.config.common import ARCHIVING_CONFIG, SHELL_CONFIG
diff --git a/archivebox/extractors/dom.py b/archivebox/vendor/abx-plugin-chrome/abx_plugin_chrome/dom.py
similarity index 100%
rename from archivebox/extractors/dom.py
rename to archivebox/vendor/abx-plugin-chrome/abx_plugin_chrome/dom.py
diff --git a/archivebox/extractors/pdf.py b/archivebox/vendor/abx-plugin-chrome/abx_plugin_chrome/pdf.py
similarity index 100%
rename from archivebox/extractors/pdf.py
rename to archivebox/vendor/abx-plugin-chrome/abx_plugin_chrome/pdf.py
diff --git a/archivebox/extractors/screenshot.py b/archivebox/vendor/abx-plugin-chrome/abx_plugin_chrome/screenshot.py
similarity index 100%
rename from archivebox/extractors/screenshot.py
rename to archivebox/vendor/abx-plugin-chrome/abx_plugin_chrome/screenshot.py
diff --git a/archivebox/vendor/abx-plugin-chrome/pyproject.toml b/archivebox/vendor/abx-plugin-chrome/pyproject.toml
new file mode 100644
index 00000000..da26078d
--- /dev/null
+++ b/archivebox/vendor/abx-plugin-chrome/pyproject.toml
@@ -0,0 +1,18 @@
+[project]
+name = "abx-plugin-chrome"
+version = "2024.10.28"
+description = "Add your description here"
+readme = "README.md"
+requires-python = ">=3.10"
+dependencies = [
+ "abx>=0.1.0",
+ "abx-spec-config>=0.1.0",
+ "abx-spec-pydantic-pkgr>=0.1.0",
+]
+
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project.entry-points.abx]
+abx_plugin_chrome = "abx_plugin_chrome"
diff --git a/packages/abx-plugin-curl-extractor/README.md b/archivebox/vendor/abx-plugin-curl/README.md
similarity index 100%
rename from packages/abx-plugin-curl-extractor/README.md
rename to archivebox/vendor/abx-plugin-curl/README.md
diff --git a/archivebox/vendor/abx-plugin-curl/abx_plugin_curl/__init__.py b/archivebox/vendor/abx-plugin-curl/abx_plugin_curl/__init__.py
new file mode 100644
index 00000000..7988ef5e
--- /dev/null
+++ b/archivebox/vendor/abx-plugin-curl/abx_plugin_curl/__init__.py
@@ -0,0 +1,18 @@
+import abx
+
+
+@abx.hookimpl
+def get_CONFIG():
+ from .config import CURL_CONFIG
+
+ return {
+ 'curl': CURL_CONFIG
+ }
+
+@abx.hookimpl
+def get_BINARIES():
+ from .binaries import CURL_BINARY
+
+ return {
+ 'curl': CURL_BINARY,
+ }
diff --git a/packages/abx-plugin-curl-extractor/binaries.py b/archivebox/vendor/abx-plugin-curl/abx_plugin_curl/binaries.py
similarity index 57%
rename from packages/abx-plugin-curl-extractor/binaries.py
rename to archivebox/vendor/abx-plugin-curl/abx_plugin_curl/binaries.py
index 41ff9616..32628248 100644
--- a/packages/abx-plugin-curl-extractor/binaries.py
+++ b/archivebox/vendor/abx-plugin-curl/abx_plugin_curl/binaries.py
@@ -1,17 +1,17 @@
-__package__ = 'plugins_extractor.curl'
+__package__ = 'abx_plugin_curl'
from typing import List
from pydantic import InstanceOf
-from pydantic_pkgr import BinProvider, BinName
+from pydantic_pkgr import BinProvider, BinName, Binary
-from abx.archivebox.base_binary import BaseBinary, env, apt, brew
+from abx_plugin_default_binproviders import apt, brew, env
from .config import CURL_CONFIG
-class CurlBinary(BaseBinary):
+class CurlBinary(Binary):
name: BinName = CURL_CONFIG.CURL_BINARY
binproviders_supported: List[InstanceOf[BinProvider]] = [apt, brew, env]
diff --git a/packages/abx-plugin-curl-extractor/config.py b/archivebox/vendor/abx-plugin-curl/abx_plugin_curl/config.py
similarity index 90%
rename from packages/abx-plugin-curl-extractor/config.py
rename to archivebox/vendor/abx-plugin-curl/abx_plugin_curl/config.py
index 14996f66..69f4a637 100644
--- a/packages/abx-plugin-curl-extractor/config.py
+++ b/archivebox/vendor/abx-plugin-curl/abx_plugin_curl/config.py
@@ -1,11 +1,11 @@
-__package__ = 'plugins_extractor.curl'
+__package__ = 'abx_plugin_curl'
from typing import List, Optional
from pathlib import Path
from pydantic import Field
-from abx.archivebox.base_configset import BaseConfigSet
+from abx_spec_config.base_configset import BaseConfigSet
from archivebox.config.common import ARCHIVING_CONFIG
diff --git a/archivebox/extractors/headers.py b/archivebox/vendor/abx-plugin-curl/abx_plugin_curl/headers.py
similarity index 100%
rename from archivebox/extractors/headers.py
rename to archivebox/vendor/abx-plugin-curl/abx_plugin_curl/headers.py
diff --git a/archivebox/vendor/abx-plugin-curl/pyproject.toml b/archivebox/vendor/abx-plugin-curl/pyproject.toml
new file mode 100644
index 00000000..f3c6ad55
--- /dev/null
+++ b/archivebox/vendor/abx-plugin-curl/pyproject.toml
@@ -0,0 +1,18 @@
+[project]
+name = "abx-plugin-curl"
+version = "2024.10.24"
+description = "Add your description here"
+readme = "README.md"
+requires-python = ">=3.10"
+dependencies = [
+ "abx>=0.1.0",
+ "abx-spec-config>=0.1.0",
+ "abx-spec-pydantic-pkgr>=0.1.0",
+]
+
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project.entry-points.abx]
+abx_plugin_curl = "abx_plugin_curl"
diff --git a/packages/abx-plugin-default-binproviders/README.md b/archivebox/vendor/abx-plugin-default-binproviders/README.md
similarity index 100%
rename from packages/abx-plugin-default-binproviders/README.md
rename to archivebox/vendor/abx-plugin-default-binproviders/README.md
diff --git a/packages/abx-plugin-default-binproviders/abx_plugin_default_binproviders.py b/archivebox/vendor/abx-plugin-default-binproviders/abx_plugin_default_binproviders.py
similarity index 99%
rename from packages/abx-plugin-default-binproviders/abx_plugin_default_binproviders.py
rename to archivebox/vendor/abx-plugin-default-binproviders/abx_plugin_default_binproviders.py
index 2a628a4e..58dbdac9 100644
--- a/packages/abx-plugin-default-binproviders/abx_plugin_default_binproviders.py
+++ b/archivebox/vendor/abx-plugin-default-binproviders/abx_plugin_default_binproviders.py
@@ -16,7 +16,6 @@ env = ENV_BINPROVIDER = EnvProvider()
@abx.hookimpl(tryfirst=True)
def get_BINPROVIDERS() -> Dict[str, BinProvider]:
-
return {
'apt': APT_BINPROVIDER,
'brew': BREW_BINPROVIDER,
diff --git a/packages/abx-plugin-default-binproviders/pyproject.toml b/archivebox/vendor/abx-plugin-default-binproviders/pyproject.toml
similarity index 100%
rename from packages/abx-plugin-default-binproviders/pyproject.toml
rename to archivebox/vendor/abx-plugin-default-binproviders/pyproject.toml
diff --git a/packages/abx-plugin-favicon-extractor/README.md b/archivebox/vendor/abx-plugin-favicon/README.md
similarity index 100%
rename from packages/abx-plugin-favicon-extractor/README.md
rename to archivebox/vendor/abx-plugin-favicon/README.md
diff --git a/archivebox/vendor/abx-plugin-favicon/abx_plugin_favicon/__init__.py b/archivebox/vendor/abx-plugin-favicon/abx_plugin_favicon/__init__.py
new file mode 100644
index 00000000..75004e3d
--- /dev/null
+++ b/archivebox/vendor/abx-plugin-favicon/abx_plugin_favicon/__init__.py
@@ -0,0 +1,29 @@
+__label__ = 'Favicon'
+__version__ = '2024.10.24'
+__author__ = 'ArchiveBox'
+__homepage__ = 'https://github.com/ArchiveBox/archivebox'
+__dependencies__ = [
+ 'abx>=0.1.0',
+ 'abx-spec-config>=0.1.0',
+ 'abx-plugin-curl-extractor>=2024.10.24',
+]
+
+import abx
+
+
+@abx.hookimpl
+def get_CONFIG():
+ from .config import FAVICON_CONFIG
+
+ return {
+ 'FAVICON_CONFIG': FAVICON_CONFIG
+ }
+
+
+# @abx.hookimpl
+# def get_EXTRACTORS():
+# from .extractors import FAVICON_EXTRACTOR
+
+# return {
+# 'favicon': FAVICON_EXTRACTOR,
+# }
diff --git a/packages/abx-plugin-favicon-extractor/config.py b/archivebox/vendor/abx-plugin-favicon/abx_plugin_favicon/config.py
similarity index 64%
rename from packages/abx-plugin-favicon-extractor/config.py
rename to archivebox/vendor/abx-plugin-favicon/abx_plugin_favicon/config.py
index 6073ef87..8b97d758 100644
--- a/packages/abx-plugin-favicon-extractor/config.py
+++ b/archivebox/vendor/abx-plugin-favicon/abx_plugin_favicon/config.py
@@ -1,7 +1,4 @@
-__package__ = 'plugins_extractor.favicon'
-
-
-from abx.archivebox.base_configset import BaseConfigSet
+from abx_spec_config.base_configset import BaseConfigSet
class FaviconConfig(BaseConfigSet):
diff --git a/archivebox/extractors/favicon.py b/archivebox/vendor/abx-plugin-favicon/abx_plugin_favicon/favicon.py
similarity index 100%
rename from archivebox/extractors/favicon.py
rename to archivebox/vendor/abx-plugin-favicon/abx_plugin_favicon/favicon.py
diff --git a/archivebox/vendor/abx-plugin-favicon/pyproject.toml b/archivebox/vendor/abx-plugin-favicon/pyproject.toml
new file mode 100644
index 00000000..cad10890
--- /dev/null
+++ b/archivebox/vendor/abx-plugin-favicon/pyproject.toml
@@ -0,0 +1,18 @@
+[project]
+name = "abx-plugin-favicon"
+version = "2024.10.28"
+description = "Add your description here"
+readme = "README.md"
+requires-python = ">=3.10"
+dependencies = [
+ "abx>=0.1.0",
+ "abx-spec-config>=0.1.0",
+ "abx-plugin-curl>=2024.10.28",
+]
+
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project.entry-points.abx]
+abx_plugin_favicon = "abx_plugin_favicon"
diff --git a/packages/abx-plugin-git-extractor/README.md b/archivebox/vendor/abx-plugin-git/README.md
similarity index 100%
rename from packages/abx-plugin-git-extractor/README.md
rename to archivebox/vendor/abx-plugin-git/README.md
diff --git a/archivebox/vendor/abx-plugin-git/abx_plugin_git/__init__.py b/archivebox/vendor/abx-plugin-git/abx_plugin_git/__init__.py
new file mode 100644
index 00000000..61c04b9c
--- /dev/null
+++ b/archivebox/vendor/abx-plugin-git/abx_plugin_git/__init__.py
@@ -0,0 +1,29 @@
+__package__ = 'abx_plugin_git'
+__label__ = 'Git'
+
+import abx
+
+
+@abx.hookimpl
+def get_CONFIG():
+ from .config import GIT_CONFIG
+
+ return {
+ 'GIT_CONFIG': GIT_CONFIG
+ }
+
+@abx.hookimpl
+def get_BINARIES():
+ from .binaries import GIT_BINARY
+
+ return {
+ 'git': GIT_BINARY,
+ }
+
+@abx.hookimpl
+def get_EXTRACTORS():
+ from .extractors import GIT_EXTRACTOR
+
+ return {
+ 'git': GIT_EXTRACTOR,
+ }
diff --git a/packages/abx-plugin-git-extractor/binaries.py b/archivebox/vendor/abx-plugin-git/abx_plugin_git/binaries.py
similarity index 57%
rename from packages/abx-plugin-git-extractor/binaries.py
rename to archivebox/vendor/abx-plugin-git/abx_plugin_git/binaries.py
index 8d990769..f352fd99 100644
--- a/packages/abx-plugin-git-extractor/binaries.py
+++ b/archivebox/vendor/abx-plugin-git/abx_plugin_git/binaries.py
@@ -1,17 +1,17 @@
-__package__ = 'plugins_extractor.git'
+__package__ = 'abx_plugin_git'
from typing import List
from pydantic import InstanceOf
-from pydantic_pkgr import BinProvider, BinName
+from pydantic_pkgr import BinProvider, BinName, Binary
-from abx.archivebox.base_binary import BaseBinary, env, apt, brew
+from abx_plugin_default_binproviders import apt, brew, env
from .config import GIT_CONFIG
-class GitBinary(BaseBinary):
+class GitBinary(Binary):
name: BinName = GIT_CONFIG.GIT_BINARY
binproviders_supported: List[InstanceOf[BinProvider]] = [apt, brew, env]
diff --git a/packages/abx-plugin-git-extractor/config.py b/archivebox/vendor/abx-plugin-git/abx_plugin_git/config.py
similarity index 87%
rename from packages/abx-plugin-git-extractor/config.py
rename to archivebox/vendor/abx-plugin-git/abx_plugin_git/config.py
index 3d890d62..d8a9ca17 100644
--- a/packages/abx-plugin-git-extractor/config.py
+++ b/archivebox/vendor/abx-plugin-git/abx_plugin_git/config.py
@@ -1,10 +1,10 @@
-__package__ = 'plugins_extractor.git'
+__package__ = 'abx_plugin_git'
from typing import List
from pydantic import Field
-from abx.archivebox.base_configset import BaseConfigSet
+from abx_spec_config.base_configset import BaseConfigSet
from archivebox.config.common import ARCHIVING_CONFIG
diff --git a/archivebox/vendor/abx-plugin-git/abx_plugin_git/extractors.py b/archivebox/vendor/abx-plugin-git/abx_plugin_git/extractors.py
new file mode 100644
index 00000000..4863d031
--- /dev/null
+++ b/archivebox/vendor/abx-plugin-git/abx_plugin_git/extractors.py
@@ -0,0 +1,15 @@
+__package__ = 'abx_plugin_git'
+
+# from pathlib import Path
+
+# from .binaries import GIT_BINARY
+
+
+# class GitExtractor(BaseExtractor):
+# name: ExtractorName = 'git'
+# binary: str = GIT_BINARY.name
+
+# def get_output_path(self, snapshot) -> Path | None:
+# return snapshot.as_link() / 'git'
+
+# GIT_EXTRACTOR = GitExtractor()
diff --git a/archivebox/extractors/git.py b/archivebox/vendor/abx-plugin-git/abx_plugin_git/git.py
similarity index 95%
rename from archivebox/extractors/git.py
rename to archivebox/vendor/abx-plugin-git/abx_plugin_git/git.py
index 9ac71d3e..128ba0e7 100644
--- a/archivebox/extractors/git.py
+++ b/archivebox/vendor/abx-plugin-git/abx_plugin_git/git.py
@@ -16,8 +16,8 @@ from archivebox.misc.util import (
from ..logging_util import TimedProgress
from ..index.schema import Link, ArchiveResult, ArchiveOutput, ArchiveError
-from archivebox.plugins_extractor.git.config import GIT_CONFIG
-from archivebox.plugins_extractor.git.binaries import GIT_BINARY
+from abx_plugin_git.config import GIT_CONFIG
+from abx_plugin_git.binaries import GIT_BINARY
def get_output_path():
diff --git a/archivebox/vendor/abx-plugin-git/pyproject.toml b/archivebox/vendor/abx-plugin-git/pyproject.toml
new file mode 100644
index 00000000..384599b7
--- /dev/null
+++ b/archivebox/vendor/abx-plugin-git/pyproject.toml
@@ -0,0 +1,19 @@
+[project]
+name = "abx-plugin-git"
+version = "2024.10.28"
+description = "Add your description here"
+readme = "README.md"
+requires-python = ">=3.10"
+dependencies = [
+ "abx>=0.1.0",
+ "abx-spec-config>=0.1.0",
+ "abx-spec-pydantic-pkgr>=0.1.0",
+ "abx-plugin-default-binproviders>=2024.10.24",
+]
+
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project.entry-points.abx]
+abx_plugin_git = "abx_plugin_git"
diff --git a/packages/abx-plugin-htmltotext-extractor/README.md b/archivebox/vendor/abx-plugin-htmltotext/README.md
similarity index 100%
rename from packages/abx-plugin-htmltotext-extractor/README.md
rename to archivebox/vendor/abx-plugin-htmltotext/README.md
diff --git a/archivebox/vendor/abx-plugin-htmltotext/abx_plugin_htmltotext/__init__.py b/archivebox/vendor/abx-plugin-htmltotext/abx_plugin_htmltotext/__init__.py
new file mode 100644
index 00000000..ebbc6800
--- /dev/null
+++ b/archivebox/vendor/abx-plugin-htmltotext/abx_plugin_htmltotext/__init__.py
@@ -0,0 +1,22 @@
+__package__ = 'abx_plugin_htmltotext'
+__label__ = 'HTML-to-Text'
+
+import abx
+
+
+@abx.hookimpl
+def get_CONFIG():
+ from .config import HTMLTOTEXT_CONFIG
+
+ return {
+ 'HTMLTOTEXT_CONFIG': HTMLTOTEXT_CONFIG
+ }
+
+
+# @abx.hookimpl
+# def get_EXTRACTORS():
+# from .extractors import FAVICON_EXTRACTOR
+
+# return {
+# 'htmltotext': FAVICON_EXTRACTOR,
+# }
diff --git a/packages/abx-plugin-htmltotext-extractor/config.py b/archivebox/vendor/abx-plugin-htmltotext/abx_plugin_htmltotext/config.py
similarity index 52%
rename from packages/abx-plugin-htmltotext-extractor/config.py
rename to archivebox/vendor/abx-plugin-htmltotext/abx_plugin_htmltotext/config.py
index 31b9bff5..bd3aabc6 100644
--- a/packages/abx-plugin-htmltotext-extractor/config.py
+++ b/archivebox/vendor/abx-plugin-htmltotext/abx_plugin_htmltotext/config.py
@@ -1,7 +1,4 @@
-__package__ = 'plugins_extractor.htmltotext'
-
-
-from abx.archivebox.base_configset import BaseConfigSet
+from abx_spec_config.base_configset import BaseConfigSet
class HtmltotextConfig(BaseConfigSet):
diff --git a/archivebox/extractors/htmltotext.py b/archivebox/vendor/abx-plugin-htmltotext/abx_plugin_htmltotext/htmltotext.py
similarity index 100%
rename from archivebox/extractors/htmltotext.py
rename to archivebox/vendor/abx-plugin-htmltotext/abx_plugin_htmltotext/htmltotext.py
diff --git a/packages/abx-plugin-ldap-auth/pyproject.toml b/archivebox/vendor/abx-plugin-htmltotext/pyproject.toml
similarity index 52%
rename from packages/abx-plugin-ldap-auth/pyproject.toml
rename to archivebox/vendor/abx-plugin-htmltotext/pyproject.toml
index 1db98ebd..46ebaa46 100644
--- a/packages/abx-plugin-ldap-auth/pyproject.toml
+++ b/archivebox/vendor/abx-plugin-htmltotext/pyproject.toml
@@ -1,22 +1,17 @@
[project]
-name = "abx-ldap-auth"
-version = "0.1.0"
+name = "abx-plugin-htmltotext"
+version = "2024.10.28"
description = "Add your description here"
readme = "README.md"
requires-python = ">=3.10"
-dependencies = []
-
-
-[project.entry-points.abx]
-ldap = "abx_ldap_auth"
-
+dependencies = [
+ "abx>=0.1.0",
+ "abx-spec-config>=0.1.0",
+]
[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"
-[tool.hatch.build.targets.sdist]
-packages = ["."]
-
-[tool.hatch.build.targets.wheel]
-packages = ["."]
+[project.entry-points.abx]
+abx_plugin_htmltotext = "abx_plugin_htmltotext"
diff --git a/packages/abx-plugin-ldap-auth/README.md b/archivebox/vendor/abx-plugin-ldap-auth/README.md
similarity index 100%
rename from packages/abx-plugin-ldap-auth/README.md
rename to archivebox/vendor/abx-plugin-ldap-auth/README.md
diff --git a/packages/abx-plugin-ldap-auth/__init__.py b/archivebox/vendor/abx-plugin-ldap-auth/abx_plugin_ldap_auth/__init__.py
similarity index 68%
rename from packages/abx-plugin-ldap-auth/__init__.py
rename to archivebox/vendor/abx-plugin-ldap-auth/abx_plugin_ldap_auth/__init__.py
index 6ba43b90..d4ac6431 100644
--- a/packages/abx-plugin-ldap-auth/__init__.py
+++ b/archivebox/vendor/abx-plugin-ldap-auth/abx_plugin_ldap_auth/__init__.py
@@ -1,36 +1,15 @@
-__package__ = 'plugins_auth.ldap'
-__id__ = 'ldap'
+__package__ = 'abx_plugin_ldap_auth'
__label__ = 'LDAP'
-__version__ = '2024.10.14'
-__author__ = 'ArchiveBox'
__homepage__ = 'https://github.com/django-auth-ldap/django-auth-ldap'
-__dependencies__ = ['pip']
import abx
-
-@abx.hookimpl
-def get_PLUGIN():
- return {
- __id__: {
- 'id': __id__,
- 'package': __package__,
- 'label': __label__,
- 'version': __version__,
- 'author': __author__,
- 'homepage': __homepage__,
- 'dependencies': __dependencies__,
- }
- }
-
-
-
@abx.hookimpl
def get_CONFIG():
from .config import LDAP_CONFIG
return {
- __id__: LDAP_CONFIG
+ 'LDAP_CONFIG': LDAP_CONFIG
}
@abx.hookimpl
@@ -48,12 +27,12 @@ def create_superuser_from_ldap_user(sender, user=None, ldap_user=None, **kwargs)
ArchiveBox requires staff/superuser status to view the admin at all, so we must create a user
+ set staff and superuser when LDAP authenticates a new person.
"""
- from django.conf import settings
+ from .config import LDAP_CONFIG
if user is None:
return # not authenticated at all
- if not user.id and settings.CONFIGS.ldap.LDAP_CREATE_SUPERUSER:
+ if not user.id and LDAP_CONFIG.LDAP_CREATE_SUPERUSER:
user.is_superuser = True # authenticated via LDAP, but user is not set up in DB yet
user.is_staff = True
@@ -69,9 +48,7 @@ def ready():
LDAP_CONFIG.validate()
- from django.conf import settings
-
- if settings.CONFIGS.ldap.LDAP_ENABLED:
+ if LDAP_CONFIG.LDAP_ENABLED:
# tell django-auth-ldap to call our function when a user is authenticated via LDAP
import django_auth_ldap.backend
django_auth_ldap.backend.populate_user.connect(create_superuser_from_ldap_user)
diff --git a/packages/abx-plugin-ldap-auth/binaries.py b/archivebox/vendor/abx-plugin-ldap-auth/abx_plugin_ldap_auth/binaries.py
similarity index 78%
rename from packages/abx-plugin-ldap-auth/binaries.py
rename to archivebox/vendor/abx-plugin-ldap-auth/abx_plugin_ldap_auth/binaries.py
index cc932183..8ea4776d 100644
--- a/packages/abx-plugin-ldap-auth/binaries.py
+++ b/archivebox/vendor/abx-plugin-ldap-auth/abx_plugin_ldap_auth/binaries.py
@@ -1,5 +1,4 @@
-__package__ = 'plugins_auth.ldap'
-
+__package__ = 'abx_plugin_ldap_auth'
import inspect
@@ -7,12 +6,10 @@ from typing import List
from pathlib import Path
from pydantic import InstanceOf
-from pydantic_pkgr import BinaryOverrides, SemVer
+from pydantic_pkgr import BinaryOverrides, SemVer, Binary, BinProvider
-
-from abx.archivebox.base_binary import BaseBinary, BaseBinProvider, apt
-
-from plugins_pkg.pip.binproviders import SYS_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, LIB_PIP_BINPROVIDER, VENV_SITE_PACKAGES, LIB_SITE_PACKAGES, USER_SITE_PACKAGES, SYS_SITE_PACKAGES
+from abx_plugin_default_binproviders import apt
+from abx_plugin_pip.binproviders import SYS_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, LIB_PIP_BINPROVIDER, VENV_SITE_PACKAGES, LIB_SITE_PACKAGES, USER_SITE_PACKAGES, SYS_SITE_PACKAGES
from .config import get_ldap_lib
@@ -39,10 +36,10 @@ def get_LDAP_LIB_version():
return LDAP_LIB and SemVer(LDAP_LIB.__version__)
-class LdapBinary(BaseBinary):
+class LdapBinary(Binary):
name: str = 'ldap'
description: str = 'LDAP Authentication'
- binproviders_supported: List[InstanceOf[BaseBinProvider]] = [VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER, LIB_PIP_BINPROVIDER, apt]
+ binproviders_supported: List[InstanceOf[BinProvider]] = [VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER, LIB_PIP_BINPROVIDER, apt]
overrides: BinaryOverrides = {
LIB_PIP_BINPROVIDER.name: {
diff --git a/packages/abx-plugin-ldap-auth/config.py b/archivebox/vendor/abx-plugin-ldap-auth/abx_plugin_ldap_auth/config.py
similarity index 96%
rename from packages/abx-plugin-ldap-auth/config.py
rename to archivebox/vendor/abx-plugin-ldap-auth/abx_plugin_ldap_auth/config.py
index 2094dc68..451c9da8 100644
--- a/packages/abx-plugin-ldap-auth/config.py
+++ b/archivebox/vendor/abx-plugin-ldap-auth/abx_plugin_ldap_auth/config.py
@@ -1,11 +1,11 @@
-__package__ = 'plugins_auth.ldap'
+__package__ = 'abx_plugin_ldap_auth'
import sys
from typing import Dict, List, Optional
-from pydantic import Field, model_validator, computed_field
+from pydantic import Field, computed_field
-from abx.archivebox.base_configset import BaseConfigSet
+from abx_spec_config.base_configset import BaseConfigSet
LDAP_LIB = None
LDAP_SEARCH = None
diff --git a/archivebox/vendor/abx-plugin-ldap-auth/pyproject.toml b/archivebox/vendor/abx-plugin-ldap-auth/pyproject.toml
new file mode 100644
index 00000000..a89d0cbc
--- /dev/null
+++ b/archivebox/vendor/abx-plugin-ldap-auth/pyproject.toml
@@ -0,0 +1,20 @@
+[project]
+name = "abx-plugin-ldap-auth"
+version = "2024.10.28"
+description = "Add your description here"
+readme = "README.md"
+requires-python = ">=3.10"
+dependencies = [
+ "abx>=0.1.0",
+ "abx-spec-config>=0.1.0",
+ "abx-spec-django>=0.1.0",
+]
+
+
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+
+[project.entry-points.abx]
+abx_plugin_ldap_auth = "abx_plugin_ldap_auth"
diff --git a/packages/abx-plugin-mercury-extractor/README.md b/archivebox/vendor/abx-plugin-mercury/README.md
similarity index 100%
rename from packages/abx-plugin-mercury-extractor/README.md
rename to archivebox/vendor/abx-plugin-mercury/README.md
diff --git a/archivebox/vendor/abx-plugin-mercury/abx_plugin_mercury/__init__.py b/archivebox/vendor/abx-plugin-mercury/abx_plugin_mercury/__init__.py
new file mode 100644
index 00000000..7b6fcfd6
--- /dev/null
+++ b/archivebox/vendor/abx-plugin-mercury/abx_plugin_mercury/__init__.py
@@ -0,0 +1,29 @@
+__package__ = 'abx_plugin_mercury'
+__label__ = 'Postlight Parser'
+__homepage__ = 'https://github.com/postlight/mercury-parser'
+
+import abx
+
+@abx.hookimpl
+def get_CONFIG():
+ from .config import MERCURY_CONFIG
+
+ return {
+ 'MERCURY_CONFIG': MERCURY_CONFIG
+ }
+
+@abx.hookimpl
+def get_BINARIES():
+ from .binaries import MERCURY_BINARY
+
+ return {
+ 'mercury': MERCURY_BINARY,
+ }
+
+@abx.hookimpl
+def get_EXTRACTORS():
+ from .extractors import MERCURY_EXTRACTOR
+
+ return {
+ 'mercury': MERCURY_EXTRACTOR,
+ }
diff --git a/packages/abx-plugin-mercury-extractor/binaries.py b/archivebox/vendor/abx-plugin-mercury/abx_plugin_mercury/binaries.py
similarity index 78%
rename from packages/abx-plugin-mercury-extractor/binaries.py
rename to archivebox/vendor/abx-plugin-mercury/abx_plugin_mercury/binaries.py
index b07055fd..f015a7ca 100644
--- a/packages/abx-plugin-mercury-extractor/binaries.py
+++ b/archivebox/vendor/abx-plugin-mercury/abx_plugin_mercury/binaries.py
@@ -1,18 +1,18 @@
-__package__ = 'plugins_extractor.mercury'
+__package__ = 'abx_plugin_mercury'
from typing import List
from pydantic import InstanceOf
-from pydantic_pkgr import BinProvider, BinName, BinaryOverrides, bin_abspath
+from pydantic_pkgr import BinProvider, BinName, BinaryOverrides, bin_abspath, Binary
-from abx.archivebox.base_binary import BaseBinary, env
+from abx_plugin_default_binproviders import env
-from archivebox.plugins_pkg.npm.binproviders import SYS_NPM_BINPROVIDER, LIB_NPM_BINPROVIDER
+from abx_plugin_npm.binproviders import SYS_NPM_BINPROVIDER, LIB_NPM_BINPROVIDER
from .config import MERCURY_CONFIG
-class MercuryBinary(BaseBinary):
+class MercuryBinary(Binary):
name: BinName = MERCURY_CONFIG.MERCURY_BINARY
binproviders_supported: List[InstanceOf[BinProvider]] = [LIB_NPM_BINPROVIDER, SYS_NPM_BINPROVIDER, env]
diff --git a/packages/abx-plugin-mercury-extractor/config.py b/archivebox/vendor/abx-plugin-mercury/abx_plugin_mercury/config.py
similarity index 90%
rename from packages/abx-plugin-mercury-extractor/config.py
rename to archivebox/vendor/abx-plugin-mercury/abx_plugin_mercury/config.py
index 49c92b73..00fa82a4 100644
--- a/packages/abx-plugin-mercury-extractor/config.py
+++ b/archivebox/vendor/abx-plugin-mercury/abx_plugin_mercury/config.py
@@ -1,11 +1,11 @@
-__package__ = 'plugins_extractor.mercury'
+__package__ = 'abx_plugin_mercury'
from typing import List, Optional
from pathlib import Path
from pydantic import Field
-from abx.archivebox.base_configset import BaseConfigSet
+from abx_spec_config.base_configset import BaseConfigSet
from archivebox.config.common import ARCHIVING_CONFIG, STORAGE_CONFIG
diff --git a/archivebox/vendor/abx-plugin-mercury/abx_plugin_mercury/extractors.py b/archivebox/vendor/abx-plugin-mercury/abx_plugin_mercury/extractors.py
new file mode 100644
index 00000000..36a17f3a
--- /dev/null
+++ b/archivebox/vendor/abx-plugin-mercury/abx_plugin_mercury/extractors.py
@@ -0,0 +1,17 @@
+__package__ = 'abx_plugin_mercury'
+
+# from pathlib import Path
+
+# from .binaries import MERCURY_BINARY
+
+
+
+# class MercuryExtractor(BaseExtractor):
+# name: ExtractorName = 'mercury'
+# binary: str = MERCURY_BINARY.name
+
+# def get_output_path(self, snapshot) -> Path | None:
+# return snapshot.link_dir / 'mercury' / 'content.html'
+
+
+# MERCURY_EXTRACTOR = MercuryExtractor()
diff --git a/archivebox/extractors/mercury.py b/archivebox/vendor/abx-plugin-mercury/abx_plugin_mercury/mercury.py
similarity index 100%
rename from archivebox/extractors/mercury.py
rename to archivebox/vendor/abx-plugin-mercury/abx_plugin_mercury/mercury.py
diff --git a/archivebox/vendor/abx-plugin-mercury/pyproject.toml b/archivebox/vendor/abx-plugin-mercury/pyproject.toml
new file mode 100644
index 00000000..c740008b
--- /dev/null
+++ b/archivebox/vendor/abx-plugin-mercury/pyproject.toml
@@ -0,0 +1,17 @@
+[project]
+name = "abx-plugin-mercury"
+version = "2024.10.28"
+description = "Add your description here"
+readme = "README.md"
+requires-python = ">=3.10"
+dependencies = [
+ "abx>=0.1.0",
+ "abx-spec-config>=0.1.0",
+]
+
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project.entry-points.abx]
+abx_plugin_mercury = "abx_plugin_mercury"
diff --git a/packages/abx-plugin-npm-binprovider/README.md b/archivebox/vendor/abx-plugin-npm/README.md
similarity index 100%
rename from packages/abx-plugin-npm-binprovider/README.md
rename to archivebox/vendor/abx-plugin-npm/README.md
diff --git a/packages/abx-plugin-npm-binprovider/abx_plugin_npm_binprovider/__init__.py b/archivebox/vendor/abx-plugin-npm/abx_plugin_npm/__init__.py
similarity index 86%
rename from packages/abx-plugin-npm-binprovider/abx_plugin_npm_binprovider/__init__.py
rename to archivebox/vendor/abx-plugin-npm/abx_plugin_npm/__init__.py
index 3901516e..d1f56f35 100644
--- a/packages/abx-plugin-npm-binprovider/abx_plugin_npm_binprovider/__init__.py
+++ b/archivebox/vendor/abx-plugin-npm/abx_plugin_npm/__init__.py
@@ -1,5 +1,3 @@
-__package__ = 'abx_plugin_npm_binprovider'
-__id__ = 'npm'
__label__ = 'NPM'
__author__ = 'ArchiveBox'
__homepage__ = 'https://www.npmjs.com/'
@@ -10,9 +8,8 @@ import abx
@abx.hookimpl
def get_CONFIG():
from .config import NPM_CONFIG
-
return {
- __id__: NPM_CONFIG,
+ 'NPM_CONFIG': NPM_CONFIG,
}
@abx.hookimpl
diff --git a/packages/abx-plugin-npm-binprovider/abx_plugin_npm_binprovider/binaries.py b/archivebox/vendor/abx-plugin-npm/abx_plugin_npm/binaries.py
similarity index 100%
rename from packages/abx-plugin-npm-binprovider/abx_plugin_npm_binprovider/binaries.py
rename to archivebox/vendor/abx-plugin-npm/abx_plugin_npm/binaries.py
diff --git a/packages/abx-plugin-npm-binprovider/abx_plugin_npm_binprovider/binproviders.py b/archivebox/vendor/abx-plugin-npm/abx_plugin_npm/binproviders.py
similarity index 94%
rename from packages/abx-plugin-npm-binprovider/abx_plugin_npm_binprovider/binproviders.py
rename to archivebox/vendor/abx-plugin-npm/abx_plugin_npm/binproviders.py
index e0b26a90..dd56e3a9 100644
--- a/packages/abx-plugin-npm-binprovider/abx_plugin_npm_binprovider/binproviders.py
+++ b/archivebox/vendor/abx-plugin-npm/abx_plugin_npm/binproviders.py
@@ -26,8 +26,7 @@ class LibNpmBinProvider(NpmProvider):
def setup(self) -> None:
# update paths from config at runtime
- LIB_DIR = abx.pm.hook.get_CONFIG().LIB_DIR
-
+ LIB_DIR = abx.pm.hook.get_LIB_DIR()
self.npm_prefix = LIB_DIR / 'npm'
self.PATH = f'{LIB_DIR / "npm" / "node_modules" / ".bin"}:{NEW_NODE_BIN_PATH}:{OLD_NODE_BIN_PATH}'
diff --git a/packages/abx-plugin-npm-binprovider/abx_plugin_npm_binprovider/config.py b/archivebox/vendor/abx-plugin-npm/abx_plugin_npm/config.py
similarity index 100%
rename from packages/abx-plugin-npm-binprovider/abx_plugin_npm_binprovider/config.py
rename to archivebox/vendor/abx-plugin-npm/abx_plugin_npm/config.py
diff --git a/packages/abx-plugin-npm-binprovider/pyproject.toml b/archivebox/vendor/abx-plugin-npm/pyproject.toml
similarity index 81%
rename from packages/abx-plugin-npm-binprovider/pyproject.toml
rename to archivebox/vendor/abx-plugin-npm/pyproject.toml
index 5d614f90..1371b2c4 100644
--- a/packages/abx-plugin-npm-binprovider/pyproject.toml
+++ b/archivebox/vendor/abx-plugin-npm/pyproject.toml
@@ -1,5 +1,5 @@
[project]
-name = "abx-plugin-npm-binprovider"
+name = "abx-plugin-npm"
version = "2024.10.24"
description = "NPM binary provider plugin for ABX"
readme = "README.md"
@@ -17,4 +17,4 @@ requires = ["hatchling"]
build-backend = "hatchling.build"
[project.entry-points.abx]
-abx_plugin_npm_binprovider = "abx_plugin_npm_binprovider"
+abx_plugin_npm = "abx_plugin_npm"
diff --git a/packages/abx-plugin-pip-binprovider/README.md b/archivebox/vendor/abx-plugin-pip/README.md
similarity index 100%
rename from packages/abx-plugin-pip-binprovider/README.md
rename to archivebox/vendor/abx-plugin-pip/README.md
diff --git a/packages/abx-plugin-pip-binprovider/abx_plugin_pip_binprovider/.plugin_order b/archivebox/vendor/abx-plugin-pip/abx_plugin_pip/.plugin_order
similarity index 100%
rename from packages/abx-plugin-pip-binprovider/abx_plugin_pip_binprovider/.plugin_order
rename to archivebox/vendor/abx-plugin-pip/abx_plugin_pip/.plugin_order
diff --git a/packages/abx-plugin-pip-binprovider/abx_plugin_pip_binprovider/__init__.py b/archivebox/vendor/abx-plugin-pip/abx_plugin_pip/__init__.py
similarity index 90%
rename from packages/abx-plugin-pip-binprovider/abx_plugin_pip_binprovider/__init__.py
rename to archivebox/vendor/abx-plugin-pip/abx_plugin_pip/__init__.py
index 8445055f..eebcdb5b 100644
--- a/packages/abx-plugin-pip-binprovider/abx_plugin_pip_binprovider/__init__.py
+++ b/archivebox/vendor/abx-plugin-pip/abx_plugin_pip/__init__.py
@@ -1,5 +1,4 @@
-__package__ = 'abx_plugin_pip_binprovider'
-__id__ = 'pip'
+__package__ = 'abx_plugin_pip'
__label__ = 'PIP'
import abx
@@ -10,7 +9,7 @@ def get_CONFIG():
from .config import PIP_CONFIG
return {
- __id__: PIP_CONFIG
+ 'PIP_CONFIG': PIP_CONFIG
}
@abx.hookimpl(tryfirst=True)
diff --git a/packages/abx-plugin-pip-binprovider/abx_plugin_pip_binprovider/binaries.py b/archivebox/vendor/abx-plugin-pip/abx_plugin_pip/binaries.py
similarity index 99%
rename from packages/abx-plugin-pip-binprovider/abx_plugin_pip_binprovider/binaries.py
rename to archivebox/vendor/abx-plugin-pip/abx_plugin_pip/binaries.py
index b1974250..18e5f34f 100644
--- a/packages/abx-plugin-pip-binprovider/abx_plugin_pip_binprovider/binaries.py
+++ b/archivebox/vendor/abx-plugin-pip/abx_plugin_pip/binaries.py
@@ -1,4 +1,4 @@
-__package__ = 'abx_plugin_pip_binprovider'
+__package__ = 'abx_plugin_pip'
import sys
from pathlib import Path
diff --git a/packages/abx-plugin-pip-binprovider/abx_plugin_pip_binprovider/binproviders.py b/archivebox/vendor/abx-plugin-pip/abx_plugin_pip/binproviders.py
similarity index 98%
rename from packages/abx-plugin-pip-binprovider/abx_plugin_pip_binprovider/binproviders.py
rename to archivebox/vendor/abx-plugin-pip/abx_plugin_pip/binproviders.py
index 1c245b62..c29798b0 100644
--- a/packages/abx-plugin-pip-binprovider/abx_plugin_pip_binprovider/binproviders.py
+++ b/archivebox/vendor/abx-plugin-pip/abx_plugin_pip/binproviders.py
@@ -58,7 +58,7 @@ class LibPipBinProvider(PipProvider):
def setup(self) -> None:
# update venv path to match most up-to-date LIB_DIR based on runtime config
- LIB_DIR = abx.pm.hook.get_FLAT_CONFIG().LIB_DIR
+ LIB_DIR = abx.pm.hook.get_LIB_DIR()
self.pip_venv = LIB_DIR / 'pip' / 'venv'
super().setup()
diff --git a/packages/abx-plugin-pip-binprovider/abx_plugin_pip_binprovider/config.py b/archivebox/vendor/abx-plugin-pip/abx_plugin_pip/config.py
similarity index 86%
rename from packages/abx-plugin-pip-binprovider/abx_plugin_pip_binprovider/config.py
rename to archivebox/vendor/abx-plugin-pip/abx_plugin_pip/config.py
index 26cf0f8e..f7464810 100644
--- a/packages/abx-plugin-pip-binprovider/abx_plugin_pip_binprovider/config.py
+++ b/archivebox/vendor/abx-plugin-pip/abx_plugin_pip/config.py
@@ -3,7 +3,7 @@ __package__ = 'pip'
from typing import List, Optional
from pydantic import Field
-from abx.archivebox.base_configset import BaseConfigSet
+from abx_spec_config.base_configset import BaseConfigSet
class PipDependencyConfigs(BaseConfigSet):
diff --git a/packages/abx-plugin-pip-binprovider/pyproject.toml b/archivebox/vendor/abx-plugin-pip/pyproject.toml
similarity index 82%
rename from packages/abx-plugin-pip-binprovider/pyproject.toml
rename to archivebox/vendor/abx-plugin-pip/pyproject.toml
index 3f6364e0..03f88d0b 100644
--- a/packages/abx-plugin-pip-binprovider/pyproject.toml
+++ b/archivebox/vendor/abx-plugin-pip/pyproject.toml
@@ -1,5 +1,5 @@
[project]
-name = "abx-plugin-pip-binprovider"
+name = "abx-plugin-pip"
version = "2024.10.24"
description = "Add your description here"
readme = "README.md"
@@ -19,4 +19,4 @@ requires = ["hatchling"]
build-backend = "hatchling.build"
[project.entry-points.abx]
-abx_plugin_pip_binprovider = "abx_plugin_pip_binprovider"
+abx_plugin_pip = "abx_plugin_pip"
diff --git a/packages/abx-plugin-playwright-binprovider/README.md b/archivebox/vendor/abx-plugin-playwright/README.md
similarity index 100%
rename from packages/abx-plugin-playwright-binprovider/README.md
rename to archivebox/vendor/abx-plugin-playwright/README.md
diff --git a/packages/abx-plugin-playwright-binprovider/abx_plugin_playwright_binprovider/__init__.py b/archivebox/vendor/abx-plugin-playwright/abx_plugin_playwright/__init__.py
similarity index 78%
rename from packages/abx-plugin-playwright-binprovider/abx_plugin_playwright_binprovider/__init__.py
rename to archivebox/vendor/abx-plugin-playwright/abx_plugin_playwright/__init__.py
index 557f12c0..6d3ed715 100644
--- a/packages/abx-plugin-playwright-binprovider/abx_plugin_playwright_binprovider/__init__.py
+++ b/archivebox/vendor/abx-plugin-playwright/abx_plugin_playwright/__init__.py
@@ -1,7 +1,4 @@
-__package__ = 'abx_plugin_playwright_binprovider'
-__id__ = 'playwright'
__label__ = 'Playwright'
-__author__ = 'ArchiveBox'
__homepage__ = 'https://github.com/microsoft/playwright-python'
import abx
@@ -10,9 +7,8 @@ import abx
@abx.hookimpl
def get_CONFIG():
from .config import PLAYWRIGHT_CONFIG
-
return {
- __id__: PLAYWRIGHT_CONFIG
+ 'PLAYWRIGHT_CONFIG': PLAYWRIGHT_CONFIG
}
@abx.hookimpl
diff --git a/packages/abx-plugin-playwright-binprovider/abx_plugin_playwright_binprovider/binaries.py b/archivebox/vendor/abx-plugin-playwright/abx_plugin_playwright/binaries.py
similarity index 73%
rename from packages/abx-plugin-playwright-binprovider/abx_plugin_playwright_binprovider/binaries.py
rename to archivebox/vendor/abx-plugin-playwright/abx_plugin_playwright/binaries.py
index 333da054..4b77d9d4 100644
--- a/packages/abx-plugin-playwright-binprovider/abx_plugin_playwright_binprovider/binaries.py
+++ b/archivebox/vendor/abx-plugin-playwright/abx_plugin_playwright/binaries.py
@@ -1,4 +1,4 @@
-__package__ = 'abx_plugin_playwright_binprovider'
+__package__ = 'abx_plugin_playwright'
from typing import List
@@ -6,7 +6,7 @@ from pydantic import InstanceOf
from pydantic_pkgr import BinName, BinProvider, Binary
-from abx_plugin_pip_binprovider.binproviders import LIB_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER
+from abx_plugin_pip.binproviders import LIB_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER
from abx_plugin_default_binproviders import env
from .config import PLAYWRIGHT_CONFIG
diff --git a/packages/abx-plugin-playwright-binprovider/abx_plugin_playwright_binprovider/binproviders.py b/archivebox/vendor/abx-plugin-playwright/abx_plugin_playwright/binproviders.py
similarity index 98%
rename from packages/abx-plugin-playwright-binprovider/abx_plugin_playwright_binprovider/binproviders.py
rename to archivebox/vendor/abx-plugin-playwright/abx_plugin_playwright/binproviders.py
index 8e472988..6bc44815 100644
--- a/packages/abx-plugin-playwright-binprovider/abx_plugin_playwright_binprovider/binproviders.py
+++ b/archivebox/vendor/abx-plugin-playwright/abx_plugin_playwright/binproviders.py
@@ -1,4 +1,4 @@
-__package__ = 'abx_plugin_playwright_binprovider'
+__package__ = 'abx_plugin_playwright'
import os
import shutil
@@ -59,7 +59,7 @@ class PlaywrightBinProvider(BinProvider):
def setup(self) -> None:
# update paths from config at runtime
- LIB_DIR = abx.pm.hook.get_FLAT_CONFIG().LIB_DIR
+ LIB_DIR = abx.pm.hook.get_LIB_DIR()
self.PATH = f"{LIB_DIR / 'bin'}:{DEFAULT_ENV_PATH}"
diff --git a/packages/abx-plugin-playwright-binprovider/abx_plugin_playwright_binprovider/config.py b/archivebox/vendor/abx-plugin-playwright/abx_plugin_playwright/config.py
similarity index 100%
rename from packages/abx-plugin-playwright-binprovider/abx_plugin_playwright_binprovider/config.py
rename to archivebox/vendor/abx-plugin-playwright/abx_plugin_playwright/config.py
diff --git a/packages/abx-plugin-playwright-binprovider/pyproject.toml b/archivebox/vendor/abx-plugin-playwright/pyproject.toml
similarity index 72%
rename from packages/abx-plugin-playwright-binprovider/pyproject.toml
rename to archivebox/vendor/abx-plugin-playwright/pyproject.toml
index a6c8937b..0ad0d995 100644
--- a/packages/abx-plugin-playwright-binprovider/pyproject.toml
+++ b/archivebox/vendor/abx-plugin-playwright/pyproject.toml
@@ -1,6 +1,6 @@
[project]
-name = "abx-plugin-playwright-binprovider"
-version = "2024.10.24"
+name = "abx-plugin-playwright"
+version = "2024.10.28"
description = "Add your description here"
readme = "README.md"
requires-python = ">=3.10"
@@ -17,4 +17,4 @@ requires = ["hatchling"]
build-backend = "hatchling.build"
[project.entry-points.abx]
-abx_plugin_playwright_binprovider = "abx_plugin_playwright_binprovider"
+abx_plugin_playwright = "abx_plugin_playwright"
diff --git a/packages/abx-plugin-pocket-extractor/README.md b/archivebox/vendor/abx-plugin-pocket/README.md
similarity index 100%
rename from packages/abx-plugin-pocket-extractor/README.md
rename to archivebox/vendor/abx-plugin-pocket/README.md
diff --git a/archivebox/vendor/abx-plugin-pocket/abx_plugin_pocket/__init__.py b/archivebox/vendor/abx-plugin-pocket/abx_plugin_pocket/__init__.py
new file mode 100644
index 00000000..09e5dc8f
--- /dev/null
+++ b/archivebox/vendor/abx-plugin-pocket/abx_plugin_pocket/__init__.py
@@ -0,0 +1,18 @@
+__package__ = 'abx_plugin_pocket'
+__label__ = 'Pocket'
+
+import abx
+
+
+@abx.hookimpl
+def get_CONFIG():
+ from .config import POCKET_CONFIG
+
+ return {
+ 'POCKET_CONFIG': POCKET_CONFIG
+ }
+
+@abx.hookimpl
+def ready():
+ from .config import POCKET_CONFIG
+ POCKET_CONFIG.validate()
diff --git a/packages/abx-plugin-pocket-extractor/config.py b/archivebox/vendor/abx-plugin-pocket/abx_plugin_pocket/config.py
similarity index 76%
rename from packages/abx-plugin-pocket-extractor/config.py
rename to archivebox/vendor/abx-plugin-pocket/abx_plugin_pocket/config.py
index 7866a1f6..2db072a1 100644
--- a/packages/abx-plugin-pocket-extractor/config.py
+++ b/archivebox/vendor/abx-plugin-pocket/abx_plugin_pocket/config.py
@@ -1,10 +1,10 @@
-__package__ = 'plugins_extractor.pocket'
+__package__ = 'abx_plugin_pocket'
from typing import Dict
from pydantic import Field
-from abx.archivebox.base_configset import BaseConfigSet
+from abx_spec_config.base_configset import BaseConfigSet
class PocketConfig(BaseConfigSet):
diff --git a/archivebox/vendor/abx-plugin-pocket/pyproject.toml b/archivebox/vendor/abx-plugin-pocket/pyproject.toml
new file mode 100644
index 00000000..999fa098
--- /dev/null
+++ b/archivebox/vendor/abx-plugin-pocket/pyproject.toml
@@ -0,0 +1,18 @@
+[project]
+name = "abx-plugin-pocket"
+version = "2024.10.28"
+description = "Add your description here"
+readme = "README.md"
+requires-python = ">=3.10"
+dependencies = [
+ "abx>=0.1.0",
+ "abx-spec-config>=0.1.0",
+ "pocket>=0.3.6",
+]
+
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project.entry-points.abx]
+abx_plugin_pocket = "abx_plugin_pocket"
diff --git a/packages/abx-plugin-puppeteer-binprovider/README.md b/archivebox/vendor/abx-plugin-puppeteer/README.md
similarity index 100%
rename from packages/abx-plugin-puppeteer-binprovider/README.md
rename to archivebox/vendor/abx-plugin-puppeteer/README.md
diff --git a/archivebox/vendor/abx-plugin-puppeteer/abx_plugin_puppeteer/__init__.py b/archivebox/vendor/abx-plugin-puppeteer/abx_plugin_puppeteer/__init__.py
new file mode 100644
index 00000000..1ee876d6
--- /dev/null
+++ b/archivebox/vendor/abx-plugin-puppeteer/abx_plugin_puppeteer/__init__.py
@@ -0,0 +1,30 @@
+__package__ = 'abx_plugin_puppeteer'
+__label__ = 'Puppeteer'
+__homepage__ = 'https://github.com/puppeteer/puppeteer'
+
+import abx
+
+
+@abx.hookimpl
+def get_CONFIG():
+ from .config import PUPPETEER_CONFIG
+
+ return {
+ 'PUPPETEER_CONFIG': PUPPETEER_CONFIG
+ }
+
+@abx.hookimpl
+def get_BINARIES():
+ from .binaries import PUPPETEER_BINARY
+
+ return {
+ 'puppeteer': PUPPETEER_BINARY,
+ }
+
+@abx.hookimpl
+def get_BINPROVIDERS():
+ from .binproviders import PUPPETEER_BINPROVIDER
+
+ return {
+ 'puppeteer': PUPPETEER_BINPROVIDER,
+ }
diff --git a/packages/abx-plugin-puppeteer-binprovider/binaries.py b/archivebox/vendor/abx-plugin-puppeteer/abx_plugin_puppeteer/binaries.py
similarity index 54%
rename from packages/abx-plugin-puppeteer-binprovider/binaries.py
rename to archivebox/vendor/abx-plugin-puppeteer/abx_plugin_puppeteer/binaries.py
index 7e592bba..8afd484f 100644
--- a/packages/abx-plugin-puppeteer-binprovider/binaries.py
+++ b/archivebox/vendor/abx-plugin-puppeteer/abx_plugin_puppeteer/binaries.py
@@ -1,20 +1,20 @@
-__package__ = 'plugins_pkg.puppeteer'
+__package__ = 'abx_plugin_puppeteer'
from typing import List
from pydantic import InstanceOf
-from pydantic_pkgr import BinProvider, BinName
+from pydantic_pkgr import BinProvider, BinName, Binary
-from abx.archivebox.base_binary import BaseBinary, env
+from abx_plugin_default_binproviders import env
-from plugins_pkg.npm.binproviders import LIB_NPM_BINPROVIDER, SYS_NPM_BINPROVIDER
+from abx_plugin_npm.binproviders import LIB_NPM_BINPROVIDER, SYS_NPM_BINPROVIDER
###################### Config ##########################
-class PuppeteerBinary(BaseBinary):
+class PuppeteerBinary(Binary):
name: BinName = "puppeteer"
binproviders_supported: List[InstanceOf[BinProvider]] = [LIB_NPM_BINPROVIDER, SYS_NPM_BINPROVIDER, env]
diff --git a/packages/abx-plugin-puppeteer-binprovider/binproviders.py b/archivebox/vendor/abx-plugin-puppeteer/abx_plugin_puppeteer/binproviders.py
similarity index 93%
rename from packages/abx-plugin-puppeteer-binprovider/binproviders.py
rename to archivebox/vendor/abx-plugin-puppeteer/abx_plugin_puppeteer/binproviders.py
index 0fa9ca33..e7b697bd 100644
--- a/packages/abx-plugin-puppeteer-binprovider/binproviders.py
+++ b/archivebox/vendor/abx-plugin-puppeteer/abx_plugin_puppeteer/binproviders.py
@@ -1,5 +1,3 @@
-__package__ = 'plugins_pkg.puppeteer'
-
import os
import platform
from pathlib import Path
@@ -7,6 +5,7 @@ from typing import List, Optional, Dict, ClassVar
from pydantic import Field
from pydantic_pkgr import (
+ BinProvider,
BinName,
BinProviderName,
BinProviderOverrides,
@@ -15,15 +14,15 @@ from pydantic_pkgr import (
HostBinPath,
)
+import abx
+
from archivebox.config import CONSTANTS
from archivebox.config.permissions import ARCHIVEBOX_USER
-from abx.archivebox.base_binary import BaseBinProvider
-
-from plugins_pkg.npm.binproviders import SYS_NPM_BINPROVIDER
+from abx_plugin_npm.binproviders import SYS_NPM_BINPROVIDER
-class PuppeteerBinProvider(BaseBinProvider):
+class PuppeteerBinProvider(BinProvider):
name: BinProviderName = "puppeteer"
INSTALLER_BIN: BinName = "npx"
@@ -44,9 +43,10 @@ class PuppeteerBinProvider(BaseBinProvider):
def setup(self) -> None:
# update paths from config, don't do this lazily because we dont want to import archivebox.config.common at import-time
# we want to avoid depending on archivebox from abx code if at all possible
- from archivebox.config.common import STORAGE_CONFIG
- self.puppeteer_browsers_dir = STORAGE_CONFIG.LIB_DIR / 'browsers'
- self.PATH = str(STORAGE_CONFIG.LIB_DIR / 'bin')
+ LIB_DIR = abx.pm.hook.get_LIB_DIR()
+ BIN_DIR = abx.pm.hook.get_BIN_DIR()
+ self.puppeteer_browsers_dir = LIB_DIR / 'browsers'
+ self.PATH = str(BIN_DIR)
assert SYS_NPM_BINPROVIDER.INSTALLER_BIN_ABSPATH, "NPM bin provider not initialized"
diff --git a/packages/abx-plugin-puppeteer-binprovider/config.py b/archivebox/vendor/abx-plugin-puppeteer/abx_plugin_puppeteer/config.py
similarity index 79%
rename from packages/abx-plugin-puppeteer-binprovider/config.py
rename to archivebox/vendor/abx-plugin-puppeteer/abx_plugin_puppeteer/config.py
index b76d0779..f09e7062 100644
--- a/packages/abx-plugin-puppeteer-binprovider/config.py
+++ b/archivebox/vendor/abx-plugin-puppeteer/abx_plugin_puppeteer/config.py
@@ -1,7 +1,7 @@
-__package__ = 'plugins_pkg.puppeteer'
+__package__ = 'abx_plugin_puppeteer'
-from abx.archivebox.base_configset import BaseConfigSet
+from abx_spec_config.base_configset import BaseConfigSet
###################### Config ##########################
diff --git a/archivebox/vendor/abx-plugin-puppeteer/pyproject.toml b/archivebox/vendor/abx-plugin-puppeteer/pyproject.toml
new file mode 100644
index 00000000..2633b481
--- /dev/null
+++ b/archivebox/vendor/abx-plugin-puppeteer/pyproject.toml
@@ -0,0 +1,19 @@
+[project]
+name = "abx-plugin-puppeteer"
+version = "2024.10.28"
+description = "Add your description here"
+readme = "README.md"
+requires-python = ">=3.10"
+dependencies = [
+ "abx>=0.1.0",
+ "abx-spec-config>=0.1.0",
+ "abx-spec-pydantic-pkgr>=0.1.0",
+ "pydantic-pkgr>=0.5.4",
+]
+
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project.entry-points.abx]
+abx_plugin_puppeteer = "abx_plugin_puppeteer"
diff --git a/packages/abx-plugin-readability-extractor/README.md b/archivebox/vendor/abx-plugin-readability/README.md
similarity index 100%
rename from packages/abx-plugin-readability-extractor/README.md
rename to archivebox/vendor/abx-plugin-readability/README.md
diff --git a/archivebox/vendor/abx-plugin-readability/abx_plugin_readability/__init__.py b/archivebox/vendor/abx-plugin-readability/abx_plugin_readability/__init__.py
new file mode 100644
index 00000000..cb7d35af
--- /dev/null
+++ b/archivebox/vendor/abx-plugin-readability/abx_plugin_readability/__init__.py
@@ -0,0 +1,30 @@
+__package__ = 'abx_plugin_readability'
+__label__ = 'Readability'
+__homepage__ = 'https://github.com/ArchiveBox/readability-extractor'
+
+import abx
+
+
+@abx.hookimpl
+def get_CONFIG():
+ from .config import READABILITY_CONFIG
+
+ return {
+ 'READABILITY_CONFIG': READABILITY_CONFIG
+ }
+
+@abx.hookimpl
+def get_BINARIES():
+ from .binaries import READABILITY_BINARY
+
+ return {
+ 'readability': READABILITY_BINARY,
+ }
+
+@abx.hookimpl
+def get_EXTRACTORS():
+ from .extractors import READABILITY_EXTRACTOR
+
+ return {
+ 'readability': READABILITY_EXTRACTOR,
+ }
diff --git a/packages/abx-plugin-readability-extractor/binaries.py b/archivebox/vendor/abx-plugin-readability/abx_plugin_readability/binaries.py
similarity index 69%
rename from packages/abx-plugin-readability-extractor/binaries.py
rename to archivebox/vendor/abx-plugin-readability/abx_plugin_readability/binaries.py
index 43343924..65ecf57c 100644
--- a/packages/abx-plugin-readability-extractor/binaries.py
+++ b/archivebox/vendor/abx-plugin-readability/abx_plugin_readability/binaries.py
@@ -1,20 +1,19 @@
-__package__ = 'plugins_extractor.readability'
+__package__ = 'abx_plugin_readability'
from typing import List
from pydantic import InstanceOf
-from pydantic_pkgr import BinProvider, BinaryOverrides, BinName
+from pydantic_pkgr import Binary, BinProvider, BinaryOverrides, BinName
-from abx.archivebox.base_binary import BaseBinary, env
-
-from plugins_pkg.npm.binproviders import SYS_NPM_BINPROVIDER, LIB_NPM_BINPROVIDER
+from abx_plugin_default_binproviders import env
+from abx_plugin_npm.binproviders import SYS_NPM_BINPROVIDER, LIB_NPM_BINPROVIDER
from .config import READABILITY_CONFIG
READABILITY_PACKAGE_NAME = 'github:ArchiveBox/readability-extractor'
-class ReadabilityBinary(BaseBinary):
+class ReadabilityBinary(Binary):
name: BinName = READABILITY_CONFIG.READABILITY_BINARY
binproviders_supported: List[InstanceOf[BinProvider]] = [LIB_NPM_BINPROVIDER, SYS_NPM_BINPROVIDER, env]
diff --git a/packages/abx-plugin-readability-extractor/config.py b/archivebox/vendor/abx-plugin-readability/abx_plugin_readability/config.py
similarity index 83%
rename from packages/abx-plugin-readability-extractor/config.py
rename to archivebox/vendor/abx-plugin-readability/abx_plugin_readability/config.py
index 8066d56c..726295fe 100644
--- a/packages/abx-plugin-readability-extractor/config.py
+++ b/archivebox/vendor/abx-plugin-readability/abx_plugin_readability/config.py
@@ -1,8 +1,6 @@
-__package__ = 'plugins_extractor.readability'
-
from pydantic import Field
-from abx.archivebox.base_configset import BaseConfigSet
+from abx_spec_config.base_configset import BaseConfigSet
from archivebox.config.common import ARCHIVING_CONFIG
diff --git a/archivebox/vendor/abx-plugin-readability/abx_plugin_readability/extractors.py b/archivebox/vendor/abx-plugin-readability/abx_plugin_readability/extractors.py
new file mode 100644
index 00000000..64d712ed
--- /dev/null
+++ b/archivebox/vendor/abx-plugin-readability/abx_plugin_readability/extractors.py
@@ -0,0 +1,19 @@
+# __package__ = 'abx_plugin_readability'
+
+# from pathlib import Path
+
+# from pydantic_pkgr import BinName
+
+
+# from .binaries import READABILITY_BINARY
+
+
+# class ReadabilityExtractor(BaseExtractor):
+# name: str = 'readability'
+# binary: BinName = READABILITY_BINARY.name
+
+# def get_output_path(self, snapshot) -> Path:
+# return Path(snapshot.link_dir) / 'readability' / 'content.html'
+
+
+# READABILITY_EXTRACTOR = ReadabilityExtractor()
diff --git a/archivebox/extractors/readability.py b/archivebox/vendor/abx-plugin-readability/abx_plugin_readability/readability.py
similarity index 100%
rename from archivebox/extractors/readability.py
rename to archivebox/vendor/abx-plugin-readability/abx_plugin_readability/readability.py
diff --git a/archivebox/vendor/abx-plugin-readability/pyproject.toml b/archivebox/vendor/abx-plugin-readability/pyproject.toml
new file mode 100644
index 00000000..59a2db64
--- /dev/null
+++ b/archivebox/vendor/abx-plugin-readability/pyproject.toml
@@ -0,0 +1,17 @@
+[project]
+name = "abx-plugin-readability"
+version = "2024.10.28"
+description = "Add your description here"
+readme = "README.md"
+requires-python = ">=3.10"
+dependencies = [
+ "abx>=0.1.0",
+ "abx-spec-config>=0.1.0",
+]
+
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project.entry-points.abx]
+abx_plugin_readability = "abx_plugin_readability"
diff --git a/packages/abx-plugin-readwise-extractor/README.md b/archivebox/vendor/abx-plugin-readwise/README.md
similarity index 100%
rename from packages/abx-plugin-readwise-extractor/README.md
rename to archivebox/vendor/abx-plugin-readwise/README.md
diff --git a/archivebox/vendor/abx-plugin-readwise/abx_plugin_readwise.py b/archivebox/vendor/abx-plugin-readwise/abx_plugin_readwise.py
new file mode 100644
index 00000000..ea31cd14
--- /dev/null
+++ b/archivebox/vendor/abx-plugin-readwise/abx_plugin_readwise.py
@@ -0,0 +1,35 @@
+__package__ = 'abx_plugin_readwise_extractor'
+__id__ = 'abx_plugin_readwise_extractor'
+__label__ = 'Readwise API'
+__version__ = '2024.10.27'
+__author__ = 'ArchiveBox'
+__homepage__ = 'https://github.com/ArchiveBox/ArchiveBox/tree/dev/archivebox/plugins_extractor/readwise'
+__dependencies__ = []
+
+import abx
+
+from typing import Dict
+from pathlib import Path
+
+from pydantic import Field
+
+from abx_spec_config.base_configset import BaseConfigSet
+
+SOURCES_DIR = abx.pm.hook.get_CONFIG().SOURCES_DIR
+
+
+class ReadwiseConfig(BaseConfigSet):
+ READWISE_DB_PATH: Path = Field(default=SOURCES_DIR / "readwise_reader_api.db")
+ READWISE_READER_TOKENS: Dict[str, str] = Field(default=lambda: {}) # {: , ...}
+
+
+@abx.hookimpl
+def get_CONFIG():
+ return {
+ __id__: ReadwiseConfig()
+ }
+
+@abx.hookimpl
+def ready():
+ READWISE_CONFIG = abx.pm.hook.get_CONFIG()[__id__]
+ READWISE_CONFIG.validate()
diff --git a/archivebox/vendor/abx-plugin-readwise/pyproject.toml b/archivebox/vendor/abx-plugin-readwise/pyproject.toml
new file mode 100644
index 00000000..c85d489f
--- /dev/null
+++ b/archivebox/vendor/abx-plugin-readwise/pyproject.toml
@@ -0,0 +1,18 @@
+[project]
+name = "abx-plugin-readwise"
+version = "2024.10.28"
+description = "Readwise API Extractor"
+readme = "README.md"
+requires-python = ">=3.10"
+dependencies = [
+ "abx>=0.1.0",
+ "abx-spec-config>=0.1.0",
+]
+
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project.entry-points.abx]
+abx_plugin_readwise = "abx_plugin_readwise"
+
diff --git a/packages/abx-plugin-ripgrep-search/README.md b/archivebox/vendor/abx-plugin-ripgrep-search/README.md
similarity index 100%
rename from packages/abx-plugin-ripgrep-search/README.md
rename to archivebox/vendor/abx-plugin-ripgrep-search/README.md
diff --git a/archivebox/vendor/abx-plugin-ripgrep-search/abx_plugin_ripgrep_search/__init__.py b/archivebox/vendor/abx-plugin-ripgrep-search/abx_plugin_ripgrep_search/__init__.py
new file mode 100644
index 00000000..91347523
--- /dev/null
+++ b/archivebox/vendor/abx-plugin-ripgrep-search/abx_plugin_ripgrep_search/__init__.py
@@ -0,0 +1,31 @@
+__package__ = 'abx_plugin_ripgrep_search'
+__label__ = 'Ripgrep Search'
+__homepage__ = 'https://github.com/BurntSushi/ripgrep'
+
+import abx
+
+@abx.hookimpl
+def get_CONFIG():
+ from .config import RIPGREP_CONFIG
+
+ return {
+ 'RIPGREP_CONFIG': RIPGREP_CONFIG
+ }
+
+
+@abx.hookimpl
+def get_BINARIES():
+ from .binaries import RIPGREP_BINARY
+
+ return {
+ 'ripgrep': RIPGREP_BINARY
+ }
+
+
+@abx.hookimpl
+def get_SEARCHBACKENDS():
+ from .searchbackend import RIPGREP_SEARCH_BACKEND
+
+ return {
+ 'ripgrep': RIPGREP_SEARCH_BACKEND,
+ }
diff --git a/packages/abx-plugin-ripgrep-search/binaries.py b/archivebox/vendor/abx-plugin-ripgrep-search/abx_plugin_ripgrep_search/binaries.py
similarity index 65%
rename from packages/abx-plugin-ripgrep-search/binaries.py
rename to archivebox/vendor/abx-plugin-ripgrep-search/abx_plugin_ripgrep_search/binaries.py
index 710a1ef0..ef9217ad 100644
--- a/packages/abx-plugin-ripgrep-search/binaries.py
+++ b/archivebox/vendor/abx-plugin-ripgrep-search/abx_plugin_ripgrep_search/binaries.py
@@ -1,17 +1,17 @@
-__package__ = 'plugins_search.ripgrep'
+__package__ = 'abx_plugin_ripgrep_search'
from typing import List
from pydantic import InstanceOf
-from pydantic_pkgr import BinProvider, BinaryOverrides, BinName
+from pydantic_pkgr import BinProvider, BinaryOverrides, BinName, Binary
-from abx.archivebox.base_binary import BaseBinary, env, apt, brew
+from abx_plugin_default_binproviders import apt, brew, env
from .config import RIPGREP_CONFIG
-class RipgrepBinary(BaseBinary):
+class RipgrepBinary(Binary):
name: BinName = RIPGREP_CONFIG.RIPGREP_BINARY
binproviders_supported: List[InstanceOf[BinProvider]] = [apt, brew, env]
diff --git a/packages/abx-plugin-ripgrep-search/config.py b/archivebox/vendor/abx-plugin-ripgrep-search/abx_plugin_ripgrep_search/config.py
similarity index 89%
rename from packages/abx-plugin-ripgrep-search/config.py
rename to archivebox/vendor/abx-plugin-ripgrep-search/abx_plugin_ripgrep_search/config.py
index 726c21e8..e0fd3b28 100644
--- a/packages/abx-plugin-ripgrep-search/config.py
+++ b/archivebox/vendor/abx-plugin-ripgrep-search/abx_plugin_ripgrep_search/config.py
@@ -1,11 +1,11 @@
-__package__ = 'plugins_search.ripgrep'
+__package__ = 'abx_plugin_ripgrep_search'
from pathlib import Path
from typing import List
from pydantic import Field
-from abx.archivebox.base_configset import BaseConfigSet
+from abx_spec_config.base_configset import BaseConfigSet
from archivebox.config import CONSTANTS
from archivebox.config.common import SEARCH_BACKEND_CONFIG
diff --git a/packages/abx-plugin-ripgrep-search/searchbackend.py b/archivebox/vendor/abx-plugin-ripgrep-search/abx_plugin_ripgrep_search/searchbackend.py
similarity index 93%
rename from packages/abx-plugin-ripgrep-search/searchbackend.py
rename to archivebox/vendor/abx-plugin-ripgrep-search/abx_plugin_ripgrep_search/searchbackend.py
index 3c30af85..ed3965ba 100644
--- a/packages/abx-plugin-ripgrep-search/searchbackend.py
+++ b/archivebox/vendor/abx-plugin-ripgrep-search/abx_plugin_ripgrep_search/searchbackend.py
@@ -1,11 +1,11 @@
-__package__ = 'plugins_search.ripgrep'
+__package__ = 'abx_plugin_ripgrep_search'
import re
import subprocess
from typing import List, Iterable
-from abx.archivebox.base_searchbackend import BaseSearchBackend
+from abx_spec_searchbackend import BaseSearchBackend
from .binaries import RIPGREP_BINARY
from .config import RIPGREP_CONFIG
diff --git a/archivebox/vendor/abx-plugin-ripgrep-search/pyproject.toml b/archivebox/vendor/abx-plugin-ripgrep-search/pyproject.toml
new file mode 100644
index 00000000..67245c48
--- /dev/null
+++ b/archivebox/vendor/abx-plugin-ripgrep-search/pyproject.toml
@@ -0,0 +1,18 @@
+[project]
+name = "abx-plugin-ripgrep-search"
+version = "2024.10.28"
+description = "Add your description here"
+readme = "README.md"
+requires-python = ">=3.10"
+dependencies = [
+ "abx>=0.1.0",
+ "abx-spec-config>=0.1.0",
+ "abx-spec-searchbackend>=0.1.0",
+]
+
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project.entry-points.abx]
+abx_plugin_ripgrep_search = "abx_plugin_ripgrep_search"
diff --git a/packages/abx-plugin-singlefile-extractor/README.md b/archivebox/vendor/abx-plugin-singlefile/README.md
similarity index 100%
rename from packages/abx-plugin-singlefile-extractor/README.md
rename to archivebox/vendor/abx-plugin-singlefile/README.md
diff --git a/packages/abx-plugin-singlefile-extractor/__init__.py b/archivebox/vendor/abx-plugin-singlefile/abx_plugin_singlefile/__init__.py
similarity index 53%
rename from packages/abx-plugin-singlefile-extractor/__init__.py
rename to archivebox/vendor/abx-plugin-singlefile/abx_plugin_singlefile/__init__.py
index cd72adb8..ddfb4236 100644
--- a/packages/abx-plugin-singlefile-extractor/__init__.py
+++ b/archivebox/vendor/abx-plugin-singlefile/abx_plugin_singlefile/__init__.py
@@ -1,32 +1,16 @@
-__package__ = 'plugins_extractor.singlefile'
-__label__ = 'singlefile'
-__version__ = '2024.10.14'
-__author__ = 'ArchiveBox'
+__package__ = 'abx_plugin_singlefile'
+__label__ = 'Singlefile'
__homepage__ = 'https://github.com/gildas-lormeau/singlefile'
-__dependencies__ = ['npm']
import abx
-@abx.hookimpl
-def get_PLUGIN():
- return {
- 'singlefile': {
- 'PACKAGE': __package__,
- 'LABEL': __label__,
- 'VERSION': __version__,
- 'AUTHOR': __author__,
- 'HOMEPAGE': __homepage__,
- 'DEPENDENCIES': __dependencies__,
- }
- }
-
@abx.hookimpl
def get_CONFIG():
from .config import SINGLEFILE_CONFIG
return {
- 'singlefile': SINGLEFILE_CONFIG
+ 'SINGLEFILE_CONFIG': SINGLEFILE_CONFIG
}
@abx.hookimpl
diff --git a/packages/abx-plugin-singlefile-extractor/binaries.py b/archivebox/vendor/abx-plugin-singlefile/abx_plugin_singlefile/binaries.py
similarity index 84%
rename from packages/abx-plugin-singlefile-extractor/binaries.py
rename to archivebox/vendor/abx-plugin-singlefile/abx_plugin_singlefile/binaries.py
index 0c8a1bab..7af784a3 100644
--- a/packages/abx-plugin-singlefile-extractor/binaries.py
+++ b/archivebox/vendor/abx-plugin-singlefile/abx_plugin_singlefile/binaries.py
@@ -1,13 +1,10 @@
-__package__ = 'plugins_extractor.singlefile'
-
from typing import List
from pydantic import InstanceOf
-from pydantic_pkgr import BinProvider, BinaryOverrides, BinName, bin_abspath
+from pydantic_pkgr import Binary, BinProvider, BinaryOverrides, BinName, bin_abspath
-from abx.archivebox.base_binary import BaseBinary, env
-
-from plugins_pkg.npm.binproviders import SYS_NPM_BINPROVIDER, LIB_NPM_BINPROVIDER
+from abx_plugin_default_binproviders import env
+from abx_plugin_npm.binproviders import SYS_NPM_BINPROVIDER, LIB_NPM_BINPROVIDER
from .config import SINGLEFILE_CONFIG
@@ -16,7 +13,7 @@ SINGLEFILE_MIN_VERSION = '1.1.54'
SINGLEFILE_MAX_VERSION = '1.1.60'
-class SinglefileBinary(BaseBinary):
+class SinglefileBinary(Binary):
name: BinName = SINGLEFILE_CONFIG.SINGLEFILE_BINARY
binproviders_supported: List[InstanceOf[BinProvider]] = [LIB_NPM_BINPROVIDER, SYS_NPM_BINPROVIDER, env]
diff --git a/packages/abx-plugin-singlefile-extractor/config.py b/archivebox/vendor/abx-plugin-singlefile/abx_plugin_singlefile/config.py
similarity index 88%
rename from packages/abx-plugin-singlefile-extractor/config.py
rename to archivebox/vendor/abx-plugin-singlefile/abx_plugin_singlefile/config.py
index 7d27031e..0d2164ba 100644
--- a/packages/abx-plugin-singlefile-extractor/config.py
+++ b/archivebox/vendor/abx-plugin-singlefile/abx_plugin_singlefile/config.py
@@ -1,11 +1,9 @@
-__package__ = 'plugins_extractor.singlefile'
-
from pathlib import Path
from typing import List, Optional
from pydantic import Field
-from abx.archivebox.base_configset import BaseConfigSet
+from abx_spec_config.base_configset import BaseConfigSet
from archivebox.config.common import ARCHIVING_CONFIG
diff --git a/archivebox/vendor/abx-plugin-singlefile/abx_plugin_singlefile/extractors.py b/archivebox/vendor/abx-plugin-singlefile/abx_plugin_singlefile/extractors.py
new file mode 100644
index 00000000..07b674ac
--- /dev/null
+++ b/archivebox/vendor/abx-plugin-singlefile/abx_plugin_singlefile/extractors.py
@@ -0,0 +1,18 @@
+__package__ = 'abx_plugin_singlefile'
+
+# from pathlib import Path
+
+# from pydantic_pkgr import BinName
+
+# from .binaries import SINGLEFILE_BINARY
+
+
+# class SinglefileExtractor(BaseExtractor):
+# name: str = 'singlefile'
+# binary: BinName = SINGLEFILE_BINARY.name
+
+# def get_output_path(self, snapshot) -> Path:
+# return Path(snapshot.link_dir) / 'singlefile.html'
+
+
+# SINGLEFILE_EXTRACTOR = SinglefileExtractor()
diff --git a/packages/abx-plugin-singlefile-extractor/models.py b/archivebox/vendor/abx-plugin-singlefile/abx_plugin_singlefile/models.py
similarity index 100%
rename from packages/abx-plugin-singlefile-extractor/models.py
rename to archivebox/vendor/abx-plugin-singlefile/abx_plugin_singlefile/models.py
diff --git a/archivebox/extractors/singlefile.py b/archivebox/vendor/abx-plugin-singlefile/abx_plugin_singlefile/singlefile.py
similarity index 100%
rename from archivebox/extractors/singlefile.py
rename to archivebox/vendor/abx-plugin-singlefile/abx_plugin_singlefile/singlefile.py
diff --git a/archivebox/vendor/abx-plugin-singlefile/pyproject.toml b/archivebox/vendor/abx-plugin-singlefile/pyproject.toml
new file mode 100644
index 00000000..7cecd40a
--- /dev/null
+++ b/archivebox/vendor/abx-plugin-singlefile/pyproject.toml
@@ -0,0 +1,19 @@
+[project]
+name = "abx-plugin-singlefile"
+version = "2024.10.28"
+description = "Add your description here"
+readme = "README.md"
+requires-python = ">=3.10"
+dependencies = [
+ "abx>=0.1.0",
+ "abx-spec-config>=0.1.0",
+ "abx-spec-pydantic-pkgr>=0.1.0",
+ "pydantic-pkgr>=0.5.4",
+]
+
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project.entry-points.abx]
+abx_plugin_singlefile = "abx_plugin_singlefile"
diff --git a/packages/abx-plugin-sonic-search/README.md b/archivebox/vendor/abx-plugin-sonic-search/README.md
similarity index 100%
rename from packages/abx-plugin-sonic-search/README.md
rename to archivebox/vendor/abx-plugin-sonic-search/README.md
diff --git a/packages/abx-plugin-sonic-search/__init__.py b/archivebox/vendor/abx-plugin-sonic-search/abx_plugin_sonic_search/__init__.py
similarity index 53%
rename from packages/abx-plugin-sonic-search/__init__.py
rename to archivebox/vendor/abx-plugin-sonic-search/abx_plugin_sonic_search/__init__.py
index a899679b..1a92a8d2 100644
--- a/packages/abx-plugin-sonic-search/__init__.py
+++ b/archivebox/vendor/abx-plugin-sonic-search/abx_plugin_sonic_search/__init__.py
@@ -1,32 +1,16 @@
-__package__ = 'plugins_search.sonic'
-__label__ = 'sonic'
-__version__ = '2024.10.14'
-__author__ = 'ArchiveBox'
+__package__ = 'abx_plugin_sonic_search'
+__label__ = 'Sonic Search'
__homepage__ = 'https://github.com/valeriansaliou/sonic'
-__dependencies__ = []
import abx
-@abx.hookimpl
-def get_PLUGIN():
- return {
- 'sonic': {
- 'PACKAGE': __package__,
- 'LABEL': __label__,
- 'VERSION': __version__,
- 'AUTHOR': __author__,
- 'HOMEPAGE': __homepage__,
- 'DEPENDENCIES': __dependencies__,
- }
- }
-
@abx.hookimpl
def get_CONFIG():
from .config import SONIC_CONFIG
return {
- 'sonic': SONIC_CONFIG
+ 'SONIC_CONFIG': SONIC_CONFIG
}
diff --git a/packages/abx-plugin-sonic-search/binaries.py b/archivebox/vendor/abx-plugin-sonic-search/abx_plugin_sonic_search/binaries.py
similarity index 80%
rename from packages/abx-plugin-sonic-search/binaries.py
rename to archivebox/vendor/abx-plugin-sonic-search/abx_plugin_sonic_search/binaries.py
index eab987c5..2e8fb536 100644
--- a/packages/abx-plugin-sonic-search/binaries.py
+++ b/archivebox/vendor/abx-plugin-sonic-search/abx_plugin_sonic_search/binaries.py
@@ -1,16 +1,16 @@
-__package__ = 'plugins_search.sonic'
+__package__ = 'abx_plugin_sonic_search'
from typing import List
from pydantic import InstanceOf
-from pydantic_pkgr import BinProvider, BinaryOverrides, BinName
+from pydantic_pkgr import BinProvider, BinaryOverrides, BinName, Binary
-from abx.archivebox.base_binary import BaseBinary, env, brew
+from abx_plugin_default_binproviders import brew, env
from .config import SONIC_CONFIG
-class SonicBinary(BaseBinary):
+class SonicBinary(Binary):
name: BinName = SONIC_CONFIG.SONIC_BINARY
binproviders_supported: List[InstanceOf[BinProvider]] = [brew, env] # TODO: add cargo
diff --git a/packages/abx-plugin-sonic-search/config.py b/archivebox/vendor/abx-plugin-sonic-search/abx_plugin_sonic_search/config.py
similarity index 93%
rename from packages/abx-plugin-sonic-search/config.py
rename to archivebox/vendor/abx-plugin-sonic-search/abx_plugin_sonic_search/config.py
index d54ed568..97cc7b3a 100644
--- a/packages/abx-plugin-sonic-search/config.py
+++ b/archivebox/vendor/abx-plugin-sonic-search/abx_plugin_sonic_search/config.py
@@ -1,10 +1,10 @@
-__package__ = 'plugins_search.sonic'
+__package__ = 'abx_plugin_sonic_search'
import sys
from pydantic import Field
-from abx.archivebox.base_configset import BaseConfigSet
+from abx_spec_config.base_configset import BaseConfigSet
from archivebox.config.common import SEARCH_BACKEND_CONFIG
diff --git a/packages/abx-plugin-sonic-search/searchbackend.py b/archivebox/vendor/abx-plugin-sonic-search/abx_plugin_sonic_search/searchbackend.py
similarity index 97%
rename from packages/abx-plugin-sonic-search/searchbackend.py
rename to archivebox/vendor/abx-plugin-sonic-search/abx_plugin_sonic_search/searchbackend.py
index 1662e5b2..a63a0132 100644
--- a/packages/abx-plugin-sonic-search/searchbackend.py
+++ b/archivebox/vendor/abx-plugin-sonic-search/abx_plugin_sonic_search/searchbackend.py
@@ -2,7 +2,7 @@ __package__ = 'plugins_search.sonic'
from typing import List, Generator, cast
-from abx.archivebox.base_searchbackend import BaseSearchBackend
+from abx_spec_searchbackend import BaseSearchBackend
from .config import SONIC_CONFIG, SONIC_LIB
diff --git a/archivebox/vendor/abx-plugin-sonic-search/pyproject.toml b/archivebox/vendor/abx-plugin-sonic-search/pyproject.toml
new file mode 100644
index 00000000..b6551b52
--- /dev/null
+++ b/archivebox/vendor/abx-plugin-sonic-search/pyproject.toml
@@ -0,0 +1,20 @@
+[project]
+name = "abx-plugin-sonic-search"
+version = "2024.10.28"
+description = "Add your description here"
+readme = "README.md"
+requires-python = ">=3.10"
+dependencies = [
+ "abx>=0.1.0",
+ "abx-spec-config>=0.1.0",
+ "abx-spec-pydantic-pkgr>=0.1.0",
+ "abx-spec-searchbackend>=0.1.0",
+ "pydantic-pkgr>=0.5.4",
+]
+
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project.entry-points.abx]
+abx_plugin_sonic_search = "abx_plugin_sonic_search"
diff --git a/packages/abx-plugin-sqlitefts-search/README.md b/archivebox/vendor/abx-plugin-sqlitefts-search/README.md
similarity index 100%
rename from packages/abx-plugin-sqlitefts-search/README.md
rename to archivebox/vendor/abx-plugin-sqlitefts-search/README.md
diff --git a/archivebox/vendor/abx-plugin-sqlitefts-search/abx_plugin_sqlitefts_search/__init__.py b/archivebox/vendor/abx-plugin-sqlitefts-search/abx_plugin_sqlitefts_search/__init__.py
new file mode 100644
index 00000000..5d5ed6de
--- /dev/null
+++ b/archivebox/vendor/abx-plugin-sqlitefts-search/abx_plugin_sqlitefts_search/__init__.py
@@ -0,0 +1,21 @@
+__package__ = 'abx_plugin_sqlitefts_search'
+__label__ = 'SQLiteFTS Search'
+
+import abx
+
+@abx.hookimpl
+def get_CONFIG():
+ from .config import SQLITEFTS_CONFIG
+
+ return {
+ 'SQLITEFTS_CONFIG': SQLITEFTS_CONFIG
+ }
+
+
+@abx.hookimpl
+def get_SEARCHBACKENDS():
+ from .searchbackend import SQLITEFTS_SEARCH_BACKEND
+
+ return {
+ 'sqlitefts': SQLITEFTS_SEARCH_BACKEND,
+ }
diff --git a/packages/abx-plugin-sqlitefts-search/config.py b/archivebox/vendor/abx-plugin-sqlitefts-search/abx_plugin_sqlitefts_search/config.py
similarity index 96%
rename from packages/abx-plugin-sqlitefts-search/config.py
rename to archivebox/vendor/abx-plugin-sqlitefts-search/abx_plugin_sqlitefts_search/config.py
index 5690dc6c..789ff114 100644
--- a/packages/abx-plugin-sqlitefts-search/config.py
+++ b/archivebox/vendor/abx-plugin-sqlitefts-search/abx_plugin_sqlitefts_search/config.py
@@ -1,4 +1,4 @@
-__package__ = 'plugins_search.sqlitefts'
+__package__ = 'abx_plugin_sqlitefts_search'
import sys
import sqlite3
@@ -8,7 +8,7 @@ from django.core.exceptions import ImproperlyConfigured
from pydantic import Field
-from abx.archivebox.base_configset import BaseConfigSet
+from abx_spec_config.base_configset import BaseConfigSet
from archivebox.config.common import SEARCH_BACKEND_CONFIG
diff --git a/packages/abx-plugin-sqlitefts-search/searchbackend.py b/archivebox/vendor/abx-plugin-sqlitefts-search/abx_plugin_sqlitefts_search/searchbackend.py
similarity index 98%
rename from packages/abx-plugin-sqlitefts-search/searchbackend.py
rename to archivebox/vendor/abx-plugin-sqlitefts-search/abx_plugin_sqlitefts_search/searchbackend.py
index 630bdd4c..2ae7c9cf 100644
--- a/packages/abx-plugin-sqlitefts-search/searchbackend.py
+++ b/archivebox/vendor/abx-plugin-sqlitefts-search/abx_plugin_sqlitefts_search/searchbackend.py
@@ -1,10 +1,10 @@
-__package__ = 'plugins_search.sqlitefts'
+__package__ = 'abx_plugin_sqlitefts_search'
import codecs
import sqlite3
from typing import List, Iterable
-from abx.archivebox.base_searchbackend import BaseSearchBackend
+from abx_spec_searchbackend import BaseSearchBackend
from .config import SQLITEFTS_CONFIG
diff --git a/archivebox/vendor/abx-plugin-sqlitefts-search/pyproject.toml b/archivebox/vendor/abx-plugin-sqlitefts-search/pyproject.toml
new file mode 100644
index 00000000..abc6181a
--- /dev/null
+++ b/archivebox/vendor/abx-plugin-sqlitefts-search/pyproject.toml
@@ -0,0 +1,18 @@
+[project]
+name = "abx-plugin-sqlitefts-search"
+version = "2024.10.28"
+description = "Add your description here"
+readme = "README.md"
+requires-python = ">=3.10"
+dependencies = [
+ "abx>=0.1.0",
+ "abx-spec-config>=0.1.0",
+ "abx-spec-searchbackend>=0.1.0",
+]
+
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project.entry-points.abx]
+abx_plugin_sqlitefts_search = "abx_plugin_sqlitefts_search"
diff --git a/packages/abx-plugin-wget-extractor/README.md b/archivebox/vendor/abx-plugin-title/README.md
similarity index 100%
rename from packages/abx-plugin-wget-extractor/README.md
rename to archivebox/vendor/abx-plugin-title/README.md
diff --git a/archivebox/vendor/abx-plugin-title/abx_plugin_title/__init__.py b/archivebox/vendor/abx-plugin-title/abx_plugin_title/__init__.py
new file mode 100644
index 00000000..d3e5cac5
--- /dev/null
+++ b/archivebox/vendor/abx-plugin-title/abx_plugin_title/__init__.py
@@ -0,0 +1,9 @@
+import abx
+
+# @abx.hookimpl
+# def get_CONFIG():
+# from .config import TITLE_EXTRACTOR_CONFIG
+
+# return {
+# 'title_extractor': TITLE_EXTRACTOR_CONFIG
+# }
diff --git a/archivebox/extractors/title.py b/archivebox/vendor/abx-plugin-title/abx_plugin_title/extractor.py
similarity index 97%
rename from archivebox/extractors/title.py
rename to archivebox/vendor/abx-plugin-title/abx_plugin_title/extractor.py
index ceefb699..a8ef52cf 100644
--- a/archivebox/extractors/title.py
+++ b/archivebox/vendor/abx-plugin-title/abx_plugin_title/extractor.py
@@ -11,8 +11,8 @@ from archivebox.misc.util import (
htmldecode,
dedupe,
)
-from archivebox.plugins_extractor.curl.config import CURL_CONFIG
-from archivebox.plugins_extractor.curl.binaries import CURL_BINARY
+from abx_plugin_curl_extractor.config import CURL_CONFIG
+from abx_plugin_curl_extractor.binaries import CURL_BINARY
from ..index.schema import Link, ArchiveResult, ArchiveOutput, ArchiveError
from ..logging_util import TimedProgress
diff --git a/archivebox/vendor/abx-plugin-title/pyproject.toml b/archivebox/vendor/abx-plugin-title/pyproject.toml
new file mode 100644
index 00000000..a9737b3a
--- /dev/null
+++ b/archivebox/vendor/abx-plugin-title/pyproject.toml
@@ -0,0 +1,18 @@
+[project]
+name = "abx-plugin-title"
+version = "2024.10.27"
+description = "Title Extractor"
+readme = "README.md"
+requires-python = ">=3.10"
+dependencies = [
+ "abx>=0.1.0",
+ "abx-spec-config>=0.1.0",
+ "abx-plugin-curl>=2024.10.28",
+]
+
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project.entry-points.abx]
+abx_plugin_title = "abx_plugin_title"
diff --git a/packages/abx-plugin-ytdlp-extractor/README.md b/archivebox/vendor/abx-plugin-wget/README.md
similarity index 100%
rename from packages/abx-plugin-ytdlp-extractor/README.md
rename to archivebox/vendor/abx-plugin-wget/README.md
diff --git a/archivebox/vendor/abx-plugin-wget/abx_plugin_wget/__init__.py b/archivebox/vendor/abx-plugin-wget/abx_plugin_wget/__init__.py
new file mode 100644
index 00000000..a32987ee
--- /dev/null
+++ b/archivebox/vendor/abx-plugin-wget/abx_plugin_wget/__init__.py
@@ -0,0 +1,35 @@
+__package__ = 'abx_plugin_wget'
+__label__ = 'WGET'
+
+import abx
+
+
+@abx.hookimpl
+def get_CONFIG():
+ from .config import WGET_CONFIG
+
+ return {
+ 'WGET_CONFIG': WGET_CONFIG
+ }
+
+@abx.hookimpl
+def get_BINARIES():
+ from .binaries import WGET_BINARY
+
+ return {
+ 'wget': WGET_BINARY,
+ }
+
+@abx.hookimpl
+def get_EXTRACTORS():
+ from .extractors import WGET_EXTRACTOR, WARC_EXTRACTOR
+
+ return {
+ 'wget': WGET_EXTRACTOR,
+ 'warc': WARC_EXTRACTOR,
+ }
+
+@abx.hookimpl
+def ready():
+ from .config import WGET_CONFIG
+ WGET_CONFIG.validate()
diff --git a/packages/abx-plugin-wget-extractor/binaries.py b/archivebox/vendor/abx-plugin-wget/abx_plugin_wget/binaries.py
similarity index 57%
rename from packages/abx-plugin-wget-extractor/binaries.py
rename to archivebox/vendor/abx-plugin-wget/abx_plugin_wget/binaries.py
index 6198beac..39cbe111 100644
--- a/packages/abx-plugin-wget-extractor/binaries.py
+++ b/archivebox/vendor/abx-plugin-wget/abx_plugin_wget/binaries.py
@@ -1,17 +1,17 @@
-__package__ = 'plugins_extractor.wget'
+__package__ = 'abx_plugin_wget'
from typing import List
from pydantic import InstanceOf
-from pydantic_pkgr import BinProvider, BinName
+from pydantic_pkgr import BinProvider, BinName, Binary
-from abx.archivebox.base_binary import BaseBinary, env, apt, brew
+from abx_plugin_default_binproviders import apt, brew, env
from .config import WGET_CONFIG
-class WgetBinary(BaseBinary):
+class WgetBinary(Binary):
name: BinName = WGET_CONFIG.WGET_BINARY
binproviders_supported: List[InstanceOf[BinProvider]] = [apt, brew, env]
diff --git a/packages/abx-plugin-wget-extractor/config.py b/archivebox/vendor/abx-plugin-wget/abx_plugin_wget/config.py
similarity index 96%
rename from packages/abx-plugin-wget-extractor/config.py
rename to archivebox/vendor/abx-plugin-wget/abx_plugin_wget/config.py
index 12edf672..1dfd1b07 100644
--- a/packages/abx-plugin-wget-extractor/config.py
+++ b/archivebox/vendor/abx-plugin-wget/abx_plugin_wget/config.py
@@ -1,12 +1,10 @@
-__package__ = 'plugins_extractor.wget'
-
import subprocess
from typing import List, Optional
from pathlib import Path
from pydantic import Field
-from abx.archivebox.base_configset import BaseConfigSet
+from abx_spec_config.base_configset import BaseConfigSet
from archivebox.config.common import ARCHIVING_CONFIG, STORAGE_CONFIG
from archivebox.misc.logging import STDERR
diff --git a/archivebox/vendor/abx-plugin-wget/abx_plugin_wget/extractors.py b/archivebox/vendor/abx-plugin-wget/abx_plugin_wget/extractors.py
new file mode 100644
index 00000000..4d4d0243
--- /dev/null
+++ b/archivebox/vendor/abx-plugin-wget/abx_plugin_wget/extractors.py
@@ -0,0 +1,35 @@
+__package__ = 'abx_plugin_wget'
+
+# from pathlib import Path
+
+# from pydantic_pkgr import BinName
+
+# from .binaries import WGET_BINARY
+# from .wget_util import wget_output_path
+
+# class WgetExtractor(BaseExtractor):
+# name: ExtractorName = 'wget'
+# binary: BinName = WGET_BINARY.name
+
+# def get_output_path(self, snapshot) -> Path | None:
+# wget_index_path = wget_output_path(snapshot.as_link())
+# if wget_index_path:
+# return Path(wget_index_path)
+# return None
+
+# WGET_EXTRACTOR = WgetExtractor()
+
+
+# class WarcExtractor(BaseExtractor):
+# name: ExtractorName = 'warc'
+# binary: BinName = WGET_BINARY.name
+
+# def get_output_path(self, snapshot) -> Path | None:
+# warc_files = list((Path(snapshot.link_dir) / 'warc').glob('*.warc.gz'))
+# if warc_files:
+# return sorted(warc_files, key=lambda x: x.stat().st_size, reverse=True)[0]
+# return None
+
+
+# WARC_EXTRACTOR = WarcExtractor()
+
diff --git a/archivebox/extractors/wget.py b/archivebox/vendor/abx-plugin-wget/abx_plugin_wget/wget.py
similarity index 97%
rename from archivebox/extractors/wget.py
rename to archivebox/vendor/abx-plugin-wget/abx_plugin_wget/wget.py
index 416e797e..caaaeaf6 100644
--- a/archivebox/extractors/wget.py
+++ b/archivebox/vendor/abx-plugin-wget/abx_plugin_wget/wget.py
@@ -1,4 +1,4 @@
-__package__ = 'archivebox.extractors'
+__package__ = 'abx_plugin_wget_extractor'
import re
import os
@@ -17,10 +17,11 @@ from archivebox.misc.util import (
urldecode,
dedupe,
)
-from archivebox.plugins_extractor.wget.config import WGET_CONFIG
-from archivebox.plugins_extractor.wget.binaries import WGET_BINARY
-from ..logging_util import TimedProgress
-from ..index.schema import Link, ArchiveResult, ArchiveOutput, ArchiveError
+from .config import WGET_CONFIG
+from .binaries import WGET_BINARY
+
+from archivebox.logging_util import TimedProgress
+from archivebox.index.schema import Link, ArchiveResult, ArchiveOutput, ArchiveError
def get_output_path():
diff --git a/packages/abx-plugin-wget-extractor/wget_util.py b/archivebox/vendor/abx-plugin-wget/abx_plugin_wget/wget_util.py
similarity index 100%
rename from packages/abx-plugin-wget-extractor/wget_util.py
rename to archivebox/vendor/abx-plugin-wget/abx_plugin_wget/wget_util.py
diff --git a/archivebox/vendor/abx-plugin-wget/pyproject.toml b/archivebox/vendor/abx-plugin-wget/pyproject.toml
new file mode 100644
index 00000000..d401e52f
--- /dev/null
+++ b/archivebox/vendor/abx-plugin-wget/pyproject.toml
@@ -0,0 +1,18 @@
+[project]
+name = "abx-plugin-wget"
+version = "2024.10.28"
+description = "Add your description here"
+readme = "README.md"
+requires-python = ">=3.10"
+dependencies = [
+ "abx>=0.1.0",
+ "abx-spec-config>=0.1.0",
+ "abx-spec-pydantic-pkgr>=0.1.0",
+]
+
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project.entry-points.abx]
+abx_plugin_wget = "abx_plugin_wget"
diff --git a/packages/abx-spec-archivebox/README.md b/archivebox/vendor/abx-plugin-ytdlp/README.md
similarity index 100%
rename from packages/abx-spec-archivebox/README.md
rename to archivebox/vendor/abx-plugin-ytdlp/README.md
diff --git a/packages/abx-plugin-ytdlp-extractor/__init__.py b/archivebox/vendor/abx-plugin-ytdlp/abx_plugin_ytdlp/__init__.py
similarity index 53%
rename from packages/abx-plugin-ytdlp-extractor/__init__.py
rename to archivebox/vendor/abx-plugin-ytdlp/abx_plugin_ytdlp/__init__.py
index 1dc9ef99..5b1d9968 100644
--- a/packages/abx-plugin-ytdlp-extractor/__init__.py
+++ b/archivebox/vendor/abx-plugin-ytdlp/abx_plugin_ytdlp/__init__.py
@@ -1,30 +1,15 @@
-__package__ = 'plugins_extractor.ytdlp'
+__package__ = 'abx_plugin_ytdlp'
__label__ = 'YT-DLP'
-__version__ = '2024.10.14'
-__author__ = 'ArchiveBox'
__homepage__ = 'https://github.com/yt-dlp/yt-dlp'
import abx
-
-@abx.hookimpl
-def get_PLUGIN():
- return {
- 'ytdlp': {
- 'PACKAGE': __package__,
- 'LABEL': __label__,
- 'VERSION': __version__,
- 'AUTHOR': __author__,
- 'HOMEPAGE': __homepage__,
- }
- }
-
@abx.hookimpl
def get_CONFIG():
from .config import YTDLP_CONFIG
return {
- 'ytdlp': YTDLP_CONFIG
+ 'YTDLP_CONFIG': YTDLP_CONFIG
}
@abx.hookimpl
diff --git a/packages/abx-plugin-ytdlp-extractor/binaries.py b/archivebox/vendor/abx-plugin-ytdlp/abx_plugin_ytdlp/binaries.py
similarity index 77%
rename from packages/abx-plugin-ytdlp-extractor/binaries.py
rename to archivebox/vendor/abx-plugin-ytdlp/abx_plugin_ytdlp/binaries.py
index 730de2dc..69239515 100644
--- a/packages/abx-plugin-ytdlp-extractor/binaries.py
+++ b/archivebox/vendor/abx-plugin-ytdlp/abx_plugin_ytdlp/binaries.py
@@ -1,26 +1,25 @@
-__package__ = 'plugins_extractor.ytdlp'
+__package__ = 'abx_plugin_ytdlp'
import subprocess
from typing import List
from pydantic import InstanceOf
-from pydantic_pkgr import BinProvider, BinName, BinaryOverrides
+from pydantic_pkgr import BinProvider, BinName, BinaryOverrides, Binary
-from abx.archivebox.base_binary import BaseBinary, env, apt, brew
-
-from plugins_pkg.pip.binproviders import LIB_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER
+from abx_plugin_default_binproviders import apt, brew, env
+from abx_plugin_pip.binproviders import LIB_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER
from .config import YTDLP_CONFIG
-class YtdlpBinary(BaseBinary):
+class YtdlpBinary(Binary):
name: BinName = YTDLP_CONFIG.YTDLP_BINARY
binproviders_supported: List[InstanceOf[BinProvider]] = [LIB_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER, apt, brew, env]
YTDLP_BINARY = YtdlpBinary()
-class FfmpegBinary(BaseBinary):
+class FfmpegBinary(Binary):
name: BinName = 'ffmpeg'
binproviders_supported: List[InstanceOf[BinProvider]] = [apt, brew, env]
diff --git a/packages/abx-plugin-ytdlp-extractor/config.py b/archivebox/vendor/abx-plugin-ytdlp/abx_plugin_ytdlp/config.py
similarity index 97%
rename from packages/abx-plugin-ytdlp-extractor/config.py
rename to archivebox/vendor/abx-plugin-ytdlp/abx_plugin_ytdlp/config.py
index 0082df3d..b36d19d1 100644
--- a/packages/abx-plugin-ytdlp-extractor/config.py
+++ b/archivebox/vendor/abx-plugin-ytdlp/abx_plugin_ytdlp/config.py
@@ -4,7 +4,7 @@ from typing import List
from pydantic import Field, AliasChoices
-from abx.archivebox.base_configset import BaseConfigSet
+from abx_spec_config.base_configset import BaseConfigSet
from archivebox.config.common import ARCHIVING_CONFIG
from archivebox.misc.logging import STDERR
diff --git a/archivebox/extractors/media.py b/archivebox/vendor/abx-plugin-ytdlp/abx_plugin_ytdlp/media.py
similarity index 100%
rename from archivebox/extractors/media.py
rename to archivebox/vendor/abx-plugin-ytdlp/abx_plugin_ytdlp/media.py
diff --git a/archivebox/vendor/abx-plugin-ytdlp/pyproject.toml b/archivebox/vendor/abx-plugin-ytdlp/pyproject.toml
new file mode 100644
index 00000000..b45626bd
--- /dev/null
+++ b/archivebox/vendor/abx-plugin-ytdlp/pyproject.toml
@@ -0,0 +1,19 @@
+[project]
+name = "abx-plugin-ytdlp"
+version = "2024.10.28"
+description = "Add your description here"
+readme = "README.md"
+requires-python = ">=3.10"
+dependencies = [
+ "abx>=0.1.0",
+ "abx-spec-config>=0.1.0",
+ "abx-spec-pydantic-pkgr>=0.1.0",
+ "pydantic-pkgr>=0.5.4",
+]
+
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project.entry-points.abx]
+abx_plugin_ytdlp = "abx_plugin_ytdlp"
diff --git a/packages/abx-spec-django/README.md b/archivebox/vendor/abx-spec-archivebox/README.md
similarity index 100%
rename from packages/abx-spec-django/README.md
rename to archivebox/vendor/abx-spec-archivebox/README.md
diff --git a/archivebox/vendor/abx-spec-archivebox/abx_spec_archivebox/__init__.py b/archivebox/vendor/abx-spec-archivebox/abx_spec_archivebox/__init__.py
new file mode 100644
index 00000000..ab591c96
--- /dev/null
+++ b/archivebox/vendor/abx-spec-archivebox/abx_spec_archivebox/__init__.py
@@ -0,0 +1,28 @@
+__package__ = 'abx_spec_archivebox'
+__order__ = 400
+
+# from .effects import *
+# from .events import *
+# from .reads import *
+# from .writes import *
+# from .states import *
+
+from typing import cast
+
+import abx
+from abx_spec_config import ConfigPluginSpec
+from abx_spec_pydantic_pkgr import PydanticPkgrPluginSpec
+from abx_spec_django import DjangoPluginSpec
+from abx_spec_searchbackend import SearchBackendPluginSpec
+
+class ArchiveBoxPluginSpec(ConfigPluginSpec, PydanticPkgrPluginSpec, DjangoPluginSpec, SearchBackendPluginSpec):
+ """
+ ArchiveBox plugins can use any of the hooks from the Config, PydanticPkgr, and Django plugin specs.
+ """
+ pass
+
+PLUGIN_SPEC = ArchiveBoxPluginSpec
+
+
+TypedPluginManager = abx.ABXPluginManager[ArchiveBoxPluginSpec]
+pm = cast(TypedPluginManager, abx.pm)
diff --git a/packages/abx-spec-archivebox/abx_spec_archivebox/effects.py b/archivebox/vendor/abx-spec-archivebox/abx_spec_archivebox/effects.py
similarity index 100%
rename from packages/abx-spec-archivebox/abx_spec_archivebox/effects.py
rename to archivebox/vendor/abx-spec-archivebox/abx_spec_archivebox/effects.py
diff --git a/packages/abx-spec-archivebox/abx_spec_archivebox/events.py b/archivebox/vendor/abx-spec-archivebox/abx_spec_archivebox/events.py
similarity index 100%
rename from packages/abx-spec-archivebox/abx_spec_archivebox/events.py
rename to archivebox/vendor/abx-spec-archivebox/abx_spec_archivebox/events.py
diff --git a/packages/abx-spec-archivebox/abx_spec_archivebox/reads.py b/archivebox/vendor/abx-spec-archivebox/abx_spec_archivebox/reads.py
similarity index 100%
rename from packages/abx-spec-archivebox/abx_spec_archivebox/reads.py
rename to archivebox/vendor/abx-spec-archivebox/abx_spec_archivebox/reads.py
diff --git a/packages/abx-spec-archivebox/abx_spec_archivebox/states.py b/archivebox/vendor/abx-spec-archivebox/abx_spec_archivebox/states.py
similarity index 100%
rename from packages/abx-spec-archivebox/abx_spec_archivebox/states.py
rename to archivebox/vendor/abx-spec-archivebox/abx_spec_archivebox/states.py
diff --git a/packages/abx-spec-archivebox/abx_spec_archivebox/writes.py b/archivebox/vendor/abx-spec-archivebox/abx_spec_archivebox/writes.py
similarity index 99%
rename from packages/abx-spec-archivebox/abx_spec_archivebox/writes.py
rename to archivebox/vendor/abx-spec-archivebox/abx_spec_archivebox/writes.py
index 1ca1ac7e..977543d2 100644
--- a/packages/abx-spec-archivebox/abx_spec_archivebox/writes.py
+++ b/archivebox/vendor/abx-spec-archivebox/abx_spec_archivebox/writes.py
@@ -8,7 +8,6 @@ from benedict import benedict
from django.conf import settings
import abx
-from .. import pm
@abx.hookimpl
diff --git a/packages/abx-spec-archivebox/pyproject.toml b/archivebox/vendor/abx-spec-archivebox/pyproject.toml
similarity index 100%
rename from packages/abx-spec-archivebox/pyproject.toml
rename to archivebox/vendor/abx-spec-archivebox/pyproject.toml
diff --git a/packages/abx-spec-extractor/README.md b/archivebox/vendor/abx-spec-config/README.md
similarity index 100%
rename from packages/abx-spec-extractor/README.md
rename to archivebox/vendor/abx-spec-config/README.md
diff --git a/archivebox/vendor/abx-spec-config/abx_spec_config/__init__.py b/archivebox/vendor/abx-spec-config/abx_spec_config/__init__.py
new file mode 100644
index 00000000..3feaab82
--- /dev/null
+++ b/archivebox/vendor/abx-spec-config/abx_spec_config/__init__.py
@@ -0,0 +1,66 @@
+__order__ = 100
+
+import os
+from pathlib import Path
+from typing import Dict, Any, cast
+
+from benedict import benedict
+
+
+import abx
+
+from .base_configset import BaseConfigSet, ConfigKeyStr
+
+
+class ConfigPluginSpec:
+ @abx.hookspec(firstresult=True)
+ @abx.hookimpl
+ def get_collection_config_path(self) -> Path:
+ return Path(os.getcwd()) / "ArchiveBox.conf"
+
+
+ @abx.hookspec(firstresult=True)
+ @abx.hookimpl
+ def get_system_config_path(self) -> Path:
+ return Path('~/.config/abx/abx.conf').expanduser()
+
+
+ @abx.hookspec
+ @abx.hookimpl
+ def get_CONFIG(self) -> Dict[abx.PluginId, BaseConfigSet]:
+ """Get the config for a single plugin -> {plugin_id: PluginConfigSet()}"""
+ return {
+ # override this in your plugin to return your plugin's config, e.g.
+ # 'ytdlp': YtdlpConfig(...),
+ }
+
+
+ @abx.hookspec(firstresult=True)
+ @abx.hookimpl
+ def get_CONFIGS(self) -> Dict[abx.PluginId, BaseConfigSet]:
+ """Get the config for all plugins by plugin_id -> {plugin_abc: PluginABCConfigSet(), plugin_xyz: PluginXYZConfigSet(), ...}"""
+ return abx.as_dict(pm.hook.get_CONFIG())
+
+
+ @abx.hookspec(firstresult=True)
+ @abx.hookimpl
+ def get_FLAT_CONFIG(self) -> Dict[ConfigKeyStr, Any]:
+ """Get the flat config assembled from all plugins config -> {SOME_KEY: 'someval', 'OTHER_KEY': 'otherval', ...}"""
+ return benedict({
+ key: value
+ for configset in pm.hook.get_CONFIGS().values()
+ for key, value in benedict(configset).items()
+ })
+
+
+ # TODO: add read_config_file(), write_config_file() hooks
+
+
+PLUGIN_SPEC = ConfigPluginSpec
+
+
+class ExpectedPluginSpec(ConfigPluginSpec):
+ pass
+
+TypedPluginManager = abx.ABXPluginManager[ExpectedPluginSpec]
+pm = cast(TypedPluginManager, abx.pm)
diff --git a/packages/abx-spec-config/abx_spec_config/base_configset.py b/archivebox/vendor/abx-spec-config/abx_spec_config/base_configset.py
similarity index 100%
rename from packages/abx-spec-config/abx_spec_config/base_configset.py
rename to archivebox/vendor/abx-spec-config/abx_spec_config/base_configset.py
diff --git a/packages/abx-spec-config/abx_spec_config/toml_util.py b/archivebox/vendor/abx-spec-config/abx_spec_config/toml_util.py
similarity index 100%
rename from packages/abx-spec-config/abx_spec_config/toml_util.py
rename to archivebox/vendor/abx-spec-config/abx_spec_config/toml_util.py
diff --git a/packages/abx-spec-config/pyproject.toml b/archivebox/vendor/abx-spec-config/pyproject.toml
similarity index 67%
rename from packages/abx-spec-config/pyproject.toml
rename to archivebox/vendor/abx-spec-config/pyproject.toml
index b85f675e..aa2f6eb4 100644
--- a/packages/abx-spec-config/pyproject.toml
+++ b/archivebox/vendor/abx-spec-config/pyproject.toml
@@ -1,6 +1,9 @@
[project]
name = "abx-spec-config"
-version = "0.0.1"
+version = "0.1.0"
+description = "The common shared interfaces for the ABX ArchiveBox plugin ecosystem."
+readme = "README.md"
+requires-python = ">=3.10"
dependencies = [
"abx>=0.1.0",
"python-benedict>=0.34.0",
diff --git a/packages/abx-spec-pydantic-pkgr/README.md b/archivebox/vendor/abx-spec-django/README.md
similarity index 100%
rename from packages/abx-spec-pydantic-pkgr/README.md
rename to archivebox/vendor/abx-spec-django/README.md
diff --git a/archivebox/vendor/abx-spec-django/abx_spec_django.py b/archivebox/vendor/abx-spec-django/abx_spec_django.py
new file mode 100644
index 00000000..562dad72
--- /dev/null
+++ b/archivebox/vendor/abx-spec-django/abx_spec_django.py
@@ -0,0 +1,118 @@
+__order__ = 300
+
+import abx
+from typing import List, Dict, Any, cast
+
+###########################################################################################
+
+class DjangoPluginSpec:
+ @abx.hookspec
+ def get_INSTALLED_APPS() -> List[str]:
+ return ['abx_spec_django']
+
+ @abx.hookspec
+ def get_TEMPLATE_DIRS() -> List[str]:
+ return [] # e.g. ['your_plugin_type/plugin_name/templates']
+
+
+ @abx.hookspec
+ def get_STATICFILES_DIRS() -> List[str]:
+ return [] # e.g. ['your_plugin_type/plugin_name/static']
+
+ # @abx.hookspec
+ # def register_STATICFILES_DIRS(STATICFILES_DIRS):
+ # """Mutate STATICFILES_DIRS in place to add your static dirs in a specific position"""
+ # # e.g. STATICFILES_DIRS.insert(0, 'your_plugin_type/plugin_name/static')
+ # pass
+
+
+ @abx.hookspec
+ def get_MIDDLEWARES() -> List[str]:
+ return [] # e.g. ['your_plugin_type.plugin_name.middleware.YourMiddleware']
+
+ # @abx.hookspec
+ # def register_MIDDLEWARE(MIDDLEWARE):
+ # """Mutate MIDDLEWARE in place to add your middleware in a specific position"""
+ # # e.g. MIDDLEWARE.insert(0, 'your_plugin_type.plugin_name.middleware.YourMiddleware')
+ # pass
+
+
+ @abx.hookspec
+ def get_AUTHENTICATION_BACKENDS() -> List[str]:
+ return [] # e.g. ['django_auth_ldap.backend.LDAPBackend']
+
+ # @abx.hookspec
+ # def register_AUTHENTICATION_BACKENDS(AUTHENTICATION_BACKENDS):
+ # """Mutate AUTHENTICATION_BACKENDS in place to add your auth backends in a specific position"""
+ # # e.g. AUTHENTICATION_BACKENDS.insert(0, 'your_plugin_type.plugin_name.backend.YourBackend')
+ # pass
+
+ @abx.hookspec
+ def get_DJANGO_HUEY_QUEUES(QUEUE_DATABASE_NAME) -> Dict[str, Dict[str, Any]]:
+ return {} # e.g. {'some_queue_name': {'filename': 'some_queue_name.sqlite3', 'store_none': True, 'results': True, ...}}
+
+ # @abx.hookspec
+ # def register_DJANGO_HUEY(DJANGO_HUEY):
+ # """Mutate DJANGO_HUEY in place to add your huey queues in a specific position"""
+ # # e.g. DJANGO_HUEY['queues']['some_queue_name']['some_setting'] = 'some_value'
+ # pass
+
+
+ @abx.hookspec
+ def get_ADMIN_DATA_VIEWS_URLS() -> List[str]:
+ return []
+
+ # @abx.hookspec
+ # def register_ADMIN_DATA_VIEWS(ADMIN_DATA_VIEWS):
+ # """Mutate ADMIN_DATA_VIEWS in place to add your admin data views in a specific position"""
+ # # e.g. ADMIN_DATA_VIEWS['URLS'].insert(0, 'your_plugin_type/plugin_name/admin_data_views.py')
+ # pass
+
+
+ # @abx.hookspec
+ # def register_settings(settings):
+ # """Mutate settings in place to add your settings / modify existing settings"""
+ # # settings.SOME_KEY = 'some_value'
+ # pass
+
+
+ ###########################################################################################
+
+ @abx.hookspec
+ def get_urlpatterns() -> List[str]:
+ return [] # e.g. [path('your_plugin_type/plugin_name/url.py', your_view)]
+
+ # @abx.hookspec
+ # def register_urlpatterns(urlpatterns):
+ # """Mutate urlpatterns in place to add your urlpatterns in a specific position"""
+ # # e.g. urlpatterns.insert(0, path('your_plugin_type/plugin_name/url.py', your_view))
+ # pass
+
+ ###########################################################################################
+
+
+
+ @abx.hookspec
+ def register_admin(admin_site) -> None:
+ """Register django admin views/models with the main django admin site instance"""
+ # e.g. admin_site.register(your_model, your_admin_class)
+ pass
+
+
+ ###########################################################################################
+
+
+ @abx.hookspec
+ def ready() -> None:
+ """Called when Django apps app.ready() are triggered"""
+ # e.g. abx.pm.hook.get_CONFIG().ytdlp.validate()
+ pass
+
+
+PLUGIN_SPEC = DjangoPluginSpec
+
+class ExpectedPluginSpec(DjangoPluginSpec):
+ pass
+
+TypedPluginManager = abx.ABXPluginManager[ExpectedPluginSpec]
+pm = cast(TypedPluginManager, abx.pm)
diff --git a/packages/abx-spec-django/pyproject.toml b/archivebox/vendor/abx-spec-django/pyproject.toml
similarity index 100%
rename from packages/abx-spec-django/pyproject.toml
rename to archivebox/vendor/abx-spec-django/pyproject.toml
diff --git a/packages/abx-spec-searchbackend/README.md b/archivebox/vendor/abx-spec-extractor/README.md
similarity index 100%
rename from packages/abx-spec-searchbackend/README.md
rename to archivebox/vendor/abx-spec-extractor/README.md
diff --git a/packages/abx-spec-extractor/abx_spec_extractor.py b/archivebox/vendor/abx-spec-extractor/abx_spec_extractor.py
similarity index 100%
rename from packages/abx-spec-extractor/abx_spec_extractor.py
rename to archivebox/vendor/abx-spec-extractor/abx_spec_extractor.py
diff --git a/packages/abx-spec-extractor/pyproject.toml b/archivebox/vendor/abx-spec-extractor/pyproject.toml
similarity index 100%
rename from packages/abx-spec-extractor/pyproject.toml
rename to archivebox/vendor/abx-spec-extractor/pyproject.toml
diff --git a/packages/abx/README.md b/archivebox/vendor/abx-spec-pydantic-pkgr/README.md
similarity index 100%
rename from packages/abx/README.md
rename to archivebox/vendor/abx-spec-pydantic-pkgr/README.md
diff --git a/archivebox/vendor/abx-spec-pydantic-pkgr/abx_spec_pydantic_pkgr.py b/archivebox/vendor/abx-spec-pydantic-pkgr/abx_spec_pydantic_pkgr.py
new file mode 100644
index 00000000..b95b3f33
--- /dev/null
+++ b/archivebox/vendor/abx-spec-pydantic-pkgr/abx_spec_pydantic_pkgr.py
@@ -0,0 +1,114 @@
+__order__ = 200
+
+import os
+
+from typing import Dict, cast
+from pathlib import Path
+
+from pydantic_pkgr import Binary, BinProvider
+
+import abx
+
+from abx_spec_config import ConfigPluginSpec
+
+###########################################################################################
+
+class PydanticPkgrPluginSpec:
+
+ @abx.hookspec(firstresult=True)
+ @abx.hookimpl
+ def get_LIB_DIR(self) -> Path:
+ """Get the directory where shared runtime libraries/dependencies should be installed"""
+ FLAT_CONFIG = pm.hook.get_FLAT_CONFIG()
+ LIB_DIR = Path(FLAT_CONFIG.get('LIB_DIR', '/usr/local/share/abx'))
+ return LIB_DIR
+
+ @abx.hookspec(firstresult=True)
+ @abx.hookimpl
+ def get_BIN_DIR(self) -> Path:
+ """Get the directory where binaries should be symlinked to"""
+ FLAT_CONFIG = pm.hook.get_FLAT_CONFIG()
+ LIB_DIR = pm.hook.get_LIB_DIR()
+ BIN_DIR = Path(FLAT_CONFIG.get('BIN_DIR') or LIB_DIR / 'bin')
+ return BIN_DIR
+
+ @abx.hookspec
+ @abx.hookimpl
+ def get_BINPROVIDERS(self) -> Dict[str, BinProvider]:
+ return {
+ # to be implemented by plugins, e.g.:
+ # 'npm': NpmBinProvider(npm_prefix=Path('/usr/local/share/abx/npm')),
+ }
+
+ @abx.hookspec
+ @abx.hookimpl
+ def get_BINARIES(self) -> Dict[str, Binary]:
+ return {
+ # to be implemented by plugins, e.g.:
+ # 'yt-dlp': Binary(name='yt-dlp', binproviders=[npm]),
+ }
+
+ @abx.hookspec(firstresult=True)
+ @abx.hookimpl
+ def get_BINPROVIDER(self, binprovider_name: str) -> BinProvider:
+ """Get a specific BinProvider by name"""
+ return abx.as_dict(pm.hook.get_BINPROVIDERS())[binprovider_name]
+
+ @abx.hookspec(firstresult=True)
+ @abx.hookimpl
+ def get_BINARY(self, bin_name: str) -> Binary:
+ """Get a specific Binary by name"""
+ return abx.as_dict(pm.hook.get_BINARIES())[bin_name]
+
+
+ @abx.hookspec(firstresult=True)
+ @abx.hookimpl
+ def binary_load(self, binary: Binary, **kwargs) -> Binary:
+ """Load a binary from the filesystem (override to load a binary from a different source, e.g. DB, cache, etc.)"""
+ loaded_binary = binary.load(**kwargs)
+ pm.hook.binary_symlink_to_bin_dir(binary=loaded_binary)
+ return loaded_binary
+
+ @abx.hookspec(firstresult=True)
+ @abx.hookimpl
+ def binary_install(self, binary: Binary, **kwargs) -> Binary:
+ """Override to change how a binary is installed (e.g. by downloading from a remote source, etc.)"""
+ loaded_binary = binary.install(**kwargs)
+ pm.hook.binary_symlink_to_bin_dir(binary=loaded_binary)
+ return loaded_binary
+
+ @abx.hookspec(firstresult=True)
+ @abx.hookimpl
+ def binary_load_or_install(self, binary: Binary, **kwargs) -> Binary:
+ """Override to change how a binary is loaded or installed (e.g. by downloading from a remote source, etc.)"""
+ loaded_binary = binary.load_or_install(**kwargs)
+ pm.hook.binary_symlink_to_bin_dir(binary=loaded_binary)
+ return loaded_binary
+
+ @abx.hookspec(firstresult=True)
+ @abx.hookimpl
+ def binary_symlink_to_bin_dir(self, binary: Binary, bin_dir: Path | None=None):
+ if not (binary.abspath and os.path.isfile(binary.abspath)):
+ return
+
+ BIN_DIR = pm.hook.get_BIN_DIR()
+ try:
+ BIN_DIR.mkdir(parents=True, exist_ok=True)
+ symlink = BIN_DIR / binary.name
+ symlink.unlink(missing_ok=True)
+ symlink.symlink_to(binary.abspath)
+ symlink.chmod(0o777) # make sure its executable by everyone
+ except Exception:
+ # print(f'[red]:warning: Failed to symlink {symlink} -> {binary.abspath}[/red] {err}')
+ # not actually needed, we can just run without it
+ pass
+
+
+PLUGIN_SPEC = PydanticPkgrPluginSpec
+
+
+class RequiredSpecsAvailable(ConfigPluginSpec, PydanticPkgrPluginSpec):
+ pass
+
+TypedPluginManager = abx.ABXPluginManager[RequiredSpecsAvailable]
+pm = cast(TypedPluginManager, abx.pm)
diff --git a/packages/abx-spec-pydantic-pkgr/pyproject.toml b/archivebox/vendor/abx-spec-pydantic-pkgr/pyproject.toml
similarity index 100%
rename from packages/abx-spec-pydantic-pkgr/pyproject.toml
rename to archivebox/vendor/abx-spec-pydantic-pkgr/pyproject.toml
diff --git a/archivebox/vendor/abx-spec-searchbackend/README.md b/archivebox/vendor/abx-spec-searchbackend/README.md
new file mode 100644
index 00000000..e69de29b
diff --git a/archivebox/vendor/abx-spec-searchbackend/abx_spec_searchbackend.py b/archivebox/vendor/abx-spec-searchbackend/abx_spec_searchbackend.py
new file mode 100644
index 00000000..8bc53eb8
--- /dev/null
+++ b/archivebox/vendor/abx-spec-searchbackend/abx_spec_searchbackend.py
@@ -0,0 +1,40 @@
+import abc
+from typing import Iterable, List, Dict, cast
+
+import abx
+from abx_spec_config import ConfigPluginSpec
+
+
+class BaseSearchBackend(abc.ABC):
+ name: str
+
+ @staticmethod
+ @abc.abstractmethod
+ def index(snapshot_id: str, texts: List[str]):
+ return
+
+ @staticmethod
+ @abc.abstractmethod
+ def flush(snapshot_ids: Iterable[str]):
+ return
+
+ @staticmethod
+ @abc.abstractmethod
+ def search(text: str) -> List[str]:
+ raise NotImplementedError("search method must be implemented by subclass")
+
+
+class SearchBackendPluginSpec:
+ @abx.hookspec
+ @abx.hookimpl
+ def get_SEARCHBACKENDS() -> Dict[abx.PluginId, BaseSearchBackend]:
+ return {}
+
+
+class ExpectedPluginSpec(SearchBackendPluginSpec, ConfigPluginSpec):
+ pass
+
+PLUGIN_SPEC = SearchBackendPluginSpec
+
+TypedPluginManager = abx.ABXPluginManager[ExpectedPluginSpec]
+pm = cast(TypedPluginManager, abx.pm)
diff --git a/packages/abx-spec-searchbackend/pyproject.toml b/archivebox/vendor/abx-spec-searchbackend/pyproject.toml
similarity index 100%
rename from packages/abx-spec-searchbackend/pyproject.toml
rename to archivebox/vendor/abx-spec-searchbackend/pyproject.toml
diff --git a/archivebox/vendor/abx/README.md b/archivebox/vendor/abx/README.md
new file mode 100644
index 00000000..e69de29b
diff --git a/archivebox/vendor/abx/abx.py b/archivebox/vendor/abx/abx.py
new file mode 100644
index 00000000..990fe8e1
--- /dev/null
+++ b/archivebox/vendor/abx/abx.py
@@ -0,0 +1,483 @@
+__package__ = 'abx'
+__id__ = 'abx'
+__label__ = 'ABX'
+__author__ = 'Nick Sweeting'
+__homepage__ = 'https://github.com/ArchiveBox'
+__order__ = 0
+
+
+import inspect
+import importlib
+import itertools
+from pathlib import Path
+from typing import Dict, Callable, List, Set, Tuple, Iterable, Any, TypeVar, TypedDict, Type, cast, Generic, Mapping, overload, Final, ParamSpec, Literal, Protocol
+from types import ModuleType
+from typing_extensions import Annotated
+from functools import cache
+
+from benedict import benedict
+from pydantic import AfterValidator
+
+from pluggy import HookimplMarker, PluginManager, HookimplOpts, HookspecOpts, HookCaller
+
+
+
+ParamsT = ParamSpec("ParamsT")
+ReturnT = TypeVar('ReturnT')
+
+class HookSpecDecoratorThatReturnsFirstResult(Protocol):
+ def __call__(self, func: Callable[ParamsT, ReturnT]) -> Callable[ParamsT, ReturnT]: ...
+
+class HookSpecDecoratorThatReturnsListResults(Protocol):
+ def __call__(self, func: Callable[ParamsT, ReturnT]) -> Callable[ParamsT, List[ReturnT]]: ...
+
+
+class TypedHookspecMarker:
+ """
+ Improved version of pluggy.HookspecMarker that supports type inference of hookspecs with firstresult=True|False correctly
+ https://github.com/pytest-dev/pluggy/issues/191
+ """
+
+ __slots__ = ('project_name',)
+
+ def __init__(self, project_name: str) -> None:
+ self.project_name: Final[str] = project_name
+
+ # handle @hookspec(firstresult=False) -> List[ReturnT] (test_firstresult_False_hookspec)
+ @overload
+ def __call__(
+ self,
+ function: None = ...,
+ firstresult: Literal[False] = ...,
+ historic: bool = ...,
+ warn_on_impl: Warning | None = ...,
+ warn_on_impl_args: Mapping[str, Warning] | None = ...,
+ ) -> HookSpecDecoratorThatReturnsListResults: ...
+
+ # handle @hookspec(firstresult=True) -> ReturnT (test_firstresult_True_hookspec)
+ @overload
+ def __call__(
+ self,
+ function: None = ...,
+ firstresult: Literal[True] = ...,
+ historic: bool = ...,
+ warn_on_impl: Warning | None = ...,
+ warn_on_impl_args: Mapping[str, Warning] | None = ...,
+ ) -> HookSpecDecoratorThatReturnsFirstResult: ...
+
+ # handle @hookspec -> List[ReturnT] (test_normal_hookspec)
+ # order matters!!! this one has to come last
+ @overload
+ def __call__(
+ self,
+ function: Callable[ParamsT, ReturnT] = ...,
+ firstresult: Literal[False] = ...,
+ historic: bool = ...,
+ warn_on_impl: None = ...,
+ warn_on_impl_args: None = ...,
+ ) -> Callable[ParamsT, List[ReturnT]]: ...
+
+ def __call__(
+ self,
+ function: Callable[ParamsT, ReturnT] | None = None,
+ firstresult: bool = False,
+ historic: bool = False,
+ warn_on_impl: Warning | None = None,
+ warn_on_impl_args: Mapping[str, Warning] | None = None,
+ ) -> Callable[ParamsT, List[ReturnT]] | HookSpecDecoratorThatReturnsListResults | HookSpecDecoratorThatReturnsFirstResult:
+
+ def setattr_hookspec_opts(func) -> Callable:
+ if historic and firstresult:
+ raise ValueError("cannot have a historic firstresult hook")
+ opts: HookspecOpts = {
+ "firstresult": firstresult,
+ "historic": historic,
+ "warn_on_impl": warn_on_impl,
+ "warn_on_impl_args": warn_on_impl_args,
+ }
+ setattr(func, self.project_name + "_spec", opts)
+ return func
+
+ if function is not None:
+ return setattr_hookspec_opts(function)
+ else:
+ return setattr_hookspec_opts
+
+
+
+
+spec = hookspec = TypedHookspecMarker("abx")
+impl = hookimpl = HookimplMarker("abx")
+
+
+def is_valid_attr_name(x: str) -> str:
+ assert x.isidentifier() and not x.startswith('_')
+ return x
+
+def is_valid_module_name(x: str) -> str:
+ assert x.isidentifier() and not x.startswith('_') and x.islower()
+ return x
+
+AttrName = Annotated[str, AfterValidator(is_valid_attr_name)]
+PluginId = Annotated[str, AfterValidator(is_valid_module_name)]
+
+
+class PluginInfo(TypedDict, total=True):
+ id: PluginId
+ package: AttrName
+ label: str
+ version: str
+ author: str
+ homepage: str
+ dependencies: List[str]
+
+ source_code: str
+ hooks: Dict[AttrName, Callable]
+ module: ModuleType
+
+
+
+PluginSpec = TypeVar("PluginSpec")
+
+class ABXPluginManager(PluginManager, Generic[PluginSpec]):
+ """
+ Patch to fix pluggy's PluginManager to work with pydantic models.
+ See: https://github.com/pytest-dev/pluggy/pull/536
+ """
+
+ # enable static type checking of pm.hook.call() calls
+ # https://stackoverflow.com/a/62871889/2156113
+ # https://github.com/pytest-dev/pluggy/issues/191
+ hook: PluginSpec
+
+ def create_typed_hookcaller(self, name: str, module_or_class: Type[PluginSpec], spec_opts: HookspecOpts) -> HookCaller:
+ """
+ create a new HookCaller subclass with a modified __signature__
+ so that the return type is correct and args are converted to kwargs
+ """
+ TypedHookCaller = type('TypedHookCaller', (HookCaller,), {})
+
+ hookspec_signature = inspect.signature(getattr(module_or_class, name))
+ hookspec_return_type = hookspec_signature.return_annotation
+
+ # replace return type with list if firstresult=False
+ hookcall_return_type = hookspec_return_type if spec_opts['firstresult'] else List[hookspec_return_type]
+
+ # replace each arg with kwarg equivalent (pm.hook.call() only accepts kwargs)
+ args_as_kwargs = [
+ param.replace(kind=inspect.Parameter.KEYWORD_ONLY) if param.name != 'self' else param
+ for param in hookspec_signature.parameters.values()
+ ]
+ TypedHookCaller.__signature__ = hookspec_signature.replace(parameters=args_as_kwargs, return_annotation=hookcall_return_type)
+ TypedHookCaller.__name__ = f'{name}_HookCaller'
+
+ return TypedHookCaller(name, self._hookexec, module_or_class, spec_opts)
+
+ def add_hookspecs(self, module_or_class: Type[PluginSpec]) -> None:
+ """Add HookSpecs from the given class, (generic type allows us to enforce types of pm.hook.call() statically)"""
+ names = []
+ for name in dir(module_or_class):
+ spec_opts = self.parse_hookspec_opts(module_or_class, name)
+ if spec_opts is not None:
+ hc: HookCaller | None = getattr(self.hook, name, None)
+ if hc is None:
+ hc = self.create_typed_hookcaller(name, module_or_class, spec_opts)
+ setattr(self.hook, name, hc)
+ else:
+ # Plugins registered this hook without knowing the spec.
+ hc.set_specification(module_or_class, spec_opts)
+ for hookfunction in hc.get_hookimpls():
+ self._verify_hook(hc, hookfunction)
+ names.append(name)
+
+ if not names:
+ raise ValueError(
+ f"did not find any {self.project_name!r} hooks in {module_or_class!r}"
+ )
+
+ def parse_hookimpl_opts(self, plugin, name: str) -> HookimplOpts | None:
+ # IMPORTANT: @property methods can have side effects, and are never hookimpl
+ # if attr is a property, skip it in advance
+ # plugin_class = plugin if inspect.isclass(plugin) else type(plugin)
+ if isinstance(getattr(plugin, name, None), property):
+ return None
+
+ try:
+ return super().parse_hookimpl_opts(plugin, name)
+ except AttributeError:
+ return None
+
+
+pm = ABXPluginManager("abx")
+
+
+
+def get_plugin_order(plugin: PluginId | Path | ModuleType | Type) -> Tuple[int, Path]:
+ assert plugin
+ plugin_module = None
+ plugin_dir = None
+
+ if isinstance(plugin, str) or isinstance(plugin, Path):
+ if str(plugin).endswith('.py'):
+ plugin_dir = Path(plugin).parent
+ elif '/' in str(plugin):
+ # assume it's a path to a plugin directory
+ plugin_dir = Path(plugin)
+ elif str(plugin).isidentifier():
+ pass
+
+ elif inspect.ismodule(plugin):
+ plugin_module = plugin
+ plugin_dir = Path(str(plugin_module.__file__)).parent
+ elif inspect.isclass(plugin):
+ plugin_module = plugin
+ plugin_dir = Path(inspect.getfile(plugin)).parent
+ else:
+ raise ValueError(f'Invalid plugin, cannot get order: {plugin}')
+
+ if plugin_dir:
+ try:
+ # if .plugin_order file exists, use it to set the load priority
+ order = int((plugin_dir / '.plugin_order').read_text())
+ assert -1000000 < order < 100000000
+ return (order, plugin_dir)
+ except FileNotFoundError:
+ pass
+
+ if plugin_module:
+ order = getattr(plugin_module, '__order__', 999)
+ else:
+ order = 999
+
+ assert order is not None
+ assert plugin_dir
+
+ return (order, plugin_dir)
+
+
+# @cache
+def get_plugin(plugin: PluginId | ModuleType | Type) -> PluginInfo:
+ assert plugin
+
+ # import the plugin module by its name
+ if isinstance(plugin, str):
+ module = importlib.import_module(plugin)
+ plugin = getattr(module, 'PLUGIN_SPEC', getattr(module, 'PLUGIN', module))
+ elif inspect.ismodule(plugin):
+ module = plugin
+ plugin = getattr(module, 'PLUGIN_SPEC', getattr(module, 'PLUGIN', module))
+ elif inspect.isclass(plugin):
+ module = inspect.getmodule(plugin)
+ else:
+ raise ValueError(f'Invalid plugin, must be a module, class, or plugin ID (package name): {plugin}')
+
+ assert module
+
+ plugin_file = Path(inspect.getfile(module))
+ plugin_package = module.__package__ or module.__name__
+ plugin_id = plugin_package.replace('.', '_')
+
+ # load the plugin info from the plugin/__init__.py __attr__s if they exist
+ plugin_module_attrs = {
+ 'label': getattr(module, '__label__', plugin_id),
+ 'version': getattr(module, '__version__', '0.0.1'),
+ 'author': getattr(module, '__author__', 'ArchiveBox'),
+ 'homepage': getattr(module, '__homepage__', 'https://github.com/ArchiveBox'),
+ 'dependencies': getattr(module, '__dependencies__', []),
+ }
+
+ # load the plugin info from the plugin/pyproject.toml file if it has one
+ plugin_toml_info = {}
+ try:
+ # try loading ./pyproject.toml first in case the plugin is a bare python file not inside a package dir
+ plugin_toml_info = benedict.from_toml((plugin_file.parent / 'pyproject.toml').read_text()).project
+ except Exception:
+ try:
+ # try loading ../pyproject.toml next in case the plugin is in a packge dir
+ plugin_toml_info = benedict.from_toml((plugin_file.parent.parent / 'pyproject.toml').read_text()).project
+ except Exception:
+ # print('WARNING: could not detect pyproject.toml for PLUGIN:', plugin_id, plugin_file.parent, 'ERROR:', e)
+ pass
+
+
+ assert plugin_id
+ assert plugin_package
+ assert module.__file__
+
+ # merge the plugin info from all sources + add dyanmically calculated info
+ return cast(PluginInfo, benedict(PluginInfo(**{
+ 'id': plugin_id,
+ **plugin_module_attrs,
+ **plugin_toml_info,
+ 'package': plugin_package,
+ 'source_code': module.__file__,
+ 'order': get_plugin_order(plugin),
+ 'hooks': get_plugin_hooks(plugin),
+ 'module': module,
+ 'plugin': plugin,
+ })))
+
+
+def get_all_plugins() -> Dict[PluginId, PluginInfo]:
+ """Get the metadata for all the plugins registered with Pluggy."""
+ plugins = {}
+ for plugin_module in pm.get_plugins():
+ plugin_info = get_plugin(plugin=plugin_module)
+ assert 'id' in plugin_info
+ plugins[plugin_info['id']] = plugin_info
+ return benedict(plugins)
+
+
+def get_all_hook_names() -> Set[str]:
+ """Get a set of all hook names across all plugins"""
+ return {
+ hook_name
+ for plugin_module in pm.get_plugins()
+ for hook_name in get_plugin_hooks(plugin_module)
+ }
+
+
+def get_all_hook_specs() -> Dict[str, Dict[str, Any]]:
+ """Get a set of all hookspec methods defined in all plugins (useful for type checking if a pm.hook.call() is valid)"""
+ hook_specs = {}
+
+ for hook_name in get_all_hook_names():
+ for plugin_module in pm.get_plugins():
+ if hasattr(plugin_module, hook_name):
+ hookspecopts = pm.parse_hookspec_opts(plugin_module, hook_name)
+ if hookspecopts:
+ method = getattr(plugin_module, hook_name)
+ signature = inspect.signature(method)
+ return_type = signature.return_annotation if signature.return_annotation != inspect._empty else None
+
+ if hookspecopts.get('firstresult'):
+ return_type = return_type
+ else:
+ # if not firstresult, return_type is a sequence
+ return_type = List[return_type]
+
+ call_signature = signature.replace(return_annotation=return_type)
+ method = lambda *args, **kwargs: getattr(pm.hook, hook_name)(*args, **kwargs)
+ method.__signature__ = call_signature
+ method.__name__ = hook_name
+ method.__package__ = plugin_module.__package__
+
+ hook_specs[hook_name] = {
+ 'name': hook_name,
+ 'method': method,
+ 'signature': call_signature,
+ 'hookspec_opts': hookspecopts,
+ 'hookspec_signature': signature,
+ 'hookspec_plugin': plugin_module.__package__,
+ }
+ return hook_specs
+
+
+
+###### PLUGIN DISCOVERY AND LOADING ########################################################
+
+
+def find_plugins_in_dir(plugins_dir: Path) -> Dict[PluginId, Path]:
+ """
+ Find all the plugins in a given directory. Just looks for an __init__.py file.
+ """
+ python_dirs = plugins_dir.glob("*/__init__.py")
+ sorted_python_dirs = sorted(python_dirs, key=lambda p: get_plugin_order(plugin=p) or 500)
+
+ return {
+ plugin_entrypoint.parent.name: plugin_entrypoint.parent
+ for plugin_entrypoint in sorted_python_dirs
+ if plugin_entrypoint.parent.name not in ('abx', 'core')
+ }
+
+
+def get_pip_installed_plugins(group: PluginId='abx') -> Dict[PluginId, Path]:
+ """replaces pm.load_setuptools_entrypoints("abx"), finds plugins that registered entrypoints via pip"""
+ import importlib.metadata
+
+ DETECTED_PLUGINS = {} # module_name: module_dir_path
+ for dist in list(importlib.metadata.distributions()):
+ for entrypoint in dist.entry_points:
+ if entrypoint.group != group or pm.is_blocked(entrypoint.name):
+ continue
+ DETECTED_PLUGINS[entrypoint.name] = Path(entrypoint.load().__file__).parent
+ # pm.register(plugin, name=ep.name)
+ # pm._plugin_distinfo.append((plugin, DistFacade(dist)))
+ return DETECTED_PLUGINS
+
+
+
+# Load all plugins from pip packages, archivebox built-ins, and user plugins
+def load_plugins(plugins: Iterable[PluginId | ModuleType | Type] | Dict[PluginId, Path]):
+ """
+ Load all the plugins from a dictionary of module names and directory paths.
+ """
+ PLUGINS_TO_LOAD = []
+ LOADED_PLUGINS = {}
+
+ for plugin in plugins:
+ plugin_info = get_plugin(plugin)
+ assert plugin_info, f'No plugin metadata found for {plugin}'
+ assert 'id' in plugin_info and 'module' in plugin_info
+ if plugin_info['module'] in pm.get_plugins():
+ LOADED_PLUGINS[plugin_info['id']] = plugin_info
+ continue
+ else:
+ PLUGINS_TO_LOAD.append(plugin_info)
+
+ PLUGINS_TO_LOAD = sorted(PLUGINS_TO_LOAD, key=lambda x: x['order'])
+
+ for plugin_info in PLUGINS_TO_LOAD:
+ pm.register(plugin_info['module'])
+ LOADED_PLUGINS[plugin_info['id']] = plugin_info
+ # print(f' √ Loaded plugin: {plugin_id}')
+ return benedict(LOADED_PLUGINS)
+
+@cache
+def get_plugin_hooks(plugin: PluginId | ModuleType | Type | None) -> Dict[AttrName, Callable]:
+ """Get all the functions marked with @hookimpl on a module."""
+ if not plugin:
+ return {}
+
+ hooks = {}
+
+ if isinstance(plugin, str):
+ plugin_module = importlib.import_module(plugin)
+ elif inspect.ismodule(plugin) or inspect.isclass(plugin):
+ plugin_module = plugin
+ else:
+ raise ValueError(f'Invalid plugin, cannot get hooks: {plugin}')
+
+ for attr_name in dir(plugin_module):
+ if attr_name.startswith('_'):
+ continue
+ try:
+ attr = getattr(plugin_module, attr_name)
+ if isinstance(attr, Callable):
+ if pm.parse_hookimpl_opts(plugin_module, attr_name):
+ hooks[attr_name] = attr
+ except Exception as e:
+ print(f'Error getting hookimpls for {plugin}: {e}')
+
+ return hooks
+
+ReturnT = TypeVar('ReturnT')
+
+def as_list(results: List[List[ReturnT]]) -> List[ReturnT]:
+ """Flatten a list of lists returned by a pm.hook.call() into a single list"""
+ return list(itertools.chain(*results))
+
+
+def as_dict(results: List[Dict[PluginId, ReturnT]]) -> Dict[PluginId, ReturnT]:
+ """Flatten a list of dicts returned by a pm.hook.call() into a single dict"""
+
+ if isinstance(results, (dict, benedict)):
+ results_list = results.values()
+ else:
+ results_list = results
+
+ return benedict({
+ result_id: result
+ for plugin_results in results_list
+ for result_id, result in plugin_results.items()
+ })
diff --git a/packages/abx/pyproject.toml b/archivebox/vendor/abx/pyproject.toml
similarity index 100%
rename from packages/abx/pyproject.toml
rename to archivebox/vendor/abx/pyproject.toml
diff --git a/archivebox/vendor/pocket b/archivebox/vendor/pocket
new file mode 160000
index 00000000..e7970b63
--- /dev/null
+++ b/archivebox/vendor/pocket
@@ -0,0 +1 @@
+Subproject commit e7970b63feafc8941c325111c5ce3706698a18b5
diff --git a/packages/pydantic-pkgr b/archivebox/vendor/pydantic-pkgr
similarity index 100%
rename from packages/pydantic-pkgr
rename to archivebox/vendor/pydantic-pkgr
diff --git a/packages/abx-plugin-archivedotorg-extractor/__init__.py b/packages/abx-plugin-archivedotorg-extractor/__init__.py
deleted file mode 100644
index a5c24932..00000000
--- a/packages/abx-plugin-archivedotorg-extractor/__init__.py
+++ /dev/null
@@ -1,39 +0,0 @@
-__package__ = 'plugins_extractor.archivedotorg'
-__label__ = 'archivedotorg'
-__version__ = '2024.10.14'
-__author__ = 'ArchiveBox'
-__homepage__ = 'https://archive.org'
-__dependencies__ = []
-
-import abx
-
-
-@abx.hookimpl
-def get_PLUGIN():
- return {
- 'archivedotorg': {
- 'PACKAGE': __package__,
- 'LABEL': __label__,
- 'VERSION': __version__,
- 'AUTHOR': __author__,
- 'HOMEPAGE': __homepage__,
- 'DEPENDENCIES': __dependencies__,
- }
- }
-
-@abx.hookimpl
-def get_CONFIG():
- from .config import ARCHIVEDOTORG_CONFIG
-
- return {
- 'archivedotorg': ARCHIVEDOTORG_CONFIG
- }
-
-
-# @abx.hookimpl
-# def get_EXTRACTORS():
-# from .extractors import ARCHIVEDOTORG_EXTRACTOR
-#
-# return {
-# 'archivedotorg': ARCHIVEDOTORG_EXTRACTOR,
-# }
diff --git a/packages/abx-plugin-archivedotorg-extractor/pyproject.toml b/packages/abx-plugin-archivedotorg-extractor/pyproject.toml
deleted file mode 100644
index 8754b4bd..00000000
--- a/packages/abx-plugin-archivedotorg-extractor/pyproject.toml
+++ /dev/null
@@ -1,7 +0,0 @@
-[project]
-name = "abx-archivedotorg-extractor"
-version = "0.1.0"
-description = "Add your description here"
-readme = "README.md"
-requires-python = ">=3.10"
-dependencies = []
diff --git a/packages/abx-plugin-chrome-extractor/__init__.py b/packages/abx-plugin-chrome-extractor/__init__.py
deleted file mode 100644
index 016cd292..00000000
--- a/packages/abx-plugin-chrome-extractor/__init__.py
+++ /dev/null
@@ -1,54 +0,0 @@
-__package__ = 'plugins_extractor.chrome'
-__id__ = 'chrome'
-__label__ = 'Chrome'
-__version__ = '2024.10.14'
-__author__ = 'ArchiveBox'
-__homepage__ = 'https://github.com/ArchiveBox/ArchiveBox/tree/main/archivebox/plugins_extractor/chrome'
-__dependencies__ = []
-
-import abx
-
-
-@abx.hookimpl
-def get_PLUGIN():
- return {
- __id__: {
- 'id': __id__,
- 'package': __package__,
- 'label': __label__,
- 'version': __version__,
- 'author': __author__,
- 'homepage': __homepage__,
- 'dependencies': __dependencies__,
- }
- }
-
-@abx.hookimpl
-def get_CONFIG():
- from .config import CHROME_CONFIG
-
- return {
- __id__: CHROME_CONFIG
- }
-
-@abx.hookimpl
-def get_BINARIES():
- from .binaries import CHROME_BINARY
-
- return {
- 'chrome': CHROME_BINARY,
- }
-
-@abx.hookimpl
-def ready():
- from .config import CHROME_CONFIG
- CHROME_CONFIG.validate()
-
-
-# @abx.hookimpl
-# def get_EXTRACTORS():
-# return {
-# 'pdf': PDF_EXTRACTOR,
-# 'screenshot': SCREENSHOT_EXTRACTOR,
-# 'dom': DOM_EXTRACTOR,
-# }
diff --git a/packages/abx-plugin-chrome-extractor/pyproject.toml b/packages/abx-plugin-chrome-extractor/pyproject.toml
deleted file mode 100644
index 6676882c..00000000
--- a/packages/abx-plugin-chrome-extractor/pyproject.toml
+++ /dev/null
@@ -1,7 +0,0 @@
-[project]
-name = "abx-chrome-extractor"
-version = "0.1.0"
-description = "Add your description here"
-readme = "README.md"
-requires-python = ">=3.10"
-dependencies = []
diff --git a/packages/abx-plugin-curl-extractor/__init__.py b/packages/abx-plugin-curl-extractor/__init__.py
deleted file mode 100644
index 99af0107..00000000
--- a/packages/abx-plugin-curl-extractor/__init__.py
+++ /dev/null
@@ -1,38 +0,0 @@
-__package__ = 'plugins_extractor.curl'
-__label__ = 'curl'
-__version__ = '2024.10.14'
-__author__ = 'ArchiveBox'
-__homepage__ = 'https://github.com/curl/curl'
-__dependencies__ = []
-
-import abx
-
-
-@abx.hookimpl
-def get_PLUGIN():
- return {
- 'curl': {
- 'PACKAGE': __package__,
- 'LABEL': __label__,
- 'VERSION': __version__,
- 'AUTHOR': __author__,
- 'HOMEPAGE': __homepage__,
- 'DEPENDENCIES': __dependencies__,
- }
- }
-
-@abx.hookimpl
-def get_CONFIG():
- from .config import CURL_CONFIG
-
- return {
- 'curl': CURL_CONFIG
- }
-
-@abx.hookimpl
-def get_BINARIES():
- from .binaries import CURL_BINARY
-
- return {
- 'curl': CURL_BINARY,
- }
diff --git a/packages/abx-plugin-curl-extractor/pyproject.toml b/packages/abx-plugin-curl-extractor/pyproject.toml
deleted file mode 100644
index 9bd6f396..00000000
--- a/packages/abx-plugin-curl-extractor/pyproject.toml
+++ /dev/null
@@ -1,7 +0,0 @@
-[project]
-name = "abx-curl-extractor"
-version = "0.1.0"
-description = "Add your description here"
-readme = "README.md"
-requires-python = ">=3.10"
-dependencies = []
diff --git a/packages/abx-plugin-favicon-extractor/__init__.py b/packages/abx-plugin-favicon-extractor/__init__.py
deleted file mode 100644
index 3fa84560..00000000
--- a/packages/abx-plugin-favicon-extractor/__init__.py
+++ /dev/null
@@ -1,39 +0,0 @@
-__package__ = 'plugins_extractor.favicon'
-__label__ = 'favicon'
-__version__ = '2024.10.14'
-__author__ = 'ArchiveBox'
-__homepage__ = 'https://github.com/ArchiveBox/archivebox'
-__dependencies__ = []
-
-import abx
-
-
-@abx.hookimpl
-def get_PLUGIN():
- return {
- 'favicon': {
- 'PACKAGE': __package__,
- 'LABEL': __label__,
- 'VERSION': __version__,
- 'AUTHOR': __author__,
- 'HOMEPAGE': __homepage__,
- 'DEPENDENCIES': __dependencies__,
- }
- }
-
-@abx.hookimpl
-def get_CONFIG():
- from .config import FAVICON_CONFIG
-
- return {
- 'favicon': FAVICON_CONFIG
- }
-
-
-# @abx.hookimpl
-# def get_EXTRACTORS():
-# from .extractors import FAVICON_EXTRACTOR
-
-# return {
-# 'favicon': FAVICON_EXTRACTOR,
-# }
diff --git a/packages/abx-plugin-favicon-extractor/pyproject.toml b/packages/abx-plugin-favicon-extractor/pyproject.toml
deleted file mode 100644
index 96e62f6d..00000000
--- a/packages/abx-plugin-favicon-extractor/pyproject.toml
+++ /dev/null
@@ -1,7 +0,0 @@
-[project]
-name = "abx-favicon-extractor"
-version = "0.1.0"
-description = "Add your description here"
-readme = "README.md"
-requires-python = ">=3.10"
-dependencies = []
diff --git a/packages/abx-plugin-git-extractor/__init__.py b/packages/abx-plugin-git-extractor/__init__.py
deleted file mode 100644
index db18919f..00000000
--- a/packages/abx-plugin-git-extractor/__init__.py
+++ /dev/null
@@ -1,46 +0,0 @@
-__package__ = 'plugins_extractor.git'
-__label__ = 'git'
-__version__ = '2024.10.14'
-__author__ = 'ArchiveBox'
-__homepage__ = 'https://github.com/git/git'
-__dependencies__ = []
-
-import abx
-
-
-@abx.hookimpl
-def get_PLUGIN():
- return {
- 'git': {
- 'PACKAGE': __package__,
- 'LABEL': __label__,
- 'VERSION': __version__,
- 'AUTHOR': __author__,
- 'HOMEPAGE': __homepage__,
- 'DEPENDENCIES': __dependencies__,
- }
- }
-
-@abx.hookimpl
-def get_CONFIG():
- from .config import GIT_CONFIG
-
- return {
- 'git': GIT_CONFIG
- }
-
-@abx.hookimpl
-def get_BINARIES():
- from .binaries import GIT_BINARY
-
- return {
- 'git': GIT_BINARY,
- }
-
-@abx.hookimpl
-def get_EXTRACTORS():
- from .extractors import GIT_EXTRACTOR
-
- return {
- 'git': GIT_EXTRACTOR,
- }
diff --git a/packages/abx-plugin-git-extractor/extractors.py b/packages/abx-plugin-git-extractor/extractors.py
deleted file mode 100644
index 350f1b82..00000000
--- a/packages/abx-plugin-git-extractor/extractors.py
+++ /dev/null
@@ -1,17 +0,0 @@
-__package__ = 'plugins_extractor.git'
-
-from pathlib import Path
-
-from abx.archivebox.base_extractor import BaseExtractor, ExtractorName
-
-from .binaries import GIT_BINARY
-
-
-class GitExtractor(BaseExtractor):
- name: ExtractorName = 'git'
- binary: str = GIT_BINARY.name
-
- def get_output_path(self, snapshot) -> Path | None:
- return snapshot.as_link() / 'git'
-
-GIT_EXTRACTOR = GitExtractor()
diff --git a/packages/abx-plugin-git-extractor/pyproject.toml b/packages/abx-plugin-git-extractor/pyproject.toml
deleted file mode 100644
index 4a7b375e..00000000
--- a/packages/abx-plugin-git-extractor/pyproject.toml
+++ /dev/null
@@ -1,7 +0,0 @@
-[project]
-name = "abx-git-extractor"
-version = "0.1.0"
-description = "Add your description here"
-readme = "README.md"
-requires-python = ">=3.10"
-dependencies = []
diff --git a/packages/abx-plugin-htmltotext-extractor/__init__.py b/packages/abx-plugin-htmltotext-extractor/__init__.py
deleted file mode 100644
index 0f2b756c..00000000
--- a/packages/abx-plugin-htmltotext-extractor/__init__.py
+++ /dev/null
@@ -1,41 +0,0 @@
-__package__ = 'plugins_extractor.htmltotext'
-__id__ = 'htmltotext'
-__label__ = 'HTML-to-Text'
-__version__ = '2024.10.14'
-__author__ = 'ArchiveBox'
-__homepage__ = 'https://github.com/ArchiveBox/archivebox'
-__dependencies__ = []
-
-import abx
-
-
-@abx.hookimpl
-def get_PLUGIN():
- return {
- __id__: {
- 'id': __id__,
- 'package': __package__,
- 'label': __label__,
- 'version': __version__,
- 'author': __author__,
- 'homepage': __homepage__,
- 'dependencies': __dependencies__,
- }
- }
-
-@abx.hookimpl
-def get_CONFIG():
- from .config import HTMLTOTEXT_CONFIG
-
- return {
- __id__: HTMLTOTEXT_CONFIG
- }
-
-
-# @abx.hookimpl
-# def get_EXTRACTORS():
-# from .extractors import FAVICON_EXTRACTOR
-
-# return {
-# 'htmltotext': FAVICON_EXTRACTOR,
-# }
diff --git a/packages/abx-plugin-htmltotext-extractor/pyproject.toml b/packages/abx-plugin-htmltotext-extractor/pyproject.toml
deleted file mode 100644
index 2e26cb25..00000000
--- a/packages/abx-plugin-htmltotext-extractor/pyproject.toml
+++ /dev/null
@@ -1,7 +0,0 @@
-[project]
-name = "abx-htmltotext-extractor"
-version = "0.1.0"
-description = "Add your description here"
-readme = "README.md"
-requires-python = ">=3.10"
-dependencies = []
diff --git a/packages/abx-plugin-mercury-extractor/__init__.py b/packages/abx-plugin-mercury-extractor/__init__.py
deleted file mode 100644
index 10aca671..00000000
--- a/packages/abx-plugin-mercury-extractor/__init__.py
+++ /dev/null
@@ -1,46 +0,0 @@
-__package__ = 'plugins_extractor.mercury'
-__label__ = 'mercury'
-__version__ = '2024.10.14'
-__author__ = 'ArchiveBox'
-__homepage__ = 'https://github.com/postlight/mercury-parser'
-__dependencies__ = ['npm']
-
-import abx
-
-
-@abx.hookimpl
-def get_PLUGIN():
- return {
- 'mercury': {
- 'PACKAGE': __package__,
- 'LABEL': __label__,
- 'VERSION': __version__,
- 'AUTHOR': __author__,
- 'HOMEPAGE': __homepage__,
- 'DEPENDENCIES': __dependencies__,
- }
- }
-
-@abx.hookimpl
-def get_CONFIG():
- from .config import MERCURY_CONFIG
-
- return {
- 'mercury': MERCURY_CONFIG
- }
-
-@abx.hookimpl
-def get_BINARIES():
- from .binaries import MERCURY_BINARY
-
- return {
- 'mercury': MERCURY_BINARY,
- }
-
-@abx.hookimpl
-def get_EXTRACTORS():
- from .extractors import MERCURY_EXTRACTOR
-
- return {
- 'mercury': MERCURY_EXTRACTOR,
- }
diff --git a/packages/abx-plugin-mercury-extractor/extractors.py b/packages/abx-plugin-mercury-extractor/extractors.py
deleted file mode 100644
index 5d91b0e0..00000000
--- a/packages/abx-plugin-mercury-extractor/extractors.py
+++ /dev/null
@@ -1,19 +0,0 @@
-__package__ = 'plugins_extractor.mercury'
-
-from pathlib import Path
-
-from abx.archivebox.base_extractor import BaseExtractor, ExtractorName
-
-from .binaries import MERCURY_BINARY
-
-
-
-class MercuryExtractor(BaseExtractor):
- name: ExtractorName = 'mercury'
- binary: str = MERCURY_BINARY.name
-
- def get_output_path(self, snapshot) -> Path | None:
- return snapshot.link_dir / 'mercury' / 'content.html'
-
-
-MERCURY_EXTRACTOR = MercuryExtractor()
diff --git a/packages/abx-plugin-mercury-extractor/pyproject.toml b/packages/abx-plugin-mercury-extractor/pyproject.toml
deleted file mode 100644
index 35415a1d..00000000
--- a/packages/abx-plugin-mercury-extractor/pyproject.toml
+++ /dev/null
@@ -1,7 +0,0 @@
-[project]
-name = "abx-mercury-extractor"
-version = "0.1.0"
-description = "Add your description here"
-readme = "README.md"
-requires-python = ">=3.10"
-dependencies = []
diff --git a/packages/abx-plugin-pocket-extractor/__init__.py b/packages/abx-plugin-pocket-extractor/__init__.py
deleted file mode 100644
index bf09435f..00000000
--- a/packages/abx-plugin-pocket-extractor/__init__.py
+++ /dev/null
@@ -1,37 +0,0 @@
-__package__ = 'plugins_extractor.pocket'
-__id__ = 'pocket'
-__label__ = 'pocket'
-__version__ = '2024.10.21'
-__author__ = 'ArchiveBox'
-__homepage__ = 'https://github.com/ArchiveBox/ArchiveBox/tree/dev/archivebox/plugins_extractor/pocket'
-__dependencies__ = []
-
-import abx
-
-
-@abx.hookimpl
-def get_PLUGIN():
- return {
- __id__: {
- 'id': __id__,
- 'package': __package__,
- 'label': __label__,
- 'version': __version__,
- 'author': __author__,
- 'homepage': __homepage__,
- 'dependencies': __dependencies__,
- }
- }
-
-@abx.hookimpl
-def get_CONFIG():
- from .config import POCKET_CONFIG
-
- return {
- __id__: POCKET_CONFIG
- }
-
-@abx.hookimpl
-def ready():
- from .config import POCKET_CONFIG
- POCKET_CONFIG.validate()
diff --git a/packages/abx-plugin-pocket-extractor/pyproject.toml b/packages/abx-plugin-pocket-extractor/pyproject.toml
deleted file mode 100644
index c9af2450..00000000
--- a/packages/abx-plugin-pocket-extractor/pyproject.toml
+++ /dev/null
@@ -1,7 +0,0 @@
-[project]
-name = "abx-pocket-extractor"
-version = "0.1.0"
-description = "Add your description here"
-readme = "README.md"
-requires-python = ">=3.10"
-dependencies = []
diff --git a/packages/abx-plugin-puppeteer-binprovider/__init__.py b/packages/abx-plugin-puppeteer-binprovider/__init__.py
deleted file mode 100644
index 7acc5b1b..00000000
--- a/packages/abx-plugin-puppeteer-binprovider/__init__.py
+++ /dev/null
@@ -1,46 +0,0 @@
-__package__ = 'plugins_pkg.puppeteer'
-__label__ = 'puppeteer'
-__version__ = '2024.10.14'
-__author__ = 'ArchiveBox'
-__homepage__ = 'https://github.com/puppeteer/puppeteer'
-__dependencies__ = ['npm']
-
-import abx
-
-
-@abx.hookimpl
-def get_PLUGIN():
- return {
- 'puppeteer': {
- 'PACKAGE': __package__,
- 'LABEL': __label__,
- 'VERSION': __version__,
- 'AUTHOR': __author__,
- 'HOMEPAGE': __homepage__,
- 'DEPENDENCIES': __dependencies__,
- }
- }
-
-@abx.hookimpl
-def get_CONFIG():
- from .config import PUPPETEER_CONFIG
-
- return {
- 'puppeteer': PUPPETEER_CONFIG
- }
-
-@abx.hookimpl
-def get_BINARIES():
- from .binaries import PUPPETEER_BINARY
-
- return {
- 'puppeteer': PUPPETEER_BINARY,
- }
-
-@abx.hookimpl
-def get_BINPROVIDERS():
- from .binproviders import PUPPETEER_BINPROVIDER
-
- return {
- 'puppeteer': PUPPETEER_BINPROVIDER,
- }
diff --git a/packages/abx-plugin-puppeteer-binprovider/pyproject.toml b/packages/abx-plugin-puppeteer-binprovider/pyproject.toml
deleted file mode 100644
index e901ca88..00000000
--- a/packages/abx-plugin-puppeteer-binprovider/pyproject.toml
+++ /dev/null
@@ -1,7 +0,0 @@
-[project]
-name = "abx-puppeteer-binprovider"
-version = "0.1.0"
-description = "Add your description here"
-readme = "README.md"
-requires-python = ">=3.10"
-dependencies = []
diff --git a/packages/abx-plugin-readability-extractor/__init__.py b/packages/abx-plugin-readability-extractor/__init__.py
deleted file mode 100644
index 2ef1a1a8..00000000
--- a/packages/abx-plugin-readability-extractor/__init__.py
+++ /dev/null
@@ -1,46 +0,0 @@
-__package__ = 'plugins_extractor.readability'
-__label__ = 'readability'
-__version__ = '2024.10.14'
-__author__ = 'ArchiveBox'
-__homepage__ = 'https://github.com/ArchiveBox/readability-extractor'
-__dependencies__ = ['npm']
-
-import abx
-
-
-@abx.hookimpl
-def get_PLUGIN():
- return {
- 'readability': {
- 'PACKAGE': __package__,
- 'LABEL': __label__,
- 'VERSION': __version__,
- 'AUTHOR': __author__,
- 'HOMEPAGE': __homepage__,
- 'DEPENDENCIES': __dependencies__,
- }
- }
-
-@abx.hookimpl
-def get_CONFIG():
- from .config import READABILITY_CONFIG
-
- return {
- 'readability': READABILITY_CONFIG
- }
-
-@abx.hookimpl
-def get_BINARIES():
- from .binaries import READABILITY_BINARY
-
- return {
- 'readability': READABILITY_BINARY,
- }
-
-@abx.hookimpl
-def get_EXTRACTORS():
- from .extractors import READABILITY_EXTRACTOR
-
- return {
- 'readability': READABILITY_EXTRACTOR,
- }
diff --git a/packages/abx-plugin-readability-extractor/extractors.py b/packages/abx-plugin-readability-extractor/extractors.py
deleted file mode 100644
index eb8ea165..00000000
--- a/packages/abx-plugin-readability-extractor/extractors.py
+++ /dev/null
@@ -1,20 +0,0 @@
-__package__ = 'plugins_extractor.readability'
-
-from pathlib import Path
-
-from pydantic_pkgr import BinName
-
-from abx.archivebox.base_extractor import BaseExtractor
-
-from .binaries import READABILITY_BINARY
-
-
-class ReadabilityExtractor(BaseExtractor):
- name: str = 'readability'
- binary: BinName = READABILITY_BINARY.name
-
- def get_output_path(self, snapshot) -> Path:
- return Path(snapshot.link_dir) / 'readability' / 'content.html'
-
-
-READABILITY_EXTRACTOR = ReadabilityExtractor()
diff --git a/packages/abx-plugin-readability-extractor/pyproject.toml b/packages/abx-plugin-readability-extractor/pyproject.toml
deleted file mode 100644
index 5caa0adb..00000000
--- a/packages/abx-plugin-readability-extractor/pyproject.toml
+++ /dev/null
@@ -1,7 +0,0 @@
-[project]
-name = "abx-readability-extractor"
-version = "0.1.0"
-description = "Add your description here"
-readme = "README.md"
-requires-python = ">=3.10"
-dependencies = []
diff --git a/packages/abx-plugin-readwise-extractor/__init__.py b/packages/abx-plugin-readwise-extractor/__init__.py
deleted file mode 100644
index 002eb58b..00000000
--- a/packages/abx-plugin-readwise-extractor/__init__.py
+++ /dev/null
@@ -1,37 +0,0 @@
-__package__ = 'plugins_extractor.readwise'
-__id__ = 'readwise'
-__label__ = 'readwise'
-__version__ = '2024.10.21'
-__author__ = 'ArchiveBox'
-__homepage__ = 'https://github.com/ArchiveBox/ArchiveBox/tree/dev/archivebox/plugins_extractor/readwise'
-__dependencies__ = []
-
-import abx
-
-
-@abx.hookimpl
-def get_PLUGIN():
- return {
- __id__: {
- 'id': __id__,
- 'package': __package__,
- 'label': __label__,
- 'version': __version__,
- 'author': __author__,
- 'homepage': __homepage__,
- 'dependencies': __dependencies__,
- }
- }
-
-@abx.hookimpl
-def get_CONFIG():
- from .config import READWISE_CONFIG
-
- return {
- __id__: READWISE_CONFIG
- }
-
-@abx.hookimpl
-def ready():
- from .config import READWISE_CONFIG
- READWISE_CONFIG.validate()
diff --git a/packages/abx-plugin-readwise-extractor/config.py b/packages/abx-plugin-readwise-extractor/config.py
deleted file mode 100644
index 106aaf06..00000000
--- a/packages/abx-plugin-readwise-extractor/config.py
+++ /dev/null
@@ -1,17 +0,0 @@
-__package__ = 'plugins_extractor.readwise'
-
-from typing import Dict
-from pathlib import Path
-
-from pydantic import Field
-
-from abx.archivebox.base_configset import BaseConfigSet
-
-from archivebox.config import CONSTANTS
-
-
-class ReadwiseConfig(BaseConfigSet):
- READWISE_DB_PATH: Path = Field(default=CONSTANTS.SOURCES_DIR / "readwise_reader_api.db")
- READWISE_READER_TOKENS: Dict[str, str] = Field(default=lambda: {}) # {: , ...}
-
-READWISE_CONFIG = ReadwiseConfig()
diff --git a/packages/abx-plugin-readwise-extractor/pyproject.toml b/packages/abx-plugin-readwise-extractor/pyproject.toml
deleted file mode 100644
index 7df49b56..00000000
--- a/packages/abx-plugin-readwise-extractor/pyproject.toml
+++ /dev/null
@@ -1,7 +0,0 @@
-[project]
-name = "abx-readwise-extractor"
-version = "0.1.0"
-description = "Add your description here"
-readme = "README.md"
-requires-python = ">=3.10"
-dependencies = []
diff --git a/packages/abx-plugin-ripgrep-search/__init__.py b/packages/abx-plugin-ripgrep-search/__init__.py
deleted file mode 100644
index ac1e417c..00000000
--- a/packages/abx-plugin-ripgrep-search/__init__.py
+++ /dev/null
@@ -1,48 +0,0 @@
-__package__ = 'plugins_search.ripgrep'
-__label__ = 'ripgrep'
-__version__ = '2024.10.14'
-__author__ = 'ArchiveBox'
-__homepage__ = 'https://github.com/BurntSushi/ripgrep'
-__dependencies__ = []
-
-import abx
-
-
-@abx.hookimpl
-def get_PLUGIN():
- return {
- 'ripgrep': {
- 'PACKAGE': __package__,
- 'LABEL': __label__,
- 'VERSION': __version__,
- 'AUTHOR': __author__,
- 'HOMEPAGE': __homepage__,
- 'DEPENDENCIES': __dependencies__,
- }
- }
-
-@abx.hookimpl
-def get_CONFIG():
- from .config import RIPGREP_CONFIG
-
- return {
- 'ripgrep': RIPGREP_CONFIG
- }
-
-
-@abx.hookimpl
-def get_BINARIES():
- from .binaries import RIPGREP_BINARY
-
- return {
- 'ripgrep': RIPGREP_BINARY
- }
-
-
-@abx.hookimpl
-def get_SEARCHBACKENDS():
- from .searchbackend import RIPGREP_SEARCH_BACKEND
-
- return {
- 'ripgrep': RIPGREP_SEARCH_BACKEND,
- }
diff --git a/packages/abx-plugin-ripgrep-search/pyproject.toml b/packages/abx-plugin-ripgrep-search/pyproject.toml
deleted file mode 100644
index c79821d1..00000000
--- a/packages/abx-plugin-ripgrep-search/pyproject.toml
+++ /dev/null
@@ -1,7 +0,0 @@
-[project]
-name = "abx-ripgrep-search"
-version = "0.1.0"
-description = "Add your description here"
-readme = "README.md"
-requires-python = ">=3.10"
-dependencies = []
diff --git a/packages/abx-plugin-singlefile-extractor/extractors.py b/packages/abx-plugin-singlefile-extractor/extractors.py
deleted file mode 100644
index fedbe801..00000000
--- a/packages/abx-plugin-singlefile-extractor/extractors.py
+++ /dev/null
@@ -1,19 +0,0 @@
-__package__ = 'plugins_extractor.singlefile'
-
-from pathlib import Path
-
-from pydantic_pkgr import BinName
-from abx.archivebox.base_extractor import BaseExtractor
-
-from .binaries import SINGLEFILE_BINARY
-
-
-class SinglefileExtractor(BaseExtractor):
- name: str = 'singlefile'
- binary: BinName = SINGLEFILE_BINARY.name
-
- def get_output_path(self, snapshot) -> Path:
- return Path(snapshot.link_dir) / 'singlefile.html'
-
-
-SINGLEFILE_EXTRACTOR = SinglefileExtractor()
diff --git a/packages/abx-plugin-singlefile-extractor/pyproject.toml b/packages/abx-plugin-singlefile-extractor/pyproject.toml
deleted file mode 100644
index b0c9df1b..00000000
--- a/packages/abx-plugin-singlefile-extractor/pyproject.toml
+++ /dev/null
@@ -1,7 +0,0 @@
-[project]
-name = "abx-singlefile-extractor"
-version = "0.1.0"
-description = "Add your description here"
-readme = "README.md"
-requires-python = ">=3.10"
-dependencies = []
diff --git a/packages/abx-plugin-sonic-search/pyproject.toml b/packages/abx-plugin-sonic-search/pyproject.toml
deleted file mode 100644
index a61d17c7..00000000
--- a/packages/abx-plugin-sonic-search/pyproject.toml
+++ /dev/null
@@ -1,7 +0,0 @@
-[project]
-name = "abx-sonic-search"
-version = "0.1.0"
-description = "Add your description here"
-readme = "README.md"
-requires-python = ">=3.10"
-dependencies = []
diff --git a/packages/abx-plugin-sqlitefts-search/__init__.py b/packages/abx-plugin-sqlitefts-search/__init__.py
deleted file mode 100644
index 63fb1b12..00000000
--- a/packages/abx-plugin-sqlitefts-search/__init__.py
+++ /dev/null
@@ -1,39 +0,0 @@
-__package__ = 'plugins_search.sqlitefts'
-__label__ = 'sqlitefts'
-__version__ = '2024.10.14'
-__author__ = 'ArchiveBox'
-__homepage__ = 'https://github.com/ArchiveBox/archivebox'
-__dependencies__ = []
-
-import abx
-
-
-@abx.hookimpl
-def get_PLUGIN():
- return {
- 'sqlitefts': {
- 'PACKAGE': __package__,
- 'LABEL': __label__,
- 'VERSION': __version__,
- 'AUTHOR': __author__,
- 'HOMEPAGE': __homepage__,
- 'DEPENDENCIES': __dependencies__,
- }
- }
-
-@abx.hookimpl
-def get_CONFIG():
- from .config import SQLITEFTS_CONFIG
-
- return {
- 'sqlitefts': SQLITEFTS_CONFIG
- }
-
-
-@abx.hookimpl
-def get_SEARCHBACKENDS():
- from .searchbackend import SQLITEFTS_SEARCH_BACKEND
-
- return {
- 'sqlitefts': SQLITEFTS_SEARCH_BACKEND,
- }
diff --git a/packages/abx-plugin-sqlitefts-search/pyproject.toml b/packages/abx-plugin-sqlitefts-search/pyproject.toml
deleted file mode 100644
index f635fb16..00000000
--- a/packages/abx-plugin-sqlitefts-search/pyproject.toml
+++ /dev/null
@@ -1,7 +0,0 @@
-[project]
-name = "abx-sqlitefts-search"
-version = "0.1.0"
-description = "Add your description here"
-readme = "README.md"
-requires-python = ">=3.10"
-dependencies = []
diff --git a/packages/abx-plugin-wget-extractor/__init__.py b/packages/abx-plugin-wget-extractor/__init__.py
deleted file mode 100644
index e2a36aa4..00000000
--- a/packages/abx-plugin-wget-extractor/__init__.py
+++ /dev/null
@@ -1,54 +0,0 @@
-__package__ = 'plugins_extractor.wget'
-__id__ = 'wget'
-__label__ = 'WGET'
-__version__ = '2024.10.14'
-__author__ = 'ArchiveBox'
-__homepage__ = 'https://github.com/ArchiveBox/ArchiveBox/tree/dev/archivebox/plugins_extractor/wget'
-__dependencies__ = []
-
-import abx
-
-
-@abx.hookimpl
-def get_PLUGIN():
- return {
- __id__: {
- 'id': __id__,
- 'package': __package__,
- 'label': __label__,
- 'version': __version__,
- 'author': __author__,
- 'homepage': __homepage__,
- 'dependencies': __dependencies__,
- }
- }
-
-@abx.hookimpl
-def get_CONFIG():
- from .config import WGET_CONFIG
-
- return {
- __id__: WGET_CONFIG
- }
-
-@abx.hookimpl
-def get_BINARIES():
- from .binaries import WGET_BINARY
-
- return {
- 'wget': WGET_BINARY,
- }
-
-@abx.hookimpl
-def get_EXTRACTORS():
- from .extractors import WGET_EXTRACTOR, WARC_EXTRACTOR
-
- return {
- 'wget': WGET_EXTRACTOR,
- 'warc': WARC_EXTRACTOR,
- }
-
-@abx.hookimpl
-def ready():
- from .config import WGET_CONFIG
- WGET_CONFIG.validate()
diff --git a/packages/abx-plugin-wget-extractor/extractors.py b/packages/abx-plugin-wget-extractor/extractors.py
deleted file mode 100644
index 86fa3923..00000000
--- a/packages/abx-plugin-wget-extractor/extractors.py
+++ /dev/null
@@ -1,37 +0,0 @@
-__package__ = 'plugins_extractor.wget'
-
-from pathlib import Path
-
-from pydantic_pkgr import BinName
-
-from abx.archivebox.base_extractor import BaseExtractor, ExtractorName
-
-from .binaries import WGET_BINARY
-from .wget_util import wget_output_path
-
-class WgetExtractor(BaseExtractor):
- name: ExtractorName = 'wget'
- binary: BinName = WGET_BINARY.name
-
- def get_output_path(self, snapshot) -> Path | None:
- wget_index_path = wget_output_path(snapshot.as_link())
- if wget_index_path:
- return Path(wget_index_path)
- return None
-
-WGET_EXTRACTOR = WgetExtractor()
-
-
-class WarcExtractor(BaseExtractor):
- name: ExtractorName = 'warc'
- binary: BinName = WGET_BINARY.name
-
- def get_output_path(self, snapshot) -> Path | None:
- warc_files = list((Path(snapshot.link_dir) / 'warc').glob('*.warc.gz'))
- if warc_files:
- return sorted(warc_files, key=lambda x: x.stat().st_size, reverse=True)[0]
- return None
-
-
-WARC_EXTRACTOR = WarcExtractor()
-
diff --git a/packages/abx-plugin-wget-extractor/pyproject.toml b/packages/abx-plugin-wget-extractor/pyproject.toml
deleted file mode 100644
index 21445c18..00000000
--- a/packages/abx-plugin-wget-extractor/pyproject.toml
+++ /dev/null
@@ -1,7 +0,0 @@
-[project]
-name = "abx-wget-extractor"
-version = "0.1.0"
-description = "Add your description here"
-readme = "README.md"
-requires-python = ">=3.10"
-dependencies = []
diff --git a/packages/abx-plugin-ytdlp-extractor/pyproject.toml b/packages/abx-plugin-ytdlp-extractor/pyproject.toml
deleted file mode 100644
index 1b6b4e30..00000000
--- a/packages/abx-plugin-ytdlp-extractor/pyproject.toml
+++ /dev/null
@@ -1,7 +0,0 @@
-[project]
-name = "abx-ytdlp-extractor"
-version = "0.1.0"
-description = "Add your description here"
-readme = "README.md"
-requires-python = ">=3.10"
-dependencies = []
diff --git a/packages/abx-spec-archivebox/abx_spec_archivebox/__init__.py b/packages/abx-spec-archivebox/abx_spec_archivebox/__init__.py
deleted file mode 100644
index 5b646bf9..00000000
--- a/packages/abx-spec-archivebox/abx_spec_archivebox/__init__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-__package__ = 'abx_spec_archivebox'
-
-# from .effects import *
-# from .events import *
-# from .reads import *
-# from .writes import *
-# from .states import *
diff --git a/packages/abx-spec-config/abx_spec_config/__init__.py b/packages/abx-spec-config/abx_spec_config/__init__.py
deleted file mode 100644
index cc840381..00000000
--- a/packages/abx-spec-config/abx_spec_config/__init__.py
+++ /dev/null
@@ -1,50 +0,0 @@
-import os
-from pathlib import Path
-from typing import Dict, Any
-
-from benedict import benedict
-
-
-import abx
-
-from .base_configset import BaseConfigSet, ConfigKeyStr
-
-
-@abx.hookspec(firstresult=True)
-@abx.hookimpl
-def get_collection_config_path() -> Path:
- return Path(os.getcwd()) / "ArchiveBox.conf"
-
-
-@abx.hookspec(firstresult=True)
-@abx.hookimpl
-def get_system_config_path() -> Path:
- return Path('~/.config/abx/abx.conf').expanduser()
-
-
-@abx.hookspec
-@abx.hookimpl
-def get_CONFIG() -> Dict[abx.PluginId, BaseConfigSet]:
- """Get the config for a single plugin -> {plugin_id: PluginConfigSet()}"""
- return {}
-
-
-@abx.hookspec(firstresult=True)
-@abx.hookimpl
-def get_CONFIGS() -> Dict[abx.PluginId, BaseConfigSet]:
- """Get the config for all plugins by plugin_id -> {plugin_abc: PluginABCConfigSet(), plugin_xyz: PluginXYZConfigSet(), ...}"""
- return abx.as_dict(abx.pm.hook.get_CONFIG())
-
-
-@abx.hookspec(firstresult=True)
-@abx.hookimpl
-def get_FLAT_CONFIG() -> Dict[ConfigKeyStr, Any]:
- """Get the flat config assembled from all plugins config -> {SOME_KEY: 'someval', 'OTHER_KEY': 'otherval', ...}"""
- return benedict({
- key: value
- for configset in get_CONFIGS().values()
- for key, value in benedict(configset).items()
- })
-
-
-# TODO: add read_config_file(), write_config_file() hooks
diff --git a/packages/abx-spec-django/abx_spec_django/__init__.py b/packages/abx-spec-django/abx_spec_django/__init__.py
deleted file mode 100644
index 20f62d2b..00000000
--- a/packages/abx-spec-django/abx_spec_django/__init__.py
+++ /dev/null
@@ -1,140 +0,0 @@
-import abx
-
-###########################################################################################
-
-@abx.hookspec
-@abx.hookimpl
-def get_INSTALLED_APPS():
- """Return a list of apps to add to INSTALLED_APPS"""
- # e.g. ['your_plugin_type.plugin_name']
- return ['abx_spec_django']
-
-# @abx.hookspec
-# @abx.hookimpl
-# def register_INSTALLED_APPS(INSTALLED_APPS):
-# """Mutate INSTALLED_APPS in place to add your app in a specific position"""
-# # idx_of_contrib = INSTALLED_APPS.index('django.contrib.auth')
-# # INSTALLED_APPS.insert(idx_of_contrib + 1, 'your_plugin_type.plugin_name')
-# pass
-
-
-@abx.hookspec
-@abx.hookimpl
-def get_TEMPLATE_DIRS():
- return [] # e.g. ['your_plugin_type/plugin_name/templates']
-
-# @abx.hookspec
-# @abx.hookimpl
-# def register_TEMPLATE_DIRS(TEMPLATE_DIRS):
-# """Install django settings"""
-# # e.g. TEMPLATE_DIRS.insert(0, 'your_plugin_type/plugin_name/templates')
-# pass
-
-
-@abx.hookspec
-@abx.hookimpl
-def get_STATICFILES_DIRS():
- return [] # e.g. ['your_plugin_type/plugin_name/static']
-
-# @abx.hookspec
-# @abx.hookimpl
-# def register_STATICFILES_DIRS(STATICFILES_DIRS):
-# """Mutate STATICFILES_DIRS in place to add your static dirs in a specific position"""
-# # e.g. STATICFILES_DIRS.insert(0, 'your_plugin_type/plugin_name/static')
-# pass
-
-
-@abx.hookspec
-@abx.hookimpl
-def get_MIDDLEWARES():
- return [] # e.g. ['your_plugin_type.plugin_name.middleware.YourMiddleware']
-
-# @abx.hookspec
-# @abx.hookimpl
-# def register_MIDDLEWARE(MIDDLEWARE):
-# """Mutate MIDDLEWARE in place to add your middleware in a specific position"""
-# # e.g. MIDDLEWARE.insert(0, 'your_plugin_type.plugin_name.middleware.YourMiddleware')
-# pass
-
-
-@abx.hookspec
-@abx.hookimpl
-def get_AUTHENTICATION_BACKENDS():
- return [] # e.g. ['django_auth_ldap.backend.LDAPBackend']
-
-# @abx.hookspec
-# @abx.hookimpl
-# def register_AUTHENTICATION_BACKENDS(AUTHENTICATION_BACKENDS):
-# """Mutate AUTHENTICATION_BACKENDS in place to add your auth backends in a specific position"""
-# # e.g. AUTHENTICATION_BACKENDS.insert(0, 'your_plugin_type.plugin_name.backend.YourBackend')
-# pass
-
-@abx.hookspec
-@abx.hookimpl
-def get_DJANGO_HUEY_QUEUES(QUEUE_DATABASE_NAME):
- return {} # e.g. {'some_queue_name': {'filename': 'some_queue_name.sqlite3', 'store_none': True, 'results': True, ...}}
-
-# @abx.hookspec
-# @abx.hookimpl
-# def register_DJANGO_HUEY(DJANGO_HUEY):
-# """Mutate DJANGO_HUEY in place to add your huey queues in a specific position"""
-# # e.g. DJANGO_HUEY['queues']['some_queue_name']['some_setting'] = 'some_value'
-# pass
-
-
-@abx.hookspec
-@abx.hookimpl
-def get_ADMIN_DATA_VIEWS_URLS():
- return []
-
-# @abx.hookspec
-# @abx.hookimpl
-# def register_ADMIN_DATA_VIEWS(ADMIN_DATA_VIEWS):
-# """Mutate ADMIN_DATA_VIEWS in place to add your admin data views in a specific position"""
-# # e.g. ADMIN_DATA_VIEWS['URLS'].insert(0, 'your_plugin_type/plugin_name/admin_data_views.py')
-# pass
-
-
-# @abx.hookspec
-# @abx.hookimpl
-# def register_settings(settings):
-# """Mutate settings in place to add your settings / modify existing settings"""
-# # settings.SOME_KEY = 'some_value'
-# pass
-
-
-###########################################################################################
-
-@abx.hookspec
-@abx.hookimpl
-def get_urlpatterns():
- return [] # e.g. [path('your_plugin_type/plugin_name/url.py', your_view)]
-
-# @abx.hookspec
-# @abx.hookimpl
-# def register_urlpatterns(urlpatterns):
-# """Mutate urlpatterns in place to add your urlpatterns in a specific position"""
-# # e.g. urlpatterns.insert(0, path('your_plugin_type/plugin_name/url.py', your_view))
-# pass
-
-###########################################################################################
-
-
-
-@abx.hookspec
-@abx.hookimpl
-def register_admin(admin_site):
- """Register django admin views/models with the main django admin site instance"""
- # e.g. admin_site.register(your_model, your_admin_class)
- pass
-
-
-###########################################################################################
-
-
-@abx.hookspec
-@abx.hookimpl
-def ready():
- """Called when Django apps app.ready() are triggered"""
- # e.g. abx.pm.hook.get_CONFIG().ytdlp.validate()
- pass
diff --git a/packages/abx-spec-django/abx_spec_django/apps.py b/packages/abx-spec-django/abx_spec_django/apps.py
deleted file mode 100644
index 667b74c0..00000000
--- a/packages/abx-spec-django/abx_spec_django/apps.py
+++ /dev/null
@@ -1,14 +0,0 @@
-__package__ = 'abx_spec_django'
-
-from django.apps import AppConfig
-
-import abx
-
-
-class ABXConfig(AppConfig):
- name = 'abx_spec_django'
-
- def ready(self):
- from django.conf import settings
-
- abx.pm.hook.ready(settings=settings)
diff --git a/packages/abx-spec-pydantic-pkgr/abx_spec_pydantic_pkgr.py b/packages/abx-spec-pydantic-pkgr/abx_spec_pydantic_pkgr.py
deleted file mode 100644
index 4665452a..00000000
--- a/packages/abx-spec-pydantic-pkgr/abx_spec_pydantic_pkgr.py
+++ /dev/null
@@ -1,72 +0,0 @@
-import os
-
-from typing import Dict
-from pathlib import Path
-
-import abx
-
-from pydantic_pkgr import Binary, BinProvider
-
-###########################################################################################
-
-@abx.hookspec
-@abx.hookimpl()
-def get_BINPROVIDERS() -> Dict[str, BinProvider]:
- return {}
-
-@abx.hookspec
-@abx.hookimpl()
-def get_BINARIES() -> Dict[str, Binary]:
- return {}
-
-@abx.hookspec(firstresult=True)
-@abx.hookimpl
-def get_BINPROVIDER(binprovider_name: str) -> BinProvider:
- return abx.as_dict(abx.pm.hook.get_BINPROVIDERS())[binprovider_name]
-
-@abx.hookspec(firstresult=True)
-@abx.hookimpl
-def get_BINARY(bin_name: str) -> BinProvider:
- return abx.as_dict(abx.pm.hook.get_BINARYS())[bin_name]
-
-
-@abx.hookspec(firstresult=True)
-@abx.hookimpl
-def binary_load(binary: Binary, **kwargs) -> Binary:
- loaded_binary = binary.load(**kwargs)
- abx.pm.hook.binary_symlink_to_bin_dir(binary=loaded_binary)
- return loaded_binary
-
-@abx.hookspec(firstresult=True)
-@abx.hookimpl
-def binary_install(binary: Binary, **kwargs) -> Binary:
- loaded_binary = binary.install(**kwargs)
- abx.pm.hook.binary_symlink_to_bin_dir(binary=loaded_binary)
- return loaded_binary
-
-@abx.hookspec(firstresult=True)
-@abx.hookimpl
-def binary_load_or_install(binary: Binary, **kwargs) -> Binary:
- loaded_binary = binary.load_or_install(**kwargs)
- abx.pm.hook.binary_symlink_to_bin_dir(binary=loaded_binary)
- return loaded_binary
-
-@abx.hookspec(firstresult=True)
-@abx.hookimpl
-def binary_symlink_to_bin_dir(binary: Binary, bin_dir: Path | None=None):
- LIB_DIR = Path(abx.pm.hook.get_CONFIG().get('LIB_DIR', '/usr/local/share/abx'))
- BIN_DIR = bin_dir or Path(abx.pm.hook.get_CONFIG().get('BIN_DIR', LIB_DIR / 'bin'))
-
- if not (binary.abspath and os.path.isfile(binary.abspath)):
- return
-
- try:
- BIN_DIR.mkdir(parents=True, exist_ok=True)
- symlink = BIN_DIR / binary.name
- symlink.unlink(missing_ok=True)
- symlink.symlink_to(binary.abspath)
- symlink.chmod(0o777) # make sure its executable by everyone
- except Exception:
- # print(f'[red]:warning: Failed to symlink {symlink} -> {binary.abspath}[/red] {err}')
- # not actually needed, we can just run without it
- pass
diff --git a/packages/abx-spec-searchbackend/abx_spec_searchbackend.py b/packages/abx-spec-searchbackend/abx_spec_searchbackend.py
deleted file mode 100644
index 66b34114..00000000
--- a/packages/abx-spec-searchbackend/abx_spec_searchbackend.py
+++ /dev/null
@@ -1,29 +0,0 @@
-import abc
-from typing import Iterable, List, Dict
-
-import abx
-
-@abx.hookspec
-@abx.hookimpl
-def get_SEARCHBACKENDS() -> Dict[abx.PluginId, 'BaseSearchBackend']:
- return {}
-
-
-class BaseSearchBackend(abc.ABC):
- name: str
-
- @staticmethod
- @abc.abstractmethod
- def index(snapshot_id: str, texts: List[str]):
- return
-
- @staticmethod
- @abc.abstractmethod
- def flush(snapshot_ids: Iterable[str]):
- return
-
- @staticmethod
- @abc.abstractmethod
- def search(text: str) -> List[str]:
- raise NotImplementedError("search method must be implemented by subclass")
-
diff --git a/packages/abx/abx.py b/packages/abx/abx.py
deleted file mode 100644
index 0ce28462..00000000
--- a/packages/abx/abx.py
+++ /dev/null
@@ -1,344 +0,0 @@
-__package__ = 'abx'
-__id__ = 'abx'
-__label__ = 'ABX'
-__author__ = 'Nick Sweeting'
-__homepage__ = 'https://github.com/ArchiveBox'
-__order__ = 0
-
-
-import sys
-import inspect
-import importlib
-import itertools
-from pathlib import Path
-from typing import Dict, Callable, List, Set, Tuple, Iterable, Any, TypedDict, Type, cast
-from types import ModuleType
-from typing_extensions import Annotated
-from functools import cache
-
-from benedict import benedict
-from pydantic import AfterValidator
-
-from pluggy import HookspecMarker, HookimplMarker, PluginManager, HookimplOpts
-
-spec = hookspec = HookspecMarker("abx")
-impl = hookimpl = HookimplMarker("abx")
-
-
-
-AttrName = Annotated[str, AfterValidator(lambda x: x.isidentifier() and not x.startswith('_'))]
-PluginId = Annotated[str, AfterValidator(lambda x: x.isidentifier() and not x.startswith('_') and x.islower())]
-
-class PluginInfo(TypedDict, total=False):
- id: PluginId
- package: AttrName
- label: str
- version: str
- author: str
- homepage: str
- dependencies: List[str]
-
- source_code: str
- hooks: Dict[AttrName, Callable]
- module: ModuleType
-
-
-
-class PatchedPluginManager(PluginManager):
- """
- Patch to fix pluggy's PluginManager to work with pydantic models.
- See: https://github.com/pytest-dev/pluggy/pull/536
- """
- def parse_hookimpl_opts(self, plugin, name: str) -> HookimplOpts | None:
- # IMPORTANT: @property methods can have side effects, and are never hookimpl
- # if attr is a property, skip it in advance
- plugin_class = plugin if inspect.isclass(plugin) else type(plugin)
- if isinstance(getattr(plugin_class, name, None), property):
- return None
-
- # pydantic model fields are like attrs and also can never be hookimpls
- plugin_is_pydantic_obj = hasattr(plugin, "__pydantic_core_schema__")
- if plugin_is_pydantic_obj and name in getattr(plugin, "model_fields", {}):
- # pydantic models mess with the class and attr __signature__
- # so inspect.isroutine(...) throws exceptions and cant be used
- return None
-
- try:
- return super().parse_hookimpl_opts(plugin, name)
- except AttributeError:
- return super().parse_hookimpl_opts(type(plugin), name)
-
-pm = PatchedPluginManager("abx")
-
-
-
-@hookspec(firstresult=True)
-@hookimpl
-@cache
-def get_PLUGIN_ORDER(plugin: PluginId | Path | ModuleType | Type) -> Tuple[int, Path]:
- plugin_dir = None
- plugin_module = None
-
- if isinstance(plugin, str) or isinstance(plugin, Path):
- if str(plugin).endswith('.py'):
- plugin_dir = Path(plugin).parent
- plugin_id = plugin_dir.name
- elif '/' in str(plugin):
- # assume it's a path to a plugin directory
- plugin_dir = Path(plugin)
- plugin_id = plugin_dir.name
- elif str(plugin).isidentifier():
- # assume it's a plugin_id
- plugin_id = str(plugin)
-
- elif inspect.ismodule(plugin) or inspect.isclass(plugin):
- plugin_module = plugin
- plugin_dir = Path(str(plugin_module.__file__)).parent
- plugin_id = plugin_dir.name
- else:
- raise ValueError(f'Invalid plugin, cannot get order: {plugin}')
-
- if plugin_dir:
- try:
- # if .plugin_order file exists, use it to set the load priority
- order = int((plugin_dir / '.plugin_order').read_text())
- return (order, plugin_dir)
- except FileNotFoundError:
- pass
-
- if not plugin_module:
- try:
- plugin_module = importlib.import_module(plugin_id)
- except ImportError:
- raise ValueError(f'Invalid plugin, cannot get order: {plugin}')
-
- if plugin_module and not plugin_dir:
- plugin_dir = Path(str(plugin_module.__file__)).parent
-
- assert plugin_dir
-
- return (getattr(plugin_module, '__order__', 999), plugin_dir)
-
-# @hookspec
-# @hookimpl
-# def get_PLUGIN() -> Dict[PluginId, PluginInfo]:
-# """Get the info for a single plugin, implemented by each plugin"""
-# return {
-# __id__: PluginInfo({
-# 'id': __id__,
-# 'package': str(__package__),
-# 'label': __id__,
-# 'version': __version__,
-# 'author': __author__,
-# 'homepage': __homepage__,
-# 'dependencies': __dependencies__,
-# }),
-# }
-
-@hookspec(firstresult=True)
-@hookimpl
-@cache
-def get_PLUGIN_METADATA(plugin: PluginId | ModuleType | Type) -> PluginInfo:
- # TODO: remove get_PLUGIN hook in favor of pyproject.toml and __attr__s metdata
- # having three methods to detect plugin metadata is overkill
-
- assert plugin
-
- # import the plugin module by its name
- if isinstance(plugin, str):
- module = importlib.import_module(plugin)
- plugin_id = plugin
- elif inspect.ismodule(plugin) or inspect.isclass(plugin):
- module = plugin
- plugin_id = plugin.__package__
- else:
- raise ValueError(f'Invalid plugin, must be a module, class, or plugin ID (package name): {plugin}')
-
- assert module.__file__
-
- # load the plugin info from the plugin/__init__.py __attr__s if they exist
- plugin_module_attrs = {
- 'id': getattr(module, '__id__', plugin_id),
- 'name': getattr(module, '__id__', plugin_id),
- 'label': getattr(module, '__label__', plugin_id),
- 'version': getattr(module, '__version__', '0.0.1'),
- 'author': getattr(module, '__author__', 'Unknown'),
- 'homepage': getattr(module, '__homepage__', 'https://github.com/ArchiveBox'),
- 'dependencies': getattr(module, '__dependencies__', []),
- }
-
- # load the plugin info from the plugin.get_PLUGIN() hook method if it has one
- plugin_info_dict = {}
- if hasattr(module, 'get_PLUGIN'):
- plugin_info_dict = {
- key.lower(): value
- for key, value in module.get_PLUGIN().items()
- }
-
- # load the plugin info from the plugin/pyproject.toml file if it has one
- plugin_toml_info = {}
- try:
- # try loading ./pyproject.toml first in case the plugin is a bare python file not inside a package dir
- plugin_toml_info = benedict.from_toml((Path(module.__file__).parent / 'pyproject.toml').read_text()).project
- except Exception:
- try:
- # try loading ../pyproject.toml next in case the plugin is in a packge dir
- plugin_toml_info = benedict.from_toml((Path(module.__file__).parent.parent / 'pyproject.toml').read_text()).project
- except Exception as e:
- print('WARNING: could not detect pyproject.toml for PLUGIN:', plugin_id, Path(module.__file__).parent, 'ERROR:', e)
-
- # merge the plugin info from all sources + add dyanmically calculated info
- return cast(PluginInfo, benedict(PluginInfo(**{
- 'id': plugin_id,
- **plugin_module_attrs,
- **plugin_info_dict,
- **plugin_toml_info,
- 'package': module.__package__,
- 'module': module,
- 'order': pm.hook.get_PLUGIN_ORDER(plugin=module),
- 'source_code': module.__file__,
- 'hooks': get_plugin_hooks(module),
- })))
-
-@hookspec(firstresult=True)
-@hookimpl
-def get_ALL_PLUGINS() -> Dict[PluginId, PluginInfo]:
- """Get a flat dictionary of all plugins {plugin_id: {...plugin_metadata}}"""
- return as_dict(pm.hook.get_PLUGIN())
-
-
-@hookspec(firstresult=True)
-@hookimpl
-def get_ALL_PLUGINS_METADATA() -> Dict[PluginId, PluginInfo]:
- """Get the metadata for all the plugins registered with Pluggy."""
- plugins = {}
- for plugin_module in pm.get_plugins():
- plugin_info = pm.hook.get_PLUGIN_METADATA(plugin=plugin_module)
- assert 'id' in plugin_info
- plugins[plugin_info['id']] = plugin_info
- return benedict(plugins)
-
-@hookspec(firstresult=True)
-@hookimpl
-def get_ALL_PLUGIN_HOOK_NAMES() -> Set[str]:
- """Get a set of all hook names across all plugins"""
- return {
- hook_name
- for plugin_module in pm.get_plugins()
- for hook_name in get_plugin_hooks(plugin_module)
- }
-
-pm.add_hookspecs(sys.modules[__name__])
-pm.register(sys.modules[__name__])
-
-
-###### PLUGIN DISCOVERY AND LOADING ########################################################
-
-
-
-def register_hookspecs(plugin_ids: Iterable[PluginId]):
- """
- Register all the hookspecs from a list of module names.
- """
- for plugin_id in plugin_ids:
- hookspec_module = importlib.import_module(plugin_id)
- pm.add_hookspecs(hookspec_module)
-
-
-def find_plugins_in_dir(plugins_dir: Path) -> Dict[PluginId, Path]:
- """
- Find all the plugins in a given directory. Just looks for an __init__.py file.
- """
- return {
- plugin_entrypoint.parent.name: plugin_entrypoint.parent
- for plugin_entrypoint in sorted(plugins_dir.glob("*/__init__.py"), key=pm.hook.get_PLUGIN_ORDER) # type:ignore
- if plugin_entrypoint.parent.name != 'abx'
- } # "plugins_pkg.pip": "/app/archivebox/plugins_pkg/pip"
-
-
-def get_pip_installed_plugins(group: PluginId='abx') -> Dict[PluginId, Path]:
- """replaces pm.load_setuptools_entrypoints("abx"), finds plugins that registered entrypoints via pip"""
- import importlib.metadata
-
- DETECTED_PLUGINS = {} # module_name: module_dir_path
- for dist in list(importlib.metadata.distributions()):
- for entrypoint in dist.entry_points:
- if entrypoint.group != group or pm.is_blocked(entrypoint.name):
- continue
- DETECTED_PLUGINS[entrypoint.name] = Path(entrypoint.load().__file__).parent
- # pm.register(plugin, name=ep.name)
- # pm._plugin_distinfo.append((plugin, DistFacade(dist)))
- return DETECTED_PLUGINS
-
-
-
-# Load all plugins from pip packages, archivebox built-ins, and user plugins
-def load_plugins(plugins: Iterable[PluginId | ModuleType | Type] | Dict[PluginId, Path]):
- """
- Load all the plugins from a dictionary of module names and directory paths.
- """
- LOADED_PLUGINS = {}
- for plugin in plugins:
- plugin_info = pm.hook.get_PLUGIN_METADATA(plugin=plugin)
- assert 'id' in plugin_info and 'module' in plugin_info
- if plugin_info['module'] in pm.get_plugins():
- LOADED_PLUGINS[plugin_info['id']] = plugin_info
- continue
- try:
- pm.add_hookspecs(plugin_info['module'])
- except ValueError:
- # not all plugins register new hookspecs, some only have hookimpls
- pass
- pm.register(plugin_info['module'])
- LOADED_PLUGINS[plugin_info['id']] = plugin_info
- # print(f' √ Loaded plugin: {plugin_id}')
- return benedict(LOADED_PLUGINS)
-
-@cache
-def get_plugin_hooks(plugin: PluginId | ModuleType | Type | None) -> Dict[AttrName, Callable]:
- """Get all the functions marked with @hookimpl on a module."""
- if not plugin:
- return {}
-
- hooks = {}
-
- if isinstance(plugin, str):
- plugin_module = importlib.import_module(plugin)
- elif inspect.ismodule(plugin) or inspect.isclass(plugin):
- plugin_module = plugin
- else:
- raise ValueError(f'Invalid plugin, cannot get hooks: {plugin}')
-
- for attr_name in dir(plugin_module):
- if attr_name.startswith('_'):
- continue
- try:
- attr = getattr(plugin_module, attr_name)
- if isinstance(attr, Callable):
- if pm.parse_hookimpl_opts(plugin_module, attr_name):
- hooks[attr_name] = attr
- except Exception as e:
- print(f'Error getting hookimpls for {plugin}: {e}')
-
- return hooks
-
-
-def as_list(results) -> List[Any]:
- """Flatten a list of lists returned by a pm.hook.call() into a single list"""
- return list(itertools.chain(*results))
-
-
-def as_dict(results: Dict[str, Dict[PluginId, Any]] | List[Dict[PluginId, Any]]) -> Dict[PluginId, Any]:
- """Flatten a list of dicts returned by a pm.hook.call() into a single dict"""
- if isinstance(results, (dict, benedict)):
- results_list = results.values()
- else:
- results_list = results
-
- return benedict({
- result_id: result
- for plugin_results in results_list
- for result_id, result in dict(plugin_results).items()
- })
-
-
diff --git a/packages/archivebox-pocket/.circleci/config.yml b/packages/archivebox-pocket/.circleci/config.yml
deleted file mode 100644
index a20a6aae..00000000
--- a/packages/archivebox-pocket/.circleci/config.yml
+++ /dev/null
@@ -1,61 +0,0 @@
-version: 2.1
-orbs:
- python: circleci/python@2.0.3
-
-jobs:
- build_and_test_3_7:
- docker:
- - image: circleci/python:3.7
- executor: python/default
- steps:
- - checkout
- - python/install-packages:
- pkg-manager: pip
- - run:
- name: Run tests
- command: nosetests
-
- build_and_test_3_8:
- docker:
- - image: circleci/python:3.8
- executor: python/default
- steps:
- - checkout
- - python/install-packages:
- pkg-manager: pip
- - run:
- name: Run tests
- command: nosetests
-
- build_and_test_3_9:
- docker:
- - image: circleci/python:3.9
- executor: python/default
- steps:
- - checkout
- - python/install-packages:
- pkg-manager: pip
- - run:
- name: Run tests
- command: nosetests
-
- build_and_test_3_10:
- docker:
- - image: circleci/python:3.10
- executor: python/default
- steps:
- - checkout
- - python/install-packages:
- pkg-manager: pip
- - run:
- name: Run tests
- command: nosetests
-
-
-workflows:
- test_pocket:
- jobs:
- - build_and_test_3_7
- - build_and_test_3_8
- - build_and_test_3_9
- - build_and_test_3_10
diff --git a/packages/archivebox-pocket/.gitignore b/packages/archivebox-pocket/.gitignore
deleted file mode 100644
index 8acafa3c..00000000
--- a/packages/archivebox-pocket/.gitignore
+++ /dev/null
@@ -1,43 +0,0 @@
-*.py[co]
-
-# Packages
-*.egg
-*.egg-info
-dist
-build
-eggs
-parts
-bin
-var
-sdist
-develop-eggs
-.installed.cfg
-.pypirc
-
-# Installer logs
-pip-log.txt
-
-# Unit test / coverage reports
-.coverage
-.tox
-
-#Translations
-*.mo
-
-#Mr Developer
-.mr.developer.cfg
-
-# Virtualenv
-include/
-lib/
-local/
-.Python
-
-# ViM files
-.*.swp
-.*.swo
-
-# Misc
-*.log
-*.pid
-*.sql
diff --git a/packages/archivebox-pocket/LICENSE.md b/packages/archivebox-pocket/LICENSE.md
deleted file mode 100644
index 3b145165..00000000
--- a/packages/archivebox-pocket/LICENSE.md
+++ /dev/null
@@ -1,27 +0,0 @@
-Copyright (c) 2014, Tapan Pandita
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without modification,
-are permitted provided that the following conditions are met:
-
-* Redistributions of source code must retain the above copyright notice, this
- list of conditions and the following disclaimer.
-
-* Redistributions in binary form must reproduce the above copyright notice, this
- list of conditions and the following disclaimer in the documentation and/or
- other materials provided with the distribution.
-
-* Neither the name of pocket nor the names of its
- contributors may be used to endorse or promote products derived from
- this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
-ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
-ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/packages/archivebox-pocket/MANIFEST.in b/packages/archivebox-pocket/MANIFEST.in
deleted file mode 100644
index 7425f8e8..00000000
--- a/packages/archivebox-pocket/MANIFEST.in
+++ /dev/null
@@ -1,2 +0,0 @@
-include LICENSE.md
-include README.md
diff --git a/packages/archivebox-pocket/README.md b/packages/archivebox-pocket/README.md
deleted file mode 100644
index 6b2430be..00000000
--- a/packages/archivebox-pocket/README.md
+++ /dev/null
@@ -1,66 +0,0 @@
-Pocket
-======
-[](https://circleci.com/gh/tapanpandita/pocket)
-[](https://pypi.python.org/pypi/pocket)
-[](https://pypi.python.org/pypi/pocket)
-
-
-
-A python wrapper for the [pocket api](http://getpocket.com/api/docs).
-
-Installation
-------------
-```
-pip install pocket
-```
-
-Usage
-------
-
-You'll need your pocket consumer key. You can find this from your account page.
-You will also need the access token for the account you want to modify.
-Then, you need to create an instance of the pocket object
-
-```python
-import pocket
-
-pocket_instance = pocket.Pocket(consumer_key, access_token)
-```
-
-### Chaining Modify Methods
-
-All the modify methods can be chained together for creating one bulk query. If you don't wish to chain the methods, just pass `wait=False`.
-
-```python
-import pocket
-
-pocket_instance = pocket.Pocket(consumer_key, access_token)
-
-# perfoms all these actions in one request
-# NOTE: Each individual method returns the instance itself. The response
-# dictionary is not returned till commit is called on the instance.
-response, headers = pocket_instance.archive(item_id1).archive(item_id2).favorite(item_id3).delete(item_id4).commit()
-
-# performs action immediately and returns a dictionary
-pocket_instance.archive(item_id1, wait=False)
-```
-
-### OAUTH
-
-To get request token, use the get_request_token class method. To get the access token use the get_access_token method.
-
-```python
-from pocket import Pocket
-
-request_token = Pocket.get_request_token(consumer_key=consumer_key, redirect_uri=redirect_uri)
-
-# URL to redirect user to, to authorize your app
-auth_url = Pocket.get_auth_url(code=request_token, redirect_uri=redirect_uri)
-# e.g. import subprocess; subprocess.run(['xdg-open', auth_url])
-
-user_credentials = Pocket.get_credentials(consumer_key=consumer_key, code=request_token)
-
-access_token = user_credentials['access_token']
-```
-
-For detailed documentation of the methods available, please visit the official [pocket api documentation](http://getpocket.com/api/docs).
diff --git a/packages/archivebox-pocket/pocket.py b/packages/archivebox-pocket/pocket.py
deleted file mode 100644
index b5b8d2fa..00000000
--- a/packages/archivebox-pocket/pocket.py
+++ /dev/null
@@ -1,366 +0,0 @@
-import requests
-import json
-from functools import wraps
-
-
-class PocketException(Exception):
- '''
- Base class for all pocket exceptions
- http://getpocket.com/developer/docs/errors
-
- '''
- pass
-
-
-class InvalidQueryException(PocketException):
- pass
-
-
-class AuthException(PocketException):
- pass
-
-
-class RateLimitException(PocketException):
- '''
- http://getpocket.com/developer/docs/rate-limits
-
- '''
- pass
-
-
-class ServerMaintenanceException(PocketException):
- pass
-
-EXCEPTIONS = {
- 400: InvalidQueryException,
- 401: AuthException,
- 403: RateLimitException,
- 503: ServerMaintenanceException,
-}
-
-
-def method_wrapper(fn):
-
- @wraps(fn)
- def wrapped(self, *args, **kwargs):
- arg_names = list(fn.__code__.co_varnames)
- arg_names.remove('self')
- kwargs.update(dict(zip(arg_names, args)))
-
- url = self.api_endpoints[fn.__name__]
- payload = dict([
- (k, v) for k, v in kwargs.items()
- if v is not None
- ])
- payload.update(self.get_payload())
-
- return self.make_request(url, payload)
-
- return wrapped
-
-
-def bulk_wrapper(fn):
-
- @wraps(fn)
- def wrapped(self, *args, **kwargs):
- arg_names = list(fn.__code__.co_varnames)
- arg_names.remove('self')
- kwargs.update(dict(zip(arg_names, args)))
-
- wait = kwargs.get('wait', True)
- query = dict(
- [(k, v) for k, v in kwargs.items() if v is not None]
- )
- # TODO: Fix this hack
- query['action'] = 'add' if fn.__name__ == 'bulk_add' else fn.__name__
-
- if wait:
- self.add_bulk_query(query)
- return self
- else:
- url = self.api_endpoints['send']
- payload = {
- 'actions': [query],
- }
- payload.update(self.get_payload())
- return self.make_request(
- url,
- json.dumps(payload),
- headers={'content-type': 'application/json'},
- )
-
- return wrapped
-
-
-class Pocket(object):
- '''
- This class implements a basic python wrapper around the pocket api. For a
- detailed documentation of the methods and what they do please refer the
- official pocket api documentation at
- http://getpocket.com/developer/docs/overview
-
- '''
- api_endpoints = dict(
- (method, 'https://getpocket.com/v3/%s' % method)
- for method in "add,send,get".split(",")
- )
-
- statuses = {
- 200: 'Request was successful',
- 400: 'Invalid request, please make sure you follow the '
- 'documentation for proper syntax',
- 401: 'Problem authenticating the user',
- 403: 'User was authenticated, but access denied due to lack of '
- 'permission or rate limiting',
- 503: 'Pocket\'s sync server is down for scheduled maintenance.',
- }
-
- def __init__(self, consumer_key, access_token):
- self.consumer_key = consumer_key
- self.access_token = access_token
- self._bulk_query = []
-
- self._payload = {
- 'consumer_key': self.consumer_key,
- 'access_token': self.access_token,
- }
-
- def get_payload(self):
- return self._payload
-
- def add_bulk_query(self, query):
- self._bulk_query.append(query)
-
- @staticmethod
- def _post_request(url, payload, headers):
- r = requests.post(url, data=payload, headers=headers)
- return r
-
- @classmethod
- def _make_request(cls, url, payload, headers=None):
- r = cls._post_request(url, payload, headers)
-
- if r.status_code > 399:
- error_msg = cls.statuses.get(r.status_code)
- extra_info = r.headers.get('X-Error')
- raise EXCEPTIONS.get(r.status_code, PocketException)(
- '%s. %s' % (error_msg, extra_info)
- )
-
- return r.json() or r.text, r.headers
-
- @classmethod
- def make_request(cls, url, payload, headers=None):
- return cls._make_request(url, payload, headers)
-
- @method_wrapper
- def add(self, url, title=None, tags=None, tweet_id=None):
- '''
- This method allows you to add a page to a user's list.
- In order to use the /v3/add endpoint, your consumer key must have the
- "Add" permission.
- http://getpocket.com/developer/docs/v3/add
-
- '''
-
- @method_wrapper
- def get(
- self, state=None, favorite=None, tag=None, contentType=None,
- sort=None, detailType=None, search=None, domain=None, since=None,
- count=None, offset=None
- ):
- '''
- This method allows you to retrieve a user's list. It supports
- retrieving items changed since a specific time to allow for syncing.
- http://getpocket.com/developer/docs/v3/retrieve
-
- '''
-
- @method_wrapper
- def send(self, actions):
- '''
- This method allows you to make changes to a user's list. It supports
- adding new pages, marking pages as read, changing titles, or updating
- tags. Multiple changes to items can be made in one request.
- http://getpocket.com/developer/docs/v3/modify
-
- '''
-
- @bulk_wrapper
- def bulk_add(
- self, item_id, ref_id=None, tags=None, time=None, title=None,
- url=None, wait=True
- ):
- '''
- Add a new item to the user's list
- http://getpocket.com/developer/docs/v3/modify#action_add
-
- '''
-
- @bulk_wrapper
- def archive(self, item_id, time=None, wait=True):
- '''
- Move an item to the user's archive
- http://getpocket.com/developer/docs/v3/modify#action_archive
-
- '''
-
- @bulk_wrapper
- def readd(self, item_id, time=None, wait=True):
- '''
- Re-add (unarchive) an item to the user's list
- http://getpocket.com/developer/docs/v3/modify#action_readd
-
- '''
-
- @bulk_wrapper
- def favorite(self, item_id, time=None, wait=True):
- '''
- Mark an item as a favorite
- http://getpocket.com/developer/docs/v3/modify#action_favorite
-
- '''
-
- @bulk_wrapper
- def unfavorite(self, item_id, time=None, wait=True):
- '''
- Remove an item from the user's favorites
- http://getpocket.com/developer/docs/v3/modify#action_unfavorite
-
- '''
-
- @bulk_wrapper
- def delete(self, item_id, time=None, wait=True):
- '''
- Permanently remove an item from the user's account
- http://getpocket.com/developer/docs/v3/modify#action_delete
-
- '''
-
- @bulk_wrapper
- def tags_add(self, item_id, tags, time=None, wait=True):
- '''
- Add one or more tags to an item
- http://getpocket.com/developer/docs/v3/modify#action_tags_add
-
- '''
-
- @bulk_wrapper
- def tags_remove(self, item_id, tags, time=None, wait=True):
- '''
- Remove one or more tags from an item
- http://getpocket.com/developer/docs/v3/modify#action_tags_remove
-
- '''
-
- @bulk_wrapper
- def tags_replace(self, item_id, tags, time=None, wait=True):
- '''
- Replace all of the tags for an item with one or more provided tags
- http://getpocket.com/developer/docs/v3/modify#action_tags_replace
-
- '''
-
- @bulk_wrapper
- def tags_clear(self, item_id, time=None, wait=True):
- '''
- Remove all tags from an item.
- http://getpocket.com/developer/docs/v3/modify#action_tags_clear
-
- '''
-
- @bulk_wrapper
- def tag_rename(self, item_id, old_tag, new_tag, time=None, wait=True):
- '''
- Rename a tag. This affects all items with this tag.
- http://getpocket.com/developer/docs/v3/modify#action_tag_rename
-
- '''
-
- def commit(self):
- '''
- This method executes the bulk query, flushes stored queries and
- returns the response
-
- '''
- url = self.api_endpoints['send']
- payload = {
- 'actions': self._bulk_query,
- }
- payload.update(self._payload)
- self._bulk_query = []
-
- return self._make_request(
- url,
- json.dumps(payload),
- headers={'content-type': 'application/json'},
- )
-
- @classmethod
- def get_request_token(
- cls, consumer_key, redirect_uri='http://example.com/', state=None
- ):
- '''
- Returns the request token that can be used to fetch the access token
-
- '''
- headers = {
- 'X-Accept': 'application/json',
- }
- url = 'https://getpocket.com/v3/oauth/request'
- payload = {
- 'consumer_key': consumer_key,
- 'redirect_uri': redirect_uri,
- }
-
- if state:
- payload['state'] = state
-
- return cls._make_request(url, payload, headers)[0]['code']
-
- @classmethod
- def get_credentials(cls, consumer_key, code):
- '''
- Fetches access token from using the request token and consumer key
-
- '''
- headers = {
- 'X-Accept': 'application/json',
- }
- url = 'https://getpocket.com/v3/oauth/authorize'
- payload = {
- 'consumer_key': consumer_key,
- 'code': code,
- }
-
- return cls._make_request(url, payload, headers)[0]
-
- @classmethod
- def get_access_token(cls, consumer_key, code):
- return cls.get_credentials(consumer_key, code)['access_token']
-
- @classmethod
- def get_auth_url(cls, code, redirect_uri='http://example.com'):
- auth_url = ('https://getpocket.com/auth/authorize'
- '?request_token=%s&redirect_uri=%s' % (code, redirect_uri))
- return auth_url
-
- @classmethod
- def auth(
- cls, consumer_key, redirect_uri='http://example.com/', state=None,
- ):
- '''
- This is a test method for verifying if oauth worked
- http://getpocket.com/developer/docs/authentication
-
- '''
- code = cls.get_request_token(consumer_key, redirect_uri, state)
-
- auth_url = 'https://getpocket.com/auth/authorize?request_token='\
- '%s&redirect_uri=%s' % (code, redirect_uri)
- raw_input(
- 'Please open %s in your browser to authorize the app and '
- 'press enter:' % auth_url
- )
-
- return cls.get_access_token(consumer_key, code)
diff --git a/packages/archivebox-pocket/pyproject.toml b/packages/archivebox-pocket/pyproject.toml
deleted file mode 100644
index 6acf8a57..00000000
--- a/packages/archivebox-pocket/pyproject.toml
+++ /dev/null
@@ -1,19 +0,0 @@
-[project]
-name = "archivebox-pocket"
-version = "0.3.7"
-description = " api wrapper for getpocket.com"
-readme = "README.md"
-requires-python = ">=3.10"
-dependencies = [
- "requests>=2.32.3",
-]
-
-[build-system]
-requires = ["hatchling"]
-build-backend = "hatchling.build"
-
-[tool.hatch.build.targets.sdist]
-packages = ["."]
-
-[tool.hatch.build.targets.wheel]
-packages = ["."]
diff --git a/packages/archivebox-pocket/requirements.txt b/packages/archivebox-pocket/requirements.txt
deleted file mode 100644
index 9598beea..00000000
--- a/packages/archivebox-pocket/requirements.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-coverage==3.7.1
-mock==1.0.1
-nose==1.3.0
-requests==2.20.0
diff --git a/packages/archivebox-pocket/setup.py b/packages/archivebox-pocket/setup.py
deleted file mode 100644
index 5a5baba0..00000000
--- a/packages/archivebox-pocket/setup.py
+++ /dev/null
@@ -1,41 +0,0 @@
-from setuptools import setup
-
-setup(
- name = "pocket", # pip install pocket
- description = "api wrapper for getpocket.com",
- #long_description=open('README.md', 'rt').read(),
-
- # version
- # third part for minor release
- # second when api changes
- # first when it becomes stable someday
- version = "0.3.7",
- author = 'Tapan Pandita',
- author_email = "tapan.pandita@gmail.com",
-
- url = 'http://github.com/tapanpandita/pocket/',
- license = 'BSD',
-
- # as a practice no need to hard code version unless you know program wont
- # work unless the specific versions are used
- install_requires = ["requests>=2.32.3"],
-
- py_modules = ["pocket"],
-
- zip_safe = True,
-)
-
-# TODO: Do all this and delete these lines
-# register: Create an accnt on pypi, store your credentials in ~/.pypirc:
-#
-# [pypirc]
-# servers =
-# pypi
-#
-# [server-login]
-# username:
-# password:
-#
-# $ python setup.py register # one time only, will create pypi page for pocket
-# $ python setup.py sdist --formats=gztar,zip upload # create a new release
-#
diff --git a/packages/archivebox-pocket/test_pocket.py b/packages/archivebox-pocket/test_pocket.py
deleted file mode 100644
index 14e67f53..00000000
--- a/packages/archivebox-pocket/test_pocket.py
+++ /dev/null
@@ -1,52 +0,0 @@
-import unittest
-import pocket
-from mock import patch
-
-
-class PocketTest(unittest.TestCase):
-
- def setUp(self):
- self.consumer_key = 'consumer_key'
- self.access_token = 'access_token'
-
- def tearDown(self):
- pass
-
- def test_pocket_init(self):
- pocket_instance = pocket.Pocket(
- self.consumer_key,
- self.access_token,
- )
-
- self.assertEqual(self.consumer_key, pocket_instance.consumer_key)
- self.assertEqual(self.access_token, pocket_instance.access_token)
-
- def test_pocket_init_payload(self):
- pocket_instance = pocket.Pocket(
- self.consumer_key,
- self.access_token,
- )
- expected_payload = {
- 'consumer_key': self.consumer_key,
- 'access_token': self.access_token,
- }
-
- self.assertEqual(expected_payload, pocket_instance._payload)
-
- def test_post_request(self):
- mock_payload = {
- 'consumer_key': self.consumer_key,
- 'access_token': self.access_token,
- }
- mock_url = 'https://getpocket.com/v3/'
- mock_headers = {
- 'content-type': 'application/json',
- }
-
- with patch('pocket.requests') as mock_requests:
- pocket.Pocket._post_request(mock_url, mock_payload, mock_headers)
- mock_requests.post.assert_called_once_with(
- mock_url,
- data=mock_payload,
- headers=mock_headers,
- )
diff --git a/pyproject.toml b/pyproject.toml
index de870ada..58e7d82b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -69,19 +69,22 @@ dependencies = [
"typeid-python>=0.3.1",
"psutil>=6.0.0",
"supervisor>=4.2.5",
- "python-crontab>=3.2.0", # for: archivebox schedule
- "croniter>=3.0.3", # for: archivebox schedule
- "ipython>=8.27.0", # for: archivebox shell
- "py-machineid>=0.6.0", # for: machine/detect.py calculating machine guid
+ "python-crontab>=3.2.0", # for: archivebox schedule
+ "croniter>=3.0.3", # for: archivebox schedule
+ "ipython>=8.27.0", # for: archivebox shell
+ "py-machineid>=0.6.0", # for: machine/detect.py calculating machine guid
"python-benedict[io,parse]>=0.33.2",
"pydantic-settings>=2.5.2",
"atomicwrites==1.4.1",
"django-taggit==6.1.0",
"base32-crockford==0.3.0",
+ "platformdirs>=4.3.6",
+ ############# Plugin Dependencies ################
# "pocket@git+https://github.com/tapanpandita/pocket.git@v0.3.7",
"pydantic-pkgr>=0.5.4",
- ############# Plugin Dependencies ################
+
"abx>=0.1.0",
+
"abx-spec-pydantic-pkgr>=0.1.0",
"abx-spec-config>=0.1.0",
"abx-spec-archivebox>=0.1.0",
@@ -90,15 +93,34 @@ dependencies = [
"abx-spec-searchbackend>=0.1.0",
"abx-plugin-default-binproviders>=2024.10.24",
- "abx-plugin-pip-binprovider>=2024.10.24",
- "abx-plugin-npm-binprovider>=2024.10.24",
- "abx-plugin-playwright-binprovider>=2024.10.24",
+ "abx-plugin-pip>=2024.10.24",
+ "abx-plugin-npm>=2024.10.24",
+ "abx-plugin-playwright>=2024.10.24",
+ "abx-plugin-puppeteer>=2024.10.28",
+
+ "abx-plugin-ripgrep-search>=2024.10.28",
+ "abx-plugin-sqlitefts-search>=2024.10.28",
+ "abx-plugin-sonic-search>=2024.10.28",
+ "abx-plugin-ldap-auth>=2024.10.28",
+
+ "abx-plugin-curl>=2024.10.27",
+ "abx-plugin-wget>=2024.10.28",
+ "abx-plugin-git>=2024.10.28",
+ "abx-plugin-chrome>=2024.10.28",
+ "abx-plugin-ytdlp>=2024.10.28",
+
+ "abx-plugin-title>=2024.10.27",
+ "abx-plugin-favicon>=2024.10.27",
+ # "abx-plugin-headers>=2024.10.27",
+ "abx-plugin-archivedotorg>=2024.10.28",
+
+ "abx-plugin-singlefile>=2024.10.28",
+ "abx-plugin-readability>=2024.10.28",
+ "abx-plugin-mercury>=2024.10.28",
+ "abx-plugin-htmltotext>=2024.10.28",
- # "abx-plugin-pocket",
- # "abx-plugin-sonic",
- # "abx-plugin-yt-dlp",
"sonic-client>=1.0.0",
- "yt-dlp>=2024.8.6", # for: media"
+ "yt-dlp>=2024.8.6", # for: media"
]
[project.optional-dependencies]
@@ -160,15 +182,38 @@ abx-spec-extractor = { workspace = true }
abx-spec-searchbackend = { workspace = true }
abx-plugin-default-binproviders = { workspace = true }
-abx-plugin-pip-binprovider = { workspace = true }
-abx-plugin-npm-binprovider = { workspace = true }
-abx-plugin-playwright-binprovider = { workspace = true }
+abx-plugin-pip = { workspace = true }
+abx-plugin-npm = { workspace = true }
+abx-plugin-playwright = { workspace = true }
+abx-plugin-puppeteer = { workspace = true }
+abx-plugin-ripgrep-search = { workspace = true }
+abx-plugin-sqlitefts-search = { workspace = true }
+abx-plugin-sonic-search = { workspace = true }
+abx-plugin-ldap-auth = { workspace = true }
+
+abx-plugin-curl = { workspace = true }
+abx-plugin-wget = { workspace = true }
+abx-plugin-git = { workspace = true }
+abx-plugin-chrome = { workspace = true }
+abx-plugin-ytdlp = { workspace = true }
+
+abx-plugin-title = { workspace = true }
+abx-plugin-favicon = { workspace = true }
+# abx-plugin-headers = { workspace = true }
+abx-plugin-archivedotorg = { workspace = true }
+
+abx-plugin-singlefile = { workspace = true }
+abx-plugin-readability = { workspace = true }
+abx-plugin-mercury = { workspace = true }
+abx-plugin-htmltotext = { workspace = true }
+
pydantic-pkgr = { workspace = true }
-archivebox-pocket = { workspace = true }
+pocket = { workspace = true }
[tool.uv.workspace]
-members = ["packages/*"]
+members = ["archivebox/vendor/*"]
+exclude = ["archivebox/vendor/__pycache__"]
[build-system]
requires = ["pdm-backend"]
@@ -183,7 +228,7 @@ package-dir = {"archivebox" = "archivebox"}
line-length = 140
target-version = "py310"
src = ["archivebox"]
-exclude = ["*.pyi", "typings/", "migrations/", "vendor/"]
+exclude = ["*.pyi", "typings/", "migrations/", "vendor/pocket"]
# https://docs.astral.sh/ruff/rules/
[tool.ruff.lint]
@@ -218,7 +263,7 @@ exclude = [
"**/node_modules",
"**/__pycache__",
"**/migrations",
- "archivebox/vendor",
+ "archivebox/vendor/pocket",
]
stubPath = "./archivebox/typings"
venvPath = "."