remove more hardcoded plugin names from codebase

This commit is contained in:
Nick Sweeting
2025-12-29 13:14:33 -08:00
parent 967c5d53e0
commit 92c26124a3
2 changed files with 3 additions and 55 deletions

View File

@@ -26,9 +26,7 @@ from archivebox.misc.system import get_dir_size, atomic_write
from archivebox.misc.util import parse_date, base_url, domain as url_domain, to_json, ts_to_date_str, urlencode, htmlencode, urldecode
from archivebox.misc.hashing import get_dir_info
from archivebox.hooks import (
EXTRACTOR_INDEXING_PRECEDENCE,
get_plugins, get_plugin_name, get_plugin_icon,
DEFAULT_PLUGIN_ICONS,
)
from archivebox.base_models.models import (
ModelWithUUID, ModelWithSerializers, ModelWithOutputDir,
@@ -1931,16 +1929,6 @@ class SnapshotMachine(BaseStateMachine, strict_states=True):
)
class ArchiveResultManager(models.Manager):
def indexable(self, sorted: bool = True):
INDEXABLE_METHODS = [r[0] for r in EXTRACTOR_INDEXING_PRECEDENCE]
qs = self.get_queryset().filter(plugin__in=INDEXABLE_METHODS, status='succeeded')
if sorted:
precedence = [When(plugin=method, then=Value(p)) for method, p in EXTRACTOR_INDEXING_PRECEDENCE]
qs = qs.annotate(indexing_precedence=Case(*precedence, default=Value(1000), output_field=IntegerField())).order_by('indexing_precedence')
return qs
class ArchiveResult(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHealthStats, ModelWithStateMachine):
class StatusChoices(models.TextChoices):
QUEUED = 'queued', 'Queued'
@@ -2000,8 +1988,6 @@ class ArchiveResult(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWi
state_field_name = 'status'
active_state = StatusChoices.STARTED
objects = ArchiveResultManager()
class Meta(TypedModelMeta):
app_label = 'core'
verbose_name = 'Archive Result'

View File

@@ -619,20 +619,6 @@ def is_parser_plugin(plugin: str) -> bool:
return name.startswith('parse_') and name.endswith('_urls')
# Precedence order for search indexing (lower number = higher priority)
# Used to select which plugin's output to use for full-text search
# Plugin names here should match the part after the numeric prefix
# e.g., '31_readability' -> 'readability'
EXTRACTOR_INDEXING_PRECEDENCE = [
('readability', 1),
('mercury', 2),
('htmltotext', 3),
('singlefile', 4),
('dom', 5),
('wget', 6),
]
def get_enabled_plugins(config: Optional[Dict[str, Any]] = None) -> List[str]:
"""
Get the list of enabled plugins based on config and available hooks.
@@ -960,25 +946,6 @@ DEFAULT_TEMPLATES = {
''',
}
# Default icons for known extractor plugins (emoji or short HTML)
DEFAULT_PLUGIN_ICONS = {
'screenshot': '📷',
'pdf': '📄',
'singlefile': '📦',
'dom': '🌐',
'wget': '📥',
'media': '🎬',
'git': '📂',
'readability': '📖',
'mercury': '☿️',
'favicon': '',
'title': '📝',
'headers': '📋',
'archive_org': '🏛️',
'htmltotext': '📃',
'warc': '🗄️',
}
def get_plugin_template(plugin: str, template_name: str, fallback: bool = True) -> Optional[str]:
"""
@@ -1018,10 +985,7 @@ def get_plugin_template(plugin: str, template_name: str, fallback: bool = True)
def get_plugin_icon(plugin: str) -> str:
"""
Get the icon for a plugin.
First checks for plugin-provided icon.html template,
then falls back to DEFAULT_PLUGIN_ICONS.
Get the icon for a plugin from its icon.html template.
Args:
plugin: Plugin name (e.g., 'screenshot', '15_singlefile')
@@ -1029,15 +993,13 @@ def get_plugin_icon(plugin: str) -> str:
Returns:
Icon HTML/emoji string.
"""
base_name = get_plugin_name(plugin)
# Try plugin-provided icon template
icon_template = get_plugin_template(plugin, 'icon', fallback=False)
if icon_template:
return icon_template.strip()
# Fall back to default icon
return DEFAULT_PLUGIN_ICONS.get(base_name, '📁')
# Fall back to generic folder icon
return '📁'
def get_all_plugin_icons() -> Dict[str, str]: