continue renaming extractor to plugin, add plan for hook concurrency, add chrome kill helper script

2026-04-05 15:27:53 +10:00 · 2025-12-28 05:29:24 -08:00
parent d2e65cfd38
commit 4ccb0863bb
53 changed files with 456 additions and 493 deletions
--- a/archivebox/plugins/parse_rss_urls/on_Snapshot__61_parse_rss_urls.py
+++ b/archivebox/plugins/parse_rss_urls/on_Snapshot__61_parse_rss_urls.py
@@ -23,7 +23,7 @@ from urllib.parse import urlparse

 import rich_click as click

-EXTRACTOR_NAME = 'parse_rss_urls'
+PLUGIN_NAME = 'parse_rss_urls'

 try:
    import feedparser
@@ -107,7 +107,7 @@ def main(url: str, snapshot_id: str = None, crawl_id: str = None, depth: int = 0
        entry = {
            'type': 'Snapshot',
            'url': unescape(item_url),
-            'via_extractor': EXTRACTOR_NAME,
+            'plugin': PLUGIN_NAME,
            'depth': depth + 1,
        }
        if snapshot_id:
--- a/archivebox/plugins/parse_rss_urls/tests/test_parse_rss_urls_comprehensive.py
+++ b/archivebox/plugins/parse_rss_urls/tests/test_parse_rss_urls_comprehensive.py
@@ -47,7 +47,7 @@ class TestRssVariants:

        assert entry['url'] == 'https://example.com/article1'
        assert entry['title'] == 'RSS 0.91 Article'
-        assert entry['via_extractor'] == 'parse_rss_urls'
+        assert entry['plugin'] == 'parse_rss_urls'

    def test_rss_10_rdf(self, tmp_path):
        """Test RSS 1.0 (RDF) format."""