improve plugin tests and config

2026-04-06 07:47:53 +10:00 · 2025-12-29 00:45:23 -08:00
parent f0aa19fa7d
commit 1e4d3ffd11
126 changed files with 2286 additions and 1717 deletions
--- a/archivebox/plugins/parse_rss_urls/config.json
+++ b/archivebox/plugins/parse_rss_urls/config.json
@@ -0,0 +1,13 @@
+{
+  "$schema": "http://json-schema.org/draft-07/schema#",
+  "type": "object",
+  "additionalProperties": false,
+  "properties": {
+    "PARSE_RSS_URLS_ENABLED": {
+      "type": "boolean",
+      "default": true,
+      "x-aliases": ["USE_PARSE_RSS_URLS"],
+      "description": "Enable RSS/Atom feed URL parsing"
+    }
+  }
+}
--- a/archivebox/plugins/parse_rss_urls/tests/test_parse_rss_urls.py
+++ b/archivebox/plugins/parse_rss_urls/tests/test_parse_rss_urls.py
@@ -9,7 +9,7 @@ from pathlib import Path
 import pytest

 PLUGIN_DIR = Path(__file__).parent.parent
-SCRIPT_PATH = next(PLUGIN_DIR.glob('on_Snapshot__*_parse_rss_urls.py'), None)
+SCRIPT_PATH = next(PLUGIN_DIR.glob('on_Snapshot__*_parse_rss_urls.*'), None)


 class TestParseRssUrls:
--- a/archivebox/plugins/parse_rss_urls/tests/test_parse_rss_urls_comprehensive.py
+++ b/archivebox/plugins/parse_rss_urls/tests/test_parse_rss_urls_comprehensive.py
@@ -9,7 +9,7 @@ from pathlib import Path
 import pytest

 PLUGIN_DIR = Path(__file__).parent.parent
-SCRIPT_PATH = next(PLUGIN_DIR.glob('on_Snapshot__*_parse_rss_urls.py'), None)
+SCRIPT_PATH = next(PLUGIN_DIR.glob('on_Snapshot__*_parse_rss_urls.*'), None)


 class TestRssVariants:
@@ -172,14 +172,14 @@ class TestAtomVariants:

        assert result.returncode == 0
        # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if line.strip() and '\"type\": \"Snapshot\"' in line]
+        lines = [line for line in result.stdout.strip().split('\n') if line.strip()]

-        tags = [json.loads(line) for line in lines if json.loads(line)['type'] == 'Tag']
+        tags = [json.loads(line) for line in lines if json.loads(line).get('type') == 'Tag']
        tag_names = {t['name'] for t in tags}
        assert 'science' in tag_names
        assert 'research' in tag_names

-        snapshots = [json.loads(line) for line in lines if json.loads(line)['type'] == 'Snapshot']
+        snapshots = [json.loads(line) for line in lines if json.loads(line).get('type') == 'Snapshot']
        entry = snapshots[0]
        assert entry['url'] == 'https://atom.example.com/1'
        assert 'bookmarked_at' in entry
@@ -384,15 +384,15 @@ class TestTagsAndCategories:

        assert result.returncode == 0
        # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if line.strip() and '\"type\": \"Snapshot\"' in line]
+        lines = [line for line in result.stdout.strip().split('\n') if line.strip()]

-        tags = [json.loads(line) for line in lines if json.loads(line)['type'] == 'Tag']
+        tags = [json.loads(line) for line in lines if json.loads(line).get('type') == 'Tag']
        tag_names = {t['name'] for t in tags}
        assert 'Tech' in tag_names
        assert 'Web' in tag_names
        assert 'Programming' in tag_names

-        snapshots = [json.loads(line) for line in lines if json.loads(line)['type'] == 'Snapshot']
+        snapshots = [json.loads(line) for line in lines if json.loads(line).get('type') == 'Snapshot']
        entry = snapshots[0]
        tags_list = entry['tags'].split(',')
        assert len(tags_list) == 3
@@ -421,9 +421,9 @@ class TestTagsAndCategories:

        assert result.returncode == 0
        # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if line.strip() and '\"type\": \"Snapshot\"' in line]
+        lines = [line for line in result.stdout.strip().split('\n') if line.strip()]

-        tags = [json.loads(line) for line in lines if json.loads(line)['type'] == 'Tag']
+        tags = [json.loads(line) for line in lines if json.loads(line).get('type') == 'Tag']
        tag_names = {t['name'] for t in tags}
        # feedparser extracts the 'term' attribute
        assert 'python' in tag_names
@@ -482,8 +482,8 @@ class TestTagsAndCategories:

        assert result.returncode == 0
        # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if line.strip() and '\"type\": \"Snapshot\"' in line]
-        tags = [json.loads(line) for line in lines if json.loads(line)['type'] == 'Tag']
+        lines = [line for line in result.stdout.strip().split('\n') if line.strip()]
+        tags = [json.loads(line) for line in lines if json.loads(line).get('type') == 'Tag']
        # Tag records should be unique
        tag_names = [t['name'] for t in tags]
        assert tag_names.count('Python') == 1
@@ -720,9 +720,9 @@ class TestEdgeCases:

        assert result.returncode == 0
        # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if line.strip() and '\"type\": \"Snapshot\"' in line]
+        lines = [line for line in result.stdout.strip().split('\n') if line.strip()]

-        tags = [json.loads(line) for line in lines if json.loads(line)['type'] == 'Tag']
+        tags = [json.loads(line) for line in lines if json.loads(line).get('type') == 'Tag']
        tag_names = {t['name'] for t in tags}
        assert 'C++' in tag_names
        assert 'Node.js' in tag_names
@@ -814,7 +814,7 @@ class TestEdgeCases:

        assert result.returncode == 0
        # Output goes to stdout (JSONL)
-        lines = output_file.read_text(encoding='utf-8').strip().split('\n')
+        lines = [line for line in result.stdout.strip().split('\n') if line.strip()]

        snapshots = [json.loads(line) for line in lines if json.loads(line)['type'] == 'Snapshot']
        entry = snapshots[0]
@@ -885,11 +885,11 @@ class TestEdgeCases:
        assert 'Found 100 URLs' in result.stdout

        # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if line.strip() and '\"type\": \"Snapshot\"' in line]
+        lines = [line for line in result.stdout.strip().split('\n') if line.strip()]

        # Should have 10 unique tags (Tag0-Tag9) + 100 snapshots
-        tags = [json.loads(line) for line in lines if json.loads(line)['type'] == 'Tag']
-        snapshots = [json.loads(line) for line in lines if json.loads(line)['type'] == 'Snapshot']
+        tags = [json.loads(line) for line in lines if json.loads(line).get('type') == 'Tag']
+        snapshots = [json.loads(line) for line in lines if json.loads(line).get('type') == 'Snapshot']

        assert len(tags) == 10
        assert len(snapshots) == 100
--- a/archivebox/plugins/parse_rss_urls/tests/test_parse_rss_urls_comprehensive.py.bak
+++ b/archivebox/plugins/parse_rss_urls/tests/test_parse_rss_urls_comprehensive.py.bak