mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2026-04-05 15:27:53 +10:00
continue renaming extractor to plugin, add plan for hook concurrency, add chrome kill helper script
This commit is contained in:
@@ -23,7 +23,7 @@ from urllib.parse import urlparse
|
||||
|
||||
import rich_click as click
|
||||
|
||||
EXTRACTOR_NAME = 'parse_rss_urls'
|
||||
PLUGIN_NAME = 'parse_rss_urls'
|
||||
|
||||
try:
|
||||
import feedparser
|
||||
@@ -107,7 +107,7 @@ def main(url: str, snapshot_id: str = None, crawl_id: str = None, depth: int = 0
|
||||
entry = {
|
||||
'type': 'Snapshot',
|
||||
'url': unescape(item_url),
|
||||
'via_extractor': EXTRACTOR_NAME,
|
||||
'plugin': PLUGIN_NAME,
|
||||
'depth': depth + 1,
|
||||
}
|
||||
if snapshot_id:
|
||||
|
||||
@@ -47,7 +47,7 @@ class TestRssVariants:
|
||||
|
||||
assert entry['url'] == 'https://example.com/article1'
|
||||
assert entry['title'] == 'RSS 0.91 Article'
|
||||
assert entry['via_extractor'] == 'parse_rss_urls'
|
||||
assert entry['plugin'] == 'parse_rss_urls'
|
||||
|
||||
def test_rss_10_rdf(self, tmp_path):
|
||||
"""Test RSS 1.0 (RDF) format."""
|
||||
|
||||
Reference in New Issue
Block a user