mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2026-04-04 23:07:56 +10:00
continue renaming extractor to plugin, add plan for hook concurrency, add chrome kill helper script
This commit is contained in:
@@ -24,7 +24,7 @@ from urllib.parse import urlparse
|
||||
|
||||
import rich_click as click
|
||||
|
||||
EXTRACTOR_NAME = 'parse_jsonl_urls'
|
||||
PLUGIN_NAME = 'parse_jsonl_urls'
|
||||
|
||||
|
||||
def parse_bookmarked_at(link: dict) -> str | None:
|
||||
@@ -75,7 +75,7 @@ def json_object_to_entry(link: dict) -> dict | None:
|
||||
entry = {
|
||||
'type': 'Snapshot',
|
||||
'url': unescape(url),
|
||||
'via_extractor': EXTRACTOR_NAME,
|
||||
'plugin': PLUGIN_NAME,
|
||||
}
|
||||
|
||||
# Parse title
|
||||
|
||||
@@ -265,7 +265,7 @@ class TestParseJsonlUrls:
|
||||
entry = json.loads(output_file.read_text().strip())
|
||||
assert entry['url'] == 'https://example.com'
|
||||
assert 'type' in entry
|
||||
assert 'via_extractor' in entry
|
||||
assert 'plugin' in entry
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
Reference in New Issue
Block a user