mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2026-04-04 23:07:56 +10:00
Remove extractor field from Crawl model and fix tests
- Remove extractor field from Crawl model (moved to config dict) - Update migration 0002_drop_seed_model to not add extractor - Update archivebox_add.py to use config['PARSER'] instead - Update admin.py recrawl to not pass extractor - Update jsonl.py serialization to not include extractor - Update test schema SCHEMA_0_8 to not include extractor - Set default timeout to 60s for test commands
This commit is contained in:
@@ -78,7 +78,6 @@ def add(urls: str | list[str],
|
||||
|
||||
crawl = Crawl.objects.create(
|
||||
urls=urls_content,
|
||||
extractor=parser,
|
||||
max_depth=depth,
|
||||
tags_str=tag,
|
||||
label=f'{USER}@{HOSTNAME} $ {cmd_str} [{timestamp}]',
|
||||
@@ -89,6 +88,7 @@ def add(urls: str | list[str],
|
||||
'OVERWRITE': overwrite,
|
||||
'EXTRACTORS': plugins,
|
||||
'DEFAULT_PERSONA': persona or 'Default',
|
||||
'PARSER': parser,
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user