mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2026-04-05 15:27:53 +10:00
Remove extractor field from Crawl model and fix tests
- Remove extractor field from Crawl model (moved to config dict) - Update migration 0002_drop_seed_model to not add extractor - Update archivebox_add.py to use config['PARSER'] instead - Update admin.py recrawl to not pass extractor - Update jsonl.py serialization to not include extractor - Update test schema SCHEMA_0_8 to not include extractor - Set default timeout to 60s for test commands
This commit is contained in:
@@ -233,7 +233,6 @@ class CrawlAdmin(ConfigEditorMixin, BaseModelAdmin):
|
||||
|
||||
new_crawl = Crawl.objects.create(
|
||||
urls=obj.urls,
|
||||
extractor=obj.extractor,
|
||||
max_depth=obj.max_depth,
|
||||
tags_str=obj.tags_str,
|
||||
config=obj.config,
|
||||
|
||||
@@ -20,11 +20,6 @@ class Migration(migrations.Migration):
|
||||
model_name='crawl',
|
||||
name='seed',
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='crawl',
|
||||
name='extractor',
|
||||
field=models.CharField(default='auto', help_text='Parser for reading URLs (auto, html, json, rss, etc)', max_length=32),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='crawl',
|
||||
name='created_by',
|
||||
|
||||
@@ -61,7 +61,6 @@ class Crawl(ModelWithOutputDir, ModelWithConfig, ModelWithHealthStats, ModelWith
|
||||
modified_at = models.DateTimeField(auto_now=True)
|
||||
|
||||
urls = models.TextField(blank=False, null=False, help_text='Newline-separated list of URLs to crawl')
|
||||
extractor = models.CharField(default='auto', max_length=32, help_text='Parser for reading URLs (auto, html, json, rss, etc)')
|
||||
config = models.JSONField(default=dict)
|
||||
max_depth = models.PositiveSmallIntegerField(default=0, validators=[MinValueValidator(0), MaxValueValidator(4)])
|
||||
tags_str = models.CharField(max_length=1024, blank=True, null=False, default='')
|
||||
|
||||
Reference in New Issue
Block a user