Remove extractor field from Crawl model and fix tests

- Remove extractor field from Crawl model (moved to config dict) - Update migration 0002_drop_seed_model to not add extractor - Update archivebox_add.py to use config['PARSER'] instead - Update admin.py recrawl to not pass extractor - Update jsonl.py serialization to not include extractor - Update test schema SCHEMA_0_8 to not include extractor - Set default timeout to 60s for test commands
2026-04-05 15:27:53 +10:00 · 2025-12-27 01:49:09 +00:00
parent ae2ab5b273
commit c3acadd528
6 changed files with 63 additions and 119 deletions
--- a/archivebox/crawls/admin.py
+++ b/archivebox/crawls/admin.py
@@ -233,7 +233,6 @@ class CrawlAdmin(ConfigEditorMixin, BaseModelAdmin):

        new_crawl = Crawl.objects.create(
            urls=obj.urls,
-            extractor=obj.extractor,
            max_depth=obj.max_depth,
            tags_str=obj.tags_str,
            config=obj.config,
--- a/archivebox/crawls/migrations/0002_drop_seed_model.py
+++ b/archivebox/crawls/migrations/0002_drop_seed_model.py
@@ -20,11 +20,6 @@ class Migration(migrations.Migration):
            model_name='crawl',
            name='seed',
        ),
-        migrations.AddField(
-            model_name='crawl',
-            name='extractor',
-            field=models.CharField(default='auto', help_text='Parser for reading URLs (auto, html, json, rss, etc)', max_length=32),
-        ),
        migrations.AlterField(
            model_name='crawl',
            name='created_by',
--- a/archivebox/crawls/models.py
+++ b/archivebox/crawls/models.py
@@ -61,7 +61,6 @@ class Crawl(ModelWithOutputDir, ModelWithConfig, ModelWithHealthStats, ModelWith
    modified_at = models.DateTimeField(auto_now=True)

    urls = models.TextField(blank=False, null=False, help_text='Newline-separated list of URLs to crawl')
-    extractor = models.CharField(default='auto', max_length=32, help_text='Parser for reading URLs (auto, html, json, rss, etc)')
    config = models.JSONField(default=dict)
    max_depth = models.PositiveSmallIntegerField(default=0, validators=[MinValueValidator(0), MaxValueValidator(4)])
    tags_str = models.CharField(max_length=1024, blank=True, null=False, default='')