rename archive_org to archivedotorg, add BinaryWorker, fix config pass-through

This commit is contained in:
Nick Sweeting
2026-01-04 22:38:15 -08:00
parent 456aaee287
commit 7ceaeae2d9
32 changed files with 1111 additions and 110 deletions

View File

@@ -147,7 +147,7 @@ class AddLinkForm(forms.Form):
'screenshot', 'seo', 'singlefile', 'ssl', 'staticfile', 'title'
}
archiving = {
'archive_org', 'favicon', 'forumdl', 'gallerydl', 'git',
'archivedotorg', 'favicon', 'forumdl', 'gallerydl', 'git',
'htmltotext', 'media', 'mercury', 'papersdl', 'readability', 'wget'
}
parsing = {

View File

@@ -120,7 +120,7 @@ class Migration(migrations.Migration):
('output', models.CharField(max_length=512)),
('start_ts', models.DateTimeField()),
('end_ts', models.DateTimeField()),
('extractor', models.CharField(choices=[('title', 'title'), ('favicon', 'favicon'), ('wget', 'wget'), ('singlefile', 'singlefile'), ('pdf', 'pdf'), ('screenshot', 'screenshot'), ('dom', 'dom'), ('readability', 'readability'), ('mercury', 'mercury'), ('git', 'git'), ('media', 'media'), ('headers', 'headers'), ('archive_org', 'archive_org')], max_length=32)),
('extractor', models.CharField(choices=[('title', 'title'), ('favicon', 'favicon'), ('wget', 'wget'), ('singlefile', 'singlefile'), ('pdf', 'pdf'), ('screenshot', 'screenshot'), ('dom', 'dom'), ('readability', 'readability'), ('mercury', 'mercury'), ('git', 'git'), ('media', 'media'), ('headers', 'headers'), ('archivedotorg', 'archivedotorg')], max_length=32)),
('snapshot', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='core.Snapshot')),
],
),

View File

@@ -19,6 +19,6 @@ class Migration(migrations.Migration):
migrations.AlterField(
model_name='archiveresult',
name='extractor',
field=models.CharField(choices=[('title', 'title'), ('favicon', 'favicon'), ('headers', 'headers'), ('singlefile', 'singlefile'), ('pdf', 'pdf'), ('screenshot', 'screenshot'), ('dom', 'dom'), ('wget', 'wget'), ('readability', 'readability'), ('mercury', 'mercury'), ('git', 'git'), ('media', 'media'), ('archive_org', 'archive_org')], max_length=32),
field=models.CharField(choices=[('title', 'title'), ('favicon', 'favicon'), ('headers', 'headers'), ('singlefile', 'singlefile'), ('pdf', 'pdf'), ('screenshot', 'screenshot'), ('dom', 'dom'), ('wget', 'wget'), ('readability', 'readability'), ('mercury', 'mercury'), ('git', 'git'), ('media', 'media'), ('archivedotorg', 'archivedotorg')], max_length=32),
),
]

View File

@@ -13,6 +13,6 @@ class Migration(migrations.Migration):
migrations.AlterField(
model_name='archiveresult',
name='extractor',
field=models.CharField(choices=[('favicon', 'favicon'), ('headers', 'headers'), ('singlefile', 'singlefile'), ('pdf', 'pdf'), ('screenshot', 'screenshot'), ('dom', 'dom'), ('wget', 'wget'), ('title', 'title'), ('readability', 'readability'), ('mercury', 'mercury'), ('git', 'git'), ('media', 'media'), ('archive_org', 'archive_org')], max_length=32),
field=models.CharField(choices=[('favicon', 'favicon'), ('headers', 'headers'), ('singlefile', 'singlefile'), ('pdf', 'pdf'), ('screenshot', 'screenshot'), ('dom', 'dom'), ('wget', 'wget'), ('title', 'title'), ('readability', 'readability'), ('mercury', 'mercury'), ('git', 'git'), ('media', 'media'), ('archivedotorg', 'archivedotorg')], max_length=32),
),
]

View File

@@ -13,6 +13,6 @@ class Migration(migrations.Migration):
migrations.AlterField(
model_name='archiveresult',
name='extractor',
field=models.CharField(choices=[('favicon', 'favicon'), ('headers', 'headers'), ('singlefile', 'singlefile'), ('pdf', 'pdf'), ('screenshot', 'screenshot'), ('dom', 'dom'), ('wget', 'wget'), ('title', 'title'), ('readability', 'readability'), ('mercury', 'mercury'), ('htmltotext', 'htmltotext'), ('git', 'git'), ('media', 'media'), ('archive_org', 'archive_org')], max_length=32),
field=models.CharField(choices=[('favicon', 'favicon'), ('headers', 'headers'), ('singlefile', 'singlefile'), ('pdf', 'pdf'), ('screenshot', 'screenshot'), ('dom', 'dom'), ('wget', 'wget'), ('title', 'title'), ('readability', 'readability'), ('mercury', 'mercury'), ('htmltotext', 'htmltotext'), ('git', 'git'), ('media', 'media'), ('archivedotorg', 'archivedotorg')], max_length=32),
),
]

View File

@@ -1973,7 +1973,7 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
canonical = {
'index_path': 'index.html',
'google_favicon_path': FAVICON_PROVIDER.format(self.domain),
'archive_org_path': f'https://web.archive.org/web/{self.base_url}',
'archivedotorg_path': f'https://web.archive.org/web/{self.base_url}',
}
# Scan each ArchiveResult's output directory for the best file

View File

@@ -206,7 +206,10 @@ DATABASES = {
}
MIGRATION_MODULES = {"signal_webhooks": None}
# as much as I'd love this to be a UUID or ULID field, it's not supported yet as of Django 5.0
# Django requires DEFAULT_AUTO_FIELD to subclass AutoField (BigAutoField, SmallAutoField, etc.)
# Cannot use UUIDField here until Django 6.0 introduces DEFAULT_PK_FIELD setting
# For now: manually add `id = models.UUIDField(primary_key=True, default=uuid7, ...)` to all models
# OR inherit from ModelWithUUID base class which provides UUID primary key
DEFAULT_AUTO_FIELD = "django.db.models.BigAutoField"