mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2026-01-04 18:05:36 +10:00
much better tests and add page ui
This commit is contained in:
@@ -1,494 +0,0 @@
|
||||
# Generated by Django 5.0.6 on 2024-12-25
|
||||
# Transforms schema from 0022 to new simplified schema (ABID system removed)
|
||||
|
||||
from uuid import uuid4
|
||||
from django.conf import settings
|
||||
from django.db import migrations, models
|
||||
import django.db.models.deletion
|
||||
import django.utils.timezone
|
||||
|
||||
|
||||
def get_or_create_system_user_pk(apps, schema_editor):
|
||||
"""Get or create system user for migrations."""
|
||||
User = apps.get_model('auth', 'User')
|
||||
user, _ = User.objects.get_or_create(
|
||||
username='system',
|
||||
defaults={'is_active': False, 'password': '!'}
|
||||
)
|
||||
return user.pk
|
||||
|
||||
|
||||
def populate_created_by_snapshot(apps, schema_editor):
|
||||
"""Populate created_by for existing snapshots."""
|
||||
User = apps.get_model('auth', 'User')
|
||||
Snapshot = apps.get_model('core', 'Snapshot')
|
||||
|
||||
system_user, _ = User.objects.get_or_create(
|
||||
username='system',
|
||||
defaults={'is_active': False, 'password': '!'}
|
||||
)
|
||||
|
||||
Snapshot.objects.filter(created_by__isnull=True).update(created_by=system_user)
|
||||
|
||||
|
||||
def populate_created_by_archiveresult(apps, schema_editor):
|
||||
"""Populate created_by for existing archive results."""
|
||||
User = apps.get_model('auth', 'User')
|
||||
ArchiveResult = apps.get_model('core', 'ArchiveResult')
|
||||
|
||||
system_user, _ = User.objects.get_or_create(
|
||||
username='system',
|
||||
defaults={'is_active': False, 'password': '!'}
|
||||
)
|
||||
|
||||
ArchiveResult.objects.filter(created_by__isnull=True).update(created_by=system_user)
|
||||
|
||||
|
||||
def populate_created_by_tag(apps, schema_editor):
|
||||
"""Populate created_by for existing tags."""
|
||||
User = apps.get_model('auth', 'User')
|
||||
Tag = apps.get_model('core', 'Tag')
|
||||
|
||||
system_user, _ = User.objects.get_or_create(
|
||||
username='system',
|
||||
defaults={'is_active': False, 'password': '!'}
|
||||
)
|
||||
|
||||
Tag.objects.filter(created_by__isnull=True).update(created_by=system_user)
|
||||
|
||||
|
||||
def generate_uuid_for_archiveresults(apps, schema_editor):
|
||||
"""Generate UUIDs for archive results that don't have them."""
|
||||
ArchiveResult = apps.get_model('core', 'ArchiveResult')
|
||||
for ar in ArchiveResult.objects.filter(uuid__isnull=True).iterator(chunk_size=500):
|
||||
ar.uuid = uuid4()
|
||||
ar.save(update_fields=['uuid'])
|
||||
|
||||
|
||||
def generate_uuid_for_tags(apps, schema_editor):
|
||||
"""Generate UUIDs for tags that don't have them."""
|
||||
Tag = apps.get_model('core', 'Tag')
|
||||
for tag in Tag.objects.filter(uuid__isnull=True).iterator(chunk_size=500):
|
||||
tag.uuid = uuid4()
|
||||
tag.save(update_fields=['uuid'])
|
||||
|
||||
|
||||
def copy_bookmarked_at_from_added(apps, schema_editor):
|
||||
"""Copy added timestamp to bookmarked_at."""
|
||||
Snapshot = apps.get_model('core', 'Snapshot')
|
||||
Snapshot.objects.filter(bookmarked_at__isnull=True).update(
|
||||
bookmarked_at=models.F('added')
|
||||
)
|
||||
|
||||
|
||||
def copy_created_at_from_added(apps, schema_editor):
|
||||
"""Copy added timestamp to created_at for snapshots."""
|
||||
Snapshot = apps.get_model('core', 'Snapshot')
|
||||
Snapshot.objects.filter(created_at__isnull=True).update(
|
||||
created_at=models.F('added')
|
||||
)
|
||||
|
||||
|
||||
def copy_created_at_from_start_ts(apps, schema_editor):
|
||||
"""Copy start_ts to created_at for archive results."""
|
||||
ArchiveResult = apps.get_model('core', 'ArchiveResult')
|
||||
ArchiveResult.objects.filter(created_at__isnull=True).update(
|
||||
created_at=models.F('start_ts')
|
||||
)
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
"""
|
||||
This migration transforms the schema from the main branch (0022) to the new
|
||||
simplified schema without the ABID system.
|
||||
|
||||
For dev branch users who had ABID migrations (0023-0074), this replaces them
|
||||
with a clean transformation.
|
||||
"""
|
||||
|
||||
replaces = [
|
||||
('core', '0023_alter_archiveresult_options_archiveresult_abid_and_more'),
|
||||
('core', '0024_auto_20240513_1143'),
|
||||
('core', '0025_alter_archiveresult_uuid'),
|
||||
('core', '0026_archiveresult_created_archiveresult_created_by_and_more'),
|
||||
('core', '0027_update_snapshot_ids'),
|
||||
('core', '0028_alter_archiveresult_uuid'),
|
||||
('core', '0029_alter_archiveresult_id'),
|
||||
('core', '0030_alter_archiveresult_uuid'),
|
||||
('core', '0031_alter_archiveresult_id_alter_archiveresult_uuid_and_more'),
|
||||
('core', '0032_alter_archiveresult_id'),
|
||||
('core', '0033_rename_id_archiveresult_old_id'),
|
||||
('core', '0034_alter_archiveresult_old_id_alter_archiveresult_uuid'),
|
||||
('core', '0035_remove_archiveresult_uuid_archiveresult_id'),
|
||||
('core', '0036_alter_archiveresult_id_alter_archiveresult_old_id'),
|
||||
('core', '0037_rename_id_snapshot_old_id'),
|
||||
('core', '0038_rename_uuid_snapshot_id'),
|
||||
('core', '0039_rename_snapshot_archiveresult_snapshot_old'),
|
||||
('core', '0040_archiveresult_snapshot'),
|
||||
('core', '0041_alter_archiveresult_snapshot_and_more'),
|
||||
('core', '0042_remove_archiveresult_snapshot_old'),
|
||||
('core', '0043_alter_archiveresult_snapshot_alter_snapshot_id_and_more'),
|
||||
('core', '0044_alter_archiveresult_snapshot_alter_tag_uuid_and_more'),
|
||||
('core', '0045_alter_snapshot_old_id'),
|
||||
('core', '0046_alter_archiveresult_snapshot_alter_snapshot_id_and_more'),
|
||||
('core', '0047_alter_snapshottag_unique_together_and_more'),
|
||||
('core', '0048_alter_archiveresult_snapshot_and_more'),
|
||||
('core', '0049_rename_snapshot_snapshottag_snapshot_old_and_more'),
|
||||
('core', '0050_alter_snapshottag_snapshot_old'),
|
||||
('core', '0051_snapshottag_snapshot_alter_snapshottag_snapshot_old'),
|
||||
('core', '0052_alter_snapshottag_unique_together_and_more'),
|
||||
('core', '0053_remove_snapshottag_snapshot_old'),
|
||||
('core', '0054_alter_snapshot_timestamp'),
|
||||
('core', '0055_alter_tag_slug'),
|
||||
('core', '0056_remove_tag_uuid'),
|
||||
('core', '0057_rename_id_tag_old_id'),
|
||||
('core', '0058_alter_tag_old_id'),
|
||||
('core', '0059_tag_id'),
|
||||
('core', '0060_alter_tag_id'),
|
||||
('core', '0061_rename_tag_snapshottag_old_tag_and_more'),
|
||||
('core', '0062_alter_snapshottag_old_tag'),
|
||||
('core', '0063_snapshottag_tag_alter_snapshottag_old_tag'),
|
||||
('core', '0064_alter_snapshottag_unique_together_and_more'),
|
||||
('core', '0065_remove_snapshottag_old_tag'),
|
||||
('core', '0066_alter_snapshottag_tag_alter_tag_id_alter_tag_old_id'),
|
||||
('core', '0067_alter_snapshottag_tag'),
|
||||
('core', '0068_alter_archiveresult_options'),
|
||||
('core', '0069_alter_archiveresult_created_alter_snapshot_added_and_more'),
|
||||
('core', '0070_alter_archiveresult_created_by_alter_snapshot_added_and_more'),
|
||||
('core', '0071_remove_archiveresult_old_id_remove_snapshot_old_id_and_more'),
|
||||
('core', '0072_rename_added_snapshot_bookmarked_at_and_more'),
|
||||
('core', '0073_rename_created_archiveresult_created_at_and_more'),
|
||||
('core', '0074_alter_snapshot_downloaded_at'),
|
||||
]
|
||||
|
||||
dependencies = [
|
||||
('core', '0022_auto_20231023_2008'),
|
||||
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
|
||||
]
|
||||
|
||||
operations = [
|
||||
# === SNAPSHOT CHANGES ===
|
||||
|
||||
# Add health stats fields to Snapshot
|
||||
migrations.AddField(
|
||||
model_name='snapshot',
|
||||
name='num_uses_failed',
|
||||
field=models.PositiveIntegerField(default=0),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='snapshot',
|
||||
name='num_uses_succeeded',
|
||||
field=models.PositiveIntegerField(default=0),
|
||||
),
|
||||
|
||||
# Add new fields to Snapshot
|
||||
migrations.AddField(
|
||||
model_name='snapshot',
|
||||
name='created_by',
|
||||
field=models.ForeignKey(
|
||||
default=None, null=True, blank=True,
|
||||
on_delete=django.db.models.deletion.CASCADE,
|
||||
related_name='snapshot_set',
|
||||
to=settings.AUTH_USER_MODEL,
|
||||
),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='snapshot',
|
||||
name='created_at',
|
||||
field=models.DateTimeField(default=django.utils.timezone.now, db_index=True, null=True),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='snapshot',
|
||||
name='modified_at',
|
||||
field=models.DateTimeField(auto_now=True),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='snapshot',
|
||||
name='bookmarked_at',
|
||||
field=models.DateTimeField(default=django.utils.timezone.now, db_index=True, null=True),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='snapshot',
|
||||
name='downloaded_at',
|
||||
field=models.DateTimeField(default=None, null=True, blank=True, db_index=True),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='snapshot',
|
||||
name='depth',
|
||||
field=models.PositiveSmallIntegerField(default=0, db_index=True),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='snapshot',
|
||||
name='status',
|
||||
field=models.CharField(choices=[('queued', 'Queued'), ('started', 'Started'), ('sealed', 'Sealed')], default='queued', max_length=15, db_index=True),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='snapshot',
|
||||
name='retry_at',
|
||||
field=models.DateTimeField(default=django.utils.timezone.now, null=True, blank=True, db_index=True),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='snapshot',
|
||||
name='config',
|
||||
field=models.JSONField(default=dict, blank=False),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='snapshot',
|
||||
name='notes',
|
||||
field=models.TextField(blank=True, default=''),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='snapshot',
|
||||
name='output_dir',
|
||||
field=models.CharField(max_length=256, default=None, null=True, blank=True),
|
||||
),
|
||||
|
||||
# Copy data from old fields to new
|
||||
migrations.RunPython(copy_bookmarked_at_from_added, migrations.RunPython.noop),
|
||||
migrations.RunPython(copy_created_at_from_added, migrations.RunPython.noop),
|
||||
migrations.RunPython(populate_created_by_snapshot, migrations.RunPython.noop),
|
||||
|
||||
# Make created_by non-nullable after population
|
||||
migrations.AlterField(
|
||||
model_name='snapshot',
|
||||
name='created_by',
|
||||
field=models.ForeignKey(
|
||||
on_delete=django.db.models.deletion.CASCADE,
|
||||
related_name='snapshot_set',
|
||||
to=settings.AUTH_USER_MODEL,
|
||||
db_index=True,
|
||||
),
|
||||
),
|
||||
|
||||
# Update timestamp field constraints
|
||||
migrations.AlterField(
|
||||
model_name='snapshot',
|
||||
name='timestamp',
|
||||
field=models.CharField(max_length=32, unique=True, db_index=True, editable=False),
|
||||
),
|
||||
|
||||
# Update title field size
|
||||
migrations.AlterField(
|
||||
model_name='snapshot',
|
||||
name='title',
|
||||
field=models.CharField(max_length=512, null=True, blank=True, db_index=True),
|
||||
),
|
||||
|
||||
# Remove old 'added' and 'updated' fields
|
||||
migrations.RemoveField(model_name='snapshot', name='added'),
|
||||
migrations.RemoveField(model_name='snapshot', name='updated'),
|
||||
|
||||
# Register SnapshotTag through model (table already exists from 0006's ManyToManyField)
|
||||
migrations.SeparateDatabaseAndState(
|
||||
state_operations=[
|
||||
migrations.CreateModel(
|
||||
name='SnapshotTag',
|
||||
fields=[
|
||||
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
||||
('snapshot', models.ForeignKey(db_column='snapshot_id', on_delete=django.db.models.deletion.CASCADE, to='core.snapshot')),
|
||||
('tag', models.ForeignKey(db_column='tag_id', on_delete=django.db.models.deletion.CASCADE, to='core.tag')),
|
||||
],
|
||||
options={
|
||||
'db_table': 'core_snapshot_tags',
|
||||
},
|
||||
),
|
||||
],
|
||||
database_operations=[], # Table already exists from 0006
|
||||
),
|
||||
|
||||
# === TAG CHANGES ===
|
||||
# Tag keeps AutoField (integer) id for migration compatibility
|
||||
|
||||
# Add tracking fields to Tag
|
||||
migrations.AddField(
|
||||
model_name='tag',
|
||||
name='created_by',
|
||||
field=models.ForeignKey(
|
||||
default=None, null=True, blank=True,
|
||||
on_delete=django.db.models.deletion.CASCADE,
|
||||
related_name='tag_set',
|
||||
to=settings.AUTH_USER_MODEL,
|
||||
),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='tag',
|
||||
name='created_at',
|
||||
field=models.DateTimeField(default=django.utils.timezone.now, db_index=True, null=True),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='tag',
|
||||
name='modified_at',
|
||||
field=models.DateTimeField(auto_now=True),
|
||||
),
|
||||
|
||||
# Populate created_by for tags
|
||||
migrations.RunPython(populate_created_by_tag, migrations.RunPython.noop),
|
||||
|
||||
# Update slug field
|
||||
migrations.AlterField(
|
||||
model_name='tag',
|
||||
name='slug',
|
||||
field=models.SlugField(unique=True, max_length=100, editable=False),
|
||||
),
|
||||
|
||||
# === ARCHIVERESULT CHANGES ===
|
||||
|
||||
# Add health stats fields to ArchiveResult
|
||||
migrations.AddField(
|
||||
model_name='archiveresult',
|
||||
name='num_uses_failed',
|
||||
field=models.PositiveIntegerField(default=0),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='archiveresult',
|
||||
name='num_uses_succeeded',
|
||||
field=models.PositiveIntegerField(default=0),
|
||||
),
|
||||
|
||||
# Add uuid field for new ID
|
||||
migrations.AddField(
|
||||
model_name='archiveresult',
|
||||
name='uuid',
|
||||
field=models.UUIDField(default=uuid4, null=True, blank=True),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='archiveresult',
|
||||
name='created_by',
|
||||
field=models.ForeignKey(
|
||||
default=None, null=True, blank=True,
|
||||
on_delete=django.db.models.deletion.CASCADE,
|
||||
related_name='archiveresult_set',
|
||||
to=settings.AUTH_USER_MODEL,
|
||||
),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='archiveresult',
|
||||
name='created_at',
|
||||
field=models.DateTimeField(default=django.utils.timezone.now, db_index=True, null=True),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='archiveresult',
|
||||
name='modified_at',
|
||||
field=models.DateTimeField(auto_now=True),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='archiveresult',
|
||||
name='retry_at',
|
||||
field=models.DateTimeField(default=django.utils.timezone.now, null=True, blank=True, db_index=True),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='archiveresult',
|
||||
name='notes',
|
||||
field=models.TextField(blank=True, default=''),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='archiveresult',
|
||||
name='output_dir',
|
||||
field=models.CharField(max_length=256, default=None, null=True, blank=True),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='archiveresult',
|
||||
name='config',
|
||||
field=models.JSONField(default=dict, blank=False),
|
||||
),
|
||||
|
||||
# Populate UUIDs and data for archive results
|
||||
migrations.RunPython(generate_uuid_for_archiveresults, migrations.RunPython.noop),
|
||||
migrations.RunPython(copy_created_at_from_start_ts, migrations.RunPython.noop),
|
||||
migrations.RunPython(populate_created_by_archiveresult, migrations.RunPython.noop),
|
||||
|
||||
# Make created_by non-nullable
|
||||
migrations.AlterField(
|
||||
model_name='archiveresult',
|
||||
name='created_by',
|
||||
field=models.ForeignKey(
|
||||
on_delete=django.db.models.deletion.CASCADE,
|
||||
related_name='archiveresult_set',
|
||||
to=settings.AUTH_USER_MODEL,
|
||||
db_index=True,
|
||||
),
|
||||
),
|
||||
|
||||
# Update extractor choices
|
||||
migrations.AlterField(
|
||||
model_name='archiveresult',
|
||||
name='extractor',
|
||||
field=models.CharField(
|
||||
choices=[
|
||||
('htmltotext', 'htmltotext'), ('git', 'git'), ('singlefile', 'singlefile'),
|
||||
('media', 'media'), ('archive_org', 'archive_org'), ('readability', 'readability'),
|
||||
('mercury', 'mercury'), ('favicon', 'favicon'), ('pdf', 'pdf'),
|
||||
('headers', 'headers'), ('screenshot', 'screenshot'), ('dom', 'dom'),
|
||||
('title', 'title'), ('wget', 'wget'),
|
||||
],
|
||||
max_length=32, db_index=True,
|
||||
),
|
||||
),
|
||||
|
||||
# Update status field
|
||||
migrations.AlterField(
|
||||
model_name='archiveresult',
|
||||
name='status',
|
||||
field=models.CharField(
|
||||
choices=[
|
||||
('queued', 'Queued'), ('started', 'Started'), ('backoff', 'Waiting to retry'),
|
||||
('succeeded', 'Succeeded'), ('failed', 'Failed'), ('skipped', 'Skipped'),
|
||||
],
|
||||
max_length=16, default='queued', db_index=True,
|
||||
),
|
||||
),
|
||||
|
||||
# Update output field size
|
||||
migrations.AlterField(
|
||||
model_name='archiveresult',
|
||||
name='output',
|
||||
field=models.CharField(max_length=1024, default=None, null=True, blank=True),
|
||||
),
|
||||
|
||||
# Update cmd_version field size
|
||||
migrations.AlterField(
|
||||
model_name='archiveresult',
|
||||
name='cmd_version',
|
||||
field=models.CharField(max_length=128, default=None, null=True, blank=True),
|
||||
),
|
||||
|
||||
# Make start_ts and end_ts nullable
|
||||
migrations.AlterField(
|
||||
model_name='archiveresult',
|
||||
name='start_ts',
|
||||
field=models.DateTimeField(default=None, null=True, blank=True),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='archiveresult',
|
||||
name='end_ts',
|
||||
field=models.DateTimeField(default=None, null=True, blank=True),
|
||||
),
|
||||
|
||||
# Make pwd nullable
|
||||
migrations.AlterField(
|
||||
model_name='archiveresult',
|
||||
name='pwd',
|
||||
field=models.CharField(max_length=256, default=None, null=True, blank=True),
|
||||
),
|
||||
|
||||
# Make cmd nullable
|
||||
migrations.AlterField(
|
||||
model_name='archiveresult',
|
||||
name='cmd',
|
||||
field=models.JSONField(default=None, null=True, blank=True),
|
||||
),
|
||||
|
||||
# Update model options
|
||||
migrations.AlterModelOptions(
|
||||
name='archiveresult',
|
||||
options={'verbose_name': 'Archive Result', 'verbose_name_plural': 'Archive Results Log'},
|
||||
),
|
||||
migrations.AlterModelOptions(
|
||||
name='snapshot',
|
||||
options={'verbose_name': 'Snapshot', 'verbose_name_plural': 'Snapshots'},
|
||||
),
|
||||
migrations.AlterModelOptions(
|
||||
name='tag',
|
||||
options={'verbose_name': 'Tag', 'verbose_name_plural': 'Tags'},
|
||||
),
|
||||
]
|
||||
190
archivebox/core/migrations/0023_upgrade_to_0_9_0.py
Normal file
190
archivebox/core/migrations/0023_upgrade_to_0_9_0.py
Normal file
@@ -0,0 +1,190 @@
|
||||
# Generated by hand on 2025-12-29
|
||||
# Upgrades core app from v0.7.2 (migration 0022) to v0.9.0 using raw SQL
|
||||
# Handles both fresh installs and upgrades from v0.7.2
|
||||
|
||||
from django.db import migrations
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('core', '0022_auto_20231023_2008'),
|
||||
('crawls', '0001_initial'),
|
||||
('machine', '0001_initial'),
|
||||
('auth', '0012_alter_user_first_name_max_length'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.RunSQL(
|
||||
# Forward SQL
|
||||
sql="""
|
||||
-- ============================================================================
|
||||
-- PART 1: Rename extractor → plugin in core_archiveresult
|
||||
-- ============================================================================
|
||||
-- SQLite doesn't support renaming columns directly, so we need to check if the rename is needed
|
||||
-- If 'extractor' exists and 'plugin' doesn't, we do a table rebuild
|
||||
|
||||
CREATE TABLE IF NOT EXISTS core_archiveresult_new (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
uuid TEXT,
|
||||
created_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
modified_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
|
||||
snapshot_id TEXT NOT NULL,
|
||||
plugin VARCHAR(32) NOT NULL DEFAULT '',
|
||||
hook_name VARCHAR(255) NOT NULL DEFAULT '',
|
||||
|
||||
cmd TEXT,
|
||||
pwd VARCHAR(256),
|
||||
cmd_version VARCHAR(128),
|
||||
|
||||
start_ts DATETIME,
|
||||
end_ts DATETIME,
|
||||
status VARCHAR(15) NOT NULL DEFAULT 'queued',
|
||||
retry_at DATETIME,
|
||||
|
||||
output_files TEXT NOT NULL DEFAULT '{}',
|
||||
output_json TEXT,
|
||||
output_str TEXT NOT NULL DEFAULT '',
|
||||
output_size INTEGER NOT NULL DEFAULT 0,
|
||||
output_mimetypes VARCHAR(512) NOT NULL DEFAULT '',
|
||||
|
||||
config TEXT,
|
||||
notes TEXT NOT NULL DEFAULT '',
|
||||
num_uses_succeeded INTEGER NOT NULL DEFAULT 0,
|
||||
num_uses_failed INTEGER NOT NULL DEFAULT 0,
|
||||
|
||||
binary_id TEXT,
|
||||
iface_id TEXT,
|
||||
process_id TEXT,
|
||||
|
||||
FOREIGN KEY (snapshot_id) REFERENCES core_snapshot(id) ON DELETE CASCADE,
|
||||
FOREIGN KEY (binary_id) REFERENCES machine_binary(id) ON DELETE SET NULL,
|
||||
FOREIGN KEY (iface_id) REFERENCES machine_networkinterface(id) ON DELETE SET NULL,
|
||||
FOREIGN KEY (process_id) REFERENCES machine_process(id) ON DELETE RESTRICT
|
||||
);
|
||||
|
||||
-- Only copy if old table exists
|
||||
INSERT OR IGNORE INTO core_archiveresult_new (
|
||||
id, uuid, created_at, modified_at, snapshot_id, plugin,
|
||||
cmd, pwd, cmd_version, start_ts, end_ts, status, output_str
|
||||
)
|
||||
SELECT
|
||||
id, uuid,
|
||||
COALESCE(start_ts, CURRENT_TIMESTAMP) as created_at,
|
||||
COALESCE(end_ts, start_ts, CURRENT_TIMESTAMP) as modified_at,
|
||||
snapshot_id,
|
||||
COALESCE(extractor, '') as plugin,
|
||||
cmd, pwd, cmd_version,
|
||||
start_ts, end_ts, status,
|
||||
COALESCE(output, '') as output_str
|
||||
FROM core_archiveresult
|
||||
WHERE EXISTS (SELECT 1 FROM sqlite_master WHERE type='table' AND name='core_archiveresult');
|
||||
|
||||
DROP TABLE IF EXISTS core_archiveresult;
|
||||
ALTER TABLE core_archiveresult_new RENAME TO core_archiveresult;
|
||||
|
||||
CREATE INDEX IF NOT EXISTS core_archiveresult_snapshot_id_idx ON core_archiveresult(snapshot_id);
|
||||
CREATE INDEX IF NOT EXISTS core_archiveresult_plugin_idx ON core_archiveresult(plugin);
|
||||
CREATE INDEX IF NOT EXISTS core_archiveresult_status_idx ON core_archiveresult(status);
|
||||
CREATE INDEX IF NOT EXISTS core_archiveresult_retry_at_idx ON core_archiveresult(retry_at);
|
||||
CREATE INDEX IF NOT EXISTS core_archiveresult_created_at_idx ON core_archiveresult(created_at);
|
||||
CREATE INDEX IF NOT EXISTS core_archiveresult_uuid_idx ON core_archiveresult(uuid);
|
||||
|
||||
-- ============================================================================
|
||||
-- PART 2: Upgrade core_snapshot table
|
||||
-- ============================================================================
|
||||
|
||||
CREATE TABLE IF NOT EXISTS core_snapshot_new (
|
||||
id TEXT PRIMARY KEY NOT NULL,
|
||||
created_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
modified_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
|
||||
url TEXT NOT NULL,
|
||||
timestamp VARCHAR(32) NOT NULL UNIQUE,
|
||||
bookmarked_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
|
||||
crawl_id TEXT,
|
||||
parent_snapshot_id TEXT,
|
||||
|
||||
title VARCHAR(512),
|
||||
downloaded_at DATETIME,
|
||||
depth INTEGER NOT NULL DEFAULT 0,
|
||||
fs_version VARCHAR(10) NOT NULL DEFAULT '0.9.0',
|
||||
|
||||
config TEXT NOT NULL DEFAULT '{}',
|
||||
notes TEXT NOT NULL DEFAULT '',
|
||||
num_uses_succeeded INTEGER NOT NULL DEFAULT 0,
|
||||
num_uses_failed INTEGER NOT NULL DEFAULT 0,
|
||||
|
||||
status VARCHAR(15) NOT NULL DEFAULT 'queued',
|
||||
retry_at DATETIME,
|
||||
current_step INTEGER NOT NULL DEFAULT 0,
|
||||
|
||||
FOREIGN KEY (crawl_id) REFERENCES crawls_crawl(id) ON DELETE CASCADE,
|
||||
FOREIGN KEY (parent_snapshot_id) REFERENCES core_snapshot(id) ON DELETE SET NULL
|
||||
);
|
||||
|
||||
-- Copy data from old table if it exists
|
||||
-- Map v0.7.2 fields: added → bookmarked_at/created_at, updated → modified_at
|
||||
INSERT OR IGNORE INTO core_snapshot_new (
|
||||
id, url, timestamp, title, bookmarked_at, created_at, modified_at
|
||||
)
|
||||
SELECT
|
||||
id, url, timestamp, title,
|
||||
COALESCE(added, CURRENT_TIMESTAMP) as bookmarked_at,
|
||||
COALESCE(added, CURRENT_TIMESTAMP) as created_at,
|
||||
COALESCE(updated, added, CURRENT_TIMESTAMP) as modified_at
|
||||
FROM core_snapshot
|
||||
WHERE EXISTS (SELECT 1 FROM sqlite_master WHERE type='table' AND name='core_snapshot');
|
||||
|
||||
DROP TABLE IF EXISTS core_snapshot;
|
||||
ALTER TABLE core_snapshot_new RENAME TO core_snapshot;
|
||||
|
||||
CREATE INDEX IF NOT EXISTS core_snapshot_url_idx ON core_snapshot(url);
|
||||
CREATE INDEX IF NOT EXISTS core_snapshot_timestamp_idx ON core_snapshot(timestamp);
|
||||
CREATE INDEX IF NOT EXISTS core_snapshot_bookmarked_at_idx ON core_snapshot(bookmarked_at);
|
||||
CREATE INDEX IF NOT EXISTS core_snapshot_crawl_id_idx ON core_snapshot(crawl_id);
|
||||
CREATE INDEX IF NOT EXISTS core_snapshot_status_idx ON core_snapshot(status);
|
||||
CREATE INDEX IF NOT EXISTS core_snapshot_retry_at_idx ON core_snapshot(retry_at);
|
||||
CREATE INDEX IF NOT EXISTS core_snapshot_created_at_idx ON core_snapshot(created_at);
|
||||
CREATE UNIQUE INDEX IF NOT EXISTS core_snapshot_url_crawl_unique ON core_snapshot(url, crawl_id);
|
||||
|
||||
-- ============================================================================
|
||||
-- PART 3: Upgrade core_tag table
|
||||
-- ============================================================================
|
||||
|
||||
CREATE TABLE IF NOT EXISTS core_tag_new (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
created_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
modified_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
|
||||
name VARCHAR(100) NOT NULL UNIQUE,
|
||||
slug VARCHAR(100) NOT NULL UNIQUE,
|
||||
|
||||
created_by_id INTEGER,
|
||||
|
||||
FOREIGN KEY (created_by_id) REFERENCES auth_user(id) ON DELETE CASCADE
|
||||
);
|
||||
|
||||
-- Copy data from old table if it exists
|
||||
INSERT OR IGNORE INTO core_tag_new (id, name, slug)
|
||||
SELECT id, name, slug
|
||||
FROM core_tag
|
||||
WHERE EXISTS (SELECT 1 FROM sqlite_master WHERE type='table' AND name='core_tag');
|
||||
|
||||
DROP TABLE IF EXISTS core_tag;
|
||||
ALTER TABLE core_tag_new RENAME TO core_tag;
|
||||
|
||||
CREATE INDEX IF NOT EXISTS core_tag_created_at_idx ON core_tag(created_at);
|
||||
CREATE INDEX IF NOT EXISTS core_tag_created_by_id_idx ON core_tag(created_by_id);
|
||||
|
||||
-- core_snapshot_tags table already exists in v0.7.2, no changes needed
|
||||
""",
|
||||
# Reverse SQL (best effort - data loss may occur)
|
||||
reverse_sql="""
|
||||
-- This is a best-effort rollback - data in new fields will be lost
|
||||
SELECT 'Migration 0023 cannot be fully reversed - new fields will be lost';
|
||||
"""
|
||||
),
|
||||
]
|
||||
118
archivebox/core/migrations/0024_assign_default_crawl.py
Normal file
118
archivebox/core/migrations/0024_assign_default_crawl.py
Normal file
@@ -0,0 +1,118 @@
|
||||
# Generated by hand on 2025-12-29
|
||||
# Creates a default crawl for v0.7.2 migrated snapshots and makes crawl_id NOT NULL
|
||||
|
||||
from django.db import migrations
|
||||
import uuid
|
||||
|
||||
|
||||
def create_default_crawl_and_assign_snapshots(apps, schema_editor):
|
||||
"""
|
||||
Create a default crawl for migrated snapshots and assign all snapshots without a crawl to it.
|
||||
Uses raw SQL because the app registry isn't fully populated during migrations.
|
||||
"""
|
||||
from django.db import connection
|
||||
import uuid as uuid_lib
|
||||
from datetime import datetime
|
||||
|
||||
cursor = connection.cursor()
|
||||
|
||||
# Check if there are any snapshots without a crawl
|
||||
cursor.execute("SELECT COUNT(*) FROM core_snapshot WHERE crawl_id IS NULL")
|
||||
snapshots_without_crawl = cursor.fetchone()[0]
|
||||
|
||||
if snapshots_without_crawl == 0:
|
||||
print('✓ Fresh install or all snapshots already have crawls')
|
||||
return
|
||||
|
||||
# Get or create system user (pk=1)
|
||||
cursor.execute("SELECT id FROM auth_user WHERE id = 1")
|
||||
if not cursor.fetchone():
|
||||
cursor.execute("""
|
||||
INSERT INTO auth_user (id, password, is_superuser, username, first_name, last_name, email, is_staff, is_active, date_joined)
|
||||
VALUES (1, '!', 1, 'system', '', '', '', 1, 1, ?)
|
||||
""", [datetime.now().isoformat()])
|
||||
|
||||
# Create a default crawl for migrated snapshots
|
||||
crawl_id = str(uuid_lib.uuid4())
|
||||
now = datetime.now().isoformat()
|
||||
|
||||
cursor.execute("""
|
||||
INSERT INTO crawls_crawl (
|
||||
id, created_at, modified_at, num_uses_succeeded, num_uses_failed,
|
||||
urls, max_depth, tags_str, label, notes, output_dir,
|
||||
status, retry_at, created_by_id, schedule_id, config, persona_id
|
||||
) VALUES (?, ?, ?, 0, 0, '', 0, '', 'Migrated from v0.7.2',
|
||||
'Auto-created crawl for snapshots migrated from v0.7.2', '',
|
||||
'sealed', ?, 1, NULL, '{}', NULL)
|
||||
""", [crawl_id, now, now, now])
|
||||
|
||||
# Assign all snapshots without a crawl to the default crawl
|
||||
cursor.execute("UPDATE core_snapshot SET crawl_id = ? WHERE crawl_id IS NULL", [crawl_id])
|
||||
|
||||
print(f'✓ Assigned {snapshots_without_crawl} snapshots to default crawl {crawl_id}')
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('core', '0023_upgrade_to_0_9_0'),
|
||||
('crawls', '0001_initial'),
|
||||
('auth', '0012_alter_user_first_name_max_length'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.RunPython(
|
||||
create_default_crawl_and_assign_snapshots,
|
||||
reverse_code=migrations.RunPython.noop,
|
||||
),
|
||||
# Now make crawl_id NOT NULL
|
||||
migrations.RunSQL(
|
||||
sql="""
|
||||
-- Rebuild snapshot table with NOT NULL crawl_id
|
||||
CREATE TABLE core_snapshot_final (
|
||||
id TEXT PRIMARY KEY NOT NULL,
|
||||
created_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
modified_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
|
||||
url TEXT NOT NULL,
|
||||
timestamp VARCHAR(32) NOT NULL UNIQUE,
|
||||
bookmarked_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
|
||||
crawl_id TEXT NOT NULL,
|
||||
parent_snapshot_id TEXT,
|
||||
|
||||
title VARCHAR(512),
|
||||
downloaded_at DATETIME,
|
||||
depth INTEGER NOT NULL DEFAULT 0,
|
||||
fs_version VARCHAR(10) NOT NULL DEFAULT '0.9.0',
|
||||
|
||||
config TEXT NOT NULL DEFAULT '{}',
|
||||
notes TEXT NOT NULL DEFAULT '',
|
||||
num_uses_succeeded INTEGER NOT NULL DEFAULT 0,
|
||||
num_uses_failed INTEGER NOT NULL DEFAULT 0,
|
||||
|
||||
status VARCHAR(15) NOT NULL DEFAULT 'queued',
|
||||
retry_at DATETIME,
|
||||
current_step INTEGER NOT NULL DEFAULT 0,
|
||||
|
||||
FOREIGN KEY (crawl_id) REFERENCES crawls_crawl(id) ON DELETE CASCADE,
|
||||
FOREIGN KEY (parent_snapshot_id) REFERENCES core_snapshot(id) ON DELETE SET NULL
|
||||
);
|
||||
|
||||
INSERT INTO core_snapshot_final SELECT * FROM core_snapshot;
|
||||
|
||||
DROP TABLE core_snapshot;
|
||||
ALTER TABLE core_snapshot_final RENAME TO core_snapshot;
|
||||
|
||||
CREATE INDEX core_snapshot_url_idx ON core_snapshot(url);
|
||||
CREATE INDEX core_snapshot_timestamp_idx ON core_snapshot(timestamp);
|
||||
CREATE INDEX core_snapshot_bookmarked_at_idx ON core_snapshot(bookmarked_at);
|
||||
CREATE INDEX core_snapshot_crawl_id_idx ON core_snapshot(crawl_id);
|
||||
CREATE INDEX core_snapshot_status_idx ON core_snapshot(status);
|
||||
CREATE INDEX core_snapshot_retry_at_idx ON core_snapshot(retry_at);
|
||||
CREATE INDEX core_snapshot_created_at_idx ON core_snapshot(created_at);
|
||||
CREATE UNIQUE INDEX core_snapshot_url_crawl_unique ON core_snapshot(url, crawl_id);
|
||||
""",
|
||||
reverse_sql=migrations.RunSQL.noop,
|
||||
),
|
||||
]
|
||||
@@ -1,57 +0,0 @@
|
||||
# Data migration to clear config fields that may contain invalid JSON
|
||||
# This runs before 0025 to prevent CHECK constraint failures
|
||||
|
||||
from django.db import migrations
|
||||
|
||||
|
||||
def clear_config_fields(apps, schema_editor):
|
||||
"""Clear all config fields in related tables to avoid JSON validation errors."""
|
||||
db_alias = schema_editor.connection.alias
|
||||
|
||||
# Disable foreign key checks temporarily to allow updates
|
||||
with schema_editor.connection.cursor() as cursor:
|
||||
cursor.execute("PRAGMA foreign_keys=OFF")
|
||||
|
||||
tables_to_clear = [
|
||||
('crawls_seed', 'config'),
|
||||
('crawls_crawl', 'config'),
|
||||
('crawls_crawlschedule', 'config') if 'crawlschedule' in dir() else None,
|
||||
('machine_machine', 'stats'),
|
||||
('machine_machine', 'config'),
|
||||
]
|
||||
|
||||
for table_info in tables_to_clear:
|
||||
if table_info is None:
|
||||
continue
|
||||
table_name, field_name = table_info
|
||||
|
||||
try:
|
||||
with schema_editor.connection.cursor() as cursor:
|
||||
# Check if table exists first
|
||||
cursor.execute(f"SELECT name FROM sqlite_master WHERE type='table' AND name='{table_name}'")
|
||||
if not cursor.fetchone():
|
||||
print(f" Skipping {table_name}.{field_name}: table does not exist")
|
||||
continue
|
||||
|
||||
# Set all to empty JSON object
|
||||
cursor.execute(f"UPDATE {table_name} SET {field_name} = '{{}}' WHERE {field_name} IS NOT NULL")
|
||||
print(f" Cleared {field_name} in {table_name}: {cursor.rowcount} rows")
|
||||
except Exception as e:
|
||||
print(f" Skipping {table_name}.{field_name}: {e}")
|
||||
|
||||
# Re-enable foreign key checks
|
||||
with schema_editor.connection.cursor() as cursor:
|
||||
cursor.execute("PRAGMA foreign_keys=ON")
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('core', '0023_new_schema'),
|
||||
('crawls', '0001_initial'),
|
||||
('machine', '0001_squashed'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.RunPython(clear_config_fields, reverse_code=migrations.RunPython.noop),
|
||||
]
|
||||
@@ -1,28 +0,0 @@
|
||||
# Disable foreign key checks before 0025 to prevent CHECK constraint validation errors
|
||||
|
||||
from django.db import migrations
|
||||
|
||||
|
||||
def disable_fk_checks(apps, schema_editor):
|
||||
"""Temporarily disable foreign key checks."""
|
||||
with schema_editor.connection.cursor() as cursor:
|
||||
cursor.execute("PRAGMA foreign_keys=OFF")
|
||||
print(" Disabled foreign key checks")
|
||||
|
||||
|
||||
def enable_fk_checks(apps, schema_editor):
|
||||
"""Re-enable foreign key checks."""
|
||||
with schema_editor.connection.cursor() as cursor:
|
||||
cursor.execute("PRAGMA foreign_keys=ON")
|
||||
print(" Enabled foreign key checks")
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('core', '0024_b_clear_config_fields'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.RunPython(disable_fk_checks, reverse_code=enable_fk_checks),
|
||||
]
|
||||
@@ -1,93 +0,0 @@
|
||||
# Fix crawls_crawl config field to avoid CHECK constraint errors during table rebuilds
|
||||
|
||||
from django.db import migrations
|
||||
|
||||
|
||||
def fix_crawls_config(apps, schema_editor):
|
||||
"""
|
||||
Rebuild crawls_crawl table to fix CHECK constraints and make seed_id nullable.
|
||||
Only runs for UPGRADES from 0.8.x (when crawls.0001_initial didn't exist yet).
|
||||
For fresh installs, crawls.0001_initial creates the correct schema.
|
||||
"""
|
||||
with schema_editor.connection.cursor() as cursor:
|
||||
# Check if this is an upgrade from old 0.8.x or a fresh install
|
||||
# In fresh installs, crawls.0001_initial was applied, creating seed FK
|
||||
# In upgrades, the table was created by old migrations before 0001_initial existed
|
||||
cursor.execute("""
|
||||
SELECT COUNT(*) FROM django_migrations
|
||||
WHERE app='crawls' AND name='0001_initial'
|
||||
""")
|
||||
has_crawls_0001 = cursor.fetchone()[0] > 0
|
||||
|
||||
if has_crawls_0001:
|
||||
# Fresh install - crawls.0001_initial already created the correct schema
|
||||
# Just clear config to avoid CHECK constraint issues
|
||||
print(" Fresh install detected - clearing config field only")
|
||||
try:
|
||||
cursor.execute('UPDATE "crawls_crawl" SET "config" = NULL')
|
||||
except Exception as e:
|
||||
print(f" Skipping config clear: {e}")
|
||||
return
|
||||
|
||||
# Upgrade from 0.8.x - rebuild table to make seed_id nullable and remove CHECK constraint
|
||||
print(" Upgrading from 0.8.x - rebuilding crawls_crawl table")
|
||||
cursor.execute("PRAGMA foreign_keys=OFF")
|
||||
|
||||
# Backup
|
||||
cursor.execute("CREATE TABLE crawls_crawl_backup AS SELECT * FROM crawls_crawl")
|
||||
|
||||
# Recreate without config CHECK constraint, with nullable seed_id
|
||||
cursor.execute("DROP TABLE crawls_crawl")
|
||||
cursor.execute("""
|
||||
CREATE TABLE "crawls_crawl" (
|
||||
"num_uses_failed" integer unsigned NOT NULL CHECK ("num_uses_failed" >= 0),
|
||||
"num_uses_succeeded" integer unsigned NOT NULL CHECK ("num_uses_succeeded" >= 0),
|
||||
"id" char(32) NOT NULL PRIMARY KEY,
|
||||
"created_at" datetime NOT NULL,
|
||||
"modified_at" datetime NOT NULL,
|
||||
"urls" text NOT NULL,
|
||||
"config" text,
|
||||
"max_depth" smallint unsigned NOT NULL CHECK ("max_depth" >= 0),
|
||||
"tags_str" varchar(1024) NOT NULL,
|
||||
"persona_id" char(32) NULL,
|
||||
"label" varchar(64) NOT NULL,
|
||||
"notes" text NOT NULL,
|
||||
"output_dir" varchar(512) NOT NULL,
|
||||
"status" varchar(15) NOT NULL,
|
||||
"retry_at" datetime NULL,
|
||||
"created_by_id" integer NOT NULL REFERENCES "auth_user" ("id") DEFERRABLE INITIALLY DEFERRED,
|
||||
"seed_id" char(32) NULL DEFAULT NULL,
|
||||
"schedule_id" char(32) NULL REFERENCES "crawls_crawlschedule" ("id") DEFERRABLE INITIALLY DEFERRED
|
||||
)
|
||||
""")
|
||||
|
||||
# Restore data
|
||||
cursor.execute("""
|
||||
INSERT INTO "crawls_crawl" (
|
||||
"num_uses_failed", "num_uses_succeeded", "id", "created_at", "modified_at",
|
||||
"urls", "config", "max_depth", "tags_str", "persona_id", "label", "notes",
|
||||
"output_dir", "status", "retry_at", "created_by_id", "seed_id", "schedule_id"
|
||||
)
|
||||
SELECT
|
||||
"num_uses_failed", "num_uses_succeeded", "id", "created_at", "modified_at",
|
||||
"urls", "config", "max_depth", "tags_str", "persona_id", "label", "notes",
|
||||
"output_dir", "status", "retry_at", "created_by_id", "seed_id", "schedule_id"
|
||||
FROM crawls_crawl_backup
|
||||
""")
|
||||
|
||||
cursor.execute("DROP TABLE crawls_crawl_backup")
|
||||
|
||||
# NULL out config to avoid any invalid JSON
|
||||
cursor.execute('UPDATE "crawls_crawl" SET "config" = NULL')
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('core', '0024_c_disable_fk_checks'),
|
||||
('crawls', '0001_initial'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.RunPython(fix_crawls_config, reverse_code=migrations.RunPython.noop),
|
||||
]
|
||||
@@ -1,38 +0,0 @@
|
||||
# Generated by Django 5.0.6 on 2024-12-25
|
||||
# Adds crawl FK and iface FK after crawls and machine apps are created
|
||||
|
||||
from django.db import migrations, models
|
||||
import django.db.models.deletion
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('core', '0024_d_fix_crawls_config'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
# Add crawl FK to Snapshot
|
||||
migrations.AddField(
|
||||
model_name='snapshot',
|
||||
name='crawl',
|
||||
field=models.ForeignKey(
|
||||
default=None, null=True, blank=True,
|
||||
on_delete=django.db.models.deletion.CASCADE,
|
||||
related_name='snapshot_set',
|
||||
to='crawls.crawl',
|
||||
db_index=True,
|
||||
),
|
||||
),
|
||||
|
||||
# Add network interface FK to ArchiveResult
|
||||
migrations.AddField(
|
||||
model_name='archiveresult',
|
||||
name='iface',
|
||||
field=models.ForeignKey(
|
||||
null=True, blank=True,
|
||||
on_delete=django.db.models.deletion.SET_NULL,
|
||||
to='machine.networkinterface',
|
||||
),
|
||||
),
|
||||
]
|
||||
@@ -1,22 +0,0 @@
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('core', '0024_snapshot_crawl'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
# Remove the unique constraint on url
|
||||
migrations.AlterField(
|
||||
model_name='snapshot',
|
||||
name='url',
|
||||
field=models.URLField(db_index=True, unique=False),
|
||||
),
|
||||
# Add unique constraint on (url, crawl) combination
|
||||
migrations.AddConstraint(
|
||||
model_name='snapshot',
|
||||
constraint=models.UniqueConstraint(fields=['url', 'crawl'], name='unique_url_per_crawl'),
|
||||
),
|
||||
]
|
||||
@@ -1,145 +0,0 @@
|
||||
# Generated by Django 6.0 on 2025-12-25 09:34
|
||||
|
||||
import archivebox.base_models.models
|
||||
import django.db.models.deletion
|
||||
import django.utils.timezone
|
||||
from archivebox import uuid_compat
|
||||
from django.conf import settings
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
def populate_archiveresult_uuids(apps, schema_editor):
|
||||
"""Generate unique UUIDs for ArchiveResults that don't have one."""
|
||||
# Check if uuid column exists before trying to populate it
|
||||
with schema_editor.connection.cursor() as cursor:
|
||||
cursor.execute("PRAGMA table_info(core_archiveresult)")
|
||||
columns = [row[1] for row in cursor.fetchall()]
|
||||
if 'uuid' not in columns:
|
||||
return # uuid column doesn't exist, skip this data migration
|
||||
|
||||
ArchiveResult = apps.get_model('core', 'ArchiveResult')
|
||||
for result in ArchiveResult.objects.filter(uuid__isnull=True):
|
||||
result.uuid = uuid_compat.uuid7()
|
||||
result.save(update_fields=['uuid'])
|
||||
|
||||
|
||||
def reverse_populate_uuids(apps, schema_editor):
|
||||
"""Reverse migration - do nothing, UUIDs can stay."""
|
||||
pass
|
||||
|
||||
|
||||
def remove_output_dir_if_exists(apps, schema_editor):
|
||||
"""Remove output_dir columns if they exist."""
|
||||
with schema_editor.connection.cursor() as cursor:
|
||||
# Check and remove from core_archiveresult
|
||||
cursor.execute("PRAGMA table_info(core_archiveresult)")
|
||||
columns = [row[1] for row in cursor.fetchall()]
|
||||
if 'output_dir' in columns:
|
||||
cursor.execute("ALTER TABLE core_archiveresult DROP COLUMN output_dir")
|
||||
|
||||
# Check and remove from core_snapshot
|
||||
cursor.execute("PRAGMA table_info(core_snapshot)")
|
||||
columns = [row[1] for row in cursor.fetchall()]
|
||||
if 'output_dir' in columns:
|
||||
cursor.execute("ALTER TABLE core_snapshot DROP COLUMN output_dir")
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('core', '0025_allow_duplicate_urls_per_crawl'),
|
||||
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
|
||||
]
|
||||
|
||||
operations = [
|
||||
# FIRST: Populate UUIDs for existing NULL rows BEFORE any schema changes
|
||||
migrations.RunPython(populate_archiveresult_uuids, reverse_populate_uuids),
|
||||
|
||||
# Remove output_dir fields (not needed, computed from snapshot)
|
||||
migrations.RunPython(remove_output_dir_if_exists, reverse_code=migrations.RunPython.noop),
|
||||
|
||||
# Update Django's migration state to match 0.9.x schema
|
||||
# Database already has correct types from 0.8.x, just update state
|
||||
migrations.SeparateDatabaseAndState(
|
||||
state_operations=[
|
||||
# Archiveresult field alterations
|
||||
migrations.AlterField(
|
||||
model_name='archiveresult',
|
||||
name='created_at',
|
||||
field=models.DateTimeField(db_index=True, default=django.utils.timezone.now),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='archiveresult',
|
||||
name='created_by',
|
||||
field=models.ForeignKey(default=archivebox.base_models.models.get_or_create_system_user_pk, on_delete=django.db.models.deletion.CASCADE, related_name='archiveresult_set', to=settings.AUTH_USER_MODEL),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='archiveresult',
|
||||
name='extractor',
|
||||
field=models.CharField(db_index=True, max_length=32),
|
||||
),
|
||||
# Convert id from AutoField to UUIDField (database already has UUID CHAR(32))
|
||||
migrations.AlterField(
|
||||
model_name='archiveresult',
|
||||
name='id',
|
||||
field=models.UUIDField(default=uuid_compat.uuid7, editable=False, primary_key=True, serialize=False, unique=True),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='archiveresult',
|
||||
name='status',
|
||||
field=models.CharField(choices=[('queued', 'Queued'), ('started', 'Started'), ('backoff', 'Waiting to retry'), ('succeeded', 'Succeeded'), ('failed', 'Failed'), ('skipped', 'Skipped')], db_index=True, default='queued', max_length=15),
|
||||
),
|
||||
|
||||
# Snapshot field alterations
|
||||
migrations.AlterField(
|
||||
model_name='snapshot',
|
||||
name='bookmarked_at',
|
||||
field=models.DateTimeField(db_index=True, default=django.utils.timezone.now),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='snapshot',
|
||||
name='created_at',
|
||||
field=models.DateTimeField(db_index=True, default=django.utils.timezone.now),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='snapshot',
|
||||
name='created_by',
|
||||
field=models.ForeignKey(default=archivebox.base_models.models.get_or_create_system_user_pk, on_delete=django.db.models.deletion.CASCADE, related_name='snapshot_set', to=settings.AUTH_USER_MODEL),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='snapshot',
|
||||
name='downloaded_at',
|
||||
field=models.DateTimeField(blank=True, db_index=True, default=None, editable=False, null=True),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='snapshot',
|
||||
name='id',
|
||||
field=models.UUIDField(default=uuid_compat.uuid7, editable=False, primary_key=True, serialize=False, unique=True),
|
||||
),
|
||||
],
|
||||
database_operations=[
|
||||
# No actual database changes needed - schema is already correct from 0.8.x
|
||||
],
|
||||
),
|
||||
|
||||
# SnapshotTag and Tag alterations - state only, DB already correct
|
||||
migrations.SeparateDatabaseAndState(
|
||||
state_operations=[
|
||||
migrations.AlterField(
|
||||
model_name='snapshottag',
|
||||
name='id',
|
||||
field=models.AutoField(primary_key=True, serialize=False),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='tag',
|
||||
name='created_by',
|
||||
field=models.ForeignKey(default=archivebox.base_models.models.get_or_create_system_user_pk, on_delete=django.db.models.deletion.CASCADE, related_name='tag_set', to=settings.AUTH_USER_MODEL),
|
||||
),
|
||||
migrations.AlterUniqueTogether(
|
||||
name='snapshottag',
|
||||
unique_together={('snapshot', 'tag')},
|
||||
),
|
||||
],
|
||||
database_operations=[],
|
||||
),
|
||||
]
|
||||
@@ -1,29 +0,0 @@
|
||||
# Generated by Django 6.0 on 2025-12-27 01:40
|
||||
|
||||
import archivebox.base_models.models
|
||||
import django.db.models.deletion
|
||||
from django.conf import settings
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('core', '0026_remove_archiveresult_output_dir_and_more'),
|
||||
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AlterField(
|
||||
model_name='archiveresult',
|
||||
name='created_by',
|
||||
field=models.ForeignKey(default=archivebox.base_models.models.get_or_create_system_user_pk, on_delete=django.db.models.deletion.CASCADE, related_name='archiveresult_set', to=settings.AUTH_USER_MODEL),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='snapshot',
|
||||
name='created_by',
|
||||
field=models.ForeignKey(default=archivebox.base_models.models.get_or_create_system_user_pk, on_delete=django.db.models.deletion.CASCADE, related_name='snapshot_set', to=settings.AUTH_USER_MODEL),
|
||||
),
|
||||
# Note: Cannot alter M2M tags field via migration (Django limitation)
|
||||
# The related_name change is handled by the model definition itself
|
||||
]
|
||||
@@ -1,47 +0,0 @@
|
||||
# Generated by Claude Code on 2025-12-27
|
||||
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
def set_existing_snapshots_to_old_version(apps, schema_editor):
|
||||
"""Set existing snapshots to 0.8.0 since they use the old filesystem layout."""
|
||||
Snapshot = apps.get_model('core', 'Snapshot')
|
||||
# Set all existing snapshots to 0.8.0 (the previous version's layout)
|
||||
Snapshot.objects.all().update(fs_version='0.8.0')
|
||||
|
||||
|
||||
def reverse_migration(apps, schema_editor):
|
||||
"""Reverse migration - do nothing."""
|
||||
pass
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('core', '0027_alter_archiveresult_created_by_and_more'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
# Add field with temporary default to allow NULL initially
|
||||
migrations.AddField(
|
||||
model_name='snapshot',
|
||||
name='fs_version',
|
||||
field=models.CharField(
|
||||
max_length=10,
|
||||
default='0.8.0', # Temporary default for adding the column
|
||||
help_text='Filesystem version of this snapshot (e.g., "0.7.0", "0.8.0", "0.9.0"). Used to trigger lazy migration on save().'
|
||||
),
|
||||
),
|
||||
# Set existing snapshots to old version
|
||||
migrations.RunPython(set_existing_snapshots_to_old_version, reverse_migration),
|
||||
# Update default to current version for new snapshots going forward
|
||||
migrations.AlterField(
|
||||
model_name='snapshot',
|
||||
name='fs_version',
|
||||
field=models.CharField(
|
||||
max_length=10,
|
||||
default='0.9.0', # Hardcoded for this migration - new migration when version bumps
|
||||
help_text='Filesystem version of this snapshot (e.g., "0.7.0", "0.8.0", "0.9.0"). Used to trigger lazy migration on save().'
|
||||
),
|
||||
),
|
||||
]
|
||||
@@ -1,91 +0,0 @@
|
||||
# Generated by Django for hook architecture support
|
||||
# Phase 1: Add new ArchiveResult fields for hook output
|
||||
|
||||
from django.db import migrations, models
|
||||
import django.db.models.deletion
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('core', '0028_snapshot_fs_version'),
|
||||
('machine', '0002_rename_custom_cmds_to_overrides'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
# Add new output fields using SeparateDatabaseAndState to avoid table rebuilds
|
||||
migrations.SeparateDatabaseAndState(
|
||||
state_operations=[
|
||||
migrations.AddField(
|
||||
model_name='archiveresult',
|
||||
name='output_str',
|
||||
field=models.TextField(
|
||||
blank=True,
|
||||
default='',
|
||||
help_text='Human-readable output summary (e.g., "Downloaded 5 files")'
|
||||
),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='archiveresult',
|
||||
name='output_json',
|
||||
field=models.JSONField(
|
||||
null=True,
|
||||
blank=True,
|
||||
default=None,
|
||||
help_text='Structured metadata (headers, redirects, etc.) - should NOT duplicate ArchiveResult fields'
|
||||
),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='archiveresult',
|
||||
name='output_files',
|
||||
field=models.JSONField(
|
||||
default=dict,
|
||||
help_text='Dict of {relative_path: {metadata}} - values are empty dicts for now, extensible for future metadata'
|
||||
),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='archiveresult',
|
||||
name='output_size',
|
||||
field=models.BigIntegerField(
|
||||
default=0,
|
||||
help_text='Total recursive size in bytes of all output files'
|
||||
),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='archiveresult',
|
||||
name='output_mimetypes',
|
||||
field=models.CharField(
|
||||
max_length=512,
|
||||
blank=True,
|
||||
default='',
|
||||
help_text='CSV of mimetypes sorted by size descending'
|
||||
),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='archiveresult',
|
||||
name='binary',
|
||||
field=models.ForeignKey(
|
||||
'machine.Binary',
|
||||
on_delete=models.SET_NULL,
|
||||
null=True,
|
||||
blank=True,
|
||||
related_name='archiveresults',
|
||||
help_text='Primary binary used by this hook (optional)'
|
||||
),
|
||||
),
|
||||
],
|
||||
database_operations=[
|
||||
migrations.RunSQL(
|
||||
sql="""
|
||||
ALTER TABLE core_archiveresult ADD COLUMN output_str TEXT DEFAULT '';
|
||||
ALTER TABLE core_archiveresult ADD COLUMN output_json TEXT;
|
||||
ALTER TABLE core_archiveresult ADD COLUMN output_files TEXT DEFAULT '{}';
|
||||
ALTER TABLE core_archiveresult ADD COLUMN output_size BIGINT DEFAULT 0;
|
||||
ALTER TABLE core_archiveresult ADD COLUMN output_mimetypes VARCHAR(512) DEFAULT '';
|
||||
ALTER TABLE core_archiveresult ADD COLUMN binary_id CHAR(32) REFERENCES machine_binary(id);
|
||||
""",
|
||||
reverse_sql=migrations.RunSQL.noop,
|
||||
),
|
||||
],
|
||||
),
|
||||
]
|
||||
@@ -1,83 +0,0 @@
|
||||
# Generated by Django for hook architecture support
|
||||
# Phase 1: Migrate existing 'output' field to new split fields
|
||||
|
||||
from django.db import migrations
|
||||
import json
|
||||
|
||||
|
||||
def migrate_output_field(apps, schema_editor):
|
||||
"""
|
||||
Migrate existing 'output' field to new split fields.
|
||||
|
||||
Logic:
|
||||
- If output contains JSON {...}, move to output_json
|
||||
- Otherwise, move to output_str
|
||||
|
||||
Use raw SQL to avoid CHECK constraint issues during migration.
|
||||
"""
|
||||
# Use raw SQL to migrate data without triggering CHECK constraints
|
||||
with schema_editor.connection.cursor() as cursor:
|
||||
# Get all archive results
|
||||
cursor.execute("""
|
||||
SELECT id, output FROM core_archiveresult
|
||||
""")
|
||||
|
||||
for row in cursor.fetchall():
|
||||
ar_id, old_output = row
|
||||
old_output = old_output or ''
|
||||
|
||||
# Case 1: JSON output
|
||||
if old_output.strip().startswith('{'):
|
||||
try:
|
||||
# Validate it's actual JSON
|
||||
parsed = json.loads(old_output)
|
||||
# Update with JSON - cast to JSON to satisfy CHECK constraint
|
||||
json_str = json.dumps(parsed)
|
||||
cursor.execute("""
|
||||
UPDATE core_archiveresult
|
||||
SET output_str = '', output_json = json(?)
|
||||
WHERE id = ?
|
||||
""", (json_str, ar_id))
|
||||
except json.JSONDecodeError:
|
||||
# Not valid JSON, treat as string
|
||||
cursor.execute("""
|
||||
UPDATE core_archiveresult
|
||||
SET output_str = ?, output_json = NULL
|
||||
WHERE id = ?
|
||||
""", (old_output, ar_id))
|
||||
# Case 2: File path or plain string
|
||||
else:
|
||||
cursor.execute("""
|
||||
UPDATE core_archiveresult
|
||||
SET output_str = ?, output_json = NULL
|
||||
WHERE id = ?
|
||||
""", (old_output, ar_id))
|
||||
|
||||
|
||||
def reverse_migrate(apps, schema_editor):
|
||||
"""Reverse migration - copy output_str back to output."""
|
||||
ArchiveResult = apps.get_model('core', 'ArchiveResult')
|
||||
|
||||
for ar in ArchiveResult.objects.all().iterator():
|
||||
if ar.output_json:
|
||||
ar.output = json.dumps(ar.output_json)
|
||||
else:
|
||||
ar.output = ar.output_str or ''
|
||||
ar.save(update_fields=['output'])
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('core', '0029_archiveresult_hook_fields'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.RunPython(migrate_output_field, reverse_migrate),
|
||||
|
||||
# Now safe to remove old 'output' field
|
||||
migrations.RemoveField(
|
||||
model_name='archiveresult',
|
||||
name='output',
|
||||
),
|
||||
]
|
||||
@@ -1,27 +0,0 @@
|
||||
# Generated by Django 6.0 on 2025-12-27
|
||||
|
||||
import django.db.models.deletion
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('core', '0030_migrate_output_field'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AddField(
|
||||
model_name='snapshot',
|
||||
name='parent_snapshot',
|
||||
field=models.ForeignKey(
|
||||
blank=True,
|
||||
db_index=True,
|
||||
help_text='Parent snapshot that discovered this URL (for recursive crawling)',
|
||||
null=True,
|
||||
on_delete=django.db.models.deletion.SET_NULL,
|
||||
related_name='child_snapshots',
|
||||
to='core.snapshot'
|
||||
),
|
||||
),
|
||||
]
|
||||
@@ -1,77 +0,0 @@
|
||||
# Generated by Django 6.0 on 2025-12-28 05:12
|
||||
|
||||
import django.db.models.deletion
|
||||
from archivebox import uuid_compat
|
||||
from django.conf import settings
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('core', '0031_snapshot_parent_snapshot'),
|
||||
('crawls', '0004_alter_crawl_output_dir'),
|
||||
('machine', '0004_drop_dependency_table'), # Changed from 0003 - wait until Dependency is dropped
|
||||
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
|
||||
]
|
||||
|
||||
operations = [
|
||||
# Update Django's state only - database already has correct schema from 0029
|
||||
migrations.SeparateDatabaseAndState(
|
||||
state_operations=[
|
||||
migrations.AlterField(
|
||||
model_name='archiveresult',
|
||||
name='binary',
|
||||
field=models.ForeignKey(blank=True, help_text='Primary binary used by this hook', null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='archiveresults', to='machine.binary'),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='archiveresult',
|
||||
name='output_files',
|
||||
field=models.JSONField(default=dict, help_text='Dict of {relative_path: {metadata}}'),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='archiveresult',
|
||||
name='output_json',
|
||||
field=models.JSONField(blank=True, default=None, help_text='Structured metadata (headers, redirects, etc.)', null=True),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='archiveresult',
|
||||
name='output_mimetypes',
|
||||
field=models.CharField(blank=True, default='', help_text='CSV of mimetypes sorted by size', max_length=512),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='archiveresult',
|
||||
name='output_size',
|
||||
field=models.BigIntegerField(default=0, help_text='Total bytes of all output files'),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='archiveresult',
|
||||
name='output_str',
|
||||
field=models.TextField(blank=True, default='', help_text='Human-readable output summary'),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='archiveresult',
|
||||
name='uuid',
|
||||
field=models.UUIDField(blank=True, db_index=True, default=uuid_compat.uuid7, null=True),
|
||||
),
|
||||
],
|
||||
database_operations=[
|
||||
# No database changes needed - columns already exist with correct types
|
||||
],
|
||||
),
|
||||
# Add unique constraint without table rebuild
|
||||
migrations.SeparateDatabaseAndState(
|
||||
state_operations=[
|
||||
migrations.AddConstraint(
|
||||
model_name='snapshot',
|
||||
constraint=models.UniqueConstraint(fields=('timestamp',), name='unique_timestamp'),
|
||||
),
|
||||
],
|
||||
database_operations=[
|
||||
migrations.RunSQL(
|
||||
sql="CREATE UNIQUE INDEX IF NOT EXISTS unique_timestamp ON core_snapshot (timestamp);",
|
||||
reverse_sql="DROP INDEX IF EXISTS unique_timestamp;",
|
||||
),
|
||||
],
|
||||
),
|
||||
]
|
||||
@@ -1,44 +0,0 @@
|
||||
# Generated by Django 6.0 on 2025-12-28
|
||||
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('core', '0032_alter_archiveresult_binary_and_more'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
# Use SeparateDatabaseAndState to avoid table rebuilds that would re-add CHECK constraints
|
||||
migrations.SeparateDatabaseAndState(
|
||||
state_operations=[
|
||||
migrations.RenameField(
|
||||
model_name='archiveresult',
|
||||
old_name='extractor',
|
||||
new_name='plugin',
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='archiveresult',
|
||||
name='hook_name',
|
||||
field=models.CharField(
|
||||
blank=True,
|
||||
default='',
|
||||
max_length=255,
|
||||
db_index=True,
|
||||
help_text='Full filename of the hook that executed (e.g., on_Snapshot__50_wget.py)'
|
||||
),
|
||||
),
|
||||
],
|
||||
database_operations=[
|
||||
migrations.RunSQL(
|
||||
sql="""
|
||||
ALTER TABLE core_archiveresult RENAME COLUMN extractor TO plugin;
|
||||
ALTER TABLE core_archiveresult ADD COLUMN hook_name VARCHAR(255) DEFAULT '' NOT NULL;
|
||||
CREATE INDEX IF NOT EXISTS core_archiveresult_hook_name_idx ON core_archiveresult (hook_name);
|
||||
""",
|
||||
reverse_sql=migrations.RunSQL.noop,
|
||||
),
|
||||
],
|
||||
),
|
||||
]
|
||||
@@ -1,37 +0,0 @@
|
||||
# Generated by Django 6.0 on 2025-12-28
|
||||
# Add Snapshot.current_step field for hook step-based execution
|
||||
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('core', '0033_rename_extractor_add_hook_name'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
# Use SeparateDatabaseAndState to avoid table rebuild that would fail on config NOT NULL constraint
|
||||
migrations.SeparateDatabaseAndState(
|
||||
state_operations=[
|
||||
migrations.AddField(
|
||||
model_name='snapshot',
|
||||
name='current_step',
|
||||
field=models.PositiveSmallIntegerField(
|
||||
default=0,
|
||||
db_index=True,
|
||||
help_text='Current hook step being executed (0-9). Used for sequential hook execution.'
|
||||
),
|
||||
),
|
||||
],
|
||||
database_operations=[
|
||||
migrations.RunSQL(
|
||||
sql="""
|
||||
ALTER TABLE core_snapshot ADD COLUMN current_step SMALLINT UNSIGNED DEFAULT 0 NOT NULL;
|
||||
CREATE INDEX IF NOT EXISTS core_snapshot_current_step_idx ON core_snapshot (current_step);
|
||||
""",
|
||||
reverse_sql=migrations.RunSQL.noop,
|
||||
),
|
||||
],
|
||||
),
|
||||
]
|
||||
@@ -1,87 +0,0 @@
|
||||
# Generated migration
|
||||
|
||||
from django.conf import settings
|
||||
from django.db import migrations, models
|
||||
import django.db.models.deletion
|
||||
|
||||
|
||||
def create_catchall_crawls_and_assign_snapshots(apps, schema_editor):
|
||||
"""
|
||||
Create one catchall Crawl per user for all snapshots without a crawl.
|
||||
Assign those snapshots to their user's catchall crawl.
|
||||
"""
|
||||
Snapshot = apps.get_model('core', 'Snapshot')
|
||||
Crawl = apps.get_model('crawls', 'Crawl')
|
||||
User = apps.get_model(settings.AUTH_USER_MODEL)
|
||||
|
||||
# Get all snapshots without a crawl
|
||||
snapshots_without_crawl = Snapshot.objects.filter(crawl__isnull=True)
|
||||
|
||||
if not snapshots_without_crawl.exists():
|
||||
return
|
||||
|
||||
# Group by created_by_id
|
||||
snapshots_by_user = {}
|
||||
for snapshot in snapshots_without_crawl:
|
||||
user_id = snapshot.created_by_id
|
||||
if user_id not in snapshots_by_user:
|
||||
snapshots_by_user[user_id] = []
|
||||
snapshots_by_user[user_id].append(snapshot)
|
||||
|
||||
# Create one catchall crawl per user and assign snapshots
|
||||
for user_id, snapshots in snapshots_by_user.items():
|
||||
try:
|
||||
user = User.objects.get(pk=user_id)
|
||||
username = user.username
|
||||
except User.DoesNotExist:
|
||||
username = 'unknown'
|
||||
|
||||
# Create catchall crawl for this user
|
||||
crawl = Crawl.objects.create(
|
||||
urls=f'# Catchall crawl for {len(snapshots)} snapshots without a crawl',
|
||||
max_depth=0,
|
||||
label=f'[migration] catchall for user {username}',
|
||||
created_by_id=user_id,
|
||||
)
|
||||
|
||||
# Assign all snapshots to this crawl
|
||||
for snapshot in snapshots:
|
||||
snapshot.crawl = crawl
|
||||
snapshot.save(update_fields=['crawl'])
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('core', '0034_snapshot_current_step'),
|
||||
('crawls', '0005_drop_seed_id_column'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
# Step 1: Assign all snapshots without a crawl to catchall crawls
|
||||
migrations.RunPython(
|
||||
create_catchall_crawls_and_assign_snapshots,
|
||||
reverse_code=migrations.RunPython.noop,
|
||||
),
|
||||
|
||||
# Step 2 & 3: Update Django's state only - leave created_by_id column in database (unused but harmless)
|
||||
migrations.SeparateDatabaseAndState(
|
||||
state_operations=[
|
||||
# Make crawl non-nullable
|
||||
migrations.AlterField(
|
||||
model_name='snapshot',
|
||||
name='crawl',
|
||||
field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='snapshot_set', to='crawls.crawl'),
|
||||
),
|
||||
# Remove created_by field from Django's state
|
||||
migrations.RemoveField(
|
||||
model_name='snapshot',
|
||||
name='created_by',
|
||||
),
|
||||
],
|
||||
database_operations=[
|
||||
# No database changes - crawl_id already exists and NOT NULL constraint will be enforced by model
|
||||
# created_by_id column remains in database but is unused
|
||||
],
|
||||
),
|
||||
]
|
||||
@@ -1,27 +0,0 @@
|
||||
# Generated migration
|
||||
|
||||
from django.db import migrations
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('core', '0035_snapshot_crawl_non_nullable_remove_created_by'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
# Remove created_by field from ArchiveResult (state only)
|
||||
# No data migration needed - created_by can be accessed via snapshot.crawl.created_by
|
||||
# Leave created_by_id column in database (unused but harmless, avoids table rebuild)
|
||||
migrations.SeparateDatabaseAndState(
|
||||
state_operations=[
|
||||
migrations.RemoveField(
|
||||
model_name='archiveresult',
|
||||
name='created_by',
|
||||
),
|
||||
],
|
||||
database_operations=[
|
||||
# No database changes - leave created_by_id column in place to avoid table rebuild
|
||||
],
|
||||
),
|
||||
]
|
||||
@@ -1,44 +0,0 @@
|
||||
# Generated by Django 6.0 on 2025-12-29 06:45
|
||||
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('core', '0036_remove_archiveresult_created_by'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
# Update Django's state only - database columns remain for backwards compat
|
||||
migrations.SeparateDatabaseAndState(
|
||||
state_operations=[
|
||||
migrations.RemoveField(
|
||||
model_name='archiveresult',
|
||||
name='output_dir',
|
||||
),
|
||||
migrations.RemoveField(
|
||||
model_name='snapshot',
|
||||
name='output_dir',
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='archiveresult',
|
||||
name='config',
|
||||
field=models.JSONField(blank=True, default=dict, null=True),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='snapshot',
|
||||
name='config',
|
||||
field=models.JSONField(blank=True, default=dict, null=True),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='snapshot',
|
||||
name='tags',
|
||||
field=models.ManyToManyField(blank=True, related_name='snapshot_set', through='core.SnapshotTag', through_fields=('snapshot', 'tag'), to='core.tag'),
|
||||
),
|
||||
],
|
||||
database_operations=[
|
||||
# No database changes - columns remain in place to avoid table rebuilds
|
||||
],
|
||||
),
|
||||
]
|
||||
@@ -1,84 +0,0 @@
|
||||
# Add missing columns to ArchiveResult and remove created_by_id from Snapshot
|
||||
|
||||
from django.db import migrations, models, connection
|
||||
import django.utils.timezone
|
||||
|
||||
|
||||
def add_columns_if_not_exist(apps, schema_editor):
|
||||
"""Add columns to ArchiveResult only if they don't already exist."""
|
||||
with connection.cursor() as cursor:
|
||||
# Get existing columns
|
||||
cursor.execute("PRAGMA table_info(core_archiveresult)")
|
||||
existing_columns = {row[1] for row in cursor.fetchall()}
|
||||
|
||||
# Add num_uses_failed if it doesn't exist
|
||||
if 'num_uses_failed' not in existing_columns:
|
||||
cursor.execute("ALTER TABLE core_archiveresult ADD COLUMN num_uses_failed integer unsigned NOT NULL DEFAULT 0 CHECK (num_uses_failed >= 0)")
|
||||
|
||||
# Add num_uses_succeeded if it doesn't exist
|
||||
if 'num_uses_succeeded' not in existing_columns:
|
||||
cursor.execute("ALTER TABLE core_archiveresult ADD COLUMN num_uses_succeeded integer unsigned NOT NULL DEFAULT 0 CHECK (num_uses_succeeded >= 0)")
|
||||
|
||||
# Add config if it doesn't exist
|
||||
if 'config' not in existing_columns:
|
||||
cursor.execute("ALTER TABLE core_archiveresult ADD COLUMN config text NULL")
|
||||
|
||||
# Add retry_at if it doesn't exist
|
||||
if 'retry_at' not in existing_columns:
|
||||
cursor.execute("ALTER TABLE core_archiveresult ADD COLUMN retry_at datetime NULL")
|
||||
cursor.execute("CREATE INDEX IF NOT EXISTS core_archiveresult_retry_at_idx ON core_archiveresult(retry_at)")
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('core', '0037_remove_archiveresult_output_dir_and_more'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
# Add missing columns to ArchiveResult
|
||||
migrations.SeparateDatabaseAndState(
|
||||
state_operations=[
|
||||
migrations.AddField(
|
||||
model_name='archiveresult',
|
||||
name='num_uses_failed',
|
||||
field=models.PositiveIntegerField(default=0),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='archiveresult',
|
||||
name='num_uses_succeeded',
|
||||
field=models.PositiveIntegerField(default=0),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='archiveresult',
|
||||
name='config',
|
||||
field=models.JSONField(blank=True, default=dict, null=True),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='archiveresult',
|
||||
name='retry_at',
|
||||
field=models.DateTimeField(blank=True, db_index=True, default=django.utils.timezone.now, null=True),
|
||||
),
|
||||
],
|
||||
database_operations=[
|
||||
migrations.RunPython(add_columns_if_not_exist, reverse_code=migrations.RunPython.noop),
|
||||
],
|
||||
),
|
||||
|
||||
# Drop created_by_id from Snapshot (database only, already removed from model in 0035)
|
||||
migrations.SeparateDatabaseAndState(
|
||||
state_operations=[
|
||||
# No state changes - field already removed in 0035
|
||||
],
|
||||
database_operations=[
|
||||
migrations.RunSQL(
|
||||
sql="""
|
||||
-- Drop index first, then column
|
||||
DROP INDEX IF EXISTS core_snapshot_created_by_id_6dbd6149;
|
||||
ALTER TABLE core_snapshot DROP COLUMN created_by_id;
|
||||
""",
|
||||
reverse_sql=migrations.RunSQL.noop,
|
||||
),
|
||||
],
|
||||
),
|
||||
]
|
||||
@@ -1,30 +0,0 @@
|
||||
# Fix num_uses_failed and num_uses_succeeded string values to integers
|
||||
|
||||
from django.db import migrations
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('core', '0038_fix_missing_columns'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
# Fix string values that got inserted as literals instead of integers
|
||||
migrations.RunSQL(
|
||||
sql="""
|
||||
UPDATE core_snapshot
|
||||
SET num_uses_failed = 0
|
||||
WHERE typeof(num_uses_failed) = 'text' OR num_uses_failed = 'num_uses_failed';
|
||||
|
||||
UPDATE core_snapshot
|
||||
SET num_uses_succeeded = 0
|
||||
WHERE typeof(num_uses_succeeded) = 'text' OR num_uses_succeeded = 'num_uses_succeeded';
|
||||
|
||||
UPDATE core_snapshot
|
||||
SET depth = 0
|
||||
WHERE typeof(depth) = 'text' OR depth = 'depth';
|
||||
""",
|
||||
reverse_sql=migrations.RunSQL.noop,
|
||||
),
|
||||
]
|
||||
Reference in New Issue
Block a user