Files
ArchiveBox/archivebox/core/migrations/0023_new_schema.py
Nick Sweeting 6c769d831c wip 2
2025-12-24 21:46:14 -08:00

495 lines
19 KiB
Python

# Generated by Django 5.0.6 on 2024-12-25
# Transforms schema from 0022 to new simplified schema (ABID system removed)
from uuid import uuid4
from django.conf import settings
from django.db import migrations, models
import django.db.models.deletion
import django.utils.timezone
def get_or_create_system_user_pk(apps, schema_editor):
"""Get or create system user for migrations."""
User = apps.get_model('auth', 'User')
user, _ = User.objects.get_or_create(
username='system',
defaults={'is_active': False, 'password': '!'}
)
return user.pk
def populate_created_by_snapshot(apps, schema_editor):
"""Populate created_by for existing snapshots."""
User = apps.get_model('auth', 'User')
Snapshot = apps.get_model('core', 'Snapshot')
system_user, _ = User.objects.get_or_create(
username='system',
defaults={'is_active': False, 'password': '!'}
)
Snapshot.objects.filter(created_by__isnull=True).update(created_by=system_user)
def populate_created_by_archiveresult(apps, schema_editor):
"""Populate created_by for existing archive results."""
User = apps.get_model('auth', 'User')
ArchiveResult = apps.get_model('core', 'ArchiveResult')
system_user, _ = User.objects.get_or_create(
username='system',
defaults={'is_active': False, 'password': '!'}
)
ArchiveResult.objects.filter(created_by__isnull=True).update(created_by=system_user)
def populate_created_by_tag(apps, schema_editor):
"""Populate created_by for existing tags."""
User = apps.get_model('auth', 'User')
Tag = apps.get_model('core', 'Tag')
system_user, _ = User.objects.get_or_create(
username='system',
defaults={'is_active': False, 'password': '!'}
)
Tag.objects.filter(created_by__isnull=True).update(created_by=system_user)
def generate_uuid_for_archiveresults(apps, schema_editor):
"""Generate UUIDs for archive results that don't have them."""
ArchiveResult = apps.get_model('core', 'ArchiveResult')
for ar in ArchiveResult.objects.filter(uuid__isnull=True).iterator(chunk_size=500):
ar.uuid = uuid4()
ar.save(update_fields=['uuid'])
def generate_uuid_for_tags(apps, schema_editor):
"""Generate UUIDs for tags that don't have them."""
Tag = apps.get_model('core', 'Tag')
for tag in Tag.objects.filter(uuid__isnull=True).iterator(chunk_size=500):
tag.uuid = uuid4()
tag.save(update_fields=['uuid'])
def copy_bookmarked_at_from_added(apps, schema_editor):
"""Copy added timestamp to bookmarked_at."""
Snapshot = apps.get_model('core', 'Snapshot')
Snapshot.objects.filter(bookmarked_at__isnull=True).update(
bookmarked_at=models.F('added')
)
def copy_created_at_from_added(apps, schema_editor):
"""Copy added timestamp to created_at for snapshots."""
Snapshot = apps.get_model('core', 'Snapshot')
Snapshot.objects.filter(created_at__isnull=True).update(
created_at=models.F('added')
)
def copy_created_at_from_start_ts(apps, schema_editor):
"""Copy start_ts to created_at for archive results."""
ArchiveResult = apps.get_model('core', 'ArchiveResult')
ArchiveResult.objects.filter(created_at__isnull=True).update(
created_at=models.F('start_ts')
)
class Migration(migrations.Migration):
"""
This migration transforms the schema from the main branch (0022) to the new
simplified schema without the ABID system.
For dev branch users who had ABID migrations (0023-0074), this replaces them
with a clean transformation.
"""
replaces = [
('core', '0023_alter_archiveresult_options_archiveresult_abid_and_more'),
('core', '0024_auto_20240513_1143'),
('core', '0025_alter_archiveresult_uuid'),
('core', '0026_archiveresult_created_archiveresult_created_by_and_more'),
('core', '0027_update_snapshot_ids'),
('core', '0028_alter_archiveresult_uuid'),
('core', '0029_alter_archiveresult_id'),
('core', '0030_alter_archiveresult_uuid'),
('core', '0031_alter_archiveresult_id_alter_archiveresult_uuid_and_more'),
('core', '0032_alter_archiveresult_id'),
('core', '0033_rename_id_archiveresult_old_id'),
('core', '0034_alter_archiveresult_old_id_alter_archiveresult_uuid'),
('core', '0035_remove_archiveresult_uuid_archiveresult_id'),
('core', '0036_alter_archiveresult_id_alter_archiveresult_old_id'),
('core', '0037_rename_id_snapshot_old_id'),
('core', '0038_rename_uuid_snapshot_id'),
('core', '0039_rename_snapshot_archiveresult_snapshot_old'),
('core', '0040_archiveresult_snapshot'),
('core', '0041_alter_archiveresult_snapshot_and_more'),
('core', '0042_remove_archiveresult_snapshot_old'),
('core', '0043_alter_archiveresult_snapshot_alter_snapshot_id_and_more'),
('core', '0044_alter_archiveresult_snapshot_alter_tag_uuid_and_more'),
('core', '0045_alter_snapshot_old_id'),
('core', '0046_alter_archiveresult_snapshot_alter_snapshot_id_and_more'),
('core', '0047_alter_snapshottag_unique_together_and_more'),
('core', '0048_alter_archiveresult_snapshot_and_more'),
('core', '0049_rename_snapshot_snapshottag_snapshot_old_and_more'),
('core', '0050_alter_snapshottag_snapshot_old'),
('core', '0051_snapshottag_snapshot_alter_snapshottag_snapshot_old'),
('core', '0052_alter_snapshottag_unique_together_and_more'),
('core', '0053_remove_snapshottag_snapshot_old'),
('core', '0054_alter_snapshot_timestamp'),
('core', '0055_alter_tag_slug'),
('core', '0056_remove_tag_uuid'),
('core', '0057_rename_id_tag_old_id'),
('core', '0058_alter_tag_old_id'),
('core', '0059_tag_id'),
('core', '0060_alter_tag_id'),
('core', '0061_rename_tag_snapshottag_old_tag_and_more'),
('core', '0062_alter_snapshottag_old_tag'),
('core', '0063_snapshottag_tag_alter_snapshottag_old_tag'),
('core', '0064_alter_snapshottag_unique_together_and_more'),
('core', '0065_remove_snapshottag_old_tag'),
('core', '0066_alter_snapshottag_tag_alter_tag_id_alter_tag_old_id'),
('core', '0067_alter_snapshottag_tag'),
('core', '0068_alter_archiveresult_options'),
('core', '0069_alter_archiveresult_created_alter_snapshot_added_and_more'),
('core', '0070_alter_archiveresult_created_by_alter_snapshot_added_and_more'),
('core', '0071_remove_archiveresult_old_id_remove_snapshot_old_id_and_more'),
('core', '0072_rename_added_snapshot_bookmarked_at_and_more'),
('core', '0073_rename_created_archiveresult_created_at_and_more'),
('core', '0074_alter_snapshot_downloaded_at'),
]
dependencies = [
('core', '0022_auto_20231023_2008'),
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
]
operations = [
# === SNAPSHOT CHANGES ===
# Add health stats fields to Snapshot
migrations.AddField(
model_name='snapshot',
name='num_uses_failed',
field=models.PositiveIntegerField(default=0),
),
migrations.AddField(
model_name='snapshot',
name='num_uses_succeeded',
field=models.PositiveIntegerField(default=0),
),
# Add new fields to Snapshot
migrations.AddField(
model_name='snapshot',
name='created_by',
field=models.ForeignKey(
default=None, null=True, blank=True,
on_delete=django.db.models.deletion.CASCADE,
related_name='snapshot_set',
to=settings.AUTH_USER_MODEL,
),
),
migrations.AddField(
model_name='snapshot',
name='created_at',
field=models.DateTimeField(default=django.utils.timezone.now, db_index=True, null=True),
),
migrations.AddField(
model_name='snapshot',
name='modified_at',
field=models.DateTimeField(auto_now=True),
),
migrations.AddField(
model_name='snapshot',
name='bookmarked_at',
field=models.DateTimeField(default=django.utils.timezone.now, db_index=True, null=True),
),
migrations.AddField(
model_name='snapshot',
name='downloaded_at',
field=models.DateTimeField(default=None, null=True, blank=True, db_index=True),
),
migrations.AddField(
model_name='snapshot',
name='depth',
field=models.PositiveSmallIntegerField(default=0, db_index=True),
),
migrations.AddField(
model_name='snapshot',
name='status',
field=models.CharField(choices=[('queued', 'Queued'), ('started', 'Started'), ('sealed', 'Sealed')], default='queued', max_length=15, db_index=True),
),
migrations.AddField(
model_name='snapshot',
name='retry_at',
field=models.DateTimeField(default=django.utils.timezone.now, null=True, blank=True, db_index=True),
),
migrations.AddField(
model_name='snapshot',
name='config',
field=models.JSONField(default=dict, blank=False),
),
migrations.AddField(
model_name='snapshot',
name='notes',
field=models.TextField(blank=True, default=''),
),
migrations.AddField(
model_name='snapshot',
name='output_dir',
field=models.CharField(max_length=256, default=None, null=True, blank=True),
),
# Copy data from old fields to new
migrations.RunPython(copy_bookmarked_at_from_added, migrations.RunPython.noop),
migrations.RunPython(copy_created_at_from_added, migrations.RunPython.noop),
migrations.RunPython(populate_created_by_snapshot, migrations.RunPython.noop),
# Make created_by non-nullable after population
migrations.AlterField(
model_name='snapshot',
name='created_by',
field=models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
related_name='snapshot_set',
to=settings.AUTH_USER_MODEL,
db_index=True,
),
),
# Update timestamp field constraints
migrations.AlterField(
model_name='snapshot',
name='timestamp',
field=models.CharField(max_length=32, unique=True, db_index=True, editable=False),
),
# Update title field size
migrations.AlterField(
model_name='snapshot',
name='title',
field=models.CharField(max_length=512, null=True, blank=True, db_index=True),
),
# Remove old 'added' and 'updated' fields
migrations.RemoveField(model_name='snapshot', name='added'),
migrations.RemoveField(model_name='snapshot', name='updated'),
# Register SnapshotTag through model (table already exists from 0006's ManyToManyField)
migrations.SeparateDatabaseAndState(
state_operations=[
migrations.CreateModel(
name='SnapshotTag',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('snapshot', models.ForeignKey(db_column='snapshot_id', on_delete=django.db.models.deletion.CASCADE, to='core.snapshot')),
('tag', models.ForeignKey(db_column='tag_id', on_delete=django.db.models.deletion.CASCADE, to='core.tag')),
],
options={
'db_table': 'core_snapshot_tags',
},
),
],
database_operations=[], # Table already exists from 0006
),
# === TAG CHANGES ===
# Tag keeps AutoField (integer) id for migration compatibility
# Add tracking fields to Tag
migrations.AddField(
model_name='tag',
name='created_by',
field=models.ForeignKey(
default=None, null=True, blank=True,
on_delete=django.db.models.deletion.CASCADE,
related_name='tag_set',
to=settings.AUTH_USER_MODEL,
),
),
migrations.AddField(
model_name='tag',
name='created_at',
field=models.DateTimeField(default=django.utils.timezone.now, db_index=True, null=True),
),
migrations.AddField(
model_name='tag',
name='modified_at',
field=models.DateTimeField(auto_now=True),
),
# Populate created_by for tags
migrations.RunPython(populate_created_by_tag, migrations.RunPython.noop),
# Update slug field
migrations.AlterField(
model_name='tag',
name='slug',
field=models.SlugField(unique=True, max_length=100, editable=False),
),
# === ARCHIVERESULT CHANGES ===
# Add health stats fields to ArchiveResult
migrations.AddField(
model_name='archiveresult',
name='num_uses_failed',
field=models.PositiveIntegerField(default=0),
),
migrations.AddField(
model_name='archiveresult',
name='num_uses_succeeded',
field=models.PositiveIntegerField(default=0),
),
# Add uuid field for new ID
migrations.AddField(
model_name='archiveresult',
name='uuid',
field=models.UUIDField(default=uuid4, null=True, blank=True),
),
migrations.AddField(
model_name='archiveresult',
name='created_by',
field=models.ForeignKey(
default=None, null=True, blank=True,
on_delete=django.db.models.deletion.CASCADE,
related_name='archiveresult_set',
to=settings.AUTH_USER_MODEL,
),
),
migrations.AddField(
model_name='archiveresult',
name='created_at',
field=models.DateTimeField(default=django.utils.timezone.now, db_index=True, null=True),
),
migrations.AddField(
model_name='archiveresult',
name='modified_at',
field=models.DateTimeField(auto_now=True),
),
migrations.AddField(
model_name='archiveresult',
name='retry_at',
field=models.DateTimeField(default=django.utils.timezone.now, null=True, blank=True, db_index=True),
),
migrations.AddField(
model_name='archiveresult',
name='notes',
field=models.TextField(blank=True, default=''),
),
migrations.AddField(
model_name='archiveresult',
name='output_dir',
field=models.CharField(max_length=256, default=None, null=True, blank=True),
),
migrations.AddField(
model_name='archiveresult',
name='config',
field=models.JSONField(default=dict, blank=False),
),
# Populate UUIDs and data for archive results
migrations.RunPython(generate_uuid_for_archiveresults, migrations.RunPython.noop),
migrations.RunPython(copy_created_at_from_start_ts, migrations.RunPython.noop),
migrations.RunPython(populate_created_by_archiveresult, migrations.RunPython.noop),
# Make created_by non-nullable
migrations.AlterField(
model_name='archiveresult',
name='created_by',
field=models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
related_name='archiveresult_set',
to=settings.AUTH_USER_MODEL,
db_index=True,
),
),
# Update extractor choices
migrations.AlterField(
model_name='archiveresult',
name='extractor',
field=models.CharField(
choices=[
('htmltotext', 'htmltotext'), ('git', 'git'), ('singlefile', 'singlefile'),
('media', 'media'), ('archive_org', 'archive_org'), ('readability', 'readability'),
('mercury', 'mercury'), ('favicon', 'favicon'), ('pdf', 'pdf'),
('headers', 'headers'), ('screenshot', 'screenshot'), ('dom', 'dom'),
('title', 'title'), ('wget', 'wget'),
],
max_length=32, db_index=True,
),
),
# Update status field
migrations.AlterField(
model_name='archiveresult',
name='status',
field=models.CharField(
choices=[
('queued', 'Queued'), ('started', 'Started'), ('backoff', 'Waiting to retry'),
('succeeded', 'Succeeded'), ('failed', 'Failed'), ('skipped', 'Skipped'),
],
max_length=16, default='queued', db_index=True,
),
),
# Update output field size
migrations.AlterField(
model_name='archiveresult',
name='output',
field=models.CharField(max_length=1024, default=None, null=True, blank=True),
),
# Update cmd_version field size
migrations.AlterField(
model_name='archiveresult',
name='cmd_version',
field=models.CharField(max_length=128, default=None, null=True, blank=True),
),
# Make start_ts and end_ts nullable
migrations.AlterField(
model_name='archiveresult',
name='start_ts',
field=models.DateTimeField(default=None, null=True, blank=True),
),
migrations.AlterField(
model_name='archiveresult',
name='end_ts',
field=models.DateTimeField(default=None, null=True, blank=True),
),
# Make pwd nullable
migrations.AlterField(
model_name='archiveresult',
name='pwd',
field=models.CharField(max_length=256, default=None, null=True, blank=True),
),
# Make cmd nullable
migrations.AlterField(
model_name='archiveresult',
name='cmd',
field=models.JSONField(default=None, null=True, blank=True),
),
# Update model options
migrations.AlterModelOptions(
name='archiveresult',
options={'verbose_name': 'Archive Result', 'verbose_name_plural': 'Archive Results Log'},
),
migrations.AlterModelOptions(
name='snapshot',
options={'verbose_name': 'Snapshot', 'verbose_name_plural': 'Snapshots'},
),
migrations.AlterModelOptions(
name='tag',
options={'verbose_name': 'Tag', 'verbose_name_plural': 'Tags'},
),
]