mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2026-01-04 09:55:33 +10:00
495 lines
19 KiB
Python
495 lines
19 KiB
Python
# Generated by Django 5.0.6 on 2024-12-25
|
|
# Transforms schema from 0022 to new simplified schema (ABID system removed)
|
|
|
|
from uuid import uuid4
|
|
from django.conf import settings
|
|
from django.db import migrations, models
|
|
import django.db.models.deletion
|
|
import django.utils.timezone
|
|
|
|
|
|
def get_or_create_system_user_pk(apps, schema_editor):
|
|
"""Get or create system user for migrations."""
|
|
User = apps.get_model('auth', 'User')
|
|
user, _ = User.objects.get_or_create(
|
|
username='system',
|
|
defaults={'is_active': False, 'password': '!'}
|
|
)
|
|
return user.pk
|
|
|
|
|
|
def populate_created_by_snapshot(apps, schema_editor):
|
|
"""Populate created_by for existing snapshots."""
|
|
User = apps.get_model('auth', 'User')
|
|
Snapshot = apps.get_model('core', 'Snapshot')
|
|
|
|
system_user, _ = User.objects.get_or_create(
|
|
username='system',
|
|
defaults={'is_active': False, 'password': '!'}
|
|
)
|
|
|
|
Snapshot.objects.filter(created_by__isnull=True).update(created_by=system_user)
|
|
|
|
|
|
def populate_created_by_archiveresult(apps, schema_editor):
|
|
"""Populate created_by for existing archive results."""
|
|
User = apps.get_model('auth', 'User')
|
|
ArchiveResult = apps.get_model('core', 'ArchiveResult')
|
|
|
|
system_user, _ = User.objects.get_or_create(
|
|
username='system',
|
|
defaults={'is_active': False, 'password': '!'}
|
|
)
|
|
|
|
ArchiveResult.objects.filter(created_by__isnull=True).update(created_by=system_user)
|
|
|
|
|
|
def populate_created_by_tag(apps, schema_editor):
|
|
"""Populate created_by for existing tags."""
|
|
User = apps.get_model('auth', 'User')
|
|
Tag = apps.get_model('core', 'Tag')
|
|
|
|
system_user, _ = User.objects.get_or_create(
|
|
username='system',
|
|
defaults={'is_active': False, 'password': '!'}
|
|
)
|
|
|
|
Tag.objects.filter(created_by__isnull=True).update(created_by=system_user)
|
|
|
|
|
|
def generate_uuid_for_archiveresults(apps, schema_editor):
|
|
"""Generate UUIDs for archive results that don't have them."""
|
|
ArchiveResult = apps.get_model('core', 'ArchiveResult')
|
|
for ar in ArchiveResult.objects.filter(uuid__isnull=True).iterator(chunk_size=500):
|
|
ar.uuid = uuid4()
|
|
ar.save(update_fields=['uuid'])
|
|
|
|
|
|
def generate_uuid_for_tags(apps, schema_editor):
|
|
"""Generate UUIDs for tags that don't have them."""
|
|
Tag = apps.get_model('core', 'Tag')
|
|
for tag in Tag.objects.filter(uuid__isnull=True).iterator(chunk_size=500):
|
|
tag.uuid = uuid4()
|
|
tag.save(update_fields=['uuid'])
|
|
|
|
|
|
def copy_bookmarked_at_from_added(apps, schema_editor):
|
|
"""Copy added timestamp to bookmarked_at."""
|
|
Snapshot = apps.get_model('core', 'Snapshot')
|
|
Snapshot.objects.filter(bookmarked_at__isnull=True).update(
|
|
bookmarked_at=models.F('added')
|
|
)
|
|
|
|
|
|
def copy_created_at_from_added(apps, schema_editor):
|
|
"""Copy added timestamp to created_at for snapshots."""
|
|
Snapshot = apps.get_model('core', 'Snapshot')
|
|
Snapshot.objects.filter(created_at__isnull=True).update(
|
|
created_at=models.F('added')
|
|
)
|
|
|
|
|
|
def copy_created_at_from_start_ts(apps, schema_editor):
|
|
"""Copy start_ts to created_at for archive results."""
|
|
ArchiveResult = apps.get_model('core', 'ArchiveResult')
|
|
ArchiveResult.objects.filter(created_at__isnull=True).update(
|
|
created_at=models.F('start_ts')
|
|
)
|
|
|
|
|
|
class Migration(migrations.Migration):
|
|
"""
|
|
This migration transforms the schema from the main branch (0022) to the new
|
|
simplified schema without the ABID system.
|
|
|
|
For dev branch users who had ABID migrations (0023-0074), this replaces them
|
|
with a clean transformation.
|
|
"""
|
|
|
|
replaces = [
|
|
('core', '0023_alter_archiveresult_options_archiveresult_abid_and_more'),
|
|
('core', '0024_auto_20240513_1143'),
|
|
('core', '0025_alter_archiveresult_uuid'),
|
|
('core', '0026_archiveresult_created_archiveresult_created_by_and_more'),
|
|
('core', '0027_update_snapshot_ids'),
|
|
('core', '0028_alter_archiveresult_uuid'),
|
|
('core', '0029_alter_archiveresult_id'),
|
|
('core', '0030_alter_archiveresult_uuid'),
|
|
('core', '0031_alter_archiveresult_id_alter_archiveresult_uuid_and_more'),
|
|
('core', '0032_alter_archiveresult_id'),
|
|
('core', '0033_rename_id_archiveresult_old_id'),
|
|
('core', '0034_alter_archiveresult_old_id_alter_archiveresult_uuid'),
|
|
('core', '0035_remove_archiveresult_uuid_archiveresult_id'),
|
|
('core', '0036_alter_archiveresult_id_alter_archiveresult_old_id'),
|
|
('core', '0037_rename_id_snapshot_old_id'),
|
|
('core', '0038_rename_uuid_snapshot_id'),
|
|
('core', '0039_rename_snapshot_archiveresult_snapshot_old'),
|
|
('core', '0040_archiveresult_snapshot'),
|
|
('core', '0041_alter_archiveresult_snapshot_and_more'),
|
|
('core', '0042_remove_archiveresult_snapshot_old'),
|
|
('core', '0043_alter_archiveresult_snapshot_alter_snapshot_id_and_more'),
|
|
('core', '0044_alter_archiveresult_snapshot_alter_tag_uuid_and_more'),
|
|
('core', '0045_alter_snapshot_old_id'),
|
|
('core', '0046_alter_archiveresult_snapshot_alter_snapshot_id_and_more'),
|
|
('core', '0047_alter_snapshottag_unique_together_and_more'),
|
|
('core', '0048_alter_archiveresult_snapshot_and_more'),
|
|
('core', '0049_rename_snapshot_snapshottag_snapshot_old_and_more'),
|
|
('core', '0050_alter_snapshottag_snapshot_old'),
|
|
('core', '0051_snapshottag_snapshot_alter_snapshottag_snapshot_old'),
|
|
('core', '0052_alter_snapshottag_unique_together_and_more'),
|
|
('core', '0053_remove_snapshottag_snapshot_old'),
|
|
('core', '0054_alter_snapshot_timestamp'),
|
|
('core', '0055_alter_tag_slug'),
|
|
('core', '0056_remove_tag_uuid'),
|
|
('core', '0057_rename_id_tag_old_id'),
|
|
('core', '0058_alter_tag_old_id'),
|
|
('core', '0059_tag_id'),
|
|
('core', '0060_alter_tag_id'),
|
|
('core', '0061_rename_tag_snapshottag_old_tag_and_more'),
|
|
('core', '0062_alter_snapshottag_old_tag'),
|
|
('core', '0063_snapshottag_tag_alter_snapshottag_old_tag'),
|
|
('core', '0064_alter_snapshottag_unique_together_and_more'),
|
|
('core', '0065_remove_snapshottag_old_tag'),
|
|
('core', '0066_alter_snapshottag_tag_alter_tag_id_alter_tag_old_id'),
|
|
('core', '0067_alter_snapshottag_tag'),
|
|
('core', '0068_alter_archiveresult_options'),
|
|
('core', '0069_alter_archiveresult_created_alter_snapshot_added_and_more'),
|
|
('core', '0070_alter_archiveresult_created_by_alter_snapshot_added_and_more'),
|
|
('core', '0071_remove_archiveresult_old_id_remove_snapshot_old_id_and_more'),
|
|
('core', '0072_rename_added_snapshot_bookmarked_at_and_more'),
|
|
('core', '0073_rename_created_archiveresult_created_at_and_more'),
|
|
('core', '0074_alter_snapshot_downloaded_at'),
|
|
]
|
|
|
|
dependencies = [
|
|
('core', '0022_auto_20231023_2008'),
|
|
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
|
|
]
|
|
|
|
operations = [
|
|
# === SNAPSHOT CHANGES ===
|
|
|
|
# Add health stats fields to Snapshot
|
|
migrations.AddField(
|
|
model_name='snapshot',
|
|
name='num_uses_failed',
|
|
field=models.PositiveIntegerField(default=0),
|
|
),
|
|
migrations.AddField(
|
|
model_name='snapshot',
|
|
name='num_uses_succeeded',
|
|
field=models.PositiveIntegerField(default=0),
|
|
),
|
|
|
|
# Add new fields to Snapshot
|
|
migrations.AddField(
|
|
model_name='snapshot',
|
|
name='created_by',
|
|
field=models.ForeignKey(
|
|
default=None, null=True, blank=True,
|
|
on_delete=django.db.models.deletion.CASCADE,
|
|
related_name='snapshot_set',
|
|
to=settings.AUTH_USER_MODEL,
|
|
),
|
|
),
|
|
migrations.AddField(
|
|
model_name='snapshot',
|
|
name='created_at',
|
|
field=models.DateTimeField(default=django.utils.timezone.now, db_index=True, null=True),
|
|
),
|
|
migrations.AddField(
|
|
model_name='snapshot',
|
|
name='modified_at',
|
|
field=models.DateTimeField(auto_now=True),
|
|
),
|
|
migrations.AddField(
|
|
model_name='snapshot',
|
|
name='bookmarked_at',
|
|
field=models.DateTimeField(default=django.utils.timezone.now, db_index=True, null=True),
|
|
),
|
|
migrations.AddField(
|
|
model_name='snapshot',
|
|
name='downloaded_at',
|
|
field=models.DateTimeField(default=None, null=True, blank=True, db_index=True),
|
|
),
|
|
migrations.AddField(
|
|
model_name='snapshot',
|
|
name='depth',
|
|
field=models.PositiveSmallIntegerField(default=0, db_index=True),
|
|
),
|
|
migrations.AddField(
|
|
model_name='snapshot',
|
|
name='status',
|
|
field=models.CharField(choices=[('queued', 'Queued'), ('started', 'Started'), ('sealed', 'Sealed')], default='queued', max_length=15, db_index=True),
|
|
),
|
|
migrations.AddField(
|
|
model_name='snapshot',
|
|
name='retry_at',
|
|
field=models.DateTimeField(default=django.utils.timezone.now, null=True, blank=True, db_index=True),
|
|
),
|
|
migrations.AddField(
|
|
model_name='snapshot',
|
|
name='config',
|
|
field=models.JSONField(default=dict, blank=False),
|
|
),
|
|
migrations.AddField(
|
|
model_name='snapshot',
|
|
name='notes',
|
|
field=models.TextField(blank=True, default=''),
|
|
),
|
|
migrations.AddField(
|
|
model_name='snapshot',
|
|
name='output_dir',
|
|
field=models.CharField(max_length=256, default=None, null=True, blank=True),
|
|
),
|
|
|
|
# Copy data from old fields to new
|
|
migrations.RunPython(copy_bookmarked_at_from_added, migrations.RunPython.noop),
|
|
migrations.RunPython(copy_created_at_from_added, migrations.RunPython.noop),
|
|
migrations.RunPython(populate_created_by_snapshot, migrations.RunPython.noop),
|
|
|
|
# Make created_by non-nullable after population
|
|
migrations.AlterField(
|
|
model_name='snapshot',
|
|
name='created_by',
|
|
field=models.ForeignKey(
|
|
on_delete=django.db.models.deletion.CASCADE,
|
|
related_name='snapshot_set',
|
|
to=settings.AUTH_USER_MODEL,
|
|
db_index=True,
|
|
),
|
|
),
|
|
|
|
# Update timestamp field constraints
|
|
migrations.AlterField(
|
|
model_name='snapshot',
|
|
name='timestamp',
|
|
field=models.CharField(max_length=32, unique=True, db_index=True, editable=False),
|
|
),
|
|
|
|
# Update title field size
|
|
migrations.AlterField(
|
|
model_name='snapshot',
|
|
name='title',
|
|
field=models.CharField(max_length=512, null=True, blank=True, db_index=True),
|
|
),
|
|
|
|
# Remove old 'added' and 'updated' fields
|
|
migrations.RemoveField(model_name='snapshot', name='added'),
|
|
migrations.RemoveField(model_name='snapshot', name='updated'),
|
|
|
|
# Register SnapshotTag through model (table already exists from 0006's ManyToManyField)
|
|
migrations.SeparateDatabaseAndState(
|
|
state_operations=[
|
|
migrations.CreateModel(
|
|
name='SnapshotTag',
|
|
fields=[
|
|
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
|
('snapshot', models.ForeignKey(db_column='snapshot_id', on_delete=django.db.models.deletion.CASCADE, to='core.snapshot')),
|
|
('tag', models.ForeignKey(db_column='tag_id', on_delete=django.db.models.deletion.CASCADE, to='core.tag')),
|
|
],
|
|
options={
|
|
'db_table': 'core_snapshot_tags',
|
|
},
|
|
),
|
|
],
|
|
database_operations=[], # Table already exists from 0006
|
|
),
|
|
|
|
# === TAG CHANGES ===
|
|
# Tag keeps AutoField (integer) id for migration compatibility
|
|
|
|
# Add tracking fields to Tag
|
|
migrations.AddField(
|
|
model_name='tag',
|
|
name='created_by',
|
|
field=models.ForeignKey(
|
|
default=None, null=True, blank=True,
|
|
on_delete=django.db.models.deletion.CASCADE,
|
|
related_name='tag_set',
|
|
to=settings.AUTH_USER_MODEL,
|
|
),
|
|
),
|
|
migrations.AddField(
|
|
model_name='tag',
|
|
name='created_at',
|
|
field=models.DateTimeField(default=django.utils.timezone.now, db_index=True, null=True),
|
|
),
|
|
migrations.AddField(
|
|
model_name='tag',
|
|
name='modified_at',
|
|
field=models.DateTimeField(auto_now=True),
|
|
),
|
|
|
|
# Populate created_by for tags
|
|
migrations.RunPython(populate_created_by_tag, migrations.RunPython.noop),
|
|
|
|
# Update slug field
|
|
migrations.AlterField(
|
|
model_name='tag',
|
|
name='slug',
|
|
field=models.SlugField(unique=True, max_length=100, editable=False),
|
|
),
|
|
|
|
# === ARCHIVERESULT CHANGES ===
|
|
|
|
# Add health stats fields to ArchiveResult
|
|
migrations.AddField(
|
|
model_name='archiveresult',
|
|
name='num_uses_failed',
|
|
field=models.PositiveIntegerField(default=0),
|
|
),
|
|
migrations.AddField(
|
|
model_name='archiveresult',
|
|
name='num_uses_succeeded',
|
|
field=models.PositiveIntegerField(default=0),
|
|
),
|
|
|
|
# Add uuid field for new ID
|
|
migrations.AddField(
|
|
model_name='archiveresult',
|
|
name='uuid',
|
|
field=models.UUIDField(default=uuid4, null=True, blank=True),
|
|
),
|
|
migrations.AddField(
|
|
model_name='archiveresult',
|
|
name='created_by',
|
|
field=models.ForeignKey(
|
|
default=None, null=True, blank=True,
|
|
on_delete=django.db.models.deletion.CASCADE,
|
|
related_name='archiveresult_set',
|
|
to=settings.AUTH_USER_MODEL,
|
|
),
|
|
),
|
|
migrations.AddField(
|
|
model_name='archiveresult',
|
|
name='created_at',
|
|
field=models.DateTimeField(default=django.utils.timezone.now, db_index=True, null=True),
|
|
),
|
|
migrations.AddField(
|
|
model_name='archiveresult',
|
|
name='modified_at',
|
|
field=models.DateTimeField(auto_now=True),
|
|
),
|
|
migrations.AddField(
|
|
model_name='archiveresult',
|
|
name='retry_at',
|
|
field=models.DateTimeField(default=django.utils.timezone.now, null=True, blank=True, db_index=True),
|
|
),
|
|
migrations.AddField(
|
|
model_name='archiveresult',
|
|
name='notes',
|
|
field=models.TextField(blank=True, default=''),
|
|
),
|
|
migrations.AddField(
|
|
model_name='archiveresult',
|
|
name='output_dir',
|
|
field=models.CharField(max_length=256, default=None, null=True, blank=True),
|
|
),
|
|
migrations.AddField(
|
|
model_name='archiveresult',
|
|
name='config',
|
|
field=models.JSONField(default=dict, blank=False),
|
|
),
|
|
|
|
# Populate UUIDs and data for archive results
|
|
migrations.RunPython(generate_uuid_for_archiveresults, migrations.RunPython.noop),
|
|
migrations.RunPython(copy_created_at_from_start_ts, migrations.RunPython.noop),
|
|
migrations.RunPython(populate_created_by_archiveresult, migrations.RunPython.noop),
|
|
|
|
# Make created_by non-nullable
|
|
migrations.AlterField(
|
|
model_name='archiveresult',
|
|
name='created_by',
|
|
field=models.ForeignKey(
|
|
on_delete=django.db.models.deletion.CASCADE,
|
|
related_name='archiveresult_set',
|
|
to=settings.AUTH_USER_MODEL,
|
|
db_index=True,
|
|
),
|
|
),
|
|
|
|
# Update extractor choices
|
|
migrations.AlterField(
|
|
model_name='archiveresult',
|
|
name='extractor',
|
|
field=models.CharField(
|
|
choices=[
|
|
('htmltotext', 'htmltotext'), ('git', 'git'), ('singlefile', 'singlefile'),
|
|
('media', 'media'), ('archive_org', 'archive_org'), ('readability', 'readability'),
|
|
('mercury', 'mercury'), ('favicon', 'favicon'), ('pdf', 'pdf'),
|
|
('headers', 'headers'), ('screenshot', 'screenshot'), ('dom', 'dom'),
|
|
('title', 'title'), ('wget', 'wget'),
|
|
],
|
|
max_length=32, db_index=True,
|
|
),
|
|
),
|
|
|
|
# Update status field
|
|
migrations.AlterField(
|
|
model_name='archiveresult',
|
|
name='status',
|
|
field=models.CharField(
|
|
choices=[
|
|
('queued', 'Queued'), ('started', 'Started'), ('backoff', 'Waiting to retry'),
|
|
('succeeded', 'Succeeded'), ('failed', 'Failed'), ('skipped', 'Skipped'),
|
|
],
|
|
max_length=16, default='queued', db_index=True,
|
|
),
|
|
),
|
|
|
|
# Update output field size
|
|
migrations.AlterField(
|
|
model_name='archiveresult',
|
|
name='output',
|
|
field=models.CharField(max_length=1024, default=None, null=True, blank=True),
|
|
),
|
|
|
|
# Update cmd_version field size
|
|
migrations.AlterField(
|
|
model_name='archiveresult',
|
|
name='cmd_version',
|
|
field=models.CharField(max_length=128, default=None, null=True, blank=True),
|
|
),
|
|
|
|
# Make start_ts and end_ts nullable
|
|
migrations.AlterField(
|
|
model_name='archiveresult',
|
|
name='start_ts',
|
|
field=models.DateTimeField(default=None, null=True, blank=True),
|
|
),
|
|
migrations.AlterField(
|
|
model_name='archiveresult',
|
|
name='end_ts',
|
|
field=models.DateTimeField(default=None, null=True, blank=True),
|
|
),
|
|
|
|
# Make pwd nullable
|
|
migrations.AlterField(
|
|
model_name='archiveresult',
|
|
name='pwd',
|
|
field=models.CharField(max_length=256, default=None, null=True, blank=True),
|
|
),
|
|
|
|
# Make cmd nullable
|
|
migrations.AlterField(
|
|
model_name='archiveresult',
|
|
name='cmd',
|
|
field=models.JSONField(default=None, null=True, blank=True),
|
|
),
|
|
|
|
# Update model options
|
|
migrations.AlterModelOptions(
|
|
name='archiveresult',
|
|
options={'verbose_name': 'Archive Result', 'verbose_name_plural': 'Archive Results Log'},
|
|
),
|
|
migrations.AlterModelOptions(
|
|
name='snapshot',
|
|
options={'verbose_name': 'Snapshot', 'verbose_name_plural': 'Snapshots'},
|
|
),
|
|
migrations.AlterModelOptions(
|
|
name='tag',
|
|
options={'verbose_name': 'Tag', 'verbose_name_plural': 'Tags'},
|
|
),
|
|
]
|