mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2026-01-03 01:15:57 +10:00
actually working migration path from 0.7.2 and 0.8.6 + renames and test coverage
This commit is contained in:
@@ -117,7 +117,7 @@ class SnapshotAdminForm(forms.ModelForm):
|
||||
|
||||
class SnapshotAdmin(SearchResultsAdminMixin, ConfigEditorMixin, BaseModelAdmin):
|
||||
form = SnapshotAdminForm
|
||||
list_display = ('created_at', 'title_str', 'status_with_progress', 'files', 'size_with_stats', 'url_str')
|
||||
list_display = ('created_at', 'title_str', 'status_with_progress', 'files', 'size_with_stats', 'health_display', 'url_str')
|
||||
sort_fields = ('title_str', 'url_str', 'created_at', 'status', 'crawl')
|
||||
readonly_fields = ('admin_actions', 'status_info', 'imported_timestamp', 'created_at', 'modified_at', 'downloaded_at', 'output_dir', 'archiveresults_list')
|
||||
search_fields = ('id', 'url', 'timestamp', 'title', 'tags__name')
|
||||
@@ -488,6 +488,12 @@ class SnapshotAdmin(SearchResultsAdminMixin, ConfigEditorMixin, BaseModelAdmin):
|
||||
obj.url[:128],
|
||||
)
|
||||
|
||||
@admin.display(description='Health', ordering='health')
|
||||
def health_display(self, obj):
|
||||
h = obj.health
|
||||
color = 'green' if h >= 80 else 'orange' if h >= 50 else 'red'
|
||||
return format_html('<span style="color: {};">{}</span>', color, h)
|
||||
|
||||
def grid_view(self, request, extra_context=None):
|
||||
|
||||
# cl = self.get_changelist_instance(request)
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
# Handles both fresh installs and upgrades from v0.7.2/v0.8.6rc0
|
||||
|
||||
from django.db import migrations, models, connection
|
||||
import django.utils.timezone
|
||||
|
||||
|
||||
def get_table_columns(table_name):
|
||||
@@ -95,31 +96,31 @@ def upgrade_core_tables(apps, schema_editor):
|
||||
# ============================================================================
|
||||
# PART 2: Upgrade core_snapshot table
|
||||
# ============================================================================
|
||||
# Create table with NEW field names for timestamps (bookmarked_at, created_at, modified_at)
|
||||
# and all other fields needed by later migrations
|
||||
cursor.execute("""
|
||||
CREATE TABLE IF NOT EXISTS core_snapshot_new (
|
||||
id TEXT PRIMARY KEY NOT NULL,
|
||||
created_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
modified_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
|
||||
url TEXT NOT NULL,
|
||||
timestamp VARCHAR(32) NOT NULL UNIQUE,
|
||||
bookmarked_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
|
||||
title VARCHAR(512),
|
||||
crawl_id TEXT,
|
||||
parent_snapshot_id TEXT,
|
||||
|
||||
title VARCHAR(512),
|
||||
bookmarked_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
created_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
modified_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
|
||||
downloaded_at DATETIME,
|
||||
status VARCHAR(15) NOT NULL DEFAULT 'queued',
|
||||
retry_at DATETIME,
|
||||
|
||||
depth INTEGER NOT NULL DEFAULT 0,
|
||||
fs_version VARCHAR(10) NOT NULL DEFAULT '0.9.0',
|
||||
|
||||
config TEXT NOT NULL DEFAULT '{}',
|
||||
notes TEXT NOT NULL DEFAULT '',
|
||||
num_uses_succeeded INTEGER NOT NULL DEFAULT 0,
|
||||
num_uses_failed INTEGER NOT NULL DEFAULT 0,
|
||||
|
||||
status VARCHAR(15) NOT NULL DEFAULT 'queued',
|
||||
retry_at DATETIME,
|
||||
current_step INTEGER NOT NULL DEFAULT 0,
|
||||
|
||||
FOREIGN KEY (crawl_id) REFERENCES crawls_crawl(id) ON DELETE CASCADE,
|
||||
@@ -141,29 +142,23 @@ def upgrade_core_tables(apps, schema_editor):
|
||||
has_bookmarked_at = 'bookmarked_at' in snapshot_cols
|
||||
|
||||
if has_added and not has_bookmarked_at:
|
||||
# Migrating from v0.7.2 (has added/updated, no bookmarked_at/created_at/modified_at)
|
||||
# Migrating from v0.7.2 (has added/updated fields)
|
||||
print('Migrating Snapshot from v0.7.2 schema...')
|
||||
# Debug: Check what data we're about to copy
|
||||
cursor.execute("SELECT id, added, updated FROM core_snapshot LIMIT 3")
|
||||
sample_data = cursor.fetchall()
|
||||
print(f'DEBUG 0023: Sample Snapshot data before migration: {sample_data}')
|
||||
|
||||
# Transform added→bookmarked_at/created_at and updated→modified_at
|
||||
cursor.execute("""
|
||||
INSERT OR IGNORE INTO core_snapshot_new (
|
||||
id, url, timestamp, title, bookmarked_at, created_at, modified_at
|
||||
id, url, timestamp, title,
|
||||
bookmarked_at, created_at, modified_at,
|
||||
status
|
||||
)
|
||||
SELECT
|
||||
id, url, timestamp, title,
|
||||
COALESCE(added, CURRENT_TIMESTAMP) as bookmarked_at,
|
||||
COALESCE(added, CURRENT_TIMESTAMP) as created_at,
|
||||
COALESCE(updated, added, CURRENT_TIMESTAMP) as modified_at
|
||||
COALESCE(updated, added, CURRENT_TIMESTAMP) as modified_at,
|
||||
'queued' as status
|
||||
FROM core_snapshot;
|
||||
""")
|
||||
|
||||
# Debug: Check what was inserted
|
||||
cursor.execute("SELECT id, bookmarked_at, modified_at FROM core_snapshot_new LIMIT 3")
|
||||
inserted_data = cursor.fetchall()
|
||||
print(f'DEBUG 0023: Sample Snapshot data after INSERT: {inserted_data}')
|
||||
elif has_bookmarked_at and not has_added:
|
||||
# Migrating from v0.8.6rc0 (already has bookmarked_at/created_at/modified_at)
|
||||
print('Migrating Snapshot from v0.8.6rc0 schema...')
|
||||
@@ -308,14 +303,29 @@ class Migration(migrations.Migration):
|
||||
),
|
||||
],
|
||||
state_operations=[
|
||||
# NOTE: We do NOT remove extractor/output here for ArchiveResult!
|
||||
# NOTE: We do NOT remove extractor/output for ArchiveResult!
|
||||
# They are still in the database and will be removed by migration 0025
|
||||
# after copying their data to the new field names (plugin, output_str).
|
||||
# after copying their data to plugin/output_str.
|
||||
|
||||
# However, for Snapshot, we DO remove added/updated here because
|
||||
# the database operations above already renamed them to bookmarked_at/created_at/modified_at.
|
||||
# However, for Snapshot, we DO remove added/updated and ADD the new timestamp fields
|
||||
# because the SQL above already transformed them.
|
||||
migrations.RemoveField(model_name='snapshot', name='added'),
|
||||
migrations.RemoveField(model_name='snapshot', name='updated'),
|
||||
migrations.AddField(
|
||||
model_name='snapshot',
|
||||
name='bookmarked_at',
|
||||
field=models.DateTimeField(db_index=True, default=django.utils.timezone.now),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='snapshot',
|
||||
name='created_at',
|
||||
field=models.DateTimeField(db_index=True, default=django.utils.timezone.now),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='snapshot',
|
||||
name='modified_at',
|
||||
field=models.DateTimeField(auto_now=True),
|
||||
),
|
||||
|
||||
# SnapshotTag table already exists from v0.7.2, just declare it in state
|
||||
migrations.CreateModel(
|
||||
|
||||
@@ -103,15 +103,21 @@ class Migration(migrations.Migration):
|
||||
);
|
||||
|
||||
INSERT INTO core_snapshot_final (
|
||||
id, created_at, modified_at, url, timestamp, bookmarked_at,
|
||||
crawl_id, parent_snapshot_id, title, downloaded_at, depth, fs_version,
|
||||
config, notes, num_uses_succeeded, num_uses_failed,
|
||||
id, url, timestamp, title,
|
||||
bookmarked_at, created_at, modified_at,
|
||||
crawl_id, parent_snapshot_id,
|
||||
downloaded_at, depth, fs_version,
|
||||
config, notes,
|
||||
num_uses_succeeded, num_uses_failed,
|
||||
status, retry_at, current_step
|
||||
)
|
||||
SELECT
|
||||
id, created_at, modified_at, url, timestamp, bookmarked_at,
|
||||
crawl_id, parent_snapshot_id, title, downloaded_at, depth, fs_version,
|
||||
COALESCE(config, '{}'), COALESCE(notes, ''), num_uses_succeeded, num_uses_failed,
|
||||
id, url, timestamp, title,
|
||||
bookmarked_at, created_at, modified_at,
|
||||
crawl_id, parent_snapshot_id,
|
||||
downloaded_at, depth, fs_version,
|
||||
COALESCE(config, '{}'), COALESCE(notes, ''),
|
||||
num_uses_succeeded, num_uses_failed,
|
||||
status, retry_at, current_step
|
||||
FROM core_snapshot;
|
||||
|
||||
|
||||
@@ -9,23 +9,16 @@ from django.db import migrations, models, connection
|
||||
|
||||
|
||||
def copy_old_fields_to_new(apps, schema_editor):
|
||||
"""Copy data from old field names to new field names before AddField operations."""
|
||||
"""Copy data from old field names to new field names after AddField operations."""
|
||||
cursor = connection.cursor()
|
||||
|
||||
# Check if old fields still exist
|
||||
cursor.execute("PRAGMA table_info(core_archiveresult)")
|
||||
cols = {row[1] for row in cursor.fetchall()}
|
||||
print(f'DEBUG 0025: ArchiveResult columns: {sorted(cols)}')
|
||||
|
||||
if 'extractor' in cols and 'plugin' in cols:
|
||||
# Copy extractor -> plugin
|
||||
print('DEBUG 0025: Copying extractor -> plugin')
|
||||
cursor.execute("UPDATE core_archiveresult SET plugin = COALESCE(extractor, '') WHERE plugin = '' OR plugin IS NULL")
|
||||
cursor.execute("SELECT COUNT(*) FROM core_archiveresult WHERE plugin != ''")
|
||||
count = cursor.fetchone()[0]
|
||||
print(f'DEBUG 0025: Updated {count} rows with plugin data')
|
||||
else:
|
||||
print(f'DEBUG 0025: NOT copying - extractor in cols: {"extractor" in cols}, plugin in cols: {"plugin" in cols}')
|
||||
|
||||
if 'output' in cols and 'output_str' in cols:
|
||||
# Copy output -> output_str
|
||||
@@ -38,16 +31,13 @@ def copy_old_fields_to_new(apps, schema_editor):
|
||||
if 'end_ts' in cols and 'modified_at' in cols:
|
||||
cursor.execute("UPDATE core_archiveresult SET modified_at = COALESCE(end_ts, start_ts, CURRENT_TIMESTAMP) WHERE modified_at IS NULL OR modified_at = ''")
|
||||
|
||||
# Same for Snapshot table
|
||||
cursor.execute("PRAGMA table_info(core_snapshot)")
|
||||
snap_cols = {row[1] for row in cursor.fetchall()}
|
||||
# NOTE: Snapshot timestamps (added→bookmarked_at, updated→modified_at) were already
|
||||
# transformed by migration 0023, so we don't need to copy them here.
|
||||
|
||||
if 'added' in snap_cols and 'bookmarked_at' in snap_cols:
|
||||
cursor.execute("UPDATE core_snapshot SET bookmarked_at = COALESCE(added, CURRENT_TIMESTAMP) WHERE bookmarked_at IS NULL OR bookmarked_at = ''")
|
||||
cursor.execute("UPDATE core_snapshot SET created_at = COALESCE(added, CURRENT_TIMESTAMP) WHERE created_at IS NULL OR created_at = ''")
|
||||
|
||||
if 'updated' in snap_cols and 'modified_at' in snap_cols:
|
||||
cursor.execute("UPDATE core_snapshot SET modified_at = COALESCE(updated, added, CURRENT_TIMESTAMP) WHERE modified_at IS NULL OR modified_at = ''")
|
||||
# Debug: Check Snapshot timestamps at end of RunPython
|
||||
cursor.execute("SELECT id, bookmarked_at, modified_at FROM core_snapshot LIMIT 2")
|
||||
snap_after = cursor.fetchall()
|
||||
print(f'DEBUG 0025: Snapshot timestamps at END of RunPython: {snap_after}')
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
@@ -149,21 +139,12 @@ class Migration(migrations.Migration):
|
||||
name='retry_at',
|
||||
field=models.DateTimeField(blank=True, db_index=True, default=django.utils.timezone.now, null=True),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='snapshot',
|
||||
name='bookmarked_at',
|
||||
field=models.DateTimeField(db_index=True, default=django.utils.timezone.now),
|
||||
),
|
||||
# NOTE: bookmarked_at and created_at already added by migration 0023
|
||||
migrations.AddField(
|
||||
model_name='snapshot',
|
||||
name='config',
|
||||
field=models.JSONField(default=dict),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='snapshot',
|
||||
name='created_at',
|
||||
field=models.DateTimeField(db_index=True, default=django.utils.timezone.now),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='snapshot',
|
||||
name='current_step',
|
||||
@@ -184,11 +165,7 @@ class Migration(migrations.Migration):
|
||||
name='fs_version',
|
||||
field=models.CharField(default='0.9.0', help_text='Filesystem version of this snapshot (e.g., "0.7.0", "0.8.0", "0.9.0"). Used to trigger lazy migration on save().', max_length=10),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='snapshot',
|
||||
name='modified_at',
|
||||
field=models.DateTimeField(auto_now=True),
|
||||
),
|
||||
# NOTE: modified_at already added by migration 0023
|
||||
migrations.AddField(
|
||||
model_name='snapshot',
|
||||
name='notes',
|
||||
@@ -248,7 +225,7 @@ class Migration(migrations.Migration):
|
||||
model_name='archiveresult',
|
||||
name='output',
|
||||
),
|
||||
# NOTE: Snapshot's added/updated fields were already removed by migration 0023
|
||||
# NOTE: Snapshot's added/updated were already removed by migration 0023
|
||||
migrations.AlterField(
|
||||
model_name='archiveresult',
|
||||
name='end_ts',
|
||||
|
||||
@@ -0,0 +1,28 @@
|
||||
# Generated by Django 6.0 on 2026-01-01 23:28
|
||||
|
||||
import django.db.models.deletion
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('core', '0025_alter_archiveresult_options_alter_snapshot_options_and_more'),
|
||||
('machine', '0003_add_process_type_and_parent'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.RemoveField(
|
||||
model_name='archiveresult',
|
||||
name='num_uses_failed',
|
||||
),
|
||||
migrations.RemoveField(
|
||||
model_name='archiveresult',
|
||||
name='num_uses_succeeded',
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='archiveresult',
|
||||
name='process',
|
||||
field=models.OneToOneField(blank=True, help_text='Process execution details for this archive result', null=True, on_delete=django.db.models.deletion.PROTECT, related_name='archiveresult', to='machine.process'),
|
||||
),
|
||||
]
|
||||
@@ -2285,13 +2285,14 @@ class ArchiveResult(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWi
|
||||
|
||||
# Process FK - tracks execution details (cmd, pwd, stdout, stderr, etc.)
|
||||
# Added POST-v0.9.0, will be added in a separate migration
|
||||
# process = models.OneToOneField(
|
||||
# 'machine.Process',
|
||||
# on_delete=models.PROTECT,
|
||||
# null=False,
|
||||
# related_name='archiveresult',
|
||||
# help_text='Process execution details for this archive result'
|
||||
# )
|
||||
process = models.OneToOneField(
|
||||
'machine.Process',
|
||||
on_delete=models.PROTECT,
|
||||
null=True,
|
||||
blank=True,
|
||||
related_name='archiveresult',
|
||||
help_text='Process execution details for this archive result'
|
||||
)
|
||||
|
||||
# New output fields (replacing old 'output' field)
|
||||
output_str = models.TextField(blank=True, default='', help_text='Human-readable output summary')
|
||||
|
||||
Reference in New Issue
Block a user