mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2026-01-03 01:15:57 +10:00
remove model health stats from models that dont need it
This commit is contained in:
@@ -143,6 +143,11 @@ def upgrade_core_tables(apps, schema_editor):
|
||||
if has_added and not has_bookmarked_at:
|
||||
# Migrating from v0.7.2 (has added/updated, no bookmarked_at/created_at/modified_at)
|
||||
print('Migrating Snapshot from v0.7.2 schema...')
|
||||
# Debug: Check what data we're about to copy
|
||||
cursor.execute("SELECT id, added, updated FROM core_snapshot LIMIT 3")
|
||||
sample_data = cursor.fetchall()
|
||||
print(f'DEBUG 0023: Sample Snapshot data before migration: {sample_data}')
|
||||
|
||||
cursor.execute("""
|
||||
INSERT OR IGNORE INTO core_snapshot_new (
|
||||
id, url, timestamp, title, bookmarked_at, created_at, modified_at
|
||||
@@ -154,6 +159,11 @@ def upgrade_core_tables(apps, schema_editor):
|
||||
COALESCE(updated, added, CURRENT_TIMESTAMP) as modified_at
|
||||
FROM core_snapshot;
|
||||
""")
|
||||
|
||||
# Debug: Check what was inserted
|
||||
cursor.execute("SELECT id, bookmarked_at, modified_at FROM core_snapshot_new LIMIT 3")
|
||||
inserted_data = cursor.fetchall()
|
||||
print(f'DEBUG 0023: Sample Snapshot data after INSERT: {inserted_data}')
|
||||
elif has_bookmarked_at and not has_added:
|
||||
# Migrating from v0.8.6rc0 (already has bookmarked_at/created_at/modified_at)
|
||||
print('Migrating Snapshot from v0.8.6rc0 schema...')
|
||||
@@ -298,12 +308,15 @@ class Migration(migrations.Migration):
|
||||
),
|
||||
],
|
||||
state_operations=[
|
||||
# Remove old ArchiveResult fields
|
||||
migrations.RemoveField(model_name='archiveresult', name='extractor'),
|
||||
migrations.RemoveField(model_name='archiveresult', name='output'),
|
||||
# Remove old Snapshot fields
|
||||
# NOTE: We do NOT remove extractor/output here for ArchiveResult!
|
||||
# They are still in the database and will be removed by migration 0025
|
||||
# after copying their data to the new field names (plugin, output_str).
|
||||
|
||||
# However, for Snapshot, we DO remove added/updated here because
|
||||
# the database operations above already renamed them to bookmarked_at/created_at/modified_at.
|
||||
migrations.RemoveField(model_name='snapshot', name='added'),
|
||||
migrations.RemoveField(model_name='snapshot', name='updated'),
|
||||
|
||||
# SnapshotTag table already exists from v0.7.2, just declare it in state
|
||||
migrations.CreateModel(
|
||||
name='SnapshotTag',
|
||||
|
||||
@@ -25,7 +25,7 @@ def copy_old_fields_to_new(apps, schema_editor):
|
||||
count = cursor.fetchone()[0]
|
||||
print(f'DEBUG 0025: Updated {count} rows with plugin data')
|
||||
else:
|
||||
print(f'DEBUG 0025: NOT copying - extractor in cols: {extractor" in cols}, plugin in cols: {"plugin" in cols}')
|
||||
print(f'DEBUG 0025: NOT copying - extractor in cols: {"extractor" in cols}, plugin in cols: {"plugin" in cols}')
|
||||
|
||||
if 'output' in cols and 'output_str' in cols:
|
||||
# Copy output -> output_str
|
||||
@@ -239,6 +239,16 @@ class Migration(migrations.Migration):
|
||||
copy_old_fields_to_new,
|
||||
reverse_code=migrations.RunPython.noop,
|
||||
),
|
||||
# Now remove the old ArchiveResult fields after data has been copied
|
||||
migrations.RemoveField(
|
||||
model_name='archiveresult',
|
||||
name='extractor',
|
||||
),
|
||||
migrations.RemoveField(
|
||||
model_name='archiveresult',
|
||||
name='output',
|
||||
),
|
||||
# NOTE: Snapshot's added/updated fields were already removed by migration 0023
|
||||
migrations.AlterField(
|
||||
model_name='archiveresult',
|
||||
name='end_ts',
|
||||
|
||||
@@ -29,7 +29,7 @@ from archivebox.hooks import (
|
||||
get_plugins, get_plugin_name, get_plugin_icon,
|
||||
)
|
||||
from archivebox.base_models.models import (
|
||||
ModelWithUUID, ModelWithSerializers, ModelWithOutputDir,
|
||||
ModelWithUUID, ModelWithOutputDir,
|
||||
ModelWithConfig, ModelWithNotes, ModelWithHealthStats,
|
||||
get_or_create_system_user_pk,
|
||||
)
|
||||
@@ -40,7 +40,7 @@ from archivebox.machine.models import NetworkInterface, Binary
|
||||
|
||||
|
||||
|
||||
class Tag(ModelWithSerializers):
|
||||
class Tag(ModelWithUUID):
|
||||
# Keep AutoField for compatibility with main branch migrations
|
||||
# Don't use UUIDField here - requires complex FK transformation
|
||||
id = models.AutoField(primary_key=True, serialize=False, verbose_name='ID')
|
||||
@@ -2254,7 +2254,7 @@ class SnapshotMachine(BaseStateMachine, strict_states=True):
|
||||
)
|
||||
|
||||
|
||||
class ArchiveResult(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHealthStats, ModelWithStateMachine):
|
||||
class ArchiveResult(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithStateMachine):
|
||||
class StatusChoices(models.TextChoices):
|
||||
QUEUED = 'queued', 'Queued'
|
||||
STARTED = 'started', 'Started'
|
||||
@@ -2551,11 +2551,20 @@ class ArchiveResult(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWi
|
||||
pass
|
||||
|
||||
def cascade_health_update(self, success: bool):
|
||||
"""Update health stats for self, parent Snapshot, and grandparent Crawl."""
|
||||
self.increment_health_stats(success)
|
||||
"""Update health stats for parent Snapshot, Crawl, and execution infrastructure (Binary, Machine, NetworkInterface)."""
|
||||
# Update archival hierarchy
|
||||
self.snapshot.increment_health_stats(success)
|
||||
self.snapshot.crawl.increment_health_stats(success)
|
||||
|
||||
# Update execution infrastructure
|
||||
if self.binary:
|
||||
self.binary.increment_health_stats(success)
|
||||
if self.binary.machine:
|
||||
self.binary.machine.increment_health_stats(success)
|
||||
|
||||
if self.iface:
|
||||
self.iface.increment_health_stats(success)
|
||||
|
||||
def run(self):
|
||||
"""
|
||||
Execute this ArchiveResult's hook and update status.
|
||||
|
||||
@@ -16,14 +16,14 @@ from statemachine import State, registry
|
||||
from rich import print
|
||||
|
||||
from archivebox.config import CONSTANTS
|
||||
from archivebox.base_models.models import ModelWithSerializers, ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHealthStats, get_or_create_system_user_pk
|
||||
from archivebox.base_models.models import ModelWithUUID, ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHealthStats, get_or_create_system_user_pk
|
||||
from archivebox.workers.models import ModelWithStateMachine, BaseStateMachine
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from archivebox.core.models import Snapshot, ArchiveResult
|
||||
|
||||
|
||||
class CrawlSchedule(ModelWithSerializers, ModelWithNotes, ModelWithHealthStats):
|
||||
class CrawlSchedule(ModelWithUUID, ModelWithNotes):
|
||||
id = models.UUIDField(primary_key=True, default=uuid7, editable=False, unique=True)
|
||||
created_at = models.DateTimeField(default=timezone.now, db_index=True)
|
||||
created_by = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE, default=get_or_create_system_user_pk, null=False)
|
||||
@@ -197,9 +197,9 @@ class Crawl(ModelWithOutputDir, ModelWithConfig, ModelWithHealthStats, ModelWith
|
||||
|
||||
@property
|
||||
def output_dir_parent(self) -> str:
|
||||
"""Construct parent directory: users/{user_id}/crawls/{YYYYMMDD}"""
|
||||
"""Construct parent directory: users/{username}/crawls/{YYYYMMDD}"""
|
||||
date_str = self.created_at.strftime('%Y%m%d')
|
||||
return f'users/{self.created_by_id}/crawls/{date_str}'
|
||||
return f'users/{self.created_by.username}/crawls/{date_str}'
|
||||
|
||||
@property
|
||||
def output_dir_name(self) -> str:
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user