mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2026-04-03 14:27:55 +10:00
use full dotted paths for all archivebox imports, add migrations and more fixes
This commit is contained in:
@@ -763,7 +763,7 @@ The configuration is documented here: **[Configuration Wiki](https://github.com/
|
||||
<br/>
|
||||
TIMEOUT=240 # default: 60 add more seconds on slower networks
|
||||
CHECK_SSL_VALIDITY=False # default: True False = allow saving URLs w/ bad SSL
|
||||
SAVE_ARCHIVE_DOT_ORG=False # default: True False = disable Archive.org saving
|
||||
SAVE_ARCHIVEDOTORG=False # default: True False = disable Archive.org saving
|
||||
MAX_MEDIA_SIZE=1500m # default: 750m raise/lower youtubedl output size
|
||||
<br/>
|
||||
PUBLIC_INDEX=True # default: True whether anon users can view index
|
||||
@@ -959,7 +959,7 @@ archivebox add 'https://docs.google.com/document/d/12345somePrivateDocument'
|
||||
archivebox add 'https://vimeo.com/somePrivateVideo'
|
||||
|
||||
# without first disabling saving to Archive.org:
|
||||
archivebox config --set SAVE_ARCHIVE_DOT_ORG=False # disable saving all URLs in Archive.org
|
||||
archivebox config --set SAVE_ARCHIVEDOTORG=False # disable saving all URLs in Archive.org
|
||||
|
||||
# restrict the main index, Snapshot content, and Add Page to authenticated users as-needed:
|
||||
archivebox config --set PUBLIC_INDEX=False
|
||||
|
||||
@@ -26,10 +26,10 @@ ASCII_LOGO = """
|
||||
|
||||
PACKAGE_DIR = Path(__file__).resolve().parent
|
||||
|
||||
# Add PACKAGE_DIR to sys.path - required for Django migrations to import models
|
||||
# Migrations reference models like 'machine.Binary' which need to be importable
|
||||
if str(PACKAGE_DIR) not in sys.path:
|
||||
sys.path.append(str(PACKAGE_DIR))
|
||||
# # Add PACKAGE_DIR to sys.path - required for Django migrations to import models
|
||||
# # Migrations reference models like 'machine.Binary' which need to be importable
|
||||
# if str(PACKAGE_DIR) not in sys.path:
|
||||
# sys.path.append(str(PACKAGE_DIR))
|
||||
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'archivebox.core.settings'
|
||||
os.environ['TZ'] = 'UTC'
|
||||
|
||||
@@ -5,6 +5,7 @@ from django.apps import AppConfig
|
||||
|
||||
class APIConfig(AppConfig):
|
||||
name = 'archivebox.api'
|
||||
label = 'api'
|
||||
|
||||
|
||||
def register_admin(admin_site):
|
||||
|
||||
@@ -94,7 +94,7 @@ class OrchestratorSchema(Schema):
|
||||
@router.get("/orchestrator", response=OrchestratorSchema, url_name="get_orchestrator")
|
||||
def get_orchestrator(request):
|
||||
"""Get the orchestrator status and all worker queues."""
|
||||
from workers.orchestrator import Orchestrator
|
||||
from archivebox.workers.orchestrator import Orchestrator
|
||||
from workers.worker import CrawlWorker, SnapshotWorker, ArchiveResultWorker
|
||||
|
||||
orchestrator = Orchestrator()
|
||||
|
||||
@@ -73,7 +73,7 @@ class ModelWithUUID(models.Model):
|
||||
return f'/api/v1/docs#/{self._meta.app_label.title()}%20Models/api_v1_{self._meta.app_label}_get_{self._meta.db_table}'
|
||||
|
||||
def as_json(self, keys: Iterable[str] = ()) -> dict:
|
||||
default_keys = ('id', 'created_at', 'modified_at', 'created_by_id')
|
||||
default_keys = ('id', 'created_at', 'modified_at')
|
||||
return {key: getattr(self, key) for key in (keys or default_keys) if hasattr(self, key)}
|
||||
|
||||
|
||||
@@ -119,7 +119,7 @@ class ModelWithHealthStats(models.Model):
|
||||
|
||||
class ModelWithConfig(models.Model):
|
||||
"""Mixin for models with a JSON config field."""
|
||||
config = models.JSONField(default=dict, null=False, blank=False, editable=True)
|
||||
config = models.JSONField(default=dict, null=True, blank=True, editable=True)
|
||||
|
||||
class Meta:
|
||||
abstract = True
|
||||
|
||||
@@ -56,7 +56,7 @@ def add(urls: str | list[str],
|
||||
from archivebox.core.models import Snapshot
|
||||
from archivebox.crawls.models import Crawl
|
||||
from archivebox.base_models.models import get_or_create_system_user_pk
|
||||
from workers.orchestrator import Orchestrator
|
||||
from archivebox.workers.orchestrator import Orchestrator
|
||||
|
||||
created_by_id = created_by_id or get_or_create_system_user_pk()
|
||||
|
||||
|
||||
@@ -78,7 +78,7 @@ def discover_outlinks(
|
||||
from archivebox.core.models import Snapshot, ArchiveResult
|
||||
from archivebox.crawls.models import Crawl
|
||||
from archivebox.config import CONSTANTS
|
||||
from workers.orchestrator import Orchestrator
|
||||
from archivebox.workers.orchestrator import Orchestrator
|
||||
|
||||
created_by_id = get_or_create_system_user_pk()
|
||||
is_tty = sys.stdout.isatty()
|
||||
|
||||
@@ -96,7 +96,7 @@ def run_plugins(
|
||||
TYPE_SNAPSHOT, TYPE_ARCHIVERESULT
|
||||
)
|
||||
from archivebox.core.models import Snapshot, ArchiveResult
|
||||
from workers.orchestrator import Orchestrator
|
||||
from archivebox.workers.orchestrator import Orchestrator
|
||||
|
||||
is_tty = sys.stdout.isatty()
|
||||
|
||||
|
||||
@@ -13,11 +13,9 @@ from archivebox.misc.util import docstring, enforce_types
|
||||
|
||||
|
||||
@enforce_types
|
||||
def init(force: bool=False, quick: bool=False, install: bool=False, setup: bool=False) -> None:
|
||||
def init(force: bool=False, quick: bool=False, install: bool=False) -> None:
|
||||
"""Initialize a new ArchiveBox collection in the current directory"""
|
||||
|
||||
install = install or setup
|
||||
|
||||
from archivebox.config import CONSTANTS, VERSION, DATA_DIR
|
||||
from archivebox.config.common import SERVER_CONFIG
|
||||
from archivebox.config.collection import write_config_file
|
||||
@@ -128,7 +126,8 @@ def init(force: bool=False, quick: bool=False, install: bool=False, setup: bool=
|
||||
print(f' [yellow]√ Added {len(orphaned_data_dir_links)} orphaned links from existing archive directories.[/yellow]')
|
||||
|
||||
if pending_links:
|
||||
Snapshot.objects.create_from_dicts(list(pending_links.values()))
|
||||
for link_dict in pending_links.values():
|
||||
Snapshot.from_jsonl(link_dict)
|
||||
|
||||
# Hint for orphaned snapshot directories
|
||||
print()
|
||||
@@ -187,7 +186,6 @@ def init(force: bool=False, quick: bool=False, install: bool=False, setup: bool=
|
||||
@click.option('--force', '-f', is_flag=True, help='Ignore unrecognized files in current directory and initialize anyway')
|
||||
@click.option('--quick', '-q', is_flag=True, help='Run any updates or migrations without rechecking all snapshot dirs')
|
||||
@click.option('--install', '-s', is_flag=True, help='Automatically install dependencies and extras used for archiving')
|
||||
@click.option('--setup', '-s', is_flag=True, help='DEPRECATED: equivalent to --install')
|
||||
@docstring(init.__doc__)
|
||||
def main(**kwargs) -> None:
|
||||
init(**kwargs)
|
||||
|
||||
@@ -85,7 +85,7 @@ def install(dry_run: bool=False) -> None:
|
||||
print()
|
||||
|
||||
# Run the crawl synchronously (this triggers on_Crawl hooks)
|
||||
from workers.orchestrator import Orchestrator
|
||||
from archivebox.workers.orchestrator import Orchestrator
|
||||
orchestrator = Orchestrator(exit_on_idle=True)
|
||||
orchestrator.runloop()
|
||||
|
||||
|
||||
@@ -37,7 +37,7 @@ def orchestrator(daemon: bool = False, watch: bool = False) -> int:
|
||||
0: All work completed successfully
|
||||
1: Error occurred
|
||||
"""
|
||||
from workers.orchestrator import Orchestrator
|
||||
from archivebox.workers.orchestrator import Orchestrator
|
||||
|
||||
if Orchestrator.is_running():
|
||||
print('[yellow]Orchestrator is already running[/yellow]')
|
||||
|
||||
@@ -74,7 +74,7 @@ def server(runserver_args: Iterable[str]=(SERVER_CONFIG.BIND_ADDR,),
|
||||
tail_multiple_worker_logs,
|
||||
is_port_in_use,
|
||||
)
|
||||
from workers.orchestrator import Orchestrator
|
||||
from archivebox.workers.orchestrator import Orchestrator
|
||||
import sys
|
||||
|
||||
# Check if port is already in use
|
||||
|
||||
@@ -163,7 +163,7 @@ def create_snapshots(
|
||||
|
||||
# If --plugins is passed, run the orchestrator for those plugins
|
||||
if plugins:
|
||||
from workers.orchestrator import Orchestrator
|
||||
from archivebox.workers.orchestrator import Orchestrator
|
||||
rprint(f'[blue]Running plugins: {plugins or "all"}...[/blue]', file=sys.stderr)
|
||||
orchestrator = Orchestrator(exit_on_idle=True)
|
||||
orchestrator.runloop()
|
||||
|
||||
@@ -160,7 +160,7 @@ def process_all_db_snapshots(batch_size: int = 100) -> dict:
|
||||
total = Snapshot.objects.count()
|
||||
print(f'[*] Processing {total} snapshots from database...')
|
||||
|
||||
for snapshot in Snapshot.objects.iterator():
|
||||
for snapshot in Snapshot.objects.iterator(chunk_size=batch_size):
|
||||
# Reconcile index.json with DB
|
||||
snapshot.reconcile_with_index_json()
|
||||
|
||||
@@ -209,7 +209,7 @@ def process_filtered_snapshots(
|
||||
total = snapshots.count()
|
||||
print(f'[*] Found {total} matching snapshots')
|
||||
|
||||
for snapshot in snapshots.iterator():
|
||||
for snapshot in snapshots.iterator(chunk_size=batch_size):
|
||||
# Reconcile index.json with DB
|
||||
snapshot.reconcile_with_index_json()
|
||||
|
||||
|
||||
@@ -17,7 +17,7 @@ TEST_CONFIG = {
|
||||
|
||||
'DATA_DIR': 'data.tests',
|
||||
|
||||
'SAVE_ARCHIVE_DOT_ORG': 'False',
|
||||
'SAVE_ARCHIVEDOTORG': 'False',
|
||||
'SAVE_TITLE': 'False',
|
||||
|
||||
'USE_CURL': 'False',
|
||||
|
||||
@@ -32,7 +32,7 @@ from unittest.mock import patch, MagicMock
|
||||
TEST_CONFIG = {
|
||||
'USE_COLOR': 'False',
|
||||
'SHOW_PROGRESS': 'False',
|
||||
'SAVE_ARCHIVE_DOT_ORG': 'False',
|
||||
'SAVE_ARCHIVEDOTORG': 'False',
|
||||
'SAVE_TITLE': 'True', # Fast extractor
|
||||
'SAVE_FAVICON': 'False',
|
||||
'SAVE_WGET': 'False',
|
||||
|
||||
@@ -216,6 +216,29 @@ def get_config(
|
||||
if snapshot and hasattr(snapshot, "config") and snapshot.config:
|
||||
config.update(snapshot.config)
|
||||
|
||||
# Normalize all aliases to canonical names (after all sources merged)
|
||||
# This handles aliases that came from user/crawl/snapshot configs, not just env
|
||||
try:
|
||||
from archivebox.hooks import discover_plugin_configs
|
||||
plugin_configs = discover_plugin_configs()
|
||||
aliases_to_normalize = {} # {alias_key: canonical_key}
|
||||
|
||||
# Build alias mapping from all plugin schemas
|
||||
for plugin_name, schema in plugin_configs.items():
|
||||
for canonical_key, prop_schema in schema.get('properties', {}).items():
|
||||
for alias in prop_schema.get('x-aliases', []):
|
||||
aliases_to_normalize[alias] = canonical_key
|
||||
|
||||
# Normalize: copy alias values to canonical keys (aliases take precedence)
|
||||
for alias_key, canonical_key in aliases_to_normalize.items():
|
||||
if alias_key in config:
|
||||
# Alias exists - copy to canonical key (overwriting any default)
|
||||
config[canonical_key] = config[alias_key]
|
||||
# Remove alias from config to keep it clean
|
||||
del config[alias_key]
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
return config
|
||||
|
||||
|
||||
|
||||
@@ -5,8 +5,12 @@ from django.apps import AppConfig
|
||||
|
||||
class CoreConfig(AppConfig):
|
||||
name = 'archivebox.core'
|
||||
label = 'core'
|
||||
|
||||
def ready(self):
|
||||
"""Register the archivebox.core.admin_site as the main django admin site"""
|
||||
from archivebox.core.admin_site import register_admin_site
|
||||
register_admin_site()
|
||||
|
||||
# Import models to register state machines with the registry
|
||||
from archivebox.core import models # noqa: F401
|
||||
|
||||
57
archivebox/core/migrations/0024_b_clear_config_fields.py
Normal file
57
archivebox/core/migrations/0024_b_clear_config_fields.py
Normal file
@@ -0,0 +1,57 @@
|
||||
# Data migration to clear config fields that may contain invalid JSON
|
||||
# This runs before 0025 to prevent CHECK constraint failures
|
||||
|
||||
from django.db import migrations
|
||||
|
||||
|
||||
def clear_config_fields(apps, schema_editor):
|
||||
"""Clear all config fields in related tables to avoid JSON validation errors."""
|
||||
db_alias = schema_editor.connection.alias
|
||||
|
||||
# Disable foreign key checks temporarily to allow updates
|
||||
with schema_editor.connection.cursor() as cursor:
|
||||
cursor.execute("PRAGMA foreign_keys=OFF")
|
||||
|
||||
tables_to_clear = [
|
||||
('crawls_seed', 'config'),
|
||||
('crawls_crawl', 'config'),
|
||||
('crawls_crawlschedule', 'config') if 'crawlschedule' in dir() else None,
|
||||
('machine_machine', 'stats'),
|
||||
('machine_machine', 'config'),
|
||||
]
|
||||
|
||||
for table_info in tables_to_clear:
|
||||
if table_info is None:
|
||||
continue
|
||||
table_name, field_name = table_info
|
||||
|
||||
try:
|
||||
with schema_editor.connection.cursor() as cursor:
|
||||
# Check if table exists first
|
||||
cursor.execute(f"SELECT name FROM sqlite_master WHERE type='table' AND name='{table_name}'")
|
||||
if not cursor.fetchone():
|
||||
print(f" Skipping {table_name}.{field_name}: table does not exist")
|
||||
continue
|
||||
|
||||
# Set all to empty JSON object
|
||||
cursor.execute(f"UPDATE {table_name} SET {field_name} = '{{}}' WHERE {field_name} IS NOT NULL")
|
||||
print(f" Cleared {field_name} in {table_name}: {cursor.rowcount} rows")
|
||||
except Exception as e:
|
||||
print(f" Skipping {table_name}.{field_name}: {e}")
|
||||
|
||||
# Re-enable foreign key checks
|
||||
with schema_editor.connection.cursor() as cursor:
|
||||
cursor.execute("PRAGMA foreign_keys=ON")
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('core', '0023_new_schema'),
|
||||
('crawls', '0001_initial'),
|
||||
('machine', '0001_squashed'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.RunPython(clear_config_fields, reverse_code=migrations.RunPython.noop),
|
||||
]
|
||||
28
archivebox/core/migrations/0024_c_disable_fk_checks.py
Normal file
28
archivebox/core/migrations/0024_c_disable_fk_checks.py
Normal file
@@ -0,0 +1,28 @@
|
||||
# Disable foreign key checks before 0025 to prevent CHECK constraint validation errors
|
||||
|
||||
from django.db import migrations
|
||||
|
||||
|
||||
def disable_fk_checks(apps, schema_editor):
|
||||
"""Temporarily disable foreign key checks."""
|
||||
with schema_editor.connection.cursor() as cursor:
|
||||
cursor.execute("PRAGMA foreign_keys=OFF")
|
||||
print(" Disabled foreign key checks")
|
||||
|
||||
|
||||
def enable_fk_checks(apps, schema_editor):
|
||||
"""Re-enable foreign key checks."""
|
||||
with schema_editor.connection.cursor() as cursor:
|
||||
cursor.execute("PRAGMA foreign_keys=ON")
|
||||
print(" Enabled foreign key checks")
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('core', '0024_b_clear_config_fields'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.RunPython(disable_fk_checks, reverse_code=enable_fk_checks),
|
||||
]
|
||||
93
archivebox/core/migrations/0024_d_fix_crawls_config.py
Normal file
93
archivebox/core/migrations/0024_d_fix_crawls_config.py
Normal file
@@ -0,0 +1,93 @@
|
||||
# Fix crawls_crawl config field to avoid CHECK constraint errors during table rebuilds
|
||||
|
||||
from django.db import migrations
|
||||
|
||||
|
||||
def fix_crawls_config(apps, schema_editor):
|
||||
"""
|
||||
Rebuild crawls_crawl table to fix CHECK constraints and make seed_id nullable.
|
||||
Only runs for UPGRADES from 0.8.x (when crawls.0001_initial didn't exist yet).
|
||||
For fresh installs, crawls.0001_initial creates the correct schema.
|
||||
"""
|
||||
with schema_editor.connection.cursor() as cursor:
|
||||
# Check if this is an upgrade from old 0.8.x or a fresh install
|
||||
# In fresh installs, crawls.0001_initial was applied, creating seed FK
|
||||
# In upgrades, the table was created by old migrations before 0001_initial existed
|
||||
cursor.execute("""
|
||||
SELECT COUNT(*) FROM django_migrations
|
||||
WHERE app='crawls' AND name='0001_initial'
|
||||
""")
|
||||
has_crawls_0001 = cursor.fetchone()[0] > 0
|
||||
|
||||
if has_crawls_0001:
|
||||
# Fresh install - crawls.0001_initial already created the correct schema
|
||||
# Just clear config to avoid CHECK constraint issues
|
||||
print(" Fresh install detected - clearing config field only")
|
||||
try:
|
||||
cursor.execute('UPDATE "crawls_crawl" SET "config" = NULL')
|
||||
except Exception as e:
|
||||
print(f" Skipping config clear: {e}")
|
||||
return
|
||||
|
||||
# Upgrade from 0.8.x - rebuild table to make seed_id nullable and remove CHECK constraint
|
||||
print(" Upgrading from 0.8.x - rebuilding crawls_crawl table")
|
||||
cursor.execute("PRAGMA foreign_keys=OFF")
|
||||
|
||||
# Backup
|
||||
cursor.execute("CREATE TABLE crawls_crawl_backup AS SELECT * FROM crawls_crawl")
|
||||
|
||||
# Recreate without config CHECK constraint, with nullable seed_id
|
||||
cursor.execute("DROP TABLE crawls_crawl")
|
||||
cursor.execute("""
|
||||
CREATE TABLE "crawls_crawl" (
|
||||
"num_uses_failed" integer unsigned NOT NULL CHECK ("num_uses_failed" >= 0),
|
||||
"num_uses_succeeded" integer unsigned NOT NULL CHECK ("num_uses_succeeded" >= 0),
|
||||
"id" char(32) NOT NULL PRIMARY KEY,
|
||||
"created_at" datetime NOT NULL,
|
||||
"modified_at" datetime NOT NULL,
|
||||
"urls" text NOT NULL,
|
||||
"config" text,
|
||||
"max_depth" smallint unsigned NOT NULL CHECK ("max_depth" >= 0),
|
||||
"tags_str" varchar(1024) NOT NULL,
|
||||
"persona_id" char(32) NULL,
|
||||
"label" varchar(64) NOT NULL,
|
||||
"notes" text NOT NULL,
|
||||
"output_dir" varchar(512) NOT NULL,
|
||||
"status" varchar(15) NOT NULL,
|
||||
"retry_at" datetime NULL,
|
||||
"created_by_id" integer NOT NULL REFERENCES "auth_user" ("id") DEFERRABLE INITIALLY DEFERRED,
|
||||
"seed_id" char(32) NULL DEFAULT NULL,
|
||||
"schedule_id" char(32) NULL REFERENCES "crawls_crawlschedule" ("id") DEFERRABLE INITIALLY DEFERRED
|
||||
)
|
||||
""")
|
||||
|
||||
# Restore data
|
||||
cursor.execute("""
|
||||
INSERT INTO "crawls_crawl" (
|
||||
"num_uses_failed", "num_uses_succeeded", "id", "created_at", "modified_at",
|
||||
"urls", "config", "max_depth", "tags_str", "persona_id", "label", "notes",
|
||||
"output_dir", "status", "retry_at", "created_by_id", "seed_id", "schedule_id"
|
||||
)
|
||||
SELECT
|
||||
"num_uses_failed", "num_uses_succeeded", "id", "created_at", "modified_at",
|
||||
"urls", "config", "max_depth", "tags_str", "persona_id", "label", "notes",
|
||||
"output_dir", "status", "retry_at", "created_by_id", "seed_id", "schedule_id"
|
||||
FROM crawls_crawl_backup
|
||||
""")
|
||||
|
||||
cursor.execute("DROP TABLE crawls_crawl_backup")
|
||||
|
||||
# NULL out config to avoid any invalid JSON
|
||||
cursor.execute('UPDATE "crawls_crawl" SET "config" = NULL')
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('core', '0024_c_disable_fk_checks'),
|
||||
('crawls', '0001_initial'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.RunPython(fix_crawls_config, reverse_code=migrations.RunPython.noop),
|
||||
]
|
||||
@@ -8,9 +8,7 @@ import django.db.models.deletion
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('core', '0023_new_schema'),
|
||||
('crawls', '0001_initial'),
|
||||
('machine', '0001_squashed'),
|
||||
('core', '0024_d_fix_crawls_config'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
|
||||
@@ -10,6 +10,13 @@ from django.db import migrations, models
|
||||
|
||||
def populate_archiveresult_uuids(apps, schema_editor):
|
||||
"""Generate unique UUIDs for ArchiveResults that don't have one."""
|
||||
# Check if uuid column exists before trying to populate it
|
||||
with schema_editor.connection.cursor() as cursor:
|
||||
cursor.execute("PRAGMA table_info(core_archiveresult)")
|
||||
columns = [row[1] for row in cursor.fetchall()]
|
||||
if 'uuid' not in columns:
|
||||
return # uuid column doesn't exist, skip this data migration
|
||||
|
||||
ArchiveResult = apps.get_model('core', 'ArchiveResult')
|
||||
for result in ArchiveResult.objects.filter(uuid__isnull=True):
|
||||
result.uuid = uuid_compat.uuid7()
|
||||
@@ -21,6 +28,22 @@ def reverse_populate_uuids(apps, schema_editor):
|
||||
pass
|
||||
|
||||
|
||||
def remove_output_dir_if_exists(apps, schema_editor):
|
||||
"""Remove output_dir columns if they exist."""
|
||||
with schema_editor.connection.cursor() as cursor:
|
||||
# Check and remove from core_archiveresult
|
||||
cursor.execute("PRAGMA table_info(core_archiveresult)")
|
||||
columns = [row[1] for row in cursor.fetchall()]
|
||||
if 'output_dir' in columns:
|
||||
cursor.execute("ALTER TABLE core_archiveresult DROP COLUMN output_dir")
|
||||
|
||||
# Check and remove from core_snapshot
|
||||
cursor.execute("PRAGMA table_info(core_snapshot)")
|
||||
columns = [row[1] for row in cursor.fetchall()]
|
||||
if 'output_dir' in columns:
|
||||
cursor.execute("ALTER TABLE core_snapshot DROP COLUMN output_dir")
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
@@ -33,82 +56,90 @@ class Migration(migrations.Migration):
|
||||
migrations.RunPython(populate_archiveresult_uuids, reverse_populate_uuids),
|
||||
|
||||
# Remove output_dir fields (not needed, computed from snapshot)
|
||||
migrations.RemoveField(
|
||||
model_name='archiveresult',
|
||||
name='output_dir',
|
||||
),
|
||||
migrations.RemoveField(
|
||||
model_name='snapshot',
|
||||
name='output_dir',
|
||||
migrations.RunPython(remove_output_dir_if_exists, reverse_code=migrations.RunPython.noop),
|
||||
|
||||
# Update Django's migration state to match 0.9.x schema
|
||||
# Database already has correct types from 0.8.x, just update state
|
||||
migrations.SeparateDatabaseAndState(
|
||||
state_operations=[
|
||||
# Archiveresult field alterations
|
||||
migrations.AlterField(
|
||||
model_name='archiveresult',
|
||||
name='created_at',
|
||||
field=models.DateTimeField(db_index=True, default=django.utils.timezone.now),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='archiveresult',
|
||||
name='created_by',
|
||||
field=models.ForeignKey(default=archivebox.base_models.models.get_or_create_system_user_pk, on_delete=django.db.models.deletion.CASCADE, related_name='archiveresult_set', to=settings.AUTH_USER_MODEL),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='archiveresult',
|
||||
name='extractor',
|
||||
field=models.CharField(db_index=True, max_length=32),
|
||||
),
|
||||
# Convert id from AutoField to UUIDField (database already has UUID CHAR(32))
|
||||
migrations.AlterField(
|
||||
model_name='archiveresult',
|
||||
name='id',
|
||||
field=models.UUIDField(default=uuid_compat.uuid7, editable=False, primary_key=True, serialize=False, unique=True),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='archiveresult',
|
||||
name='status',
|
||||
field=models.CharField(choices=[('queued', 'Queued'), ('started', 'Started'), ('backoff', 'Waiting to retry'), ('succeeded', 'Succeeded'), ('failed', 'Failed'), ('skipped', 'Skipped')], db_index=True, default='queued', max_length=15),
|
||||
),
|
||||
|
||||
# Snapshot field alterations
|
||||
migrations.AlterField(
|
||||
model_name='snapshot',
|
||||
name='bookmarked_at',
|
||||
field=models.DateTimeField(db_index=True, default=django.utils.timezone.now),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='snapshot',
|
||||
name='created_at',
|
||||
field=models.DateTimeField(db_index=True, default=django.utils.timezone.now),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='snapshot',
|
||||
name='created_by',
|
||||
field=models.ForeignKey(default=archivebox.base_models.models.get_or_create_system_user_pk, on_delete=django.db.models.deletion.CASCADE, related_name='snapshot_set', to=settings.AUTH_USER_MODEL),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='snapshot',
|
||||
name='downloaded_at',
|
||||
field=models.DateTimeField(blank=True, db_index=True, default=None, editable=False, null=True),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='snapshot',
|
||||
name='id',
|
||||
field=models.UUIDField(default=uuid_compat.uuid7, editable=False, primary_key=True, serialize=False, unique=True),
|
||||
),
|
||||
],
|
||||
database_operations=[
|
||||
# No actual database changes needed - schema is already correct from 0.8.x
|
||||
],
|
||||
),
|
||||
|
||||
# Archiveresult field alterations
|
||||
migrations.AlterField(
|
||||
model_name='archiveresult',
|
||||
name='created_at',
|
||||
field=models.DateTimeField(db_index=True, default=django.utils.timezone.now),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='archiveresult',
|
||||
name='created_by',
|
||||
field=models.ForeignKey(default=None, on_delete=django.db.models.deletion.CASCADE, related_name='archiveresult_set', to=settings.AUTH_USER_MODEL),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='archiveresult',
|
||||
name='extractor',
|
||||
field=models.CharField(db_index=True, max_length=32),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='archiveresult',
|
||||
name='id',
|
||||
field=models.AutoField(editable=False, primary_key=True, serialize=False),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='archiveresult',
|
||||
name='status',
|
||||
field=models.CharField(choices=[('queued', 'Queued'), ('started', 'Started'), ('backoff', 'Waiting to retry'), ('succeeded', 'Succeeded'), ('failed', 'Failed'), ('skipped', 'Skipped')], db_index=True, default='queued', max_length=15),
|
||||
),
|
||||
|
||||
# Snapshot field alterations
|
||||
migrations.AlterField(
|
||||
model_name='snapshot',
|
||||
name='bookmarked_at',
|
||||
field=models.DateTimeField(db_index=True, default=django.utils.timezone.now),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='snapshot',
|
||||
name='created_at',
|
||||
field=models.DateTimeField(db_index=True, default=django.utils.timezone.now),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='snapshot',
|
||||
name='created_by',
|
||||
field=models.ForeignKey(default=None, on_delete=django.db.models.deletion.CASCADE, related_name='snapshot_set', to=settings.AUTH_USER_MODEL),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='snapshot',
|
||||
name='downloaded_at',
|
||||
field=models.DateTimeField(blank=True, db_index=True, default=None, editable=False, null=True),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='snapshot',
|
||||
name='id',
|
||||
field=models.UUIDField(default=uuid_compat.uuid7, editable=False, primary_key=True, serialize=False, unique=True),
|
||||
),
|
||||
|
||||
# SnapshotTag and Tag alterations
|
||||
migrations.AlterField(
|
||||
model_name='snapshottag',
|
||||
name='id',
|
||||
field=models.AutoField(primary_key=True, serialize=False),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='tag',
|
||||
name='created_by',
|
||||
field=models.ForeignKey(default=archivebox.base_models.models.get_or_create_system_user_pk, on_delete=django.db.models.deletion.CASCADE, related_name='tag_set', to=settings.AUTH_USER_MODEL),
|
||||
),
|
||||
migrations.AlterUniqueTogether(
|
||||
name='snapshottag',
|
||||
unique_together={('snapshot', 'tag')},
|
||||
# SnapshotTag and Tag alterations - state only, DB already correct
|
||||
migrations.SeparateDatabaseAndState(
|
||||
state_operations=[
|
||||
migrations.AlterField(
|
||||
model_name='snapshottag',
|
||||
name='id',
|
||||
field=models.AutoField(primary_key=True, serialize=False),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='tag',
|
||||
name='created_by',
|
||||
field=models.ForeignKey(default=archivebox.base_models.models.get_or_create_system_user_pk, on_delete=django.db.models.deletion.CASCADE, related_name='tag_set', to=settings.AUTH_USER_MODEL),
|
||||
),
|
||||
migrations.AlterUniqueTogether(
|
||||
name='snapshottag',
|
||||
unique_together={('snapshot', 'tag')},
|
||||
),
|
||||
],
|
||||
database_operations=[],
|
||||
),
|
||||
]
|
||||
|
||||
@@ -13,68 +13,79 @@ class Migration(migrations.Migration):
|
||||
]
|
||||
|
||||
operations = [
|
||||
# Add new output fields (keep old 'output' temporarily for migration)
|
||||
migrations.AddField(
|
||||
model_name='archiveresult',
|
||||
name='output_str',
|
||||
field=models.TextField(
|
||||
blank=True,
|
||||
default='',
|
||||
help_text='Human-readable output summary (e.g., "Downloaded 5 files")'
|
||||
),
|
||||
),
|
||||
|
||||
migrations.AddField(
|
||||
model_name='archiveresult',
|
||||
name='output_json',
|
||||
field=models.JSONField(
|
||||
null=True,
|
||||
blank=True,
|
||||
default=None,
|
||||
help_text='Structured metadata (headers, redirects, etc.) - should NOT duplicate ArchiveResult fields'
|
||||
),
|
||||
),
|
||||
|
||||
migrations.AddField(
|
||||
model_name='archiveresult',
|
||||
name='output_files',
|
||||
field=models.JSONField(
|
||||
default=dict,
|
||||
help_text='Dict of {relative_path: {metadata}} - values are empty dicts for now, extensible for future metadata'
|
||||
),
|
||||
),
|
||||
|
||||
migrations.AddField(
|
||||
model_name='archiveresult',
|
||||
name='output_size',
|
||||
field=models.BigIntegerField(
|
||||
default=0,
|
||||
help_text='Total recursive size in bytes of all output files'
|
||||
),
|
||||
),
|
||||
|
||||
migrations.AddField(
|
||||
model_name='archiveresult',
|
||||
name='output_mimetypes',
|
||||
field=models.CharField(
|
||||
max_length=512,
|
||||
blank=True,
|
||||
default='',
|
||||
help_text='CSV of mimetypes sorted by size descending'
|
||||
),
|
||||
),
|
||||
|
||||
# Add binary FK (optional)
|
||||
migrations.AddField(
|
||||
model_name='archiveresult',
|
||||
name='binary',
|
||||
field=models.ForeignKey(
|
||||
'machine.Binary',
|
||||
on_delete=models.SET_NULL,
|
||||
null=True,
|
||||
blank=True,
|
||||
related_name='archiveresults',
|
||||
help_text='Primary binary used by this hook (optional)'
|
||||
),
|
||||
# Add new output fields using SeparateDatabaseAndState to avoid table rebuilds
|
||||
migrations.SeparateDatabaseAndState(
|
||||
state_operations=[
|
||||
migrations.AddField(
|
||||
model_name='archiveresult',
|
||||
name='output_str',
|
||||
field=models.TextField(
|
||||
blank=True,
|
||||
default='',
|
||||
help_text='Human-readable output summary (e.g., "Downloaded 5 files")'
|
||||
),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='archiveresult',
|
||||
name='output_json',
|
||||
field=models.JSONField(
|
||||
null=True,
|
||||
blank=True,
|
||||
default=None,
|
||||
help_text='Structured metadata (headers, redirects, etc.) - should NOT duplicate ArchiveResult fields'
|
||||
),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='archiveresult',
|
||||
name='output_files',
|
||||
field=models.JSONField(
|
||||
default=dict,
|
||||
help_text='Dict of {relative_path: {metadata}} - values are empty dicts for now, extensible for future metadata'
|
||||
),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='archiveresult',
|
||||
name='output_size',
|
||||
field=models.BigIntegerField(
|
||||
default=0,
|
||||
help_text='Total recursive size in bytes of all output files'
|
||||
),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='archiveresult',
|
||||
name='output_mimetypes',
|
||||
field=models.CharField(
|
||||
max_length=512,
|
||||
blank=True,
|
||||
default='',
|
||||
help_text='CSV of mimetypes sorted by size descending'
|
||||
),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='archiveresult',
|
||||
name='binary',
|
||||
field=models.ForeignKey(
|
||||
'machine.Binary',
|
||||
on_delete=models.SET_NULL,
|
||||
null=True,
|
||||
blank=True,
|
||||
related_name='archiveresults',
|
||||
help_text='Primary binary used by this hook (optional)'
|
||||
),
|
||||
),
|
||||
],
|
||||
database_operations=[
|
||||
migrations.RunSQL(
|
||||
sql="""
|
||||
ALTER TABLE core_archiveresult ADD COLUMN output_str TEXT DEFAULT '';
|
||||
ALTER TABLE core_archiveresult ADD COLUMN output_json TEXT;
|
||||
ALTER TABLE core_archiveresult ADD COLUMN output_files TEXT DEFAULT '{}';
|
||||
ALTER TABLE core_archiveresult ADD COLUMN output_size BIGINT DEFAULT 0;
|
||||
ALTER TABLE core_archiveresult ADD COLUMN output_mimetypes VARCHAR(512) DEFAULT '';
|
||||
ALTER TABLE core_archiveresult ADD COLUMN binary_id CHAR(32) REFERENCES machine_binary(id);
|
||||
""",
|
||||
reverse_sql=migrations.RunSQL.noop,
|
||||
),
|
||||
],
|
||||
),
|
||||
]
|
||||
|
||||
@@ -12,27 +12,46 @@ def migrate_output_field(apps, schema_editor):
|
||||
Logic:
|
||||
- If output contains JSON {...}, move to output_json
|
||||
- Otherwise, move to output_str
|
||||
|
||||
Use raw SQL to avoid CHECK constraint issues during migration.
|
||||
"""
|
||||
ArchiveResult = apps.get_model('core', 'ArchiveResult')
|
||||
# Use raw SQL to migrate data without triggering CHECK constraints
|
||||
with schema_editor.connection.cursor() as cursor:
|
||||
# Get all archive results
|
||||
cursor.execute("""
|
||||
SELECT id, output FROM core_archiveresult
|
||||
""")
|
||||
|
||||
for ar in ArchiveResult.objects.all().iterator():
|
||||
old_output = ar.output or ''
|
||||
for row in cursor.fetchall():
|
||||
ar_id, old_output = row
|
||||
old_output = old_output or ''
|
||||
|
||||
# Case 1: JSON output
|
||||
if old_output.strip().startswith('{'):
|
||||
try:
|
||||
parsed = json.loads(old_output)
|
||||
ar.output_json = parsed
|
||||
ar.output_str = ''
|
||||
except json.JSONDecodeError:
|
||||
# Not valid JSON, treat as string
|
||||
ar.output_str = old_output
|
||||
|
||||
# Case 2: File path or plain string
|
||||
else:
|
||||
ar.output_str = old_output
|
||||
|
||||
ar.save(update_fields=['output_str', 'output_json'])
|
||||
# Case 1: JSON output
|
||||
if old_output.strip().startswith('{'):
|
||||
try:
|
||||
# Validate it's actual JSON
|
||||
parsed = json.loads(old_output)
|
||||
# Update with JSON - cast to JSON to satisfy CHECK constraint
|
||||
json_str = json.dumps(parsed)
|
||||
cursor.execute("""
|
||||
UPDATE core_archiveresult
|
||||
SET output_str = '', output_json = json(?)
|
||||
WHERE id = ?
|
||||
""", (json_str, ar_id))
|
||||
except json.JSONDecodeError:
|
||||
# Not valid JSON, treat as string
|
||||
cursor.execute("""
|
||||
UPDATE core_archiveresult
|
||||
SET output_str = ?, output_json = NULL
|
||||
WHERE id = ?
|
||||
""", (old_output, ar_id))
|
||||
# Case 2: File path or plain string
|
||||
else:
|
||||
cursor.execute("""
|
||||
UPDATE core_archiveresult
|
||||
SET output_str = ?, output_json = NULL
|
||||
WHERE id = ?
|
||||
""", (old_output, ar_id))
|
||||
|
||||
|
||||
def reverse_migrate(apps, schema_editor):
|
||||
|
||||
@@ -16,43 +16,62 @@ class Migration(migrations.Migration):
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AlterField(
|
||||
model_name='archiveresult',
|
||||
name='binary',
|
||||
field=models.ForeignKey(blank=True, help_text='Primary binary used by this hook', null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='archiveresults', to='machine.binary'),
|
||||
# Update Django's state only - database already has correct schema from 0029
|
||||
migrations.SeparateDatabaseAndState(
|
||||
state_operations=[
|
||||
migrations.AlterField(
|
||||
model_name='archiveresult',
|
||||
name='binary',
|
||||
field=models.ForeignKey(blank=True, help_text='Primary binary used by this hook', null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='archiveresults', to='machine.binary'),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='archiveresult',
|
||||
name='output_files',
|
||||
field=models.JSONField(default=dict, help_text='Dict of {relative_path: {metadata}}'),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='archiveresult',
|
||||
name='output_json',
|
||||
field=models.JSONField(blank=True, default=None, help_text='Structured metadata (headers, redirects, etc.)', null=True),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='archiveresult',
|
||||
name='output_mimetypes',
|
||||
field=models.CharField(blank=True, default='', help_text='CSV of mimetypes sorted by size', max_length=512),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='archiveresult',
|
||||
name='output_size',
|
||||
field=models.BigIntegerField(default=0, help_text='Total bytes of all output files'),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='archiveresult',
|
||||
name='output_str',
|
||||
field=models.TextField(blank=True, default='', help_text='Human-readable output summary'),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='archiveresult',
|
||||
name='uuid',
|
||||
field=models.UUIDField(blank=True, db_index=True, default=uuid_compat.uuid7, null=True),
|
||||
),
|
||||
],
|
||||
database_operations=[
|
||||
# No database changes needed - columns already exist with correct types
|
||||
],
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='archiveresult',
|
||||
name='output_files',
|
||||
field=models.JSONField(default=dict, help_text='Dict of {relative_path: {metadata}}'),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='archiveresult',
|
||||
name='output_json',
|
||||
field=models.JSONField(blank=True, default=None, help_text='Structured metadata (headers, redirects, etc.)', null=True),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='archiveresult',
|
||||
name='output_mimetypes',
|
||||
field=models.CharField(blank=True, default='', help_text='CSV of mimetypes sorted by size', max_length=512),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='archiveresult',
|
||||
name='output_size',
|
||||
field=models.BigIntegerField(default=0, help_text='Total bytes of all output files'),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='archiveresult',
|
||||
name='output_str',
|
||||
field=models.TextField(blank=True, default='', help_text='Human-readable output summary'),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='archiveresult',
|
||||
name='uuid',
|
||||
field=models.UUIDField(blank=True, db_index=True, default=uuid_compat.uuid7, null=True),
|
||||
),
|
||||
migrations.AddConstraint(
|
||||
model_name='snapshot',
|
||||
constraint=models.UniqueConstraint(fields=('timestamp',), name='unique_timestamp'),
|
||||
# Add unique constraint without table rebuild
|
||||
migrations.SeparateDatabaseAndState(
|
||||
state_operations=[
|
||||
migrations.AddConstraint(
|
||||
model_name='snapshot',
|
||||
constraint=models.UniqueConstraint(fields=('timestamp',), name='unique_timestamp'),
|
||||
),
|
||||
],
|
||||
database_operations=[
|
||||
migrations.RunSQL(
|
||||
sql="CREATE UNIQUE INDEX IF NOT EXISTS unique_timestamp ON core_snapshot (timestamp);",
|
||||
reverse_sql="DROP INDEX IF EXISTS unique_timestamp;",
|
||||
),
|
||||
],
|
||||
),
|
||||
]
|
||||
|
||||
@@ -10,20 +10,35 @@ class Migration(migrations.Migration):
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.RenameField(
|
||||
model_name='archiveresult',
|
||||
old_name='extractor',
|
||||
new_name='plugin',
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='archiveresult',
|
||||
name='hook_name',
|
||||
field=models.CharField(
|
||||
blank=True,
|
||||
default='',
|
||||
max_length=255,
|
||||
db_index=True,
|
||||
help_text='Full filename of the hook that executed (e.g., on_Snapshot__50_wget.py)'
|
||||
),
|
||||
# Use SeparateDatabaseAndState to avoid table rebuilds that would re-add CHECK constraints
|
||||
migrations.SeparateDatabaseAndState(
|
||||
state_operations=[
|
||||
migrations.RenameField(
|
||||
model_name='archiveresult',
|
||||
old_name='extractor',
|
||||
new_name='plugin',
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='archiveresult',
|
||||
name='hook_name',
|
||||
field=models.CharField(
|
||||
blank=True,
|
||||
default='',
|
||||
max_length=255,
|
||||
db_index=True,
|
||||
help_text='Full filename of the hook that executed (e.g., on_Snapshot__50_wget.py)'
|
||||
),
|
||||
),
|
||||
],
|
||||
database_operations=[
|
||||
migrations.RunSQL(
|
||||
sql="""
|
||||
ALTER TABLE core_archiveresult RENAME COLUMN extractor TO plugin;
|
||||
ALTER TABLE core_archiveresult ADD COLUMN hook_name VARCHAR(255) DEFAULT '' NOT NULL;
|
||||
CREATE INDEX IF NOT EXISTS core_archiveresult_hook_name_idx ON core_archiveresult (hook_name);
|
||||
""",
|
||||
reverse_sql=migrations.RunSQL.noop,
|
||||
),
|
||||
],
|
||||
),
|
||||
]
|
||||
|
||||
@@ -11,13 +11,27 @@ class Migration(migrations.Migration):
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AddField(
|
||||
model_name='snapshot',
|
||||
name='current_step',
|
||||
field=models.PositiveSmallIntegerField(
|
||||
default=0,
|
||||
db_index=True,
|
||||
help_text='Current hook step being executed (0-9). Used for sequential hook execution.'
|
||||
),
|
||||
# Use SeparateDatabaseAndState to avoid table rebuild that would fail on config NOT NULL constraint
|
||||
migrations.SeparateDatabaseAndState(
|
||||
state_operations=[
|
||||
migrations.AddField(
|
||||
model_name='snapshot',
|
||||
name='current_step',
|
||||
field=models.PositiveSmallIntegerField(
|
||||
default=0,
|
||||
db_index=True,
|
||||
help_text='Current hook step being executed (0-9). Used for sequential hook execution.'
|
||||
),
|
||||
),
|
||||
],
|
||||
database_operations=[
|
||||
migrations.RunSQL(
|
||||
sql="""
|
||||
ALTER TABLE core_snapshot ADD COLUMN current_step SMALLINT UNSIGNED DEFAULT 0 NOT NULL;
|
||||
CREATE INDEX IF NOT EXISTS core_snapshot_current_step_idx ON core_snapshot (current_step);
|
||||
""",
|
||||
reverse_sql=migrations.RunSQL.noop,
|
||||
),
|
||||
],
|
||||
),
|
||||
]
|
||||
|
||||
@@ -54,7 +54,7 @@ class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('core', '0034_snapshot_current_step'),
|
||||
('crawls', '0004_alter_crawl_output_dir'),
|
||||
('crawls', '0005_drop_seed_id_column'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
@@ -64,16 +64,24 @@ class Migration(migrations.Migration):
|
||||
reverse_code=migrations.RunPython.noop,
|
||||
),
|
||||
|
||||
# Step 2: Make crawl non-nullable
|
||||
migrations.AlterField(
|
||||
model_name='snapshot',
|
||||
name='crawl',
|
||||
field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='snapshot_set', to='crawls.crawl'),
|
||||
),
|
||||
|
||||
# Step 3: Remove created_by field
|
||||
migrations.RemoveField(
|
||||
model_name='snapshot',
|
||||
name='created_by',
|
||||
# Step 2 & 3: Update Django's state only - leave created_by_id column in database (unused but harmless)
|
||||
migrations.SeparateDatabaseAndState(
|
||||
state_operations=[
|
||||
# Make crawl non-nullable
|
||||
migrations.AlterField(
|
||||
model_name='snapshot',
|
||||
name='crawl',
|
||||
field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='snapshot_set', to='crawls.crawl'),
|
||||
),
|
||||
# Remove created_by field from Django's state
|
||||
migrations.RemoveField(
|
||||
model_name='snapshot',
|
||||
name='created_by',
|
||||
),
|
||||
],
|
||||
database_operations=[
|
||||
# No database changes - crawl_id already exists and NOT NULL constraint will be enforced by model
|
||||
# created_by_id column remains in database but is unused
|
||||
],
|
||||
),
|
||||
]
|
||||
|
||||
@@ -10,10 +10,18 @@ class Migration(migrations.Migration):
|
||||
]
|
||||
|
||||
operations = [
|
||||
# Remove created_by field from ArchiveResult
|
||||
# Remove created_by field from ArchiveResult (state only)
|
||||
# No data migration needed - created_by can be accessed via snapshot.crawl.created_by
|
||||
migrations.RemoveField(
|
||||
model_name='archiveresult',
|
||||
name='created_by',
|
||||
# Leave created_by_id column in database (unused but harmless, avoids table rebuild)
|
||||
migrations.SeparateDatabaseAndState(
|
||||
state_operations=[
|
||||
migrations.RemoveField(
|
||||
model_name='archiveresult',
|
||||
name='created_by',
|
||||
),
|
||||
],
|
||||
database_operations=[
|
||||
# No database changes - leave created_by_id column in place to avoid table rebuild
|
||||
],
|
||||
),
|
||||
]
|
||||
|
||||
@@ -0,0 +1,44 @@
|
||||
# Generated by Django 6.0 on 2025-12-29 06:45
|
||||
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('core', '0036_remove_archiveresult_created_by'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
# Update Django's state only - database columns remain for backwards compat
|
||||
migrations.SeparateDatabaseAndState(
|
||||
state_operations=[
|
||||
migrations.RemoveField(
|
||||
model_name='archiveresult',
|
||||
name='output_dir',
|
||||
),
|
||||
migrations.RemoveField(
|
||||
model_name='snapshot',
|
||||
name='output_dir',
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='archiveresult',
|
||||
name='config',
|
||||
field=models.JSONField(blank=True, default=dict, null=True),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='snapshot',
|
||||
name='config',
|
||||
field=models.JSONField(blank=True, default=dict, null=True),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='snapshot',
|
||||
name='tags',
|
||||
field=models.ManyToManyField(blank=True, related_name='snapshot_set', through='core.SnapshotTag', through_fields=('snapshot', 'tag'), to='core.tag'),
|
||||
),
|
||||
],
|
||||
database_operations=[
|
||||
# No database changes - columns remain in place to avoid table rebuilds
|
||||
],
|
||||
),
|
||||
]
|
||||
84
archivebox/core/migrations/0038_fix_missing_columns.py
Normal file
84
archivebox/core/migrations/0038_fix_missing_columns.py
Normal file
@@ -0,0 +1,84 @@
|
||||
# Add missing columns to ArchiveResult and remove created_by_id from Snapshot
|
||||
|
||||
from django.db import migrations, models, connection
|
||||
import django.utils.timezone
|
||||
|
||||
|
||||
def add_columns_if_not_exist(apps, schema_editor):
|
||||
"""Add columns to ArchiveResult only if they don't already exist."""
|
||||
with connection.cursor() as cursor:
|
||||
# Get existing columns
|
||||
cursor.execute("PRAGMA table_info(core_archiveresult)")
|
||||
existing_columns = {row[1] for row in cursor.fetchall()}
|
||||
|
||||
# Add num_uses_failed if it doesn't exist
|
||||
if 'num_uses_failed' not in existing_columns:
|
||||
cursor.execute("ALTER TABLE core_archiveresult ADD COLUMN num_uses_failed integer unsigned NOT NULL DEFAULT 0 CHECK (num_uses_failed >= 0)")
|
||||
|
||||
# Add num_uses_succeeded if it doesn't exist
|
||||
if 'num_uses_succeeded' not in existing_columns:
|
||||
cursor.execute("ALTER TABLE core_archiveresult ADD COLUMN num_uses_succeeded integer unsigned NOT NULL DEFAULT 0 CHECK (num_uses_succeeded >= 0)")
|
||||
|
||||
# Add config if it doesn't exist
|
||||
if 'config' not in existing_columns:
|
||||
cursor.execute("ALTER TABLE core_archiveresult ADD COLUMN config text NULL")
|
||||
|
||||
# Add retry_at if it doesn't exist
|
||||
if 'retry_at' not in existing_columns:
|
||||
cursor.execute("ALTER TABLE core_archiveresult ADD COLUMN retry_at datetime NULL")
|
||||
cursor.execute("CREATE INDEX IF NOT EXISTS core_archiveresult_retry_at_idx ON core_archiveresult(retry_at)")
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('core', '0037_remove_archiveresult_output_dir_and_more'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
# Add missing columns to ArchiveResult
|
||||
migrations.SeparateDatabaseAndState(
|
||||
state_operations=[
|
||||
migrations.AddField(
|
||||
model_name='archiveresult',
|
||||
name='num_uses_failed',
|
||||
field=models.PositiveIntegerField(default=0),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='archiveresult',
|
||||
name='num_uses_succeeded',
|
||||
field=models.PositiveIntegerField(default=0),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='archiveresult',
|
||||
name='config',
|
||||
field=models.JSONField(blank=True, default=dict, null=True),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='archiveresult',
|
||||
name='retry_at',
|
||||
field=models.DateTimeField(blank=True, db_index=True, default=django.utils.timezone.now, null=True),
|
||||
),
|
||||
],
|
||||
database_operations=[
|
||||
migrations.RunPython(add_columns_if_not_exist, reverse_code=migrations.RunPython.noop),
|
||||
],
|
||||
),
|
||||
|
||||
# Drop created_by_id from Snapshot (database only, already removed from model in 0035)
|
||||
migrations.SeparateDatabaseAndState(
|
||||
state_operations=[
|
||||
# No state changes - field already removed in 0035
|
||||
],
|
||||
database_operations=[
|
||||
migrations.RunSQL(
|
||||
sql="""
|
||||
-- Drop index first, then column
|
||||
DROP INDEX IF EXISTS core_snapshot_created_by_id_6dbd6149;
|
||||
ALTER TABLE core_snapshot DROP COLUMN created_by_id;
|
||||
""",
|
||||
reverse_sql=migrations.RunSQL.noop,
|
||||
),
|
||||
],
|
||||
),
|
||||
]
|
||||
30
archivebox/core/migrations/0039_fix_num_uses_values.py
Normal file
30
archivebox/core/migrations/0039_fix_num_uses_values.py
Normal file
@@ -0,0 +1,30 @@
|
||||
# Fix num_uses_failed and num_uses_succeeded string values to integers
|
||||
|
||||
from django.db import migrations
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('core', '0038_fix_missing_columns'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
# Fix string values that got inserted as literals instead of integers
|
||||
migrations.RunSQL(
|
||||
sql="""
|
||||
UPDATE core_snapshot
|
||||
SET num_uses_failed = 0
|
||||
WHERE typeof(num_uses_failed) = 'text' OR num_uses_failed = 'num_uses_failed';
|
||||
|
||||
UPDATE core_snapshot
|
||||
SET num_uses_succeeded = 0
|
||||
WHERE typeof(num_uses_succeeded) = 'text' OR num_uses_succeeded = 'num_uses_succeeded';
|
||||
|
||||
UPDATE core_snapshot
|
||||
SET depth = 0
|
||||
WHERE typeof(depth) = 'text' OR depth = 'depth';
|
||||
""",
|
||||
reverse_sql=migrations.RunSQL.noop,
|
||||
),
|
||||
]
|
||||
@@ -911,7 +911,7 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
|
||||
)
|
||||
|
||||
merged = 0
|
||||
for dup in duplicates.iterator():
|
||||
for dup in duplicates.iterator(chunk_size=500):
|
||||
snapshots = list(
|
||||
cls.objects
|
||||
.filter(url=dup['url'], timestamp=dup['timestamp'])
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -91,7 +91,11 @@ def plugin_thumbnail(context, result) -> str:
|
||||
'output_path': output_path,
|
||||
'plugin': plugin,
|
||||
})
|
||||
return mark_safe(tpl.render(ctx))
|
||||
rendered = tpl.render(ctx)
|
||||
# Only return non-empty content (strip whitespace to check)
|
||||
if rendered.strip():
|
||||
return mark_safe(rendered)
|
||||
return ''
|
||||
except Exception:
|
||||
return ''
|
||||
|
||||
@@ -119,7 +123,11 @@ def plugin_embed(context, result) -> str:
|
||||
'output_path': output_path,
|
||||
'plugin': plugin,
|
||||
})
|
||||
return mark_safe(tpl.render(ctx))
|
||||
rendered = tpl.render(ctx)
|
||||
# Only return non-empty content (strip whitespace to check)
|
||||
if rendered.strip():
|
||||
return mark_safe(rendered)
|
||||
return ''
|
||||
except Exception:
|
||||
return ''
|
||||
|
||||
@@ -147,7 +155,11 @@ def plugin_fullscreen(context, result) -> str:
|
||||
'output_path': output_path,
|
||||
'plugin': plugin,
|
||||
})
|
||||
return mark_safe(tpl.render(ctx))
|
||||
rendered = tpl.render(ctx)
|
||||
# Only return non-empty content (strip whitespace to check)
|
||||
if rendered.strip():
|
||||
return mark_safe(rendered)
|
||||
return ''
|
||||
except Exception:
|
||||
return ''
|
||||
|
||||
|
||||
@@ -539,7 +539,7 @@ from django.http import JsonResponse
|
||||
def live_progress_view(request):
|
||||
"""Simple JSON endpoint for live progress status - used by admin progress monitor."""
|
||||
try:
|
||||
from workers.orchestrator import Orchestrator
|
||||
from archivebox.workers.orchestrator import Orchestrator
|
||||
from archivebox.crawls.models import Crawl
|
||||
from archivebox.core.models import Snapshot, ArchiveResult
|
||||
from django.db.models import Case, When, Value, IntegerField
|
||||
|
||||
@@ -4,3 +4,8 @@ from django.apps import AppConfig
|
||||
class CrawlsConfig(AppConfig):
|
||||
default_auto_field = "django.db.models.BigAutoField"
|
||||
name = "archivebox.crawls"
|
||||
label = "crawls"
|
||||
|
||||
def ready(self):
|
||||
"""Import models to register state machines with the registry"""
|
||||
from archivebox.crawls.models import CrawlMachine # noqa: F401
|
||||
|
||||
@@ -17,39 +17,62 @@ class Migration(migrations.Migration):
|
||||
]
|
||||
|
||||
operations = [
|
||||
# Remove the seed foreign key from Crawl
|
||||
migrations.RemoveField(
|
||||
model_name='crawl',
|
||||
name='seed',
|
||||
# Remove the seed foreign key from Crawl (no-op if already removed by core/0024_d)
|
||||
migrations.RunPython(
|
||||
code=lambda apps, schema_editor: None,
|
||||
reverse_code=migrations.RunPython.noop,
|
||||
),
|
||||
# Delete the Seed model entirely
|
||||
migrations.DeleteModel(
|
||||
name='Seed',
|
||||
# Delete the Seed model entirely (already done)
|
||||
migrations.RunPython(
|
||||
code=lambda apps, schema_editor: None,
|
||||
reverse_code=migrations.RunPython.noop,
|
||||
),
|
||||
# Update fields to new schema
|
||||
migrations.AlterField(
|
||||
model_name='crawl',
|
||||
name='created_by',
|
||||
field=models.ForeignKey(default=archivebox.base_models.models.get_or_create_system_user_pk, on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='crawl',
|
||||
name='id',
|
||||
field=models.UUIDField(default=uuid_compat.uuid7, editable=False, primary_key=True, serialize=False, unique=True),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='crawl',
|
||||
name='urls',
|
||||
field=models.TextField(help_text='Newline-separated list of URLs to crawl'),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='crawlschedule',
|
||||
name='created_by',
|
||||
field=models.ForeignKey(default=archivebox.base_models.models.get_or_create_system_user_pk, on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='crawlschedule',
|
||||
name='id',
|
||||
field=models.UUIDField(default=uuid_compat.uuid7, editable=False, primary_key=True, serialize=False, unique=True),
|
||||
# Drop seed_id column if it exists, then update Django's migration state
|
||||
migrations.SeparateDatabaseAndState(
|
||||
state_operations=[
|
||||
# Update fields to new schema
|
||||
migrations.AlterField(
|
||||
model_name='crawl',
|
||||
name='created_by',
|
||||
field=models.ForeignKey(default=archivebox.base_models.models.get_or_create_system_user_pk, on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='crawl',
|
||||
name='id',
|
||||
field=models.UUIDField(default=uuid_compat.uuid7, editable=False, primary_key=True, serialize=False, unique=True),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='crawl',
|
||||
name='urls',
|
||||
field=models.TextField(help_text='Newline-separated list of URLs to crawl'),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='crawlschedule',
|
||||
name='created_by',
|
||||
field=models.ForeignKey(default=archivebox.base_models.models.get_or_create_system_user_pk, on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='crawlschedule',
|
||||
name='id',
|
||||
field=models.UUIDField(default=uuid_compat.uuid7, editable=False, primary_key=True, serialize=False, unique=True),
|
||||
),
|
||||
],
|
||||
database_operations=[
|
||||
# Drop seed table and NULL out seed_id FK values
|
||||
migrations.RunSQL(
|
||||
sql="""
|
||||
PRAGMA foreign_keys=OFF;
|
||||
|
||||
-- NULL out seed_id values in crawls_crawl
|
||||
UPDATE crawls_crawl SET seed_id = NULL;
|
||||
|
||||
-- Drop seed table if it exists
|
||||
DROP TABLE IF EXISTS crawls_seed;
|
||||
|
||||
PRAGMA foreign_keys=ON;
|
||||
""",
|
||||
reverse_sql=migrations.RunSQL.noop,
|
||||
),
|
||||
],
|
||||
),
|
||||
]
|
||||
|
||||
@@ -8,12 +8,21 @@ class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('crawls', '0002_drop_seed_model'),
|
||||
('core', '0024_d_fix_crawls_config'), # Depends on config fix
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AlterField(
|
||||
model_name='crawl',
|
||||
name='output_dir',
|
||||
field=models.FilePathField(blank=True, default='', path=pathlib.PurePosixPath('/private/tmp/test_archivebox_migrations/archive')),
|
||||
# Update Django's state only to avoid table rebuild that would re-apply old constraints
|
||||
migrations.SeparateDatabaseAndState(
|
||||
state_operations=[
|
||||
migrations.AlterField(
|
||||
model_name='crawl',
|
||||
name='output_dir',
|
||||
field=models.FilePathField(blank=True, default='', path=pathlib.PurePosixPath('/private/tmp/test_archivebox_migrations/archive')),
|
||||
),
|
||||
],
|
||||
database_operations=[
|
||||
# No database changes - output_dir type change is cosmetic for Django admin
|
||||
],
|
||||
),
|
||||
]
|
||||
|
||||
@@ -11,9 +11,17 @@ class Migration(migrations.Migration):
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AlterField(
|
||||
model_name='crawl',
|
||||
name='output_dir',
|
||||
field=models.FilePathField(blank=True, default='', path=pathlib.PurePosixPath('/private/tmp/archivebox-makemigrations/archive')),
|
||||
# Update Django's state only to avoid table rebuild that would re-apply old constraints
|
||||
migrations.SeparateDatabaseAndState(
|
||||
state_operations=[
|
||||
migrations.AlterField(
|
||||
model_name='crawl',
|
||||
name='output_dir',
|
||||
field=models.FilePathField(blank=True, default='', path=pathlib.PurePosixPath('/private/tmp/archivebox-makemigrations/archive')),
|
||||
),
|
||||
],
|
||||
database_operations=[
|
||||
# No database changes - output_dir type change is cosmetic for Django admin
|
||||
],
|
||||
),
|
||||
]
|
||||
|
||||
28
archivebox/crawls/migrations/0005_drop_seed_id_column.py
Normal file
28
archivebox/crawls/migrations/0005_drop_seed_id_column.py
Normal file
@@ -0,0 +1,28 @@
|
||||
# Drop seed_id column from Django's state (leave in database to avoid FK issues)
|
||||
|
||||
from django.db import migrations
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('crawls', '0004_alter_crawl_output_dir'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
# Update Django's state only - leave seed_id column in database (unused but harmless)
|
||||
# This avoids FK mismatch errors with crawls_crawlschedule
|
||||
migrations.SeparateDatabaseAndState(
|
||||
state_operations=[
|
||||
# Remove seed field from Django's migration state
|
||||
migrations.RemoveField(
|
||||
model_name='crawl',
|
||||
name='seed',
|
||||
),
|
||||
],
|
||||
database_operations=[
|
||||
# No database changes - seed_id column remains to avoid FK rebuild issues
|
||||
# crawls_seed table can be manually dropped by DBA if needed
|
||||
],
|
||||
),
|
||||
]
|
||||
@@ -0,0 +1,35 @@
|
||||
# Generated by Django 6.0 on 2025-12-29 06:45
|
||||
|
||||
import pathlib
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('crawls', '0005_drop_seed_id_column'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
# Update Django's state only - database already correct
|
||||
migrations.SeparateDatabaseAndState(
|
||||
state_operations=[
|
||||
migrations.AlterField(
|
||||
model_name='crawl',
|
||||
name='config',
|
||||
field=models.JSONField(blank=True, default=dict, null=True),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='crawl',
|
||||
name='output_dir',
|
||||
field=models.FilePathField(blank=True, default='', path=pathlib.PurePosixPath('/Users/squash/Local/Code/archiveboxes/archivebox-nue/data/archive')),
|
||||
),
|
||||
migrations.DeleteModel(
|
||||
name='Seed',
|
||||
),
|
||||
],
|
||||
database_operations=[
|
||||
# No database changes - Seed table already dropped in 0005
|
||||
],
|
||||
),
|
||||
]
|
||||
@@ -65,7 +65,7 @@ class Crawl(ModelWithOutputDir, ModelWithConfig, ModelWithHealthStats, ModelWith
|
||||
modified_at = models.DateTimeField(auto_now=True)
|
||||
|
||||
urls = models.TextField(blank=False, null=False, help_text='Newline-separated list of URLs to crawl')
|
||||
config = models.JSONField(default=dict)
|
||||
config = models.JSONField(default=dict, null=True, blank=True)
|
||||
max_depth = models.PositiveSmallIntegerField(default=0, validators=[MinValueValidator(0), MaxValueValidator(4)])
|
||||
tags_str = models.CharField(max_length=1024, blank=True, null=False, default='')
|
||||
persona_id = models.UUIDField(null=True, blank=True)
|
||||
@@ -77,7 +77,7 @@ class Crawl(ModelWithOutputDir, ModelWithConfig, ModelWithHealthStats, ModelWith
|
||||
status = ModelWithStateMachine.StatusField(choices=ModelWithStateMachine.StatusChoices, default=ModelWithStateMachine.StatusChoices.QUEUED)
|
||||
retry_at = ModelWithStateMachine.RetryAtField(default=timezone.now)
|
||||
|
||||
state_machine_name = 'crawls.models.CrawlMachine'
|
||||
state_machine_name = 'archivebox.crawls.models.CrawlMachine'
|
||||
retry_at_field_name = 'retry_at'
|
||||
state_field_name = 'status'
|
||||
StatusChoices = ModelWithStateMachine.StatusChoices
|
||||
@@ -190,7 +190,6 @@ class Crawl(ModelWithOutputDir, ModelWithConfig, ModelWithHealthStats, ModelWith
|
||||
'status': Snapshot.INITIAL_STATE,
|
||||
'retry_at': timezone.now(),
|
||||
'timestamp': str(timezone.now().timestamp()),
|
||||
'created_by_id': self.created_by_id,
|
||||
'depth': 0,
|
||||
},
|
||||
)
|
||||
@@ -290,7 +289,7 @@ class Crawl(ModelWithOutputDir, ModelWithConfig, ModelWithHealthStats, ModelWith
|
||||
'timestamp': timestamp or str(timezone.now().timestamp()),
|
||||
'status': Snapshot.INITIAL_STATE,
|
||||
'retry_at': timezone.now(),
|
||||
'created_by_id': self.created_by_id,
|
||||
# Note: created_by removed in 0.9.0 - Snapshot inherits from Crawl
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
@@ -7,8 +7,13 @@ class MachineConfig(AppConfig):
|
||||
default_auto_field = 'django.db.models.BigAutoField'
|
||||
|
||||
name = 'archivebox.machine'
|
||||
label = 'machine' # Explicit label for migrations
|
||||
verbose_name = 'Machine Info'
|
||||
|
||||
def ready(self):
|
||||
"""Import models to register state machines with the registry"""
|
||||
from archivebox.machine import models # noqa: F401
|
||||
|
||||
|
||||
def register_admin(admin_site):
|
||||
from archivebox.machine.admin import register_admin
|
||||
|
||||
@@ -85,6 +85,12 @@ class Migration(migrations.Migration):
|
||||
('version', models.CharField(blank=True, default=None, max_length=32)),
|
||||
('sha256', models.CharField(blank=True, default=None, max_length=64)),
|
||||
('machine', models.ForeignKey(blank=True, default=None, on_delete=django.db.models.deletion.CASCADE, to='machine.machine')),
|
||||
# Fields added in migration 0005 (included here for fresh installs)
|
||||
('binproviders', models.CharField(blank=True, default='env', max_length=127)),
|
||||
('output_dir', models.CharField(blank=True, default='', max_length=255)),
|
||||
('overrides', models.JSONField(blank=True, default=dict)),
|
||||
('retry_at', models.DateTimeField(blank=True, db_index=True, default=django.utils.timezone.now, null=True)),
|
||||
('status', models.CharField(choices=[('queued', 'Queued'), ('started', 'Started'), ('succeeded', 'Succeeded'), ('failed', 'Failed')], db_index=True, default='queued', max_length=16)),
|
||||
# dependency FK removed - Dependency model deleted
|
||||
],
|
||||
options={
|
||||
|
||||
@@ -0,0 +1,104 @@
|
||||
# Generated by Django 6.0 on 2025-12-29 06:45
|
||||
|
||||
import django.db.models.deletion
|
||||
import django.utils.timezone
|
||||
from archivebox.uuid_compat import uuid7
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('machine', '0004_drop_dependency_table'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
# Update Django's state only - database already has correct schema
|
||||
migrations.SeparateDatabaseAndState(
|
||||
state_operations=[
|
||||
migrations.AddField(
|
||||
model_name='binary',
|
||||
name='binproviders',
|
||||
field=models.CharField(blank=True, default='env', help_text='Comma-separated list of allowed providers: apt,brew,pip,npm,env', max_length=127),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='binary',
|
||||
name='output_dir',
|
||||
field=models.CharField(blank=True, default='', help_text='Directory where installation hook logs are stored', max_length=255),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='binary',
|
||||
name='overrides',
|
||||
field=models.JSONField(blank=True, default=dict, help_text="Provider-specific overrides: {'apt': {'packages': ['pkg']}, ...}"),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='binary',
|
||||
name='retry_at',
|
||||
field=models.DateTimeField(blank=True, db_index=True, default=django.utils.timezone.now, help_text='When to retry this binary installation', null=True),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='binary',
|
||||
name='status',
|
||||
field=models.CharField(choices=[('queued', 'Queued'), ('started', 'Started'), ('succeeded', 'Succeeded'), ('failed', 'Failed')], db_index=True, default='queued', max_length=16),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='binary',
|
||||
name='abspath',
|
||||
field=models.CharField(blank=True, default='', max_length=255),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='binary',
|
||||
name='binprovider',
|
||||
field=models.CharField(blank=True, default='', help_text='Provider that successfully installed this binary', max_length=31),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='binary',
|
||||
name='id',
|
||||
field=models.UUIDField(default=uuid7, editable=False, primary_key=True, serialize=False, unique=True),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='binary',
|
||||
name='machine',
|
||||
field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='machine.machine'),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='binary',
|
||||
name='name',
|
||||
field=models.CharField(blank=True, db_index=True, default='', max_length=63),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='binary',
|
||||
name='sha256',
|
||||
field=models.CharField(blank=True, default='', max_length=64),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='binary',
|
||||
name='version',
|
||||
field=models.CharField(blank=True, default='', max_length=32),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='machine',
|
||||
name='config',
|
||||
field=models.JSONField(blank=True, default=dict, help_text='Machine-specific config overrides (e.g., resolved binary paths like WGET_BINARY)', null=True),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='machine',
|
||||
name='id',
|
||||
field=models.UUIDField(default=uuid7, editable=False, primary_key=True, serialize=False, unique=True),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='machine',
|
||||
name='stats',
|
||||
field=models.JSONField(blank=True, default=dict, null=True),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='networkinterface',
|
||||
name='id',
|
||||
field=models.UUIDField(default=uuid7, editable=False, primary_key=True, serialize=False, unique=True),
|
||||
),
|
||||
],
|
||||
database_operations=[
|
||||
# No database changes - schema already correct from previous migrations
|
||||
],
|
||||
),
|
||||
]
|
||||
@@ -44,8 +44,8 @@ class Machine(ModelWithHealthStats):
|
||||
os_platform = models.CharField(max_length=63, default=None, null=False)
|
||||
os_release = models.CharField(max_length=63, default=None, null=False)
|
||||
os_kernel = models.CharField(max_length=255, default=None, null=False)
|
||||
stats = models.JSONField(default=dict, null=False)
|
||||
config = models.JSONField(default=dict, null=False, blank=True,
|
||||
stats = models.JSONField(default=dict, null=True, blank=True)
|
||||
config = models.JSONField(default=dict, null=True, blank=True,
|
||||
help_text="Machine-specific config overrides (e.g., resolved binary paths like WGET_BINARY)")
|
||||
num_uses_failed = models.PositiveIntegerField(default=0)
|
||||
num_uses_succeeded = models.PositiveIntegerField(default=0)
|
||||
@@ -213,7 +213,7 @@ class Binary(ModelWithHealthStats):
|
||||
num_uses_failed = models.PositiveIntegerField(default=0)
|
||||
num_uses_succeeded = models.PositiveIntegerField(default=0)
|
||||
|
||||
state_machine_name: str = 'machine.models.BinaryMachine'
|
||||
state_machine_name: str = 'archivebox.machine.models.BinaryMachine'
|
||||
|
||||
objects: BinaryManager = BinaryManager()
|
||||
|
||||
|
||||
@@ -4,3 +4,4 @@ from django.apps import AppConfig
|
||||
class SessionsConfig(AppConfig):
|
||||
default_auto_field = "django.db.models.BigAutoField"
|
||||
name = "archivebox.personas"
|
||||
label = "personas"
|
||||
|
||||
@@ -21,7 +21,7 @@
|
||||
# # COOKIES_TXT_FILE: '/path/to/cookies.txt',
|
||||
# # CHROME_USER_DATA_DIR: '/path/to/chrome/user/data/dir',
|
||||
# # CHECK_SSL_VALIDITY: False,
|
||||
# # SAVE_ARCHIVE_DOT_ORG: True,
|
||||
# # SAVE_ARCHIVEDOTORG: True,
|
||||
# # CHROME_BINARY: 'chromium'
|
||||
# # ...
|
||||
# # }
|
||||
|
||||
@@ -63,7 +63,7 @@ def test_ripgrep_hook_detects_binary_from_path():
|
||||
|
||||
def test_ripgrep_hook_skips_when_backend_not_ripgrep():
|
||||
"""Test that ripgrep hook exits silently when search backend is not ripgrep."""
|
||||
hook_path = Path(__file__).parent.parent / 'on_Crawl__00_validate_ripgrep.py'
|
||||
hook_path = Path(__file__).parent.parent / 'on_Crawl__00_install_ripgrep.py'
|
||||
|
||||
env = os.environ.copy()
|
||||
env['SEARCH_BACKEND_ENGINE'] = 'sqlite' # Different backend
|
||||
@@ -82,7 +82,7 @@ def test_ripgrep_hook_skips_when_backend_not_ripgrep():
|
||||
|
||||
def test_ripgrep_hook_handles_absolute_path():
|
||||
"""Test that ripgrep hook works when RIPGREP_BINARY is an absolute path."""
|
||||
hook_path = Path(__file__).parent.parent / 'on_Crawl__00_validate_ripgrep.py'
|
||||
hook_path = Path(__file__).parent.parent / 'on_Crawl__00_install_ripgrep.py'
|
||||
|
||||
rg_path = shutil.which('rg')
|
||||
if not rg_path:
|
||||
@@ -222,7 +222,7 @@ def test_ripgrep_only_detected_when_backend_enabled():
|
||||
if not shutil.which('rg'):
|
||||
pytest.skip("ripgrep not installed")
|
||||
|
||||
hook_path = Path(__file__).parent.parent / 'on_Crawl__00_validate_ripgrep.py'
|
||||
hook_path = Path(__file__).parent.parent / 'on_Crawl__00_install_ripgrep.py'
|
||||
|
||||
# Test 1: With ripgrep backend - should output Binary record
|
||||
env1 = os.environ.copy()
|
||||
|
||||
@@ -360,9 +360,11 @@
|
||||
<div class="row header-bottom-frames">
|
||||
{% for result_info in archiveresults %}
|
||||
{% if result_info.result %}
|
||||
{% plugin_thumbnail result_info.result as thumbnail_html %}
|
||||
{% if thumbnail_html %}
|
||||
<div class="col-lg-2">
|
||||
<div class="card{% if forloop.first %} selected-card{% endif %}">
|
||||
{% plugin_thumbnail result_info.result %}
|
||||
{{ thumbnail_html }}
|
||||
<div class="card-body">
|
||||
<a href="{{ result_info.path }}" title="Open in new tab..." target="_blank" rel="noopener">
|
||||
<p class="card-text"><code>{{ result_info.path }}</code></p>
|
||||
@@ -373,6 +375,7 @@
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
{% endif %}
|
||||
{% endif %}
|
||||
{% endfor %}
|
||||
|
||||
@@ -395,7 +398,7 @@
|
||||
</div>
|
||||
</div>
|
||||
</header>
|
||||
<iframe sandbox="allow-same-origin allow-top-navigation-by-user-activation allow-scripts allow-forms" class="full-page-iframe" src="{{singlefile_path}}" name="preview"></iframe>
|
||||
<iframe sandbox="allow-same-origin allow-top-navigation-by-user-activation allow-scripts allow-forms" class="full-page-iframe" src="{{best_preview_path}}" name="preview"></iframe>
|
||||
|
||||
<script>
|
||||
/*! jQuery v3.2.1 -ajax,-ajax/jsonp,-ajax/load,-ajax/parseXML,-ajax/script,-ajax/var/location,-ajax/var/nonce,-ajax/var/rquery,-ajax/xhr,-manipulation/_evalUrl,-event/ajax,-effects,-effects/Tween,-effects/animatedSelector | (c) JS Foundation and other contributors | jquery.org/license */
|
||||
|
||||
@@ -429,19 +429,6 @@ class TestInstallHookOutput(unittest.TestCase):
|
||||
self.assertEqual(data['name'], 'wget')
|
||||
self.assertTrue(data['abspath'].startswith('/'))
|
||||
|
||||
def test_install_hook_outputs_dependency(self):
|
||||
"""Install hook should output Dependency JSONL when binary not found."""
|
||||
hook_output = json.dumps({
|
||||
'type': 'Dependency',
|
||||
'bin_name': 'wget',
|
||||
'bin_providers': 'apt,brew,env',
|
||||
})
|
||||
|
||||
data = json.loads(hook_output)
|
||||
self.assertEqual(data['type'], 'Dependency')
|
||||
self.assertEqual(data['bin_name'], 'wget')
|
||||
self.assertIn('apt', data['bin_providers'])
|
||||
|
||||
def test_install_hook_outputs_machine_config(self):
|
||||
"""Install hook should output Machine config update JSONL."""
|
||||
hook_output = json.dumps({
|
||||
|
||||
@@ -459,7 +459,7 @@ class TestFilesystemMigration08to09(unittest.TestCase):
|
||||
'SAVE_MERCURY': 'True',
|
||||
'SAVE_PDF': 'True',
|
||||
'SAVE_MEDIA': 'True',
|
||||
'SAVE_ARCHIVE_DOT_ORG': 'True',
|
||||
'SAVE_ARCHIVEDOTORG': 'True',
|
||||
'SAVE_HEADERS': 'True',
|
||||
'SAVE_HTMLTOTEXT': 'True',
|
||||
'SAVE_GIT': 'True',
|
||||
|
||||
@@ -949,19 +949,30 @@ def seed_0_8_data(db_path: Path) -> Dict[str, List[Dict]]:
|
||||
('core', '0072_rename_added_snapshot_bookmarked_at_and_more'),
|
||||
('core', '0073_rename_created_archiveresult_created_at_and_more'),
|
||||
('core', '0074_alter_snapshot_downloaded_at'),
|
||||
('core', '0023_new_schema'),
|
||||
# For 0.8.x: DO NOT record 0023_new_schema - it replaces 0023-0074 for fresh installs
|
||||
# We already recorded 0023-0074 above, so Django will know the state
|
||||
# For 0.8.x: Record original machine migrations (before squashing)
|
||||
# DO NOT record 0001_squashed here - it replaces 0001-0004 for fresh installs
|
||||
('machine', '0001_initial'),
|
||||
('machine', '0002_alter_machine_stats_installedbinary'),
|
||||
('machine', '0003_alter_installedbinary_options_and_more'),
|
||||
('machine', '0004_alter_installedbinary_abspath_and_more'),
|
||||
('machine', '0001_squashed'),
|
||||
# Then the new migrations after squashing
|
||||
('machine', '0002_rename_custom_cmds_to_overrides'),
|
||||
('machine', '0003_alter_dependency_id_alter_installedbinary_dependency_and_more'),
|
||||
('machine', '0004_drop_dependency_table'),
|
||||
# Crawls must come before core.0024 because 0024_b depends on it
|
||||
('crawls', '0001_initial'),
|
||||
# Core 0024 migrations chain (in dependency order)
|
||||
('core', '0024_b_clear_config_fields'),
|
||||
('core', '0024_c_disable_fk_checks'),
|
||||
('core', '0024_d_fix_crawls_config'),
|
||||
('core', '0024_snapshot_crawl'),
|
||||
('core', '0024_f_add_snapshot_config'),
|
||||
('core', '0025_allow_duplicate_urls_per_crawl'),
|
||||
# For 0.8.x: Record original api migration (before squashing)
|
||||
# DO NOT record 0001_squashed here - it replaces 0001 for fresh installs
|
||||
('api', '0001_initial'),
|
||||
('api', '0001_squashed'),
|
||||
('api', '0002_alter_apitoken_options'),
|
||||
('api', '0003_rename_user_apitoken_created_by_apitoken_abid_and_more'),
|
||||
('api', '0004_alter_apitoken_id_alter_apitoken_uuid'),
|
||||
@@ -970,11 +981,9 @@ def seed_0_8_data(db_path: Path) -> Dict[str, List[Dict]]:
|
||||
('api', '0007_alter_apitoken_created_by'),
|
||||
('api', '0008_alter_apitoken_created_alter_apitoken_created_by_and_more'),
|
||||
('api', '0009_rename_created_apitoken_created_at_and_more'),
|
||||
('crawls', '0001_initial'),
|
||||
('crawls', '0002_drop_seed_model'),
|
||||
('crawls', '0003_alter_crawl_output_dir'),
|
||||
('crawls', '0004_alter_crawl_output_dir'),
|
||||
('core', '0035_snapshot_crawl_non_nullable_remove_created_by'),
|
||||
# Note: crawls.0001_initial moved earlier (before core.0024) due to dependencies
|
||||
# Stop here - 0.8.x ends at core.0025, crawls.0001, and we want to TEST the later migrations
|
||||
# Do NOT record 0026+ as they need to be tested during migration
|
||||
]
|
||||
|
||||
for app, name in migrations:
|
||||
@@ -1000,7 +1009,7 @@ def run_archivebox(data_dir: Path, args: list, timeout: int = 60, env: dict = No
|
||||
base_env['USE_COLOR'] = 'False'
|
||||
base_env['SHOW_PROGRESS'] = 'False'
|
||||
# Disable ALL extractors for faster tests (can be overridden by env parameter)
|
||||
base_env['SAVE_ARCHIVE_DOT_ORG'] = 'False'
|
||||
base_env['SAVE_ARCHIVEDOTORG'] = 'False'
|
||||
base_env['SAVE_TITLE'] = 'False'
|
||||
base_env['SAVE_FAVICON'] = 'False'
|
||||
base_env['SAVE_WGET'] = 'False'
|
||||
|
||||
@@ -4,4 +4,5 @@ from django.apps import AppConfig
|
||||
class WorkersConfig(AppConfig):
|
||||
default_auto_field = 'django.db.models.BigAutoField'
|
||||
name = 'archivebox.workers'
|
||||
label = 'workers'
|
||||
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
# mkdir -p ~/archivebox/data && cd ~/archivebox
|
||||
# curl -fsSL 'https://docker-compose.archivebox.io' > docker-compose.yml
|
||||
# docker compose run archivebox version
|
||||
# docker compose run archivebox config --set SAVE_ARCHIVE_DOT_ORG=False
|
||||
# docker compose run archivebox config --set SAVE_ARCHIVEDOTORG=False
|
||||
# docker compose run archivebox add --depth=1 'https://news.ycombinator.com'
|
||||
# docker compose run -T archivebox add < bookmarks.txt
|
||||
# docker compose up -d && open 'https://localhost:8000'
|
||||
@@ -35,7 +35,7 @@ services:
|
||||
# - MEDIA_MAX_SIZE=750m # increase this filesize limit to allow archiving larger audio/video files
|
||||
# - TIMEOUT=60 # increase this number to 120+ seconds if you see many slow downloads timing out
|
||||
# - CHECK_SSL_VALIDITY=True # set to False to disable strict SSL checking (allows saving URLs w/ broken certs)
|
||||
# - SAVE_ARCHIVE_DOT_ORG=True # set to False to disable submitting all URLs to Archive.org when archiving
|
||||
# - SAVE_ARCHIVEDOTORG=True # set to False to disable submitting all URLs to Archive.org when archiving
|
||||
# - USER_AGENT="..." # set a custom USER_AGENT to avoid being blocked as a bot
|
||||
# ...
|
||||
# For more info, see: https://github.com/ArchiveBox/ArchiveBox/wiki/Docker#configuration
|
||||
|
||||
@@ -85,9 +85,9 @@ dependencies = [
|
||||
### Binary/Package Management
|
||||
"abx-pkg>=0.1.0", # for: detecting, versioning, and installing binaries via apt/brew/pip/npm
|
||||
"gallery-dl>=1.31.1",
|
||||
|
||||
### UUID7 backport for Python <3.14
|
||||
"uuid7>=0.1.0; python_version < '3.14'", # for: uuid7 support on Python 3.13 (provides uuid_extensions module)
|
||||
"pytest-django>=4.11.1",
|
||||
]
|
||||
|
||||
[project.optional-dependencies]
|
||||
@@ -183,6 +183,7 @@ ignore = ["E731", "E303", "E266", "E241", "E222"]
|
||||
|
||||
[tool.pytest.ini_options]
|
||||
testpaths = [ "tests" ]
|
||||
DJANGO_SETTINGS_MODULE = "archivebox.core.settings"
|
||||
|
||||
[tool.mypy]
|
||||
mypy_path = "archivebox,archivebox/typings"
|
||||
|
||||
@@ -24,7 +24,7 @@ def disable_extractors_dict():
|
||||
"SAVE_HEADERS": "false",
|
||||
"USE_GIT": "false",
|
||||
"SAVE_MEDIA": "false",
|
||||
"SAVE_ARCHIVE_DOT_ORG": "false",
|
||||
"SAVE_ARCHIVEDOTORG": "false",
|
||||
"SAVE_TITLE": "false",
|
||||
"SAVE_FAVICON": "false",
|
||||
})
|
||||
|
||||
@@ -33,7 +33,7 @@ def test_background_hooks_dont_block_parser_extractors(tmp_path, process):
|
||||
"SAVE_HEADERS": "false",
|
||||
"USE_GIT": "false",
|
||||
"SAVE_MEDIA": "false",
|
||||
"SAVE_ARCHIVE_DOT_ORG": "false",
|
||||
"SAVE_ARCHIVEDOTORG": "false",
|
||||
"SAVE_TITLE": "false",
|
||||
"SAVE_FAVICON": "false",
|
||||
# Enable chrome session (required for background hooks to start)
|
||||
@@ -133,7 +133,7 @@ def test_parser_extractors_emit_snapshot_jsonl(tmp_path, process):
|
||||
"SAVE_HEADERS": "false",
|
||||
"USE_GIT": "false",
|
||||
"SAVE_MEDIA": "false",
|
||||
"SAVE_ARCHIVE_DOT_ORG": "false",
|
||||
"SAVE_ARCHIVEDOTORG": "false",
|
||||
"SAVE_TITLE": "false",
|
||||
"SAVE_FAVICON": "false",
|
||||
"USE_CHROME": "false",
|
||||
|
||||
14
uv.lock
generated
14
uv.lock
generated
@@ -88,6 +88,7 @@ dependencies = [
|
||||
{ name = "py-machineid", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||
{ name = "pydantic", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||
{ name = "pydantic-settings", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||
{ name = "pytest-django", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||
{ name = "python-benedict", extra = ["io", "parse"], marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||
{ name = "python-crontab", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||
{ name = "python-statemachine", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||
@@ -186,6 +187,7 @@ requires-dist = [
|
||||
{ name = "py-machineid", specifier = ">=0.6.0" },
|
||||
{ name = "pydantic", specifier = ">=2.8.0" },
|
||||
{ name = "pydantic-settings", specifier = ">=2.5.2" },
|
||||
{ name = "pytest-django", specifier = ">=4.11.1" },
|
||||
{ name = "python-benedict", extras = ["io", "parse"], specifier = ">=0.33.2" },
|
||||
{ name = "python-crontab", specifier = ">=3.2.0" },
|
||||
{ name = "python-ldap", marker = "extra == 'ldap'", specifier = ">=3.4.3" },
|
||||
@@ -1848,6 +1850,18 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/3b/ab/b3226f0bd7cdcf710fbede2b3548584366da3b19b5021e74f5bde2a8fa3f/pytest-9.0.2-py3-none-any.whl", hash = "sha256:711ffd45bf766d5264d487b917733b453d917afd2b0ad65223959f59089f875b", size = 374801, upload-time = "2025-12-06T21:30:49.154Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pytest-django"
|
||||
version = "4.11.1"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "pytest", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/b1/fb/55d580352db26eb3d59ad50c64321ddfe228d3d8ac107db05387a2fadf3a/pytest_django-4.11.1.tar.gz", hash = "sha256:a949141a1ee103cb0e7a20f1451d355f83f5e4a5d07bdd4dcfdd1fd0ff227991", size = 86202, upload-time = "2025-04-03T18:56:09.338Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/be/ac/bd0608d229ec808e51a21044f3f2f27b9a37e7a0ebaca7247882e67876af/pytest_django-4.11.1-py3-none-any.whl", hash = "sha256:1b63773f648aa3d8541000c26929c1ea63934be1cfa674c76436966d73fe6a10", size = 25281, upload-time = "2025-04-03T18:56:07.678Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "python-benedict"
|
||||
version = "0.35.0"
|
||||
|
||||
Reference in New Issue
Block a user