use full dotted paths for all archivebox imports, add migrations and more fixes

This commit is contained in:
Nick Sweeting
2025-12-29 00:47:08 -08:00
parent 1e4d3ffd11
commit f4e7820533
61 changed files with 1082 additions and 2985 deletions

View File

@@ -763,7 +763,7 @@ The configuration is documented here: **[Configuration Wiki](https://github.com/
<br/>
TIMEOUT=240 # default: 60 add more seconds on slower networks
CHECK_SSL_VALIDITY=False # default: True False = allow saving URLs w/ bad SSL
SAVE_ARCHIVE_DOT_ORG=False # default: True False = disable Archive.org saving
SAVE_ARCHIVEDOTORG=False # default: True False = disable Archive.org saving
MAX_MEDIA_SIZE=1500m # default: 750m raise/lower youtubedl output size
<br/>
PUBLIC_INDEX=True # default: True whether anon users can view index
@@ -959,7 +959,7 @@ archivebox add 'https://docs.google.com/document/d/12345somePrivateDocument'
archivebox add 'https://vimeo.com/somePrivateVideo'
# without first disabling saving to Archive.org:
archivebox config --set SAVE_ARCHIVE_DOT_ORG=False # disable saving all URLs in Archive.org
archivebox config --set SAVE_ARCHIVEDOTORG=False # disable saving all URLs in Archive.org
# restrict the main index, Snapshot content, and Add Page to authenticated users as-needed:
archivebox config --set PUBLIC_INDEX=False

View File

@@ -26,10 +26,10 @@ ASCII_LOGO = """
PACKAGE_DIR = Path(__file__).resolve().parent
# Add PACKAGE_DIR to sys.path - required for Django migrations to import models
# Migrations reference models like 'machine.Binary' which need to be importable
if str(PACKAGE_DIR) not in sys.path:
sys.path.append(str(PACKAGE_DIR))
# # Add PACKAGE_DIR to sys.path - required for Django migrations to import models
# # Migrations reference models like 'machine.Binary' which need to be importable
# if str(PACKAGE_DIR) not in sys.path:
# sys.path.append(str(PACKAGE_DIR))
os.environ['DJANGO_SETTINGS_MODULE'] = 'archivebox.core.settings'
os.environ['TZ'] = 'UTC'

View File

@@ -5,6 +5,7 @@ from django.apps import AppConfig
class APIConfig(AppConfig):
name = 'archivebox.api'
label = 'api'
def register_admin(admin_site):

View File

@@ -94,7 +94,7 @@ class OrchestratorSchema(Schema):
@router.get("/orchestrator", response=OrchestratorSchema, url_name="get_orchestrator")
def get_orchestrator(request):
"""Get the orchestrator status and all worker queues."""
from workers.orchestrator import Orchestrator
from archivebox.workers.orchestrator import Orchestrator
from workers.worker import CrawlWorker, SnapshotWorker, ArchiveResultWorker
orchestrator = Orchestrator()

View File

@@ -73,7 +73,7 @@ class ModelWithUUID(models.Model):
return f'/api/v1/docs#/{self._meta.app_label.title()}%20Models/api_v1_{self._meta.app_label}_get_{self._meta.db_table}'
def as_json(self, keys: Iterable[str] = ()) -> dict:
default_keys = ('id', 'created_at', 'modified_at', 'created_by_id')
default_keys = ('id', 'created_at', 'modified_at')
return {key: getattr(self, key) for key in (keys or default_keys) if hasattr(self, key)}
@@ -119,7 +119,7 @@ class ModelWithHealthStats(models.Model):
class ModelWithConfig(models.Model):
"""Mixin for models with a JSON config field."""
config = models.JSONField(default=dict, null=False, blank=False, editable=True)
config = models.JSONField(default=dict, null=True, blank=True, editable=True)
class Meta:
abstract = True

View File

@@ -56,7 +56,7 @@ def add(urls: str | list[str],
from archivebox.core.models import Snapshot
from archivebox.crawls.models import Crawl
from archivebox.base_models.models import get_or_create_system_user_pk
from workers.orchestrator import Orchestrator
from archivebox.workers.orchestrator import Orchestrator
created_by_id = created_by_id or get_or_create_system_user_pk()

View File

@@ -78,7 +78,7 @@ def discover_outlinks(
from archivebox.core.models import Snapshot, ArchiveResult
from archivebox.crawls.models import Crawl
from archivebox.config import CONSTANTS
from workers.orchestrator import Orchestrator
from archivebox.workers.orchestrator import Orchestrator
created_by_id = get_or_create_system_user_pk()
is_tty = sys.stdout.isatty()

View File

@@ -96,7 +96,7 @@ def run_plugins(
TYPE_SNAPSHOT, TYPE_ARCHIVERESULT
)
from archivebox.core.models import Snapshot, ArchiveResult
from workers.orchestrator import Orchestrator
from archivebox.workers.orchestrator import Orchestrator
is_tty = sys.stdout.isatty()

View File

@@ -13,11 +13,9 @@ from archivebox.misc.util import docstring, enforce_types
@enforce_types
def init(force: bool=False, quick: bool=False, install: bool=False, setup: bool=False) -> None:
def init(force: bool=False, quick: bool=False, install: bool=False) -> None:
"""Initialize a new ArchiveBox collection in the current directory"""
install = install or setup
from archivebox.config import CONSTANTS, VERSION, DATA_DIR
from archivebox.config.common import SERVER_CONFIG
from archivebox.config.collection import write_config_file
@@ -128,7 +126,8 @@ def init(force: bool=False, quick: bool=False, install: bool=False, setup: bool=
print(f' [yellow]√ Added {len(orphaned_data_dir_links)} orphaned links from existing archive directories.[/yellow]')
if pending_links:
Snapshot.objects.create_from_dicts(list(pending_links.values()))
for link_dict in pending_links.values():
Snapshot.from_jsonl(link_dict)
# Hint for orphaned snapshot directories
print()
@@ -187,7 +186,6 @@ def init(force: bool=False, quick: bool=False, install: bool=False, setup: bool=
@click.option('--force', '-f', is_flag=True, help='Ignore unrecognized files in current directory and initialize anyway')
@click.option('--quick', '-q', is_flag=True, help='Run any updates or migrations without rechecking all snapshot dirs')
@click.option('--install', '-s', is_flag=True, help='Automatically install dependencies and extras used for archiving')
@click.option('--setup', '-s', is_flag=True, help='DEPRECATED: equivalent to --install')
@docstring(init.__doc__)
def main(**kwargs) -> None:
init(**kwargs)

View File

@@ -85,7 +85,7 @@ def install(dry_run: bool=False) -> None:
print()
# Run the crawl synchronously (this triggers on_Crawl hooks)
from workers.orchestrator import Orchestrator
from archivebox.workers.orchestrator import Orchestrator
orchestrator = Orchestrator(exit_on_idle=True)
orchestrator.runloop()

View File

@@ -37,7 +37,7 @@ def orchestrator(daemon: bool = False, watch: bool = False) -> int:
0: All work completed successfully
1: Error occurred
"""
from workers.orchestrator import Orchestrator
from archivebox.workers.orchestrator import Orchestrator
if Orchestrator.is_running():
print('[yellow]Orchestrator is already running[/yellow]')

View File

@@ -74,7 +74,7 @@ def server(runserver_args: Iterable[str]=(SERVER_CONFIG.BIND_ADDR,),
tail_multiple_worker_logs,
is_port_in_use,
)
from workers.orchestrator import Orchestrator
from archivebox.workers.orchestrator import Orchestrator
import sys
# Check if port is already in use

View File

@@ -163,7 +163,7 @@ def create_snapshots(
# If --plugins is passed, run the orchestrator for those plugins
if plugins:
from workers.orchestrator import Orchestrator
from archivebox.workers.orchestrator import Orchestrator
rprint(f'[blue]Running plugins: {plugins or "all"}...[/blue]', file=sys.stderr)
orchestrator = Orchestrator(exit_on_idle=True)
orchestrator.runloop()

View File

@@ -160,7 +160,7 @@ def process_all_db_snapshots(batch_size: int = 100) -> dict:
total = Snapshot.objects.count()
print(f'[*] Processing {total} snapshots from database...')
for snapshot in Snapshot.objects.iterator():
for snapshot in Snapshot.objects.iterator(chunk_size=batch_size):
# Reconcile index.json with DB
snapshot.reconcile_with_index_json()
@@ -209,7 +209,7 @@ def process_filtered_snapshots(
total = snapshots.count()
print(f'[*] Found {total} matching snapshots')
for snapshot in snapshots.iterator():
for snapshot in snapshots.iterator(chunk_size=batch_size):
# Reconcile index.json with DB
snapshot.reconcile_with_index_json()

View File

@@ -17,7 +17,7 @@ TEST_CONFIG = {
'DATA_DIR': 'data.tests',
'SAVE_ARCHIVE_DOT_ORG': 'False',
'SAVE_ARCHIVEDOTORG': 'False',
'SAVE_TITLE': 'False',
'USE_CURL': 'False',

View File

@@ -32,7 +32,7 @@ from unittest.mock import patch, MagicMock
TEST_CONFIG = {
'USE_COLOR': 'False',
'SHOW_PROGRESS': 'False',
'SAVE_ARCHIVE_DOT_ORG': 'False',
'SAVE_ARCHIVEDOTORG': 'False',
'SAVE_TITLE': 'True', # Fast extractor
'SAVE_FAVICON': 'False',
'SAVE_WGET': 'False',

View File

@@ -216,6 +216,29 @@ def get_config(
if snapshot and hasattr(snapshot, "config") and snapshot.config:
config.update(snapshot.config)
# Normalize all aliases to canonical names (after all sources merged)
# This handles aliases that came from user/crawl/snapshot configs, not just env
try:
from archivebox.hooks import discover_plugin_configs
plugin_configs = discover_plugin_configs()
aliases_to_normalize = {} # {alias_key: canonical_key}
# Build alias mapping from all plugin schemas
for plugin_name, schema in plugin_configs.items():
for canonical_key, prop_schema in schema.get('properties', {}).items():
for alias in prop_schema.get('x-aliases', []):
aliases_to_normalize[alias] = canonical_key
# Normalize: copy alias values to canonical keys (aliases take precedence)
for alias_key, canonical_key in aliases_to_normalize.items():
if alias_key in config:
# Alias exists - copy to canonical key (overwriting any default)
config[canonical_key] = config[alias_key]
# Remove alias from config to keep it clean
del config[alias_key]
except ImportError:
pass
return config

View File

@@ -5,8 +5,12 @@ from django.apps import AppConfig
class CoreConfig(AppConfig):
name = 'archivebox.core'
label = 'core'
def ready(self):
"""Register the archivebox.core.admin_site as the main django admin site"""
from archivebox.core.admin_site import register_admin_site
register_admin_site()
# Import models to register state machines with the registry
from archivebox.core import models # noqa: F401

View File

@@ -0,0 +1,57 @@
# Data migration to clear config fields that may contain invalid JSON
# This runs before 0025 to prevent CHECK constraint failures
from django.db import migrations
def clear_config_fields(apps, schema_editor):
"""Clear all config fields in related tables to avoid JSON validation errors."""
db_alias = schema_editor.connection.alias
# Disable foreign key checks temporarily to allow updates
with schema_editor.connection.cursor() as cursor:
cursor.execute("PRAGMA foreign_keys=OFF")
tables_to_clear = [
('crawls_seed', 'config'),
('crawls_crawl', 'config'),
('crawls_crawlschedule', 'config') if 'crawlschedule' in dir() else None,
('machine_machine', 'stats'),
('machine_machine', 'config'),
]
for table_info in tables_to_clear:
if table_info is None:
continue
table_name, field_name = table_info
try:
with schema_editor.connection.cursor() as cursor:
# Check if table exists first
cursor.execute(f"SELECT name FROM sqlite_master WHERE type='table' AND name='{table_name}'")
if not cursor.fetchone():
print(f" Skipping {table_name}.{field_name}: table does not exist")
continue
# Set all to empty JSON object
cursor.execute(f"UPDATE {table_name} SET {field_name} = '{{}}' WHERE {field_name} IS NOT NULL")
print(f" Cleared {field_name} in {table_name}: {cursor.rowcount} rows")
except Exception as e:
print(f" Skipping {table_name}.{field_name}: {e}")
# Re-enable foreign key checks
with schema_editor.connection.cursor() as cursor:
cursor.execute("PRAGMA foreign_keys=ON")
class Migration(migrations.Migration):
dependencies = [
('core', '0023_new_schema'),
('crawls', '0001_initial'),
('machine', '0001_squashed'),
]
operations = [
migrations.RunPython(clear_config_fields, reverse_code=migrations.RunPython.noop),
]

View File

@@ -0,0 +1,28 @@
# Disable foreign key checks before 0025 to prevent CHECK constraint validation errors
from django.db import migrations
def disable_fk_checks(apps, schema_editor):
"""Temporarily disable foreign key checks."""
with schema_editor.connection.cursor() as cursor:
cursor.execute("PRAGMA foreign_keys=OFF")
print(" Disabled foreign key checks")
def enable_fk_checks(apps, schema_editor):
"""Re-enable foreign key checks."""
with schema_editor.connection.cursor() as cursor:
cursor.execute("PRAGMA foreign_keys=ON")
print(" Enabled foreign key checks")
class Migration(migrations.Migration):
dependencies = [
('core', '0024_b_clear_config_fields'),
]
operations = [
migrations.RunPython(disable_fk_checks, reverse_code=enable_fk_checks),
]

View File

@@ -0,0 +1,93 @@
# Fix crawls_crawl config field to avoid CHECK constraint errors during table rebuilds
from django.db import migrations
def fix_crawls_config(apps, schema_editor):
"""
Rebuild crawls_crawl table to fix CHECK constraints and make seed_id nullable.
Only runs for UPGRADES from 0.8.x (when crawls.0001_initial didn't exist yet).
For fresh installs, crawls.0001_initial creates the correct schema.
"""
with schema_editor.connection.cursor() as cursor:
# Check if this is an upgrade from old 0.8.x or a fresh install
# In fresh installs, crawls.0001_initial was applied, creating seed FK
# In upgrades, the table was created by old migrations before 0001_initial existed
cursor.execute("""
SELECT COUNT(*) FROM django_migrations
WHERE app='crawls' AND name='0001_initial'
""")
has_crawls_0001 = cursor.fetchone()[0] > 0
if has_crawls_0001:
# Fresh install - crawls.0001_initial already created the correct schema
# Just clear config to avoid CHECK constraint issues
print(" Fresh install detected - clearing config field only")
try:
cursor.execute('UPDATE "crawls_crawl" SET "config" = NULL')
except Exception as e:
print(f" Skipping config clear: {e}")
return
# Upgrade from 0.8.x - rebuild table to make seed_id nullable and remove CHECK constraint
print(" Upgrading from 0.8.x - rebuilding crawls_crawl table")
cursor.execute("PRAGMA foreign_keys=OFF")
# Backup
cursor.execute("CREATE TABLE crawls_crawl_backup AS SELECT * FROM crawls_crawl")
# Recreate without config CHECK constraint, with nullable seed_id
cursor.execute("DROP TABLE crawls_crawl")
cursor.execute("""
CREATE TABLE "crawls_crawl" (
"num_uses_failed" integer unsigned NOT NULL CHECK ("num_uses_failed" >= 0),
"num_uses_succeeded" integer unsigned NOT NULL CHECK ("num_uses_succeeded" >= 0),
"id" char(32) NOT NULL PRIMARY KEY,
"created_at" datetime NOT NULL,
"modified_at" datetime NOT NULL,
"urls" text NOT NULL,
"config" text,
"max_depth" smallint unsigned NOT NULL CHECK ("max_depth" >= 0),
"tags_str" varchar(1024) NOT NULL,
"persona_id" char(32) NULL,
"label" varchar(64) NOT NULL,
"notes" text NOT NULL,
"output_dir" varchar(512) NOT NULL,
"status" varchar(15) NOT NULL,
"retry_at" datetime NULL,
"created_by_id" integer NOT NULL REFERENCES "auth_user" ("id") DEFERRABLE INITIALLY DEFERRED,
"seed_id" char(32) NULL DEFAULT NULL,
"schedule_id" char(32) NULL REFERENCES "crawls_crawlschedule" ("id") DEFERRABLE INITIALLY DEFERRED
)
""")
# Restore data
cursor.execute("""
INSERT INTO "crawls_crawl" (
"num_uses_failed", "num_uses_succeeded", "id", "created_at", "modified_at",
"urls", "config", "max_depth", "tags_str", "persona_id", "label", "notes",
"output_dir", "status", "retry_at", "created_by_id", "seed_id", "schedule_id"
)
SELECT
"num_uses_failed", "num_uses_succeeded", "id", "created_at", "modified_at",
"urls", "config", "max_depth", "tags_str", "persona_id", "label", "notes",
"output_dir", "status", "retry_at", "created_by_id", "seed_id", "schedule_id"
FROM crawls_crawl_backup
""")
cursor.execute("DROP TABLE crawls_crawl_backup")
# NULL out config to avoid any invalid JSON
cursor.execute('UPDATE "crawls_crawl" SET "config" = NULL')
class Migration(migrations.Migration):
dependencies = [
('core', '0024_c_disable_fk_checks'),
('crawls', '0001_initial'),
]
operations = [
migrations.RunPython(fix_crawls_config, reverse_code=migrations.RunPython.noop),
]

View File

@@ -8,9 +8,7 @@ import django.db.models.deletion
class Migration(migrations.Migration):
dependencies = [
('core', '0023_new_schema'),
('crawls', '0001_initial'),
('machine', '0001_squashed'),
('core', '0024_d_fix_crawls_config'),
]
operations = [

View File

@@ -10,6 +10,13 @@ from django.db import migrations, models
def populate_archiveresult_uuids(apps, schema_editor):
"""Generate unique UUIDs for ArchiveResults that don't have one."""
# Check if uuid column exists before trying to populate it
with schema_editor.connection.cursor() as cursor:
cursor.execute("PRAGMA table_info(core_archiveresult)")
columns = [row[1] for row in cursor.fetchall()]
if 'uuid' not in columns:
return # uuid column doesn't exist, skip this data migration
ArchiveResult = apps.get_model('core', 'ArchiveResult')
for result in ArchiveResult.objects.filter(uuid__isnull=True):
result.uuid = uuid_compat.uuid7()
@@ -21,6 +28,22 @@ def reverse_populate_uuids(apps, schema_editor):
pass
def remove_output_dir_if_exists(apps, schema_editor):
"""Remove output_dir columns if they exist."""
with schema_editor.connection.cursor() as cursor:
# Check and remove from core_archiveresult
cursor.execute("PRAGMA table_info(core_archiveresult)")
columns = [row[1] for row in cursor.fetchall()]
if 'output_dir' in columns:
cursor.execute("ALTER TABLE core_archiveresult DROP COLUMN output_dir")
# Check and remove from core_snapshot
cursor.execute("PRAGMA table_info(core_snapshot)")
columns = [row[1] for row in cursor.fetchall()]
if 'output_dir' in columns:
cursor.execute("ALTER TABLE core_snapshot DROP COLUMN output_dir")
class Migration(migrations.Migration):
dependencies = [
@@ -33,82 +56,90 @@ class Migration(migrations.Migration):
migrations.RunPython(populate_archiveresult_uuids, reverse_populate_uuids),
# Remove output_dir fields (not needed, computed from snapshot)
migrations.RemoveField(
model_name='archiveresult',
name='output_dir',
),
migrations.RemoveField(
model_name='snapshot',
name='output_dir',
migrations.RunPython(remove_output_dir_if_exists, reverse_code=migrations.RunPython.noop),
# Update Django's migration state to match 0.9.x schema
# Database already has correct types from 0.8.x, just update state
migrations.SeparateDatabaseAndState(
state_operations=[
# Archiveresult field alterations
migrations.AlterField(
model_name='archiveresult',
name='created_at',
field=models.DateTimeField(db_index=True, default=django.utils.timezone.now),
),
migrations.AlterField(
model_name='archiveresult',
name='created_by',
field=models.ForeignKey(default=archivebox.base_models.models.get_or_create_system_user_pk, on_delete=django.db.models.deletion.CASCADE, related_name='archiveresult_set', to=settings.AUTH_USER_MODEL),
),
migrations.AlterField(
model_name='archiveresult',
name='extractor',
field=models.CharField(db_index=True, max_length=32),
),
# Convert id from AutoField to UUIDField (database already has UUID CHAR(32))
migrations.AlterField(
model_name='archiveresult',
name='id',
field=models.UUIDField(default=uuid_compat.uuid7, editable=False, primary_key=True, serialize=False, unique=True),
),
migrations.AlterField(
model_name='archiveresult',
name='status',
field=models.CharField(choices=[('queued', 'Queued'), ('started', 'Started'), ('backoff', 'Waiting to retry'), ('succeeded', 'Succeeded'), ('failed', 'Failed'), ('skipped', 'Skipped')], db_index=True, default='queued', max_length=15),
),
# Snapshot field alterations
migrations.AlterField(
model_name='snapshot',
name='bookmarked_at',
field=models.DateTimeField(db_index=True, default=django.utils.timezone.now),
),
migrations.AlterField(
model_name='snapshot',
name='created_at',
field=models.DateTimeField(db_index=True, default=django.utils.timezone.now),
),
migrations.AlterField(
model_name='snapshot',
name='created_by',
field=models.ForeignKey(default=archivebox.base_models.models.get_or_create_system_user_pk, on_delete=django.db.models.deletion.CASCADE, related_name='snapshot_set', to=settings.AUTH_USER_MODEL),
),
migrations.AlterField(
model_name='snapshot',
name='downloaded_at',
field=models.DateTimeField(blank=True, db_index=True, default=None, editable=False, null=True),
),
migrations.AlterField(
model_name='snapshot',
name='id',
field=models.UUIDField(default=uuid_compat.uuid7, editable=False, primary_key=True, serialize=False, unique=True),
),
],
database_operations=[
# No actual database changes needed - schema is already correct from 0.8.x
],
),
# Archiveresult field alterations
migrations.AlterField(
model_name='archiveresult',
name='created_at',
field=models.DateTimeField(db_index=True, default=django.utils.timezone.now),
),
migrations.AlterField(
model_name='archiveresult',
name='created_by',
field=models.ForeignKey(default=None, on_delete=django.db.models.deletion.CASCADE, related_name='archiveresult_set', to=settings.AUTH_USER_MODEL),
),
migrations.AlterField(
model_name='archiveresult',
name='extractor',
field=models.CharField(db_index=True, max_length=32),
),
migrations.AlterField(
model_name='archiveresult',
name='id',
field=models.AutoField(editable=False, primary_key=True, serialize=False),
),
migrations.AlterField(
model_name='archiveresult',
name='status',
field=models.CharField(choices=[('queued', 'Queued'), ('started', 'Started'), ('backoff', 'Waiting to retry'), ('succeeded', 'Succeeded'), ('failed', 'Failed'), ('skipped', 'Skipped')], db_index=True, default='queued', max_length=15),
),
# Snapshot field alterations
migrations.AlterField(
model_name='snapshot',
name='bookmarked_at',
field=models.DateTimeField(db_index=True, default=django.utils.timezone.now),
),
migrations.AlterField(
model_name='snapshot',
name='created_at',
field=models.DateTimeField(db_index=True, default=django.utils.timezone.now),
),
migrations.AlterField(
model_name='snapshot',
name='created_by',
field=models.ForeignKey(default=None, on_delete=django.db.models.deletion.CASCADE, related_name='snapshot_set', to=settings.AUTH_USER_MODEL),
),
migrations.AlterField(
model_name='snapshot',
name='downloaded_at',
field=models.DateTimeField(blank=True, db_index=True, default=None, editable=False, null=True),
),
migrations.AlterField(
model_name='snapshot',
name='id',
field=models.UUIDField(default=uuid_compat.uuid7, editable=False, primary_key=True, serialize=False, unique=True),
),
# SnapshotTag and Tag alterations
migrations.AlterField(
model_name='snapshottag',
name='id',
field=models.AutoField(primary_key=True, serialize=False),
),
migrations.AlterField(
model_name='tag',
name='created_by',
field=models.ForeignKey(default=archivebox.base_models.models.get_or_create_system_user_pk, on_delete=django.db.models.deletion.CASCADE, related_name='tag_set', to=settings.AUTH_USER_MODEL),
),
migrations.AlterUniqueTogether(
name='snapshottag',
unique_together={('snapshot', 'tag')},
# SnapshotTag and Tag alterations - state only, DB already correct
migrations.SeparateDatabaseAndState(
state_operations=[
migrations.AlterField(
model_name='snapshottag',
name='id',
field=models.AutoField(primary_key=True, serialize=False),
),
migrations.AlterField(
model_name='tag',
name='created_by',
field=models.ForeignKey(default=archivebox.base_models.models.get_or_create_system_user_pk, on_delete=django.db.models.deletion.CASCADE, related_name='tag_set', to=settings.AUTH_USER_MODEL),
),
migrations.AlterUniqueTogether(
name='snapshottag',
unique_together={('snapshot', 'tag')},
),
],
database_operations=[],
),
]

View File

@@ -13,68 +13,79 @@ class Migration(migrations.Migration):
]
operations = [
# Add new output fields (keep old 'output' temporarily for migration)
migrations.AddField(
model_name='archiveresult',
name='output_str',
field=models.TextField(
blank=True,
default='',
help_text='Human-readable output summary (e.g., "Downloaded 5 files")'
),
),
migrations.AddField(
model_name='archiveresult',
name='output_json',
field=models.JSONField(
null=True,
blank=True,
default=None,
help_text='Structured metadata (headers, redirects, etc.) - should NOT duplicate ArchiveResult fields'
),
),
migrations.AddField(
model_name='archiveresult',
name='output_files',
field=models.JSONField(
default=dict,
help_text='Dict of {relative_path: {metadata}} - values are empty dicts for now, extensible for future metadata'
),
),
migrations.AddField(
model_name='archiveresult',
name='output_size',
field=models.BigIntegerField(
default=0,
help_text='Total recursive size in bytes of all output files'
),
),
migrations.AddField(
model_name='archiveresult',
name='output_mimetypes',
field=models.CharField(
max_length=512,
blank=True,
default='',
help_text='CSV of mimetypes sorted by size descending'
),
),
# Add binary FK (optional)
migrations.AddField(
model_name='archiveresult',
name='binary',
field=models.ForeignKey(
'machine.Binary',
on_delete=models.SET_NULL,
null=True,
blank=True,
related_name='archiveresults',
help_text='Primary binary used by this hook (optional)'
),
# Add new output fields using SeparateDatabaseAndState to avoid table rebuilds
migrations.SeparateDatabaseAndState(
state_operations=[
migrations.AddField(
model_name='archiveresult',
name='output_str',
field=models.TextField(
blank=True,
default='',
help_text='Human-readable output summary (e.g., "Downloaded 5 files")'
),
),
migrations.AddField(
model_name='archiveresult',
name='output_json',
field=models.JSONField(
null=True,
blank=True,
default=None,
help_text='Structured metadata (headers, redirects, etc.) - should NOT duplicate ArchiveResult fields'
),
),
migrations.AddField(
model_name='archiveresult',
name='output_files',
field=models.JSONField(
default=dict,
help_text='Dict of {relative_path: {metadata}} - values are empty dicts for now, extensible for future metadata'
),
),
migrations.AddField(
model_name='archiveresult',
name='output_size',
field=models.BigIntegerField(
default=0,
help_text='Total recursive size in bytes of all output files'
),
),
migrations.AddField(
model_name='archiveresult',
name='output_mimetypes',
field=models.CharField(
max_length=512,
blank=True,
default='',
help_text='CSV of mimetypes sorted by size descending'
),
),
migrations.AddField(
model_name='archiveresult',
name='binary',
field=models.ForeignKey(
'machine.Binary',
on_delete=models.SET_NULL,
null=True,
blank=True,
related_name='archiveresults',
help_text='Primary binary used by this hook (optional)'
),
),
],
database_operations=[
migrations.RunSQL(
sql="""
ALTER TABLE core_archiveresult ADD COLUMN output_str TEXT DEFAULT '';
ALTER TABLE core_archiveresult ADD COLUMN output_json TEXT;
ALTER TABLE core_archiveresult ADD COLUMN output_files TEXT DEFAULT '{}';
ALTER TABLE core_archiveresult ADD COLUMN output_size BIGINT DEFAULT 0;
ALTER TABLE core_archiveresult ADD COLUMN output_mimetypes VARCHAR(512) DEFAULT '';
ALTER TABLE core_archiveresult ADD COLUMN binary_id CHAR(32) REFERENCES machine_binary(id);
""",
reverse_sql=migrations.RunSQL.noop,
),
],
),
]

View File

@@ -12,27 +12,46 @@ def migrate_output_field(apps, schema_editor):
Logic:
- If output contains JSON {...}, move to output_json
- Otherwise, move to output_str
Use raw SQL to avoid CHECK constraint issues during migration.
"""
ArchiveResult = apps.get_model('core', 'ArchiveResult')
# Use raw SQL to migrate data without triggering CHECK constraints
with schema_editor.connection.cursor() as cursor:
# Get all archive results
cursor.execute("""
SELECT id, output FROM core_archiveresult
""")
for ar in ArchiveResult.objects.all().iterator():
old_output = ar.output or ''
for row in cursor.fetchall():
ar_id, old_output = row
old_output = old_output or ''
# Case 1: JSON output
if old_output.strip().startswith('{'):
try:
parsed = json.loads(old_output)
ar.output_json = parsed
ar.output_str = ''
except json.JSONDecodeError:
# Not valid JSON, treat as string
ar.output_str = old_output
# Case 2: File path or plain string
else:
ar.output_str = old_output
ar.save(update_fields=['output_str', 'output_json'])
# Case 1: JSON output
if old_output.strip().startswith('{'):
try:
# Validate it's actual JSON
parsed = json.loads(old_output)
# Update with JSON - cast to JSON to satisfy CHECK constraint
json_str = json.dumps(parsed)
cursor.execute("""
UPDATE core_archiveresult
SET output_str = '', output_json = json(?)
WHERE id = ?
""", (json_str, ar_id))
except json.JSONDecodeError:
# Not valid JSON, treat as string
cursor.execute("""
UPDATE core_archiveresult
SET output_str = ?, output_json = NULL
WHERE id = ?
""", (old_output, ar_id))
# Case 2: File path or plain string
else:
cursor.execute("""
UPDATE core_archiveresult
SET output_str = ?, output_json = NULL
WHERE id = ?
""", (old_output, ar_id))
def reverse_migrate(apps, schema_editor):

View File

@@ -16,43 +16,62 @@ class Migration(migrations.Migration):
]
operations = [
migrations.AlterField(
model_name='archiveresult',
name='binary',
field=models.ForeignKey(blank=True, help_text='Primary binary used by this hook', null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='archiveresults', to='machine.binary'),
# Update Django's state only - database already has correct schema from 0029
migrations.SeparateDatabaseAndState(
state_operations=[
migrations.AlterField(
model_name='archiveresult',
name='binary',
field=models.ForeignKey(blank=True, help_text='Primary binary used by this hook', null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='archiveresults', to='machine.binary'),
),
migrations.AlterField(
model_name='archiveresult',
name='output_files',
field=models.JSONField(default=dict, help_text='Dict of {relative_path: {metadata}}'),
),
migrations.AlterField(
model_name='archiveresult',
name='output_json',
field=models.JSONField(blank=True, default=None, help_text='Structured metadata (headers, redirects, etc.)', null=True),
),
migrations.AlterField(
model_name='archiveresult',
name='output_mimetypes',
field=models.CharField(blank=True, default='', help_text='CSV of mimetypes sorted by size', max_length=512),
),
migrations.AlterField(
model_name='archiveresult',
name='output_size',
field=models.BigIntegerField(default=0, help_text='Total bytes of all output files'),
),
migrations.AlterField(
model_name='archiveresult',
name='output_str',
field=models.TextField(blank=True, default='', help_text='Human-readable output summary'),
),
migrations.AlterField(
model_name='archiveresult',
name='uuid',
field=models.UUIDField(blank=True, db_index=True, default=uuid_compat.uuid7, null=True),
),
],
database_operations=[
# No database changes needed - columns already exist with correct types
],
),
migrations.AlterField(
model_name='archiveresult',
name='output_files',
field=models.JSONField(default=dict, help_text='Dict of {relative_path: {metadata}}'),
),
migrations.AlterField(
model_name='archiveresult',
name='output_json',
field=models.JSONField(blank=True, default=None, help_text='Structured metadata (headers, redirects, etc.)', null=True),
),
migrations.AlterField(
model_name='archiveresult',
name='output_mimetypes',
field=models.CharField(blank=True, default='', help_text='CSV of mimetypes sorted by size', max_length=512),
),
migrations.AlterField(
model_name='archiveresult',
name='output_size',
field=models.BigIntegerField(default=0, help_text='Total bytes of all output files'),
),
migrations.AlterField(
model_name='archiveresult',
name='output_str',
field=models.TextField(blank=True, default='', help_text='Human-readable output summary'),
),
migrations.AlterField(
model_name='archiveresult',
name='uuid',
field=models.UUIDField(blank=True, db_index=True, default=uuid_compat.uuid7, null=True),
),
migrations.AddConstraint(
model_name='snapshot',
constraint=models.UniqueConstraint(fields=('timestamp',), name='unique_timestamp'),
# Add unique constraint without table rebuild
migrations.SeparateDatabaseAndState(
state_operations=[
migrations.AddConstraint(
model_name='snapshot',
constraint=models.UniqueConstraint(fields=('timestamp',), name='unique_timestamp'),
),
],
database_operations=[
migrations.RunSQL(
sql="CREATE UNIQUE INDEX IF NOT EXISTS unique_timestamp ON core_snapshot (timestamp);",
reverse_sql="DROP INDEX IF EXISTS unique_timestamp;",
),
],
),
]

View File

@@ -10,20 +10,35 @@ class Migration(migrations.Migration):
]
operations = [
migrations.RenameField(
model_name='archiveresult',
old_name='extractor',
new_name='plugin',
),
migrations.AddField(
model_name='archiveresult',
name='hook_name',
field=models.CharField(
blank=True,
default='',
max_length=255,
db_index=True,
help_text='Full filename of the hook that executed (e.g., on_Snapshot__50_wget.py)'
),
# Use SeparateDatabaseAndState to avoid table rebuilds that would re-add CHECK constraints
migrations.SeparateDatabaseAndState(
state_operations=[
migrations.RenameField(
model_name='archiveresult',
old_name='extractor',
new_name='plugin',
),
migrations.AddField(
model_name='archiveresult',
name='hook_name',
field=models.CharField(
blank=True,
default='',
max_length=255,
db_index=True,
help_text='Full filename of the hook that executed (e.g., on_Snapshot__50_wget.py)'
),
),
],
database_operations=[
migrations.RunSQL(
sql="""
ALTER TABLE core_archiveresult RENAME COLUMN extractor TO plugin;
ALTER TABLE core_archiveresult ADD COLUMN hook_name VARCHAR(255) DEFAULT '' NOT NULL;
CREATE INDEX IF NOT EXISTS core_archiveresult_hook_name_idx ON core_archiveresult (hook_name);
""",
reverse_sql=migrations.RunSQL.noop,
),
],
),
]

View File

@@ -11,13 +11,27 @@ class Migration(migrations.Migration):
]
operations = [
migrations.AddField(
model_name='snapshot',
name='current_step',
field=models.PositiveSmallIntegerField(
default=0,
db_index=True,
help_text='Current hook step being executed (0-9). Used for sequential hook execution.'
),
# Use SeparateDatabaseAndState to avoid table rebuild that would fail on config NOT NULL constraint
migrations.SeparateDatabaseAndState(
state_operations=[
migrations.AddField(
model_name='snapshot',
name='current_step',
field=models.PositiveSmallIntegerField(
default=0,
db_index=True,
help_text='Current hook step being executed (0-9). Used for sequential hook execution.'
),
),
],
database_operations=[
migrations.RunSQL(
sql="""
ALTER TABLE core_snapshot ADD COLUMN current_step SMALLINT UNSIGNED DEFAULT 0 NOT NULL;
CREATE INDEX IF NOT EXISTS core_snapshot_current_step_idx ON core_snapshot (current_step);
""",
reverse_sql=migrations.RunSQL.noop,
),
],
),
]

View File

@@ -54,7 +54,7 @@ class Migration(migrations.Migration):
dependencies = [
('core', '0034_snapshot_current_step'),
('crawls', '0004_alter_crawl_output_dir'),
('crawls', '0005_drop_seed_id_column'),
]
operations = [
@@ -64,16 +64,24 @@ class Migration(migrations.Migration):
reverse_code=migrations.RunPython.noop,
),
# Step 2: Make crawl non-nullable
migrations.AlterField(
model_name='snapshot',
name='crawl',
field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='snapshot_set', to='crawls.crawl'),
),
# Step 3: Remove created_by field
migrations.RemoveField(
model_name='snapshot',
name='created_by',
# Step 2 & 3: Update Django's state only - leave created_by_id column in database (unused but harmless)
migrations.SeparateDatabaseAndState(
state_operations=[
# Make crawl non-nullable
migrations.AlterField(
model_name='snapshot',
name='crawl',
field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='snapshot_set', to='crawls.crawl'),
),
# Remove created_by field from Django's state
migrations.RemoveField(
model_name='snapshot',
name='created_by',
),
],
database_operations=[
# No database changes - crawl_id already exists and NOT NULL constraint will be enforced by model
# created_by_id column remains in database but is unused
],
),
]

View File

@@ -10,10 +10,18 @@ class Migration(migrations.Migration):
]
operations = [
# Remove created_by field from ArchiveResult
# Remove created_by field from ArchiveResult (state only)
# No data migration needed - created_by can be accessed via snapshot.crawl.created_by
migrations.RemoveField(
model_name='archiveresult',
name='created_by',
# Leave created_by_id column in database (unused but harmless, avoids table rebuild)
migrations.SeparateDatabaseAndState(
state_operations=[
migrations.RemoveField(
model_name='archiveresult',
name='created_by',
),
],
database_operations=[
# No database changes - leave created_by_id column in place to avoid table rebuild
],
),
]

View File

@@ -0,0 +1,44 @@
# Generated by Django 6.0 on 2025-12-29 06:45
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('core', '0036_remove_archiveresult_created_by'),
]
operations = [
# Update Django's state only - database columns remain for backwards compat
migrations.SeparateDatabaseAndState(
state_operations=[
migrations.RemoveField(
model_name='archiveresult',
name='output_dir',
),
migrations.RemoveField(
model_name='snapshot',
name='output_dir',
),
migrations.AlterField(
model_name='archiveresult',
name='config',
field=models.JSONField(blank=True, default=dict, null=True),
),
migrations.AlterField(
model_name='snapshot',
name='config',
field=models.JSONField(blank=True, default=dict, null=True),
),
migrations.AlterField(
model_name='snapshot',
name='tags',
field=models.ManyToManyField(blank=True, related_name='snapshot_set', through='core.SnapshotTag', through_fields=('snapshot', 'tag'), to='core.tag'),
),
],
database_operations=[
# No database changes - columns remain in place to avoid table rebuilds
],
),
]

View File

@@ -0,0 +1,84 @@
# Add missing columns to ArchiveResult and remove created_by_id from Snapshot
from django.db import migrations, models, connection
import django.utils.timezone
def add_columns_if_not_exist(apps, schema_editor):
"""Add columns to ArchiveResult only if they don't already exist."""
with connection.cursor() as cursor:
# Get existing columns
cursor.execute("PRAGMA table_info(core_archiveresult)")
existing_columns = {row[1] for row in cursor.fetchall()}
# Add num_uses_failed if it doesn't exist
if 'num_uses_failed' not in existing_columns:
cursor.execute("ALTER TABLE core_archiveresult ADD COLUMN num_uses_failed integer unsigned NOT NULL DEFAULT 0 CHECK (num_uses_failed >= 0)")
# Add num_uses_succeeded if it doesn't exist
if 'num_uses_succeeded' not in existing_columns:
cursor.execute("ALTER TABLE core_archiveresult ADD COLUMN num_uses_succeeded integer unsigned NOT NULL DEFAULT 0 CHECK (num_uses_succeeded >= 0)")
# Add config if it doesn't exist
if 'config' not in existing_columns:
cursor.execute("ALTER TABLE core_archiveresult ADD COLUMN config text NULL")
# Add retry_at if it doesn't exist
if 'retry_at' not in existing_columns:
cursor.execute("ALTER TABLE core_archiveresult ADD COLUMN retry_at datetime NULL")
cursor.execute("CREATE INDEX IF NOT EXISTS core_archiveresult_retry_at_idx ON core_archiveresult(retry_at)")
class Migration(migrations.Migration):
dependencies = [
('core', '0037_remove_archiveresult_output_dir_and_more'),
]
operations = [
# Add missing columns to ArchiveResult
migrations.SeparateDatabaseAndState(
state_operations=[
migrations.AddField(
model_name='archiveresult',
name='num_uses_failed',
field=models.PositiveIntegerField(default=0),
),
migrations.AddField(
model_name='archiveresult',
name='num_uses_succeeded',
field=models.PositiveIntegerField(default=0),
),
migrations.AddField(
model_name='archiveresult',
name='config',
field=models.JSONField(blank=True, default=dict, null=True),
),
migrations.AddField(
model_name='archiveresult',
name='retry_at',
field=models.DateTimeField(blank=True, db_index=True, default=django.utils.timezone.now, null=True),
),
],
database_operations=[
migrations.RunPython(add_columns_if_not_exist, reverse_code=migrations.RunPython.noop),
],
),
# Drop created_by_id from Snapshot (database only, already removed from model in 0035)
migrations.SeparateDatabaseAndState(
state_operations=[
# No state changes - field already removed in 0035
],
database_operations=[
migrations.RunSQL(
sql="""
-- Drop index first, then column
DROP INDEX IF EXISTS core_snapshot_created_by_id_6dbd6149;
ALTER TABLE core_snapshot DROP COLUMN created_by_id;
""",
reverse_sql=migrations.RunSQL.noop,
),
],
),
]

View File

@@ -0,0 +1,30 @@
# Fix num_uses_failed and num_uses_succeeded string values to integers
from django.db import migrations
class Migration(migrations.Migration):
dependencies = [
('core', '0038_fix_missing_columns'),
]
operations = [
# Fix string values that got inserted as literals instead of integers
migrations.RunSQL(
sql="""
UPDATE core_snapshot
SET num_uses_failed = 0
WHERE typeof(num_uses_failed) = 'text' OR num_uses_failed = 'num_uses_failed';
UPDATE core_snapshot
SET num_uses_succeeded = 0
WHERE typeof(num_uses_succeeded) = 'text' OR num_uses_succeeded = 'num_uses_succeeded';
UPDATE core_snapshot
SET depth = 0
WHERE typeof(depth) = 'text' OR depth = 'depth';
""",
reverse_sql=migrations.RunSQL.noop,
),
]

View File

@@ -911,7 +911,7 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
)
merged = 0
for dup in duplicates.iterator():
for dup in duplicates.iterator(chunk_size=500):
snapshots = list(
cls.objects
.filter(url=dup['url'], timestamp=dup['timestamp'])

File diff suppressed because it is too large Load Diff

View File

@@ -91,7 +91,11 @@ def plugin_thumbnail(context, result) -> str:
'output_path': output_path,
'plugin': plugin,
})
return mark_safe(tpl.render(ctx))
rendered = tpl.render(ctx)
# Only return non-empty content (strip whitespace to check)
if rendered.strip():
return mark_safe(rendered)
return ''
except Exception:
return ''
@@ -119,7 +123,11 @@ def plugin_embed(context, result) -> str:
'output_path': output_path,
'plugin': plugin,
})
return mark_safe(tpl.render(ctx))
rendered = tpl.render(ctx)
# Only return non-empty content (strip whitespace to check)
if rendered.strip():
return mark_safe(rendered)
return ''
except Exception:
return ''
@@ -147,7 +155,11 @@ def plugin_fullscreen(context, result) -> str:
'output_path': output_path,
'plugin': plugin,
})
return mark_safe(tpl.render(ctx))
rendered = tpl.render(ctx)
# Only return non-empty content (strip whitespace to check)
if rendered.strip():
return mark_safe(rendered)
return ''
except Exception:
return ''

View File

@@ -539,7 +539,7 @@ from django.http import JsonResponse
def live_progress_view(request):
"""Simple JSON endpoint for live progress status - used by admin progress monitor."""
try:
from workers.orchestrator import Orchestrator
from archivebox.workers.orchestrator import Orchestrator
from archivebox.crawls.models import Crawl
from archivebox.core.models import Snapshot, ArchiveResult
from django.db.models import Case, When, Value, IntegerField

View File

@@ -4,3 +4,8 @@ from django.apps import AppConfig
class CrawlsConfig(AppConfig):
default_auto_field = "django.db.models.BigAutoField"
name = "archivebox.crawls"
label = "crawls"
def ready(self):
"""Import models to register state machines with the registry"""
from archivebox.crawls.models import CrawlMachine # noqa: F401

View File

@@ -17,39 +17,62 @@ class Migration(migrations.Migration):
]
operations = [
# Remove the seed foreign key from Crawl
migrations.RemoveField(
model_name='crawl',
name='seed',
# Remove the seed foreign key from Crawl (no-op if already removed by core/0024_d)
migrations.RunPython(
code=lambda apps, schema_editor: None,
reverse_code=migrations.RunPython.noop,
),
# Delete the Seed model entirely
migrations.DeleteModel(
name='Seed',
# Delete the Seed model entirely (already done)
migrations.RunPython(
code=lambda apps, schema_editor: None,
reverse_code=migrations.RunPython.noop,
),
# Update fields to new schema
migrations.AlterField(
model_name='crawl',
name='created_by',
field=models.ForeignKey(default=archivebox.base_models.models.get_or_create_system_user_pk, on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL),
),
migrations.AlterField(
model_name='crawl',
name='id',
field=models.UUIDField(default=uuid_compat.uuid7, editable=False, primary_key=True, serialize=False, unique=True),
),
migrations.AlterField(
model_name='crawl',
name='urls',
field=models.TextField(help_text='Newline-separated list of URLs to crawl'),
),
migrations.AlterField(
model_name='crawlschedule',
name='created_by',
field=models.ForeignKey(default=archivebox.base_models.models.get_or_create_system_user_pk, on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL),
),
migrations.AlterField(
model_name='crawlschedule',
name='id',
field=models.UUIDField(default=uuid_compat.uuid7, editable=False, primary_key=True, serialize=False, unique=True),
# Drop seed_id column if it exists, then update Django's migration state
migrations.SeparateDatabaseAndState(
state_operations=[
# Update fields to new schema
migrations.AlterField(
model_name='crawl',
name='created_by',
field=models.ForeignKey(default=archivebox.base_models.models.get_or_create_system_user_pk, on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL),
),
migrations.AlterField(
model_name='crawl',
name='id',
field=models.UUIDField(default=uuid_compat.uuid7, editable=False, primary_key=True, serialize=False, unique=True),
),
migrations.AlterField(
model_name='crawl',
name='urls',
field=models.TextField(help_text='Newline-separated list of URLs to crawl'),
),
migrations.AlterField(
model_name='crawlschedule',
name='created_by',
field=models.ForeignKey(default=archivebox.base_models.models.get_or_create_system_user_pk, on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL),
),
migrations.AlterField(
model_name='crawlschedule',
name='id',
field=models.UUIDField(default=uuid_compat.uuid7, editable=False, primary_key=True, serialize=False, unique=True),
),
],
database_operations=[
# Drop seed table and NULL out seed_id FK values
migrations.RunSQL(
sql="""
PRAGMA foreign_keys=OFF;
-- NULL out seed_id values in crawls_crawl
UPDATE crawls_crawl SET seed_id = NULL;
-- Drop seed table if it exists
DROP TABLE IF EXISTS crawls_seed;
PRAGMA foreign_keys=ON;
""",
reverse_sql=migrations.RunSQL.noop,
),
],
),
]

View File

@@ -8,12 +8,21 @@ class Migration(migrations.Migration):
dependencies = [
('crawls', '0002_drop_seed_model'),
('core', '0024_d_fix_crawls_config'), # Depends on config fix
]
operations = [
migrations.AlterField(
model_name='crawl',
name='output_dir',
field=models.FilePathField(blank=True, default='', path=pathlib.PurePosixPath('/private/tmp/test_archivebox_migrations/archive')),
# Update Django's state only to avoid table rebuild that would re-apply old constraints
migrations.SeparateDatabaseAndState(
state_operations=[
migrations.AlterField(
model_name='crawl',
name='output_dir',
field=models.FilePathField(blank=True, default='', path=pathlib.PurePosixPath('/private/tmp/test_archivebox_migrations/archive')),
),
],
database_operations=[
# No database changes - output_dir type change is cosmetic for Django admin
],
),
]

View File

@@ -11,9 +11,17 @@ class Migration(migrations.Migration):
]
operations = [
migrations.AlterField(
model_name='crawl',
name='output_dir',
field=models.FilePathField(blank=True, default='', path=pathlib.PurePosixPath('/private/tmp/archivebox-makemigrations/archive')),
# Update Django's state only to avoid table rebuild that would re-apply old constraints
migrations.SeparateDatabaseAndState(
state_operations=[
migrations.AlterField(
model_name='crawl',
name='output_dir',
field=models.FilePathField(blank=True, default='', path=pathlib.PurePosixPath('/private/tmp/archivebox-makemigrations/archive')),
),
],
database_operations=[
# No database changes - output_dir type change is cosmetic for Django admin
],
),
]

View File

@@ -0,0 +1,28 @@
# Drop seed_id column from Django's state (leave in database to avoid FK issues)
from django.db import migrations
class Migration(migrations.Migration):
dependencies = [
('crawls', '0004_alter_crawl_output_dir'),
]
operations = [
# Update Django's state only - leave seed_id column in database (unused but harmless)
# This avoids FK mismatch errors with crawls_crawlschedule
migrations.SeparateDatabaseAndState(
state_operations=[
# Remove seed field from Django's migration state
migrations.RemoveField(
model_name='crawl',
name='seed',
),
],
database_operations=[
# No database changes - seed_id column remains to avoid FK rebuild issues
# crawls_seed table can be manually dropped by DBA if needed
],
),
]

View File

@@ -0,0 +1,35 @@
# Generated by Django 6.0 on 2025-12-29 06:45
import pathlib
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('crawls', '0005_drop_seed_id_column'),
]
operations = [
# Update Django's state only - database already correct
migrations.SeparateDatabaseAndState(
state_operations=[
migrations.AlterField(
model_name='crawl',
name='config',
field=models.JSONField(blank=True, default=dict, null=True),
),
migrations.AlterField(
model_name='crawl',
name='output_dir',
field=models.FilePathField(blank=True, default='', path=pathlib.PurePosixPath('/Users/squash/Local/Code/archiveboxes/archivebox-nue/data/archive')),
),
migrations.DeleteModel(
name='Seed',
),
],
database_operations=[
# No database changes - Seed table already dropped in 0005
],
),
]

View File

@@ -65,7 +65,7 @@ class Crawl(ModelWithOutputDir, ModelWithConfig, ModelWithHealthStats, ModelWith
modified_at = models.DateTimeField(auto_now=True)
urls = models.TextField(blank=False, null=False, help_text='Newline-separated list of URLs to crawl')
config = models.JSONField(default=dict)
config = models.JSONField(default=dict, null=True, blank=True)
max_depth = models.PositiveSmallIntegerField(default=0, validators=[MinValueValidator(0), MaxValueValidator(4)])
tags_str = models.CharField(max_length=1024, blank=True, null=False, default='')
persona_id = models.UUIDField(null=True, blank=True)
@@ -77,7 +77,7 @@ class Crawl(ModelWithOutputDir, ModelWithConfig, ModelWithHealthStats, ModelWith
status = ModelWithStateMachine.StatusField(choices=ModelWithStateMachine.StatusChoices, default=ModelWithStateMachine.StatusChoices.QUEUED)
retry_at = ModelWithStateMachine.RetryAtField(default=timezone.now)
state_machine_name = 'crawls.models.CrawlMachine'
state_machine_name = 'archivebox.crawls.models.CrawlMachine'
retry_at_field_name = 'retry_at'
state_field_name = 'status'
StatusChoices = ModelWithStateMachine.StatusChoices
@@ -190,7 +190,6 @@ class Crawl(ModelWithOutputDir, ModelWithConfig, ModelWithHealthStats, ModelWith
'status': Snapshot.INITIAL_STATE,
'retry_at': timezone.now(),
'timestamp': str(timezone.now().timestamp()),
'created_by_id': self.created_by_id,
'depth': 0,
},
)
@@ -290,7 +289,7 @@ class Crawl(ModelWithOutputDir, ModelWithConfig, ModelWithHealthStats, ModelWith
'timestamp': timestamp or str(timezone.now().timestamp()),
'status': Snapshot.INITIAL_STATE,
'retry_at': timezone.now(),
'created_by_id': self.created_by_id,
# Note: created_by removed in 0.9.0 - Snapshot inherits from Crawl
}
)

View File

@@ -7,8 +7,13 @@ class MachineConfig(AppConfig):
default_auto_field = 'django.db.models.BigAutoField'
name = 'archivebox.machine'
label = 'machine' # Explicit label for migrations
verbose_name = 'Machine Info'
def ready(self):
"""Import models to register state machines with the registry"""
from archivebox.machine import models # noqa: F401
def register_admin(admin_site):
from archivebox.machine.admin import register_admin

View File

@@ -85,6 +85,12 @@ class Migration(migrations.Migration):
('version', models.CharField(blank=True, default=None, max_length=32)),
('sha256', models.CharField(blank=True, default=None, max_length=64)),
('machine', models.ForeignKey(blank=True, default=None, on_delete=django.db.models.deletion.CASCADE, to='machine.machine')),
# Fields added in migration 0005 (included here for fresh installs)
('binproviders', models.CharField(blank=True, default='env', max_length=127)),
('output_dir', models.CharField(blank=True, default='', max_length=255)),
('overrides', models.JSONField(blank=True, default=dict)),
('retry_at', models.DateTimeField(blank=True, db_index=True, default=django.utils.timezone.now, null=True)),
('status', models.CharField(choices=[('queued', 'Queued'), ('started', 'Started'), ('succeeded', 'Succeeded'), ('failed', 'Failed')], db_index=True, default='queued', max_length=16)),
# dependency FK removed - Dependency model deleted
],
options={

View File

@@ -0,0 +1,104 @@
# Generated by Django 6.0 on 2025-12-29 06:45
import django.db.models.deletion
import django.utils.timezone
from archivebox.uuid_compat import uuid7
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('machine', '0004_drop_dependency_table'),
]
operations = [
# Update Django's state only - database already has correct schema
migrations.SeparateDatabaseAndState(
state_operations=[
migrations.AddField(
model_name='binary',
name='binproviders',
field=models.CharField(blank=True, default='env', help_text='Comma-separated list of allowed providers: apt,brew,pip,npm,env', max_length=127),
),
migrations.AddField(
model_name='binary',
name='output_dir',
field=models.CharField(blank=True, default='', help_text='Directory where installation hook logs are stored', max_length=255),
),
migrations.AddField(
model_name='binary',
name='overrides',
field=models.JSONField(blank=True, default=dict, help_text="Provider-specific overrides: {'apt': {'packages': ['pkg']}, ...}"),
),
migrations.AddField(
model_name='binary',
name='retry_at',
field=models.DateTimeField(blank=True, db_index=True, default=django.utils.timezone.now, help_text='When to retry this binary installation', null=True),
),
migrations.AddField(
model_name='binary',
name='status',
field=models.CharField(choices=[('queued', 'Queued'), ('started', 'Started'), ('succeeded', 'Succeeded'), ('failed', 'Failed')], db_index=True, default='queued', max_length=16),
),
migrations.AlterField(
model_name='binary',
name='abspath',
field=models.CharField(blank=True, default='', max_length=255),
),
migrations.AlterField(
model_name='binary',
name='binprovider',
field=models.CharField(blank=True, default='', help_text='Provider that successfully installed this binary', max_length=31),
),
migrations.AlterField(
model_name='binary',
name='id',
field=models.UUIDField(default=uuid7, editable=False, primary_key=True, serialize=False, unique=True),
),
migrations.AlterField(
model_name='binary',
name='machine',
field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='machine.machine'),
),
migrations.AlterField(
model_name='binary',
name='name',
field=models.CharField(blank=True, db_index=True, default='', max_length=63),
),
migrations.AlterField(
model_name='binary',
name='sha256',
field=models.CharField(blank=True, default='', max_length=64),
),
migrations.AlterField(
model_name='binary',
name='version',
field=models.CharField(blank=True, default='', max_length=32),
),
migrations.AlterField(
model_name='machine',
name='config',
field=models.JSONField(blank=True, default=dict, help_text='Machine-specific config overrides (e.g., resolved binary paths like WGET_BINARY)', null=True),
),
migrations.AlterField(
model_name='machine',
name='id',
field=models.UUIDField(default=uuid7, editable=False, primary_key=True, serialize=False, unique=True),
),
migrations.AlterField(
model_name='machine',
name='stats',
field=models.JSONField(blank=True, default=dict, null=True),
),
migrations.AlterField(
model_name='networkinterface',
name='id',
field=models.UUIDField(default=uuid7, editable=False, primary_key=True, serialize=False, unique=True),
),
],
database_operations=[
# No database changes - schema already correct from previous migrations
],
),
]

View File

@@ -44,8 +44,8 @@ class Machine(ModelWithHealthStats):
os_platform = models.CharField(max_length=63, default=None, null=False)
os_release = models.CharField(max_length=63, default=None, null=False)
os_kernel = models.CharField(max_length=255, default=None, null=False)
stats = models.JSONField(default=dict, null=False)
config = models.JSONField(default=dict, null=False, blank=True,
stats = models.JSONField(default=dict, null=True, blank=True)
config = models.JSONField(default=dict, null=True, blank=True,
help_text="Machine-specific config overrides (e.g., resolved binary paths like WGET_BINARY)")
num_uses_failed = models.PositiveIntegerField(default=0)
num_uses_succeeded = models.PositiveIntegerField(default=0)
@@ -213,7 +213,7 @@ class Binary(ModelWithHealthStats):
num_uses_failed = models.PositiveIntegerField(default=0)
num_uses_succeeded = models.PositiveIntegerField(default=0)
state_machine_name: str = 'machine.models.BinaryMachine'
state_machine_name: str = 'archivebox.machine.models.BinaryMachine'
objects: BinaryManager = BinaryManager()

View File

@@ -4,3 +4,4 @@ from django.apps import AppConfig
class SessionsConfig(AppConfig):
default_auto_field = "django.db.models.BigAutoField"
name = "archivebox.personas"
label = "personas"

View File

@@ -21,7 +21,7 @@
# # COOKIES_TXT_FILE: '/path/to/cookies.txt',
# # CHROME_USER_DATA_DIR: '/path/to/chrome/user/data/dir',
# # CHECK_SSL_VALIDITY: False,
# # SAVE_ARCHIVE_DOT_ORG: True,
# # SAVE_ARCHIVEDOTORG: True,
# # CHROME_BINARY: 'chromium'
# # ...
# # }

View File

@@ -63,7 +63,7 @@ def test_ripgrep_hook_detects_binary_from_path():
def test_ripgrep_hook_skips_when_backend_not_ripgrep():
"""Test that ripgrep hook exits silently when search backend is not ripgrep."""
hook_path = Path(__file__).parent.parent / 'on_Crawl__00_validate_ripgrep.py'
hook_path = Path(__file__).parent.parent / 'on_Crawl__00_install_ripgrep.py'
env = os.environ.copy()
env['SEARCH_BACKEND_ENGINE'] = 'sqlite' # Different backend
@@ -82,7 +82,7 @@ def test_ripgrep_hook_skips_when_backend_not_ripgrep():
def test_ripgrep_hook_handles_absolute_path():
"""Test that ripgrep hook works when RIPGREP_BINARY is an absolute path."""
hook_path = Path(__file__).parent.parent / 'on_Crawl__00_validate_ripgrep.py'
hook_path = Path(__file__).parent.parent / 'on_Crawl__00_install_ripgrep.py'
rg_path = shutil.which('rg')
if not rg_path:
@@ -222,7 +222,7 @@ def test_ripgrep_only_detected_when_backend_enabled():
if not shutil.which('rg'):
pytest.skip("ripgrep not installed")
hook_path = Path(__file__).parent.parent / 'on_Crawl__00_validate_ripgrep.py'
hook_path = Path(__file__).parent.parent / 'on_Crawl__00_install_ripgrep.py'
# Test 1: With ripgrep backend - should output Binary record
env1 = os.environ.copy()

View File

@@ -360,9 +360,11 @@
<div class="row header-bottom-frames">
{% for result_info in archiveresults %}
{% if result_info.result %}
{% plugin_thumbnail result_info.result as thumbnail_html %}
{% if thumbnail_html %}
<div class="col-lg-2">
<div class="card{% if forloop.first %} selected-card{% endif %}">
{% plugin_thumbnail result_info.result %}
{{ thumbnail_html }}
<div class="card-body">
<a href="{{ result_info.path }}" title="Open in new tab..." target="_blank" rel="noopener">
<p class="card-text"><code>{{ result_info.path }}</code></p>
@@ -373,6 +375,7 @@
</div>
</div>
</div>
{% endif %}
{% endif %}
{% endfor %}
@@ -395,7 +398,7 @@
</div>
</div>
</header>
<iframe sandbox="allow-same-origin allow-top-navigation-by-user-activation allow-scripts allow-forms" class="full-page-iframe" src="{{singlefile_path}}" name="preview"></iframe>
<iframe sandbox="allow-same-origin allow-top-navigation-by-user-activation allow-scripts allow-forms" class="full-page-iframe" src="{{best_preview_path}}" name="preview"></iframe>
<script>
/*! jQuery v3.2.1 -ajax,-ajax/jsonp,-ajax/load,-ajax/parseXML,-ajax/script,-ajax/var/location,-ajax/var/nonce,-ajax/var/rquery,-ajax/xhr,-manipulation/_evalUrl,-event/ajax,-effects,-effects/Tween,-effects/animatedSelector | (c) JS Foundation and other contributors | jquery.org/license */

View File

@@ -429,19 +429,6 @@ class TestInstallHookOutput(unittest.TestCase):
self.assertEqual(data['name'], 'wget')
self.assertTrue(data['abspath'].startswith('/'))
def test_install_hook_outputs_dependency(self):
"""Install hook should output Dependency JSONL when binary not found."""
hook_output = json.dumps({
'type': 'Dependency',
'bin_name': 'wget',
'bin_providers': 'apt,brew,env',
})
data = json.loads(hook_output)
self.assertEqual(data['type'], 'Dependency')
self.assertEqual(data['bin_name'], 'wget')
self.assertIn('apt', data['bin_providers'])
def test_install_hook_outputs_machine_config(self):
"""Install hook should output Machine config update JSONL."""
hook_output = json.dumps({

View File

@@ -459,7 +459,7 @@ class TestFilesystemMigration08to09(unittest.TestCase):
'SAVE_MERCURY': 'True',
'SAVE_PDF': 'True',
'SAVE_MEDIA': 'True',
'SAVE_ARCHIVE_DOT_ORG': 'True',
'SAVE_ARCHIVEDOTORG': 'True',
'SAVE_HEADERS': 'True',
'SAVE_HTMLTOTEXT': 'True',
'SAVE_GIT': 'True',

View File

@@ -949,19 +949,30 @@ def seed_0_8_data(db_path: Path) -> Dict[str, List[Dict]]:
('core', '0072_rename_added_snapshot_bookmarked_at_and_more'),
('core', '0073_rename_created_archiveresult_created_at_and_more'),
('core', '0074_alter_snapshot_downloaded_at'),
('core', '0023_new_schema'),
# For 0.8.x: DO NOT record 0023_new_schema - it replaces 0023-0074 for fresh installs
# We already recorded 0023-0074 above, so Django will know the state
# For 0.8.x: Record original machine migrations (before squashing)
# DO NOT record 0001_squashed here - it replaces 0001-0004 for fresh installs
('machine', '0001_initial'),
('machine', '0002_alter_machine_stats_installedbinary'),
('machine', '0003_alter_installedbinary_options_and_more'),
('machine', '0004_alter_installedbinary_abspath_and_more'),
('machine', '0001_squashed'),
# Then the new migrations after squashing
('machine', '0002_rename_custom_cmds_to_overrides'),
('machine', '0003_alter_dependency_id_alter_installedbinary_dependency_and_more'),
('machine', '0004_drop_dependency_table'),
# Crawls must come before core.0024 because 0024_b depends on it
('crawls', '0001_initial'),
# Core 0024 migrations chain (in dependency order)
('core', '0024_b_clear_config_fields'),
('core', '0024_c_disable_fk_checks'),
('core', '0024_d_fix_crawls_config'),
('core', '0024_snapshot_crawl'),
('core', '0024_f_add_snapshot_config'),
('core', '0025_allow_duplicate_urls_per_crawl'),
# For 0.8.x: Record original api migration (before squashing)
# DO NOT record 0001_squashed here - it replaces 0001 for fresh installs
('api', '0001_initial'),
('api', '0001_squashed'),
('api', '0002_alter_apitoken_options'),
('api', '0003_rename_user_apitoken_created_by_apitoken_abid_and_more'),
('api', '0004_alter_apitoken_id_alter_apitoken_uuid'),
@@ -970,11 +981,9 @@ def seed_0_8_data(db_path: Path) -> Dict[str, List[Dict]]:
('api', '0007_alter_apitoken_created_by'),
('api', '0008_alter_apitoken_created_alter_apitoken_created_by_and_more'),
('api', '0009_rename_created_apitoken_created_at_and_more'),
('crawls', '0001_initial'),
('crawls', '0002_drop_seed_model'),
('crawls', '0003_alter_crawl_output_dir'),
('crawls', '0004_alter_crawl_output_dir'),
('core', '0035_snapshot_crawl_non_nullable_remove_created_by'),
# Note: crawls.0001_initial moved earlier (before core.0024) due to dependencies
# Stop here - 0.8.x ends at core.0025, crawls.0001, and we want to TEST the later migrations
# Do NOT record 0026+ as they need to be tested during migration
]
for app, name in migrations:
@@ -1000,7 +1009,7 @@ def run_archivebox(data_dir: Path, args: list, timeout: int = 60, env: dict = No
base_env['USE_COLOR'] = 'False'
base_env['SHOW_PROGRESS'] = 'False'
# Disable ALL extractors for faster tests (can be overridden by env parameter)
base_env['SAVE_ARCHIVE_DOT_ORG'] = 'False'
base_env['SAVE_ARCHIVEDOTORG'] = 'False'
base_env['SAVE_TITLE'] = 'False'
base_env['SAVE_FAVICON'] = 'False'
base_env['SAVE_WGET'] = 'False'

View File

@@ -4,4 +4,5 @@ from django.apps import AppConfig
class WorkersConfig(AppConfig):
default_auto_field = 'django.db.models.BigAutoField'
name = 'archivebox.workers'
label = 'workers'

View File

@@ -2,7 +2,7 @@
# mkdir -p ~/archivebox/data && cd ~/archivebox
# curl -fsSL 'https://docker-compose.archivebox.io' > docker-compose.yml
# docker compose run archivebox version
# docker compose run archivebox config --set SAVE_ARCHIVE_DOT_ORG=False
# docker compose run archivebox config --set SAVE_ARCHIVEDOTORG=False
# docker compose run archivebox add --depth=1 'https://news.ycombinator.com'
# docker compose run -T archivebox add < bookmarks.txt
# docker compose up -d && open 'https://localhost:8000'
@@ -35,7 +35,7 @@ services:
# - MEDIA_MAX_SIZE=750m # increase this filesize limit to allow archiving larger audio/video files
# - TIMEOUT=60 # increase this number to 120+ seconds if you see many slow downloads timing out
# - CHECK_SSL_VALIDITY=True # set to False to disable strict SSL checking (allows saving URLs w/ broken certs)
# - SAVE_ARCHIVE_DOT_ORG=True # set to False to disable submitting all URLs to Archive.org when archiving
# - SAVE_ARCHIVEDOTORG=True # set to False to disable submitting all URLs to Archive.org when archiving
# - USER_AGENT="..." # set a custom USER_AGENT to avoid being blocked as a bot
# ...
# For more info, see: https://github.com/ArchiveBox/ArchiveBox/wiki/Docker#configuration

View File

@@ -85,9 +85,9 @@ dependencies = [
### Binary/Package Management
"abx-pkg>=0.1.0", # for: detecting, versioning, and installing binaries via apt/brew/pip/npm
"gallery-dl>=1.31.1",
### UUID7 backport for Python <3.14
"uuid7>=0.1.0; python_version < '3.14'", # for: uuid7 support on Python 3.13 (provides uuid_extensions module)
"pytest-django>=4.11.1",
]
[project.optional-dependencies]
@@ -183,6 +183,7 @@ ignore = ["E731", "E303", "E266", "E241", "E222"]
[tool.pytest.ini_options]
testpaths = [ "tests" ]
DJANGO_SETTINGS_MODULE = "archivebox.core.settings"
[tool.mypy]
mypy_path = "archivebox,archivebox/typings"

View File

@@ -24,7 +24,7 @@ def disable_extractors_dict():
"SAVE_HEADERS": "false",
"USE_GIT": "false",
"SAVE_MEDIA": "false",
"SAVE_ARCHIVE_DOT_ORG": "false",
"SAVE_ARCHIVEDOTORG": "false",
"SAVE_TITLE": "false",
"SAVE_FAVICON": "false",
})

View File

@@ -33,7 +33,7 @@ def test_background_hooks_dont_block_parser_extractors(tmp_path, process):
"SAVE_HEADERS": "false",
"USE_GIT": "false",
"SAVE_MEDIA": "false",
"SAVE_ARCHIVE_DOT_ORG": "false",
"SAVE_ARCHIVEDOTORG": "false",
"SAVE_TITLE": "false",
"SAVE_FAVICON": "false",
# Enable chrome session (required for background hooks to start)
@@ -133,7 +133,7 @@ def test_parser_extractors_emit_snapshot_jsonl(tmp_path, process):
"SAVE_HEADERS": "false",
"USE_GIT": "false",
"SAVE_MEDIA": "false",
"SAVE_ARCHIVE_DOT_ORG": "false",
"SAVE_ARCHIVEDOTORG": "false",
"SAVE_TITLE": "false",
"SAVE_FAVICON": "false",
"USE_CHROME": "false",

14
uv.lock generated
View File

@@ -88,6 +88,7 @@ dependencies = [
{ name = "py-machineid", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "pydantic", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "pydantic-settings", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "pytest-django", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "python-benedict", extra = ["io", "parse"], marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "python-crontab", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "python-statemachine", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
@@ -186,6 +187,7 @@ requires-dist = [
{ name = "py-machineid", specifier = ">=0.6.0" },
{ name = "pydantic", specifier = ">=2.8.0" },
{ name = "pydantic-settings", specifier = ">=2.5.2" },
{ name = "pytest-django", specifier = ">=4.11.1" },
{ name = "python-benedict", extras = ["io", "parse"], specifier = ">=0.33.2" },
{ name = "python-crontab", specifier = ">=3.2.0" },
{ name = "python-ldap", marker = "extra == 'ldap'", specifier = ">=3.4.3" },
@@ -1848,6 +1850,18 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/3b/ab/b3226f0bd7cdcf710fbede2b3548584366da3b19b5021e74f5bde2a8fa3f/pytest-9.0.2-py3-none-any.whl", hash = "sha256:711ffd45bf766d5264d487b917733b453d917afd2b0ad65223959f59089f875b", size = 374801, upload-time = "2025-12-06T21:30:49.154Z" },
]
[[package]]
name = "pytest-django"
version = "4.11.1"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "pytest", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
]
sdist = { url = "https://files.pythonhosted.org/packages/b1/fb/55d580352db26eb3d59ad50c64321ddfe228d3d8ac107db05387a2fadf3a/pytest_django-4.11.1.tar.gz", hash = "sha256:a949141a1ee103cb0e7a20f1451d355f83f5e4a5d07bdd4dcfdd1fd0ff227991", size = 86202, upload-time = "2025-04-03T18:56:09.338Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/be/ac/bd0608d229ec808e51a21044f3f2f27b9a37e7a0ebaca7247882e67876af/pytest_django-4.11.1-py3-none-any.whl", hash = "sha256:1b63773f648aa3d8541000c26929c1ea63934be1cfa674c76436966d73fe6a10", size = 25281, upload-time = "2025-04-03T18:56:07.678Z" },
]
[[package]]
name = "python-benedict"
version = "0.35.0"