mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2026-04-05 15:27:53 +10:00
use full dotted paths for all archivebox imports, add migrations and more fixes
This commit is contained in:
@@ -763,7 +763,7 @@ The configuration is documented here: **[Configuration Wiki](https://github.com/
|
|||||||
<br/>
|
<br/>
|
||||||
TIMEOUT=240 # default: 60 add more seconds on slower networks
|
TIMEOUT=240 # default: 60 add more seconds on slower networks
|
||||||
CHECK_SSL_VALIDITY=False # default: True False = allow saving URLs w/ bad SSL
|
CHECK_SSL_VALIDITY=False # default: True False = allow saving URLs w/ bad SSL
|
||||||
SAVE_ARCHIVE_DOT_ORG=False # default: True False = disable Archive.org saving
|
SAVE_ARCHIVEDOTORG=False # default: True False = disable Archive.org saving
|
||||||
MAX_MEDIA_SIZE=1500m # default: 750m raise/lower youtubedl output size
|
MAX_MEDIA_SIZE=1500m # default: 750m raise/lower youtubedl output size
|
||||||
<br/>
|
<br/>
|
||||||
PUBLIC_INDEX=True # default: True whether anon users can view index
|
PUBLIC_INDEX=True # default: True whether anon users can view index
|
||||||
@@ -959,7 +959,7 @@ archivebox add 'https://docs.google.com/document/d/12345somePrivateDocument'
|
|||||||
archivebox add 'https://vimeo.com/somePrivateVideo'
|
archivebox add 'https://vimeo.com/somePrivateVideo'
|
||||||
|
|
||||||
# without first disabling saving to Archive.org:
|
# without first disabling saving to Archive.org:
|
||||||
archivebox config --set SAVE_ARCHIVE_DOT_ORG=False # disable saving all URLs in Archive.org
|
archivebox config --set SAVE_ARCHIVEDOTORG=False # disable saving all URLs in Archive.org
|
||||||
|
|
||||||
# restrict the main index, Snapshot content, and Add Page to authenticated users as-needed:
|
# restrict the main index, Snapshot content, and Add Page to authenticated users as-needed:
|
||||||
archivebox config --set PUBLIC_INDEX=False
|
archivebox config --set PUBLIC_INDEX=False
|
||||||
|
|||||||
@@ -26,10 +26,10 @@ ASCII_LOGO = """
|
|||||||
|
|
||||||
PACKAGE_DIR = Path(__file__).resolve().parent
|
PACKAGE_DIR = Path(__file__).resolve().parent
|
||||||
|
|
||||||
# Add PACKAGE_DIR to sys.path - required for Django migrations to import models
|
# # Add PACKAGE_DIR to sys.path - required for Django migrations to import models
|
||||||
# Migrations reference models like 'machine.Binary' which need to be importable
|
# # Migrations reference models like 'machine.Binary' which need to be importable
|
||||||
if str(PACKAGE_DIR) not in sys.path:
|
# if str(PACKAGE_DIR) not in sys.path:
|
||||||
sys.path.append(str(PACKAGE_DIR))
|
# sys.path.append(str(PACKAGE_DIR))
|
||||||
|
|
||||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'archivebox.core.settings'
|
os.environ['DJANGO_SETTINGS_MODULE'] = 'archivebox.core.settings'
|
||||||
os.environ['TZ'] = 'UTC'
|
os.environ['TZ'] = 'UTC'
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ from django.apps import AppConfig
|
|||||||
|
|
||||||
class APIConfig(AppConfig):
|
class APIConfig(AppConfig):
|
||||||
name = 'archivebox.api'
|
name = 'archivebox.api'
|
||||||
|
label = 'api'
|
||||||
|
|
||||||
|
|
||||||
def register_admin(admin_site):
|
def register_admin(admin_site):
|
||||||
|
|||||||
@@ -94,7 +94,7 @@ class OrchestratorSchema(Schema):
|
|||||||
@router.get("/orchestrator", response=OrchestratorSchema, url_name="get_orchestrator")
|
@router.get("/orchestrator", response=OrchestratorSchema, url_name="get_orchestrator")
|
||||||
def get_orchestrator(request):
|
def get_orchestrator(request):
|
||||||
"""Get the orchestrator status and all worker queues."""
|
"""Get the orchestrator status and all worker queues."""
|
||||||
from workers.orchestrator import Orchestrator
|
from archivebox.workers.orchestrator import Orchestrator
|
||||||
from workers.worker import CrawlWorker, SnapshotWorker, ArchiveResultWorker
|
from workers.worker import CrawlWorker, SnapshotWorker, ArchiveResultWorker
|
||||||
|
|
||||||
orchestrator = Orchestrator()
|
orchestrator = Orchestrator()
|
||||||
|
|||||||
@@ -73,7 +73,7 @@ class ModelWithUUID(models.Model):
|
|||||||
return f'/api/v1/docs#/{self._meta.app_label.title()}%20Models/api_v1_{self._meta.app_label}_get_{self._meta.db_table}'
|
return f'/api/v1/docs#/{self._meta.app_label.title()}%20Models/api_v1_{self._meta.app_label}_get_{self._meta.db_table}'
|
||||||
|
|
||||||
def as_json(self, keys: Iterable[str] = ()) -> dict:
|
def as_json(self, keys: Iterable[str] = ()) -> dict:
|
||||||
default_keys = ('id', 'created_at', 'modified_at', 'created_by_id')
|
default_keys = ('id', 'created_at', 'modified_at')
|
||||||
return {key: getattr(self, key) for key in (keys or default_keys) if hasattr(self, key)}
|
return {key: getattr(self, key) for key in (keys or default_keys) if hasattr(self, key)}
|
||||||
|
|
||||||
|
|
||||||
@@ -119,7 +119,7 @@ class ModelWithHealthStats(models.Model):
|
|||||||
|
|
||||||
class ModelWithConfig(models.Model):
|
class ModelWithConfig(models.Model):
|
||||||
"""Mixin for models with a JSON config field."""
|
"""Mixin for models with a JSON config field."""
|
||||||
config = models.JSONField(default=dict, null=False, blank=False, editable=True)
|
config = models.JSONField(default=dict, null=True, blank=True, editable=True)
|
||||||
|
|
||||||
class Meta:
|
class Meta:
|
||||||
abstract = True
|
abstract = True
|
||||||
|
|||||||
@@ -56,7 +56,7 @@ def add(urls: str | list[str],
|
|||||||
from archivebox.core.models import Snapshot
|
from archivebox.core.models import Snapshot
|
||||||
from archivebox.crawls.models import Crawl
|
from archivebox.crawls.models import Crawl
|
||||||
from archivebox.base_models.models import get_or_create_system_user_pk
|
from archivebox.base_models.models import get_or_create_system_user_pk
|
||||||
from workers.orchestrator import Orchestrator
|
from archivebox.workers.orchestrator import Orchestrator
|
||||||
|
|
||||||
created_by_id = created_by_id or get_or_create_system_user_pk()
|
created_by_id = created_by_id or get_or_create_system_user_pk()
|
||||||
|
|
||||||
|
|||||||
@@ -78,7 +78,7 @@ def discover_outlinks(
|
|||||||
from archivebox.core.models import Snapshot, ArchiveResult
|
from archivebox.core.models import Snapshot, ArchiveResult
|
||||||
from archivebox.crawls.models import Crawl
|
from archivebox.crawls.models import Crawl
|
||||||
from archivebox.config import CONSTANTS
|
from archivebox.config import CONSTANTS
|
||||||
from workers.orchestrator import Orchestrator
|
from archivebox.workers.orchestrator import Orchestrator
|
||||||
|
|
||||||
created_by_id = get_or_create_system_user_pk()
|
created_by_id = get_or_create_system_user_pk()
|
||||||
is_tty = sys.stdout.isatty()
|
is_tty = sys.stdout.isatty()
|
||||||
|
|||||||
@@ -96,7 +96,7 @@ def run_plugins(
|
|||||||
TYPE_SNAPSHOT, TYPE_ARCHIVERESULT
|
TYPE_SNAPSHOT, TYPE_ARCHIVERESULT
|
||||||
)
|
)
|
||||||
from archivebox.core.models import Snapshot, ArchiveResult
|
from archivebox.core.models import Snapshot, ArchiveResult
|
||||||
from workers.orchestrator import Orchestrator
|
from archivebox.workers.orchestrator import Orchestrator
|
||||||
|
|
||||||
is_tty = sys.stdout.isatty()
|
is_tty = sys.stdout.isatty()
|
||||||
|
|
||||||
|
|||||||
@@ -13,11 +13,9 @@ from archivebox.misc.util import docstring, enforce_types
|
|||||||
|
|
||||||
|
|
||||||
@enforce_types
|
@enforce_types
|
||||||
def init(force: bool=False, quick: bool=False, install: bool=False, setup: bool=False) -> None:
|
def init(force: bool=False, quick: bool=False, install: bool=False) -> None:
|
||||||
"""Initialize a new ArchiveBox collection in the current directory"""
|
"""Initialize a new ArchiveBox collection in the current directory"""
|
||||||
|
|
||||||
install = install or setup
|
|
||||||
|
|
||||||
from archivebox.config import CONSTANTS, VERSION, DATA_DIR
|
from archivebox.config import CONSTANTS, VERSION, DATA_DIR
|
||||||
from archivebox.config.common import SERVER_CONFIG
|
from archivebox.config.common import SERVER_CONFIG
|
||||||
from archivebox.config.collection import write_config_file
|
from archivebox.config.collection import write_config_file
|
||||||
@@ -128,7 +126,8 @@ def init(force: bool=False, quick: bool=False, install: bool=False, setup: bool=
|
|||||||
print(f' [yellow]√ Added {len(orphaned_data_dir_links)} orphaned links from existing archive directories.[/yellow]')
|
print(f' [yellow]√ Added {len(orphaned_data_dir_links)} orphaned links from existing archive directories.[/yellow]')
|
||||||
|
|
||||||
if pending_links:
|
if pending_links:
|
||||||
Snapshot.objects.create_from_dicts(list(pending_links.values()))
|
for link_dict in pending_links.values():
|
||||||
|
Snapshot.from_jsonl(link_dict)
|
||||||
|
|
||||||
# Hint for orphaned snapshot directories
|
# Hint for orphaned snapshot directories
|
||||||
print()
|
print()
|
||||||
@@ -187,7 +186,6 @@ def init(force: bool=False, quick: bool=False, install: bool=False, setup: bool=
|
|||||||
@click.option('--force', '-f', is_flag=True, help='Ignore unrecognized files in current directory and initialize anyway')
|
@click.option('--force', '-f', is_flag=True, help='Ignore unrecognized files in current directory and initialize anyway')
|
||||||
@click.option('--quick', '-q', is_flag=True, help='Run any updates or migrations without rechecking all snapshot dirs')
|
@click.option('--quick', '-q', is_flag=True, help='Run any updates or migrations without rechecking all snapshot dirs')
|
||||||
@click.option('--install', '-s', is_flag=True, help='Automatically install dependencies and extras used for archiving')
|
@click.option('--install', '-s', is_flag=True, help='Automatically install dependencies and extras used for archiving')
|
||||||
@click.option('--setup', '-s', is_flag=True, help='DEPRECATED: equivalent to --install')
|
|
||||||
@docstring(init.__doc__)
|
@docstring(init.__doc__)
|
||||||
def main(**kwargs) -> None:
|
def main(**kwargs) -> None:
|
||||||
init(**kwargs)
|
init(**kwargs)
|
||||||
|
|||||||
@@ -85,7 +85,7 @@ def install(dry_run: bool=False) -> None:
|
|||||||
print()
|
print()
|
||||||
|
|
||||||
# Run the crawl synchronously (this triggers on_Crawl hooks)
|
# Run the crawl synchronously (this triggers on_Crawl hooks)
|
||||||
from workers.orchestrator import Orchestrator
|
from archivebox.workers.orchestrator import Orchestrator
|
||||||
orchestrator = Orchestrator(exit_on_idle=True)
|
orchestrator = Orchestrator(exit_on_idle=True)
|
||||||
orchestrator.runloop()
|
orchestrator.runloop()
|
||||||
|
|
||||||
|
|||||||
@@ -37,7 +37,7 @@ def orchestrator(daemon: bool = False, watch: bool = False) -> int:
|
|||||||
0: All work completed successfully
|
0: All work completed successfully
|
||||||
1: Error occurred
|
1: Error occurred
|
||||||
"""
|
"""
|
||||||
from workers.orchestrator import Orchestrator
|
from archivebox.workers.orchestrator import Orchestrator
|
||||||
|
|
||||||
if Orchestrator.is_running():
|
if Orchestrator.is_running():
|
||||||
print('[yellow]Orchestrator is already running[/yellow]')
|
print('[yellow]Orchestrator is already running[/yellow]')
|
||||||
|
|||||||
@@ -74,7 +74,7 @@ def server(runserver_args: Iterable[str]=(SERVER_CONFIG.BIND_ADDR,),
|
|||||||
tail_multiple_worker_logs,
|
tail_multiple_worker_logs,
|
||||||
is_port_in_use,
|
is_port_in_use,
|
||||||
)
|
)
|
||||||
from workers.orchestrator import Orchestrator
|
from archivebox.workers.orchestrator import Orchestrator
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
# Check if port is already in use
|
# Check if port is already in use
|
||||||
|
|||||||
@@ -163,7 +163,7 @@ def create_snapshots(
|
|||||||
|
|
||||||
# If --plugins is passed, run the orchestrator for those plugins
|
# If --plugins is passed, run the orchestrator for those plugins
|
||||||
if plugins:
|
if plugins:
|
||||||
from workers.orchestrator import Orchestrator
|
from archivebox.workers.orchestrator import Orchestrator
|
||||||
rprint(f'[blue]Running plugins: {plugins or "all"}...[/blue]', file=sys.stderr)
|
rprint(f'[blue]Running plugins: {plugins or "all"}...[/blue]', file=sys.stderr)
|
||||||
orchestrator = Orchestrator(exit_on_idle=True)
|
orchestrator = Orchestrator(exit_on_idle=True)
|
||||||
orchestrator.runloop()
|
orchestrator.runloop()
|
||||||
|
|||||||
@@ -160,7 +160,7 @@ def process_all_db_snapshots(batch_size: int = 100) -> dict:
|
|||||||
total = Snapshot.objects.count()
|
total = Snapshot.objects.count()
|
||||||
print(f'[*] Processing {total} snapshots from database...')
|
print(f'[*] Processing {total} snapshots from database...')
|
||||||
|
|
||||||
for snapshot in Snapshot.objects.iterator():
|
for snapshot in Snapshot.objects.iterator(chunk_size=batch_size):
|
||||||
# Reconcile index.json with DB
|
# Reconcile index.json with DB
|
||||||
snapshot.reconcile_with_index_json()
|
snapshot.reconcile_with_index_json()
|
||||||
|
|
||||||
@@ -209,7 +209,7 @@ def process_filtered_snapshots(
|
|||||||
total = snapshots.count()
|
total = snapshots.count()
|
||||||
print(f'[*] Found {total} matching snapshots')
|
print(f'[*] Found {total} matching snapshots')
|
||||||
|
|
||||||
for snapshot in snapshots.iterator():
|
for snapshot in snapshots.iterator(chunk_size=batch_size):
|
||||||
# Reconcile index.json with DB
|
# Reconcile index.json with DB
|
||||||
snapshot.reconcile_with_index_json()
|
snapshot.reconcile_with_index_json()
|
||||||
|
|
||||||
|
|||||||
@@ -17,7 +17,7 @@ TEST_CONFIG = {
|
|||||||
|
|
||||||
'DATA_DIR': 'data.tests',
|
'DATA_DIR': 'data.tests',
|
||||||
|
|
||||||
'SAVE_ARCHIVE_DOT_ORG': 'False',
|
'SAVE_ARCHIVEDOTORG': 'False',
|
||||||
'SAVE_TITLE': 'False',
|
'SAVE_TITLE': 'False',
|
||||||
|
|
||||||
'USE_CURL': 'False',
|
'USE_CURL': 'False',
|
||||||
|
|||||||
@@ -32,7 +32,7 @@ from unittest.mock import patch, MagicMock
|
|||||||
TEST_CONFIG = {
|
TEST_CONFIG = {
|
||||||
'USE_COLOR': 'False',
|
'USE_COLOR': 'False',
|
||||||
'SHOW_PROGRESS': 'False',
|
'SHOW_PROGRESS': 'False',
|
||||||
'SAVE_ARCHIVE_DOT_ORG': 'False',
|
'SAVE_ARCHIVEDOTORG': 'False',
|
||||||
'SAVE_TITLE': 'True', # Fast extractor
|
'SAVE_TITLE': 'True', # Fast extractor
|
||||||
'SAVE_FAVICON': 'False',
|
'SAVE_FAVICON': 'False',
|
||||||
'SAVE_WGET': 'False',
|
'SAVE_WGET': 'False',
|
||||||
|
|||||||
@@ -216,6 +216,29 @@ def get_config(
|
|||||||
if snapshot and hasattr(snapshot, "config") and snapshot.config:
|
if snapshot and hasattr(snapshot, "config") and snapshot.config:
|
||||||
config.update(snapshot.config)
|
config.update(snapshot.config)
|
||||||
|
|
||||||
|
# Normalize all aliases to canonical names (after all sources merged)
|
||||||
|
# This handles aliases that came from user/crawl/snapshot configs, not just env
|
||||||
|
try:
|
||||||
|
from archivebox.hooks import discover_plugin_configs
|
||||||
|
plugin_configs = discover_plugin_configs()
|
||||||
|
aliases_to_normalize = {} # {alias_key: canonical_key}
|
||||||
|
|
||||||
|
# Build alias mapping from all plugin schemas
|
||||||
|
for plugin_name, schema in plugin_configs.items():
|
||||||
|
for canonical_key, prop_schema in schema.get('properties', {}).items():
|
||||||
|
for alias in prop_schema.get('x-aliases', []):
|
||||||
|
aliases_to_normalize[alias] = canonical_key
|
||||||
|
|
||||||
|
# Normalize: copy alias values to canonical keys (aliases take precedence)
|
||||||
|
for alias_key, canonical_key in aliases_to_normalize.items():
|
||||||
|
if alias_key in config:
|
||||||
|
# Alias exists - copy to canonical key (overwriting any default)
|
||||||
|
config[canonical_key] = config[alias_key]
|
||||||
|
# Remove alias from config to keep it clean
|
||||||
|
del config[alias_key]
|
||||||
|
except ImportError:
|
||||||
|
pass
|
||||||
|
|
||||||
return config
|
return config
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -5,8 +5,12 @@ from django.apps import AppConfig
|
|||||||
|
|
||||||
class CoreConfig(AppConfig):
|
class CoreConfig(AppConfig):
|
||||||
name = 'archivebox.core'
|
name = 'archivebox.core'
|
||||||
|
label = 'core'
|
||||||
|
|
||||||
def ready(self):
|
def ready(self):
|
||||||
"""Register the archivebox.core.admin_site as the main django admin site"""
|
"""Register the archivebox.core.admin_site as the main django admin site"""
|
||||||
from archivebox.core.admin_site import register_admin_site
|
from archivebox.core.admin_site import register_admin_site
|
||||||
register_admin_site()
|
register_admin_site()
|
||||||
|
|
||||||
|
# Import models to register state machines with the registry
|
||||||
|
from archivebox.core import models # noqa: F401
|
||||||
|
|||||||
57
archivebox/core/migrations/0024_b_clear_config_fields.py
Normal file
57
archivebox/core/migrations/0024_b_clear_config_fields.py
Normal file
@@ -0,0 +1,57 @@
|
|||||||
|
# Data migration to clear config fields that may contain invalid JSON
|
||||||
|
# This runs before 0025 to prevent CHECK constraint failures
|
||||||
|
|
||||||
|
from django.db import migrations
|
||||||
|
|
||||||
|
|
||||||
|
def clear_config_fields(apps, schema_editor):
|
||||||
|
"""Clear all config fields in related tables to avoid JSON validation errors."""
|
||||||
|
db_alias = schema_editor.connection.alias
|
||||||
|
|
||||||
|
# Disable foreign key checks temporarily to allow updates
|
||||||
|
with schema_editor.connection.cursor() as cursor:
|
||||||
|
cursor.execute("PRAGMA foreign_keys=OFF")
|
||||||
|
|
||||||
|
tables_to_clear = [
|
||||||
|
('crawls_seed', 'config'),
|
||||||
|
('crawls_crawl', 'config'),
|
||||||
|
('crawls_crawlschedule', 'config') if 'crawlschedule' in dir() else None,
|
||||||
|
('machine_machine', 'stats'),
|
||||||
|
('machine_machine', 'config'),
|
||||||
|
]
|
||||||
|
|
||||||
|
for table_info in tables_to_clear:
|
||||||
|
if table_info is None:
|
||||||
|
continue
|
||||||
|
table_name, field_name = table_info
|
||||||
|
|
||||||
|
try:
|
||||||
|
with schema_editor.connection.cursor() as cursor:
|
||||||
|
# Check if table exists first
|
||||||
|
cursor.execute(f"SELECT name FROM sqlite_master WHERE type='table' AND name='{table_name}'")
|
||||||
|
if not cursor.fetchone():
|
||||||
|
print(f" Skipping {table_name}.{field_name}: table does not exist")
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Set all to empty JSON object
|
||||||
|
cursor.execute(f"UPDATE {table_name} SET {field_name} = '{{}}' WHERE {field_name} IS NOT NULL")
|
||||||
|
print(f" Cleared {field_name} in {table_name}: {cursor.rowcount} rows")
|
||||||
|
except Exception as e:
|
||||||
|
print(f" Skipping {table_name}.{field_name}: {e}")
|
||||||
|
|
||||||
|
# Re-enable foreign key checks
|
||||||
|
with schema_editor.connection.cursor() as cursor:
|
||||||
|
cursor.execute("PRAGMA foreign_keys=ON")
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
('core', '0023_new_schema'),
|
||||||
|
('crawls', '0001_initial'),
|
||||||
|
('machine', '0001_squashed'),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.RunPython(clear_config_fields, reverse_code=migrations.RunPython.noop),
|
||||||
|
]
|
||||||
28
archivebox/core/migrations/0024_c_disable_fk_checks.py
Normal file
28
archivebox/core/migrations/0024_c_disable_fk_checks.py
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
# Disable foreign key checks before 0025 to prevent CHECK constraint validation errors
|
||||||
|
|
||||||
|
from django.db import migrations
|
||||||
|
|
||||||
|
|
||||||
|
def disable_fk_checks(apps, schema_editor):
|
||||||
|
"""Temporarily disable foreign key checks."""
|
||||||
|
with schema_editor.connection.cursor() as cursor:
|
||||||
|
cursor.execute("PRAGMA foreign_keys=OFF")
|
||||||
|
print(" Disabled foreign key checks")
|
||||||
|
|
||||||
|
|
||||||
|
def enable_fk_checks(apps, schema_editor):
|
||||||
|
"""Re-enable foreign key checks."""
|
||||||
|
with schema_editor.connection.cursor() as cursor:
|
||||||
|
cursor.execute("PRAGMA foreign_keys=ON")
|
||||||
|
print(" Enabled foreign key checks")
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
('core', '0024_b_clear_config_fields'),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.RunPython(disable_fk_checks, reverse_code=enable_fk_checks),
|
||||||
|
]
|
||||||
93
archivebox/core/migrations/0024_d_fix_crawls_config.py
Normal file
93
archivebox/core/migrations/0024_d_fix_crawls_config.py
Normal file
@@ -0,0 +1,93 @@
|
|||||||
|
# Fix crawls_crawl config field to avoid CHECK constraint errors during table rebuilds
|
||||||
|
|
||||||
|
from django.db import migrations
|
||||||
|
|
||||||
|
|
||||||
|
def fix_crawls_config(apps, schema_editor):
|
||||||
|
"""
|
||||||
|
Rebuild crawls_crawl table to fix CHECK constraints and make seed_id nullable.
|
||||||
|
Only runs for UPGRADES from 0.8.x (when crawls.0001_initial didn't exist yet).
|
||||||
|
For fresh installs, crawls.0001_initial creates the correct schema.
|
||||||
|
"""
|
||||||
|
with schema_editor.connection.cursor() as cursor:
|
||||||
|
# Check if this is an upgrade from old 0.8.x or a fresh install
|
||||||
|
# In fresh installs, crawls.0001_initial was applied, creating seed FK
|
||||||
|
# In upgrades, the table was created by old migrations before 0001_initial existed
|
||||||
|
cursor.execute("""
|
||||||
|
SELECT COUNT(*) FROM django_migrations
|
||||||
|
WHERE app='crawls' AND name='0001_initial'
|
||||||
|
""")
|
||||||
|
has_crawls_0001 = cursor.fetchone()[0] > 0
|
||||||
|
|
||||||
|
if has_crawls_0001:
|
||||||
|
# Fresh install - crawls.0001_initial already created the correct schema
|
||||||
|
# Just clear config to avoid CHECK constraint issues
|
||||||
|
print(" Fresh install detected - clearing config field only")
|
||||||
|
try:
|
||||||
|
cursor.execute('UPDATE "crawls_crawl" SET "config" = NULL')
|
||||||
|
except Exception as e:
|
||||||
|
print(f" Skipping config clear: {e}")
|
||||||
|
return
|
||||||
|
|
||||||
|
# Upgrade from 0.8.x - rebuild table to make seed_id nullable and remove CHECK constraint
|
||||||
|
print(" Upgrading from 0.8.x - rebuilding crawls_crawl table")
|
||||||
|
cursor.execute("PRAGMA foreign_keys=OFF")
|
||||||
|
|
||||||
|
# Backup
|
||||||
|
cursor.execute("CREATE TABLE crawls_crawl_backup AS SELECT * FROM crawls_crawl")
|
||||||
|
|
||||||
|
# Recreate without config CHECK constraint, with nullable seed_id
|
||||||
|
cursor.execute("DROP TABLE crawls_crawl")
|
||||||
|
cursor.execute("""
|
||||||
|
CREATE TABLE "crawls_crawl" (
|
||||||
|
"num_uses_failed" integer unsigned NOT NULL CHECK ("num_uses_failed" >= 0),
|
||||||
|
"num_uses_succeeded" integer unsigned NOT NULL CHECK ("num_uses_succeeded" >= 0),
|
||||||
|
"id" char(32) NOT NULL PRIMARY KEY,
|
||||||
|
"created_at" datetime NOT NULL,
|
||||||
|
"modified_at" datetime NOT NULL,
|
||||||
|
"urls" text NOT NULL,
|
||||||
|
"config" text,
|
||||||
|
"max_depth" smallint unsigned NOT NULL CHECK ("max_depth" >= 0),
|
||||||
|
"tags_str" varchar(1024) NOT NULL,
|
||||||
|
"persona_id" char(32) NULL,
|
||||||
|
"label" varchar(64) NOT NULL,
|
||||||
|
"notes" text NOT NULL,
|
||||||
|
"output_dir" varchar(512) NOT NULL,
|
||||||
|
"status" varchar(15) NOT NULL,
|
||||||
|
"retry_at" datetime NULL,
|
||||||
|
"created_by_id" integer NOT NULL REFERENCES "auth_user" ("id") DEFERRABLE INITIALLY DEFERRED,
|
||||||
|
"seed_id" char(32) NULL DEFAULT NULL,
|
||||||
|
"schedule_id" char(32) NULL REFERENCES "crawls_crawlschedule" ("id") DEFERRABLE INITIALLY DEFERRED
|
||||||
|
)
|
||||||
|
""")
|
||||||
|
|
||||||
|
# Restore data
|
||||||
|
cursor.execute("""
|
||||||
|
INSERT INTO "crawls_crawl" (
|
||||||
|
"num_uses_failed", "num_uses_succeeded", "id", "created_at", "modified_at",
|
||||||
|
"urls", "config", "max_depth", "tags_str", "persona_id", "label", "notes",
|
||||||
|
"output_dir", "status", "retry_at", "created_by_id", "seed_id", "schedule_id"
|
||||||
|
)
|
||||||
|
SELECT
|
||||||
|
"num_uses_failed", "num_uses_succeeded", "id", "created_at", "modified_at",
|
||||||
|
"urls", "config", "max_depth", "tags_str", "persona_id", "label", "notes",
|
||||||
|
"output_dir", "status", "retry_at", "created_by_id", "seed_id", "schedule_id"
|
||||||
|
FROM crawls_crawl_backup
|
||||||
|
""")
|
||||||
|
|
||||||
|
cursor.execute("DROP TABLE crawls_crawl_backup")
|
||||||
|
|
||||||
|
# NULL out config to avoid any invalid JSON
|
||||||
|
cursor.execute('UPDATE "crawls_crawl" SET "config" = NULL')
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
('core', '0024_c_disable_fk_checks'),
|
||||||
|
('crawls', '0001_initial'),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.RunPython(fix_crawls_config, reverse_code=migrations.RunPython.noop),
|
||||||
|
]
|
||||||
@@ -8,9 +8,7 @@ import django.db.models.deletion
|
|||||||
class Migration(migrations.Migration):
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
dependencies = [
|
dependencies = [
|
||||||
('core', '0023_new_schema'),
|
('core', '0024_d_fix_crawls_config'),
|
||||||
('crawls', '0001_initial'),
|
|
||||||
('machine', '0001_squashed'),
|
|
||||||
]
|
]
|
||||||
|
|
||||||
operations = [
|
operations = [
|
||||||
|
|||||||
@@ -10,6 +10,13 @@ from django.db import migrations, models
|
|||||||
|
|
||||||
def populate_archiveresult_uuids(apps, schema_editor):
|
def populate_archiveresult_uuids(apps, schema_editor):
|
||||||
"""Generate unique UUIDs for ArchiveResults that don't have one."""
|
"""Generate unique UUIDs for ArchiveResults that don't have one."""
|
||||||
|
# Check if uuid column exists before trying to populate it
|
||||||
|
with schema_editor.connection.cursor() as cursor:
|
||||||
|
cursor.execute("PRAGMA table_info(core_archiveresult)")
|
||||||
|
columns = [row[1] for row in cursor.fetchall()]
|
||||||
|
if 'uuid' not in columns:
|
||||||
|
return # uuid column doesn't exist, skip this data migration
|
||||||
|
|
||||||
ArchiveResult = apps.get_model('core', 'ArchiveResult')
|
ArchiveResult = apps.get_model('core', 'ArchiveResult')
|
||||||
for result in ArchiveResult.objects.filter(uuid__isnull=True):
|
for result in ArchiveResult.objects.filter(uuid__isnull=True):
|
||||||
result.uuid = uuid_compat.uuid7()
|
result.uuid = uuid_compat.uuid7()
|
||||||
@@ -21,6 +28,22 @@ def reverse_populate_uuids(apps, schema_editor):
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def remove_output_dir_if_exists(apps, schema_editor):
|
||||||
|
"""Remove output_dir columns if they exist."""
|
||||||
|
with schema_editor.connection.cursor() as cursor:
|
||||||
|
# Check and remove from core_archiveresult
|
||||||
|
cursor.execute("PRAGMA table_info(core_archiveresult)")
|
||||||
|
columns = [row[1] for row in cursor.fetchall()]
|
||||||
|
if 'output_dir' in columns:
|
||||||
|
cursor.execute("ALTER TABLE core_archiveresult DROP COLUMN output_dir")
|
||||||
|
|
||||||
|
# Check and remove from core_snapshot
|
||||||
|
cursor.execute("PRAGMA table_info(core_snapshot)")
|
||||||
|
columns = [row[1] for row in cursor.fetchall()]
|
||||||
|
if 'output_dir' in columns:
|
||||||
|
cursor.execute("ALTER TABLE core_snapshot DROP COLUMN output_dir")
|
||||||
|
|
||||||
|
|
||||||
class Migration(migrations.Migration):
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
dependencies = [
|
dependencies = [
|
||||||
@@ -33,82 +56,90 @@ class Migration(migrations.Migration):
|
|||||||
migrations.RunPython(populate_archiveresult_uuids, reverse_populate_uuids),
|
migrations.RunPython(populate_archiveresult_uuids, reverse_populate_uuids),
|
||||||
|
|
||||||
# Remove output_dir fields (not needed, computed from snapshot)
|
# Remove output_dir fields (not needed, computed from snapshot)
|
||||||
migrations.RemoveField(
|
migrations.RunPython(remove_output_dir_if_exists, reverse_code=migrations.RunPython.noop),
|
||||||
model_name='archiveresult',
|
|
||||||
name='output_dir',
|
# Update Django's migration state to match 0.9.x schema
|
||||||
),
|
# Database already has correct types from 0.8.x, just update state
|
||||||
migrations.RemoveField(
|
migrations.SeparateDatabaseAndState(
|
||||||
model_name='snapshot',
|
state_operations=[
|
||||||
name='output_dir',
|
# Archiveresult field alterations
|
||||||
|
migrations.AlterField(
|
||||||
|
model_name='archiveresult',
|
||||||
|
name='created_at',
|
||||||
|
field=models.DateTimeField(db_index=True, default=django.utils.timezone.now),
|
||||||
|
),
|
||||||
|
migrations.AlterField(
|
||||||
|
model_name='archiveresult',
|
||||||
|
name='created_by',
|
||||||
|
field=models.ForeignKey(default=archivebox.base_models.models.get_or_create_system_user_pk, on_delete=django.db.models.deletion.CASCADE, related_name='archiveresult_set', to=settings.AUTH_USER_MODEL),
|
||||||
|
),
|
||||||
|
migrations.AlterField(
|
||||||
|
model_name='archiveresult',
|
||||||
|
name='extractor',
|
||||||
|
field=models.CharField(db_index=True, max_length=32),
|
||||||
|
),
|
||||||
|
# Convert id from AutoField to UUIDField (database already has UUID CHAR(32))
|
||||||
|
migrations.AlterField(
|
||||||
|
model_name='archiveresult',
|
||||||
|
name='id',
|
||||||
|
field=models.UUIDField(default=uuid_compat.uuid7, editable=False, primary_key=True, serialize=False, unique=True),
|
||||||
|
),
|
||||||
|
migrations.AlterField(
|
||||||
|
model_name='archiveresult',
|
||||||
|
name='status',
|
||||||
|
field=models.CharField(choices=[('queued', 'Queued'), ('started', 'Started'), ('backoff', 'Waiting to retry'), ('succeeded', 'Succeeded'), ('failed', 'Failed'), ('skipped', 'Skipped')], db_index=True, default='queued', max_length=15),
|
||||||
|
),
|
||||||
|
|
||||||
|
# Snapshot field alterations
|
||||||
|
migrations.AlterField(
|
||||||
|
model_name='snapshot',
|
||||||
|
name='bookmarked_at',
|
||||||
|
field=models.DateTimeField(db_index=True, default=django.utils.timezone.now),
|
||||||
|
),
|
||||||
|
migrations.AlterField(
|
||||||
|
model_name='snapshot',
|
||||||
|
name='created_at',
|
||||||
|
field=models.DateTimeField(db_index=True, default=django.utils.timezone.now),
|
||||||
|
),
|
||||||
|
migrations.AlterField(
|
||||||
|
model_name='snapshot',
|
||||||
|
name='created_by',
|
||||||
|
field=models.ForeignKey(default=archivebox.base_models.models.get_or_create_system_user_pk, on_delete=django.db.models.deletion.CASCADE, related_name='snapshot_set', to=settings.AUTH_USER_MODEL),
|
||||||
|
),
|
||||||
|
migrations.AlterField(
|
||||||
|
model_name='snapshot',
|
||||||
|
name='downloaded_at',
|
||||||
|
field=models.DateTimeField(blank=True, db_index=True, default=None, editable=False, null=True),
|
||||||
|
),
|
||||||
|
migrations.AlterField(
|
||||||
|
model_name='snapshot',
|
||||||
|
name='id',
|
||||||
|
field=models.UUIDField(default=uuid_compat.uuid7, editable=False, primary_key=True, serialize=False, unique=True),
|
||||||
|
),
|
||||||
|
],
|
||||||
|
database_operations=[
|
||||||
|
# No actual database changes needed - schema is already correct from 0.8.x
|
||||||
|
],
|
||||||
),
|
),
|
||||||
|
|
||||||
# Archiveresult field alterations
|
# SnapshotTag and Tag alterations - state only, DB already correct
|
||||||
migrations.AlterField(
|
migrations.SeparateDatabaseAndState(
|
||||||
model_name='archiveresult',
|
state_operations=[
|
||||||
name='created_at',
|
migrations.AlterField(
|
||||||
field=models.DateTimeField(db_index=True, default=django.utils.timezone.now),
|
model_name='snapshottag',
|
||||||
),
|
name='id',
|
||||||
migrations.AlterField(
|
field=models.AutoField(primary_key=True, serialize=False),
|
||||||
model_name='archiveresult',
|
),
|
||||||
name='created_by',
|
migrations.AlterField(
|
||||||
field=models.ForeignKey(default=None, on_delete=django.db.models.deletion.CASCADE, related_name='archiveresult_set', to=settings.AUTH_USER_MODEL),
|
model_name='tag',
|
||||||
),
|
name='created_by',
|
||||||
migrations.AlterField(
|
field=models.ForeignKey(default=archivebox.base_models.models.get_or_create_system_user_pk, on_delete=django.db.models.deletion.CASCADE, related_name='tag_set', to=settings.AUTH_USER_MODEL),
|
||||||
model_name='archiveresult',
|
),
|
||||||
name='extractor',
|
migrations.AlterUniqueTogether(
|
||||||
field=models.CharField(db_index=True, max_length=32),
|
name='snapshottag',
|
||||||
),
|
unique_together={('snapshot', 'tag')},
|
||||||
migrations.AlterField(
|
),
|
||||||
model_name='archiveresult',
|
],
|
||||||
name='id',
|
database_operations=[],
|
||||||
field=models.AutoField(editable=False, primary_key=True, serialize=False),
|
|
||||||
),
|
|
||||||
migrations.AlterField(
|
|
||||||
model_name='archiveresult',
|
|
||||||
name='status',
|
|
||||||
field=models.CharField(choices=[('queued', 'Queued'), ('started', 'Started'), ('backoff', 'Waiting to retry'), ('succeeded', 'Succeeded'), ('failed', 'Failed'), ('skipped', 'Skipped')], db_index=True, default='queued', max_length=15),
|
|
||||||
),
|
|
||||||
|
|
||||||
# Snapshot field alterations
|
|
||||||
migrations.AlterField(
|
|
||||||
model_name='snapshot',
|
|
||||||
name='bookmarked_at',
|
|
||||||
field=models.DateTimeField(db_index=True, default=django.utils.timezone.now),
|
|
||||||
),
|
|
||||||
migrations.AlterField(
|
|
||||||
model_name='snapshot',
|
|
||||||
name='created_at',
|
|
||||||
field=models.DateTimeField(db_index=True, default=django.utils.timezone.now),
|
|
||||||
),
|
|
||||||
migrations.AlterField(
|
|
||||||
model_name='snapshot',
|
|
||||||
name='created_by',
|
|
||||||
field=models.ForeignKey(default=None, on_delete=django.db.models.deletion.CASCADE, related_name='snapshot_set', to=settings.AUTH_USER_MODEL),
|
|
||||||
),
|
|
||||||
migrations.AlterField(
|
|
||||||
model_name='snapshot',
|
|
||||||
name='downloaded_at',
|
|
||||||
field=models.DateTimeField(blank=True, db_index=True, default=None, editable=False, null=True),
|
|
||||||
),
|
|
||||||
migrations.AlterField(
|
|
||||||
model_name='snapshot',
|
|
||||||
name='id',
|
|
||||||
field=models.UUIDField(default=uuid_compat.uuid7, editable=False, primary_key=True, serialize=False, unique=True),
|
|
||||||
),
|
|
||||||
|
|
||||||
# SnapshotTag and Tag alterations
|
|
||||||
migrations.AlterField(
|
|
||||||
model_name='snapshottag',
|
|
||||||
name='id',
|
|
||||||
field=models.AutoField(primary_key=True, serialize=False),
|
|
||||||
),
|
|
||||||
migrations.AlterField(
|
|
||||||
model_name='tag',
|
|
||||||
name='created_by',
|
|
||||||
field=models.ForeignKey(default=archivebox.base_models.models.get_or_create_system_user_pk, on_delete=django.db.models.deletion.CASCADE, related_name='tag_set', to=settings.AUTH_USER_MODEL),
|
|
||||||
),
|
|
||||||
migrations.AlterUniqueTogether(
|
|
||||||
name='snapshottag',
|
|
||||||
unique_together={('snapshot', 'tag')},
|
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
|
|||||||
@@ -13,68 +13,79 @@ class Migration(migrations.Migration):
|
|||||||
]
|
]
|
||||||
|
|
||||||
operations = [
|
operations = [
|
||||||
# Add new output fields (keep old 'output' temporarily for migration)
|
# Add new output fields using SeparateDatabaseAndState to avoid table rebuilds
|
||||||
migrations.AddField(
|
migrations.SeparateDatabaseAndState(
|
||||||
model_name='archiveresult',
|
state_operations=[
|
||||||
name='output_str',
|
migrations.AddField(
|
||||||
field=models.TextField(
|
model_name='archiveresult',
|
||||||
blank=True,
|
name='output_str',
|
||||||
default='',
|
field=models.TextField(
|
||||||
help_text='Human-readable output summary (e.g., "Downloaded 5 files")'
|
blank=True,
|
||||||
),
|
default='',
|
||||||
),
|
help_text='Human-readable output summary (e.g., "Downloaded 5 files")'
|
||||||
|
),
|
||||||
migrations.AddField(
|
),
|
||||||
model_name='archiveresult',
|
migrations.AddField(
|
||||||
name='output_json',
|
model_name='archiveresult',
|
||||||
field=models.JSONField(
|
name='output_json',
|
||||||
null=True,
|
field=models.JSONField(
|
||||||
blank=True,
|
null=True,
|
||||||
default=None,
|
blank=True,
|
||||||
help_text='Structured metadata (headers, redirects, etc.) - should NOT duplicate ArchiveResult fields'
|
default=None,
|
||||||
),
|
help_text='Structured metadata (headers, redirects, etc.) - should NOT duplicate ArchiveResult fields'
|
||||||
),
|
),
|
||||||
|
),
|
||||||
migrations.AddField(
|
migrations.AddField(
|
||||||
model_name='archiveresult',
|
model_name='archiveresult',
|
||||||
name='output_files',
|
name='output_files',
|
||||||
field=models.JSONField(
|
field=models.JSONField(
|
||||||
default=dict,
|
default=dict,
|
||||||
help_text='Dict of {relative_path: {metadata}} - values are empty dicts for now, extensible for future metadata'
|
help_text='Dict of {relative_path: {metadata}} - values are empty dicts for now, extensible for future metadata'
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
|
migrations.AddField(
|
||||||
migrations.AddField(
|
model_name='archiveresult',
|
||||||
model_name='archiveresult',
|
name='output_size',
|
||||||
name='output_size',
|
field=models.BigIntegerField(
|
||||||
field=models.BigIntegerField(
|
default=0,
|
||||||
default=0,
|
help_text='Total recursive size in bytes of all output files'
|
||||||
help_text='Total recursive size in bytes of all output files'
|
),
|
||||||
),
|
),
|
||||||
),
|
migrations.AddField(
|
||||||
|
model_name='archiveresult',
|
||||||
migrations.AddField(
|
name='output_mimetypes',
|
||||||
model_name='archiveresult',
|
field=models.CharField(
|
||||||
name='output_mimetypes',
|
max_length=512,
|
||||||
field=models.CharField(
|
blank=True,
|
||||||
max_length=512,
|
default='',
|
||||||
blank=True,
|
help_text='CSV of mimetypes sorted by size descending'
|
||||||
default='',
|
),
|
||||||
help_text='CSV of mimetypes sorted by size descending'
|
),
|
||||||
),
|
migrations.AddField(
|
||||||
),
|
model_name='archiveresult',
|
||||||
|
name='binary',
|
||||||
# Add binary FK (optional)
|
field=models.ForeignKey(
|
||||||
migrations.AddField(
|
'machine.Binary',
|
||||||
model_name='archiveresult',
|
on_delete=models.SET_NULL,
|
||||||
name='binary',
|
null=True,
|
||||||
field=models.ForeignKey(
|
blank=True,
|
||||||
'machine.Binary',
|
related_name='archiveresults',
|
||||||
on_delete=models.SET_NULL,
|
help_text='Primary binary used by this hook (optional)'
|
||||||
null=True,
|
),
|
||||||
blank=True,
|
),
|
||||||
related_name='archiveresults',
|
],
|
||||||
help_text='Primary binary used by this hook (optional)'
|
database_operations=[
|
||||||
),
|
migrations.RunSQL(
|
||||||
|
sql="""
|
||||||
|
ALTER TABLE core_archiveresult ADD COLUMN output_str TEXT DEFAULT '';
|
||||||
|
ALTER TABLE core_archiveresult ADD COLUMN output_json TEXT;
|
||||||
|
ALTER TABLE core_archiveresult ADD COLUMN output_files TEXT DEFAULT '{}';
|
||||||
|
ALTER TABLE core_archiveresult ADD COLUMN output_size BIGINT DEFAULT 0;
|
||||||
|
ALTER TABLE core_archiveresult ADD COLUMN output_mimetypes VARCHAR(512) DEFAULT '';
|
||||||
|
ALTER TABLE core_archiveresult ADD COLUMN binary_id CHAR(32) REFERENCES machine_binary(id);
|
||||||
|
""",
|
||||||
|
reverse_sql=migrations.RunSQL.noop,
|
||||||
|
),
|
||||||
|
],
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
|
|||||||
@@ -12,27 +12,46 @@ def migrate_output_field(apps, schema_editor):
|
|||||||
Logic:
|
Logic:
|
||||||
- If output contains JSON {...}, move to output_json
|
- If output contains JSON {...}, move to output_json
|
||||||
- Otherwise, move to output_str
|
- Otherwise, move to output_str
|
||||||
|
|
||||||
|
Use raw SQL to avoid CHECK constraint issues during migration.
|
||||||
"""
|
"""
|
||||||
ArchiveResult = apps.get_model('core', 'ArchiveResult')
|
# Use raw SQL to migrate data without triggering CHECK constraints
|
||||||
|
with schema_editor.connection.cursor() as cursor:
|
||||||
|
# Get all archive results
|
||||||
|
cursor.execute("""
|
||||||
|
SELECT id, output FROM core_archiveresult
|
||||||
|
""")
|
||||||
|
|
||||||
for ar in ArchiveResult.objects.all().iterator():
|
for row in cursor.fetchall():
|
||||||
old_output = ar.output or ''
|
ar_id, old_output = row
|
||||||
|
old_output = old_output or ''
|
||||||
|
|
||||||
# Case 1: JSON output
|
# Case 1: JSON output
|
||||||
if old_output.strip().startswith('{'):
|
if old_output.strip().startswith('{'):
|
||||||
try:
|
try:
|
||||||
parsed = json.loads(old_output)
|
# Validate it's actual JSON
|
||||||
ar.output_json = parsed
|
parsed = json.loads(old_output)
|
||||||
ar.output_str = ''
|
# Update with JSON - cast to JSON to satisfy CHECK constraint
|
||||||
except json.JSONDecodeError:
|
json_str = json.dumps(parsed)
|
||||||
# Not valid JSON, treat as string
|
cursor.execute("""
|
||||||
ar.output_str = old_output
|
UPDATE core_archiveresult
|
||||||
|
SET output_str = '', output_json = json(?)
|
||||||
# Case 2: File path or plain string
|
WHERE id = ?
|
||||||
else:
|
""", (json_str, ar_id))
|
||||||
ar.output_str = old_output
|
except json.JSONDecodeError:
|
||||||
|
# Not valid JSON, treat as string
|
||||||
ar.save(update_fields=['output_str', 'output_json'])
|
cursor.execute("""
|
||||||
|
UPDATE core_archiveresult
|
||||||
|
SET output_str = ?, output_json = NULL
|
||||||
|
WHERE id = ?
|
||||||
|
""", (old_output, ar_id))
|
||||||
|
# Case 2: File path or plain string
|
||||||
|
else:
|
||||||
|
cursor.execute("""
|
||||||
|
UPDATE core_archiveresult
|
||||||
|
SET output_str = ?, output_json = NULL
|
||||||
|
WHERE id = ?
|
||||||
|
""", (old_output, ar_id))
|
||||||
|
|
||||||
|
|
||||||
def reverse_migrate(apps, schema_editor):
|
def reverse_migrate(apps, schema_editor):
|
||||||
|
|||||||
@@ -16,43 +16,62 @@ class Migration(migrations.Migration):
|
|||||||
]
|
]
|
||||||
|
|
||||||
operations = [
|
operations = [
|
||||||
migrations.AlterField(
|
# Update Django's state only - database already has correct schema from 0029
|
||||||
model_name='archiveresult',
|
migrations.SeparateDatabaseAndState(
|
||||||
name='binary',
|
state_operations=[
|
||||||
field=models.ForeignKey(blank=True, help_text='Primary binary used by this hook', null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='archiveresults', to='machine.binary'),
|
migrations.AlterField(
|
||||||
|
model_name='archiveresult',
|
||||||
|
name='binary',
|
||||||
|
field=models.ForeignKey(blank=True, help_text='Primary binary used by this hook', null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='archiveresults', to='machine.binary'),
|
||||||
|
),
|
||||||
|
migrations.AlterField(
|
||||||
|
model_name='archiveresult',
|
||||||
|
name='output_files',
|
||||||
|
field=models.JSONField(default=dict, help_text='Dict of {relative_path: {metadata}}'),
|
||||||
|
),
|
||||||
|
migrations.AlterField(
|
||||||
|
model_name='archiveresult',
|
||||||
|
name='output_json',
|
||||||
|
field=models.JSONField(blank=True, default=None, help_text='Structured metadata (headers, redirects, etc.)', null=True),
|
||||||
|
),
|
||||||
|
migrations.AlterField(
|
||||||
|
model_name='archiveresult',
|
||||||
|
name='output_mimetypes',
|
||||||
|
field=models.CharField(blank=True, default='', help_text='CSV of mimetypes sorted by size', max_length=512),
|
||||||
|
),
|
||||||
|
migrations.AlterField(
|
||||||
|
model_name='archiveresult',
|
||||||
|
name='output_size',
|
||||||
|
field=models.BigIntegerField(default=0, help_text='Total bytes of all output files'),
|
||||||
|
),
|
||||||
|
migrations.AlterField(
|
||||||
|
model_name='archiveresult',
|
||||||
|
name='output_str',
|
||||||
|
field=models.TextField(blank=True, default='', help_text='Human-readable output summary'),
|
||||||
|
),
|
||||||
|
migrations.AlterField(
|
||||||
|
model_name='archiveresult',
|
||||||
|
name='uuid',
|
||||||
|
field=models.UUIDField(blank=True, db_index=True, default=uuid_compat.uuid7, null=True),
|
||||||
|
),
|
||||||
|
],
|
||||||
|
database_operations=[
|
||||||
|
# No database changes needed - columns already exist with correct types
|
||||||
|
],
|
||||||
),
|
),
|
||||||
migrations.AlterField(
|
# Add unique constraint without table rebuild
|
||||||
model_name='archiveresult',
|
migrations.SeparateDatabaseAndState(
|
||||||
name='output_files',
|
state_operations=[
|
||||||
field=models.JSONField(default=dict, help_text='Dict of {relative_path: {metadata}}'),
|
migrations.AddConstraint(
|
||||||
),
|
model_name='snapshot',
|
||||||
migrations.AlterField(
|
constraint=models.UniqueConstraint(fields=('timestamp',), name='unique_timestamp'),
|
||||||
model_name='archiveresult',
|
),
|
||||||
name='output_json',
|
],
|
||||||
field=models.JSONField(blank=True, default=None, help_text='Structured metadata (headers, redirects, etc.)', null=True),
|
database_operations=[
|
||||||
),
|
migrations.RunSQL(
|
||||||
migrations.AlterField(
|
sql="CREATE UNIQUE INDEX IF NOT EXISTS unique_timestamp ON core_snapshot (timestamp);",
|
||||||
model_name='archiveresult',
|
reverse_sql="DROP INDEX IF EXISTS unique_timestamp;",
|
||||||
name='output_mimetypes',
|
),
|
||||||
field=models.CharField(blank=True, default='', help_text='CSV of mimetypes sorted by size', max_length=512),
|
],
|
||||||
),
|
|
||||||
migrations.AlterField(
|
|
||||||
model_name='archiveresult',
|
|
||||||
name='output_size',
|
|
||||||
field=models.BigIntegerField(default=0, help_text='Total bytes of all output files'),
|
|
||||||
),
|
|
||||||
migrations.AlterField(
|
|
||||||
model_name='archiveresult',
|
|
||||||
name='output_str',
|
|
||||||
field=models.TextField(blank=True, default='', help_text='Human-readable output summary'),
|
|
||||||
),
|
|
||||||
migrations.AlterField(
|
|
||||||
model_name='archiveresult',
|
|
||||||
name='uuid',
|
|
||||||
field=models.UUIDField(blank=True, db_index=True, default=uuid_compat.uuid7, null=True),
|
|
||||||
),
|
|
||||||
migrations.AddConstraint(
|
|
||||||
model_name='snapshot',
|
|
||||||
constraint=models.UniqueConstraint(fields=('timestamp',), name='unique_timestamp'),
|
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
|
|||||||
@@ -10,20 +10,35 @@ class Migration(migrations.Migration):
|
|||||||
]
|
]
|
||||||
|
|
||||||
operations = [
|
operations = [
|
||||||
migrations.RenameField(
|
# Use SeparateDatabaseAndState to avoid table rebuilds that would re-add CHECK constraints
|
||||||
model_name='archiveresult',
|
migrations.SeparateDatabaseAndState(
|
||||||
old_name='extractor',
|
state_operations=[
|
||||||
new_name='plugin',
|
migrations.RenameField(
|
||||||
),
|
model_name='archiveresult',
|
||||||
migrations.AddField(
|
old_name='extractor',
|
||||||
model_name='archiveresult',
|
new_name='plugin',
|
||||||
name='hook_name',
|
),
|
||||||
field=models.CharField(
|
migrations.AddField(
|
||||||
blank=True,
|
model_name='archiveresult',
|
||||||
default='',
|
name='hook_name',
|
||||||
max_length=255,
|
field=models.CharField(
|
||||||
db_index=True,
|
blank=True,
|
||||||
help_text='Full filename of the hook that executed (e.g., on_Snapshot__50_wget.py)'
|
default='',
|
||||||
),
|
max_length=255,
|
||||||
|
db_index=True,
|
||||||
|
help_text='Full filename of the hook that executed (e.g., on_Snapshot__50_wget.py)'
|
||||||
|
),
|
||||||
|
),
|
||||||
|
],
|
||||||
|
database_operations=[
|
||||||
|
migrations.RunSQL(
|
||||||
|
sql="""
|
||||||
|
ALTER TABLE core_archiveresult RENAME COLUMN extractor TO plugin;
|
||||||
|
ALTER TABLE core_archiveresult ADD COLUMN hook_name VARCHAR(255) DEFAULT '' NOT NULL;
|
||||||
|
CREATE INDEX IF NOT EXISTS core_archiveresult_hook_name_idx ON core_archiveresult (hook_name);
|
||||||
|
""",
|
||||||
|
reverse_sql=migrations.RunSQL.noop,
|
||||||
|
),
|
||||||
|
],
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
|
|||||||
@@ -11,13 +11,27 @@ class Migration(migrations.Migration):
|
|||||||
]
|
]
|
||||||
|
|
||||||
operations = [
|
operations = [
|
||||||
migrations.AddField(
|
# Use SeparateDatabaseAndState to avoid table rebuild that would fail on config NOT NULL constraint
|
||||||
model_name='snapshot',
|
migrations.SeparateDatabaseAndState(
|
||||||
name='current_step',
|
state_operations=[
|
||||||
field=models.PositiveSmallIntegerField(
|
migrations.AddField(
|
||||||
default=0,
|
model_name='snapshot',
|
||||||
db_index=True,
|
name='current_step',
|
||||||
help_text='Current hook step being executed (0-9). Used for sequential hook execution.'
|
field=models.PositiveSmallIntegerField(
|
||||||
),
|
default=0,
|
||||||
|
db_index=True,
|
||||||
|
help_text='Current hook step being executed (0-9). Used for sequential hook execution.'
|
||||||
|
),
|
||||||
|
),
|
||||||
|
],
|
||||||
|
database_operations=[
|
||||||
|
migrations.RunSQL(
|
||||||
|
sql="""
|
||||||
|
ALTER TABLE core_snapshot ADD COLUMN current_step SMALLINT UNSIGNED DEFAULT 0 NOT NULL;
|
||||||
|
CREATE INDEX IF NOT EXISTS core_snapshot_current_step_idx ON core_snapshot (current_step);
|
||||||
|
""",
|
||||||
|
reverse_sql=migrations.RunSQL.noop,
|
||||||
|
),
|
||||||
|
],
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
|
|||||||
@@ -54,7 +54,7 @@ class Migration(migrations.Migration):
|
|||||||
|
|
||||||
dependencies = [
|
dependencies = [
|
||||||
('core', '0034_snapshot_current_step'),
|
('core', '0034_snapshot_current_step'),
|
||||||
('crawls', '0004_alter_crawl_output_dir'),
|
('crawls', '0005_drop_seed_id_column'),
|
||||||
]
|
]
|
||||||
|
|
||||||
operations = [
|
operations = [
|
||||||
@@ -64,16 +64,24 @@ class Migration(migrations.Migration):
|
|||||||
reverse_code=migrations.RunPython.noop,
|
reverse_code=migrations.RunPython.noop,
|
||||||
),
|
),
|
||||||
|
|
||||||
# Step 2: Make crawl non-nullable
|
# Step 2 & 3: Update Django's state only - leave created_by_id column in database (unused but harmless)
|
||||||
migrations.AlterField(
|
migrations.SeparateDatabaseAndState(
|
||||||
model_name='snapshot',
|
state_operations=[
|
||||||
name='crawl',
|
# Make crawl non-nullable
|
||||||
field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='snapshot_set', to='crawls.crawl'),
|
migrations.AlterField(
|
||||||
),
|
model_name='snapshot',
|
||||||
|
name='crawl',
|
||||||
# Step 3: Remove created_by field
|
field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='snapshot_set', to='crawls.crawl'),
|
||||||
migrations.RemoveField(
|
),
|
||||||
model_name='snapshot',
|
# Remove created_by field from Django's state
|
||||||
name='created_by',
|
migrations.RemoveField(
|
||||||
|
model_name='snapshot',
|
||||||
|
name='created_by',
|
||||||
|
),
|
||||||
|
],
|
||||||
|
database_operations=[
|
||||||
|
# No database changes - crawl_id already exists and NOT NULL constraint will be enforced by model
|
||||||
|
# created_by_id column remains in database but is unused
|
||||||
|
],
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
|
|||||||
@@ -10,10 +10,18 @@ class Migration(migrations.Migration):
|
|||||||
]
|
]
|
||||||
|
|
||||||
operations = [
|
operations = [
|
||||||
# Remove created_by field from ArchiveResult
|
# Remove created_by field from ArchiveResult (state only)
|
||||||
# No data migration needed - created_by can be accessed via snapshot.crawl.created_by
|
# No data migration needed - created_by can be accessed via snapshot.crawl.created_by
|
||||||
migrations.RemoveField(
|
# Leave created_by_id column in database (unused but harmless, avoids table rebuild)
|
||||||
model_name='archiveresult',
|
migrations.SeparateDatabaseAndState(
|
||||||
name='created_by',
|
state_operations=[
|
||||||
|
migrations.RemoveField(
|
||||||
|
model_name='archiveresult',
|
||||||
|
name='created_by',
|
||||||
|
),
|
||||||
|
],
|
||||||
|
database_operations=[
|
||||||
|
# No database changes - leave created_by_id column in place to avoid table rebuild
|
||||||
|
],
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
|
|||||||
@@ -0,0 +1,44 @@
|
|||||||
|
# Generated by Django 6.0 on 2025-12-29 06:45
|
||||||
|
|
||||||
|
from django.db import migrations, models
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
('core', '0036_remove_archiveresult_created_by'),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
# Update Django's state only - database columns remain for backwards compat
|
||||||
|
migrations.SeparateDatabaseAndState(
|
||||||
|
state_operations=[
|
||||||
|
migrations.RemoveField(
|
||||||
|
model_name='archiveresult',
|
||||||
|
name='output_dir',
|
||||||
|
),
|
||||||
|
migrations.RemoveField(
|
||||||
|
model_name='snapshot',
|
||||||
|
name='output_dir',
|
||||||
|
),
|
||||||
|
migrations.AlterField(
|
||||||
|
model_name='archiveresult',
|
||||||
|
name='config',
|
||||||
|
field=models.JSONField(blank=True, default=dict, null=True),
|
||||||
|
),
|
||||||
|
migrations.AlterField(
|
||||||
|
model_name='snapshot',
|
||||||
|
name='config',
|
||||||
|
field=models.JSONField(blank=True, default=dict, null=True),
|
||||||
|
),
|
||||||
|
migrations.AlterField(
|
||||||
|
model_name='snapshot',
|
||||||
|
name='tags',
|
||||||
|
field=models.ManyToManyField(blank=True, related_name='snapshot_set', through='core.SnapshotTag', through_fields=('snapshot', 'tag'), to='core.tag'),
|
||||||
|
),
|
||||||
|
],
|
||||||
|
database_operations=[
|
||||||
|
# No database changes - columns remain in place to avoid table rebuilds
|
||||||
|
],
|
||||||
|
),
|
||||||
|
]
|
||||||
84
archivebox/core/migrations/0038_fix_missing_columns.py
Normal file
84
archivebox/core/migrations/0038_fix_missing_columns.py
Normal file
@@ -0,0 +1,84 @@
|
|||||||
|
# Add missing columns to ArchiveResult and remove created_by_id from Snapshot
|
||||||
|
|
||||||
|
from django.db import migrations, models, connection
|
||||||
|
import django.utils.timezone
|
||||||
|
|
||||||
|
|
||||||
|
def add_columns_if_not_exist(apps, schema_editor):
|
||||||
|
"""Add columns to ArchiveResult only if they don't already exist."""
|
||||||
|
with connection.cursor() as cursor:
|
||||||
|
# Get existing columns
|
||||||
|
cursor.execute("PRAGMA table_info(core_archiveresult)")
|
||||||
|
existing_columns = {row[1] for row in cursor.fetchall()}
|
||||||
|
|
||||||
|
# Add num_uses_failed if it doesn't exist
|
||||||
|
if 'num_uses_failed' not in existing_columns:
|
||||||
|
cursor.execute("ALTER TABLE core_archiveresult ADD COLUMN num_uses_failed integer unsigned NOT NULL DEFAULT 0 CHECK (num_uses_failed >= 0)")
|
||||||
|
|
||||||
|
# Add num_uses_succeeded if it doesn't exist
|
||||||
|
if 'num_uses_succeeded' not in existing_columns:
|
||||||
|
cursor.execute("ALTER TABLE core_archiveresult ADD COLUMN num_uses_succeeded integer unsigned NOT NULL DEFAULT 0 CHECK (num_uses_succeeded >= 0)")
|
||||||
|
|
||||||
|
# Add config if it doesn't exist
|
||||||
|
if 'config' not in existing_columns:
|
||||||
|
cursor.execute("ALTER TABLE core_archiveresult ADD COLUMN config text NULL")
|
||||||
|
|
||||||
|
# Add retry_at if it doesn't exist
|
||||||
|
if 'retry_at' not in existing_columns:
|
||||||
|
cursor.execute("ALTER TABLE core_archiveresult ADD COLUMN retry_at datetime NULL")
|
||||||
|
cursor.execute("CREATE INDEX IF NOT EXISTS core_archiveresult_retry_at_idx ON core_archiveresult(retry_at)")
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
('core', '0037_remove_archiveresult_output_dir_and_more'),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
# Add missing columns to ArchiveResult
|
||||||
|
migrations.SeparateDatabaseAndState(
|
||||||
|
state_operations=[
|
||||||
|
migrations.AddField(
|
||||||
|
model_name='archiveresult',
|
||||||
|
name='num_uses_failed',
|
||||||
|
field=models.PositiveIntegerField(default=0),
|
||||||
|
),
|
||||||
|
migrations.AddField(
|
||||||
|
model_name='archiveresult',
|
||||||
|
name='num_uses_succeeded',
|
||||||
|
field=models.PositiveIntegerField(default=0),
|
||||||
|
),
|
||||||
|
migrations.AddField(
|
||||||
|
model_name='archiveresult',
|
||||||
|
name='config',
|
||||||
|
field=models.JSONField(blank=True, default=dict, null=True),
|
||||||
|
),
|
||||||
|
migrations.AddField(
|
||||||
|
model_name='archiveresult',
|
||||||
|
name='retry_at',
|
||||||
|
field=models.DateTimeField(blank=True, db_index=True, default=django.utils.timezone.now, null=True),
|
||||||
|
),
|
||||||
|
],
|
||||||
|
database_operations=[
|
||||||
|
migrations.RunPython(add_columns_if_not_exist, reverse_code=migrations.RunPython.noop),
|
||||||
|
],
|
||||||
|
),
|
||||||
|
|
||||||
|
# Drop created_by_id from Snapshot (database only, already removed from model in 0035)
|
||||||
|
migrations.SeparateDatabaseAndState(
|
||||||
|
state_operations=[
|
||||||
|
# No state changes - field already removed in 0035
|
||||||
|
],
|
||||||
|
database_operations=[
|
||||||
|
migrations.RunSQL(
|
||||||
|
sql="""
|
||||||
|
-- Drop index first, then column
|
||||||
|
DROP INDEX IF EXISTS core_snapshot_created_by_id_6dbd6149;
|
||||||
|
ALTER TABLE core_snapshot DROP COLUMN created_by_id;
|
||||||
|
""",
|
||||||
|
reverse_sql=migrations.RunSQL.noop,
|
||||||
|
),
|
||||||
|
],
|
||||||
|
),
|
||||||
|
]
|
||||||
30
archivebox/core/migrations/0039_fix_num_uses_values.py
Normal file
30
archivebox/core/migrations/0039_fix_num_uses_values.py
Normal file
@@ -0,0 +1,30 @@
|
|||||||
|
# Fix num_uses_failed and num_uses_succeeded string values to integers
|
||||||
|
|
||||||
|
from django.db import migrations
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
('core', '0038_fix_missing_columns'),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
# Fix string values that got inserted as literals instead of integers
|
||||||
|
migrations.RunSQL(
|
||||||
|
sql="""
|
||||||
|
UPDATE core_snapshot
|
||||||
|
SET num_uses_failed = 0
|
||||||
|
WHERE typeof(num_uses_failed) = 'text' OR num_uses_failed = 'num_uses_failed';
|
||||||
|
|
||||||
|
UPDATE core_snapshot
|
||||||
|
SET num_uses_succeeded = 0
|
||||||
|
WHERE typeof(num_uses_succeeded) = 'text' OR num_uses_succeeded = 'num_uses_succeeded';
|
||||||
|
|
||||||
|
UPDATE core_snapshot
|
||||||
|
SET depth = 0
|
||||||
|
WHERE typeof(depth) = 'text' OR depth = 'depth';
|
||||||
|
""",
|
||||||
|
reverse_sql=migrations.RunSQL.noop,
|
||||||
|
),
|
||||||
|
]
|
||||||
@@ -911,7 +911,7 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
|
|||||||
)
|
)
|
||||||
|
|
||||||
merged = 0
|
merged = 0
|
||||||
for dup in duplicates.iterator():
|
for dup in duplicates.iterator(chunk_size=500):
|
||||||
snapshots = list(
|
snapshots = list(
|
||||||
cls.objects
|
cls.objects
|
||||||
.filter(url=dup['url'], timestamp=dup['timestamp'])
|
.filter(url=dup['url'], timestamp=dup['timestamp'])
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -91,7 +91,11 @@ def plugin_thumbnail(context, result) -> str:
|
|||||||
'output_path': output_path,
|
'output_path': output_path,
|
||||||
'plugin': plugin,
|
'plugin': plugin,
|
||||||
})
|
})
|
||||||
return mark_safe(tpl.render(ctx))
|
rendered = tpl.render(ctx)
|
||||||
|
# Only return non-empty content (strip whitespace to check)
|
||||||
|
if rendered.strip():
|
||||||
|
return mark_safe(rendered)
|
||||||
|
return ''
|
||||||
except Exception:
|
except Exception:
|
||||||
return ''
|
return ''
|
||||||
|
|
||||||
@@ -119,7 +123,11 @@ def plugin_embed(context, result) -> str:
|
|||||||
'output_path': output_path,
|
'output_path': output_path,
|
||||||
'plugin': plugin,
|
'plugin': plugin,
|
||||||
})
|
})
|
||||||
return mark_safe(tpl.render(ctx))
|
rendered = tpl.render(ctx)
|
||||||
|
# Only return non-empty content (strip whitespace to check)
|
||||||
|
if rendered.strip():
|
||||||
|
return mark_safe(rendered)
|
||||||
|
return ''
|
||||||
except Exception:
|
except Exception:
|
||||||
return ''
|
return ''
|
||||||
|
|
||||||
@@ -147,7 +155,11 @@ def plugin_fullscreen(context, result) -> str:
|
|||||||
'output_path': output_path,
|
'output_path': output_path,
|
||||||
'plugin': plugin,
|
'plugin': plugin,
|
||||||
})
|
})
|
||||||
return mark_safe(tpl.render(ctx))
|
rendered = tpl.render(ctx)
|
||||||
|
# Only return non-empty content (strip whitespace to check)
|
||||||
|
if rendered.strip():
|
||||||
|
return mark_safe(rendered)
|
||||||
|
return ''
|
||||||
except Exception:
|
except Exception:
|
||||||
return ''
|
return ''
|
||||||
|
|
||||||
|
|||||||
@@ -539,7 +539,7 @@ from django.http import JsonResponse
|
|||||||
def live_progress_view(request):
|
def live_progress_view(request):
|
||||||
"""Simple JSON endpoint for live progress status - used by admin progress monitor."""
|
"""Simple JSON endpoint for live progress status - used by admin progress monitor."""
|
||||||
try:
|
try:
|
||||||
from workers.orchestrator import Orchestrator
|
from archivebox.workers.orchestrator import Orchestrator
|
||||||
from archivebox.crawls.models import Crawl
|
from archivebox.crawls.models import Crawl
|
||||||
from archivebox.core.models import Snapshot, ArchiveResult
|
from archivebox.core.models import Snapshot, ArchiveResult
|
||||||
from django.db.models import Case, When, Value, IntegerField
|
from django.db.models import Case, When, Value, IntegerField
|
||||||
|
|||||||
@@ -4,3 +4,8 @@ from django.apps import AppConfig
|
|||||||
class CrawlsConfig(AppConfig):
|
class CrawlsConfig(AppConfig):
|
||||||
default_auto_field = "django.db.models.BigAutoField"
|
default_auto_field = "django.db.models.BigAutoField"
|
||||||
name = "archivebox.crawls"
|
name = "archivebox.crawls"
|
||||||
|
label = "crawls"
|
||||||
|
|
||||||
|
def ready(self):
|
||||||
|
"""Import models to register state machines with the registry"""
|
||||||
|
from archivebox.crawls.models import CrawlMachine # noqa: F401
|
||||||
|
|||||||
@@ -17,39 +17,62 @@ class Migration(migrations.Migration):
|
|||||||
]
|
]
|
||||||
|
|
||||||
operations = [
|
operations = [
|
||||||
# Remove the seed foreign key from Crawl
|
# Remove the seed foreign key from Crawl (no-op if already removed by core/0024_d)
|
||||||
migrations.RemoveField(
|
migrations.RunPython(
|
||||||
model_name='crawl',
|
code=lambda apps, schema_editor: None,
|
||||||
name='seed',
|
reverse_code=migrations.RunPython.noop,
|
||||||
),
|
),
|
||||||
# Delete the Seed model entirely
|
# Delete the Seed model entirely (already done)
|
||||||
migrations.DeleteModel(
|
migrations.RunPython(
|
||||||
name='Seed',
|
code=lambda apps, schema_editor: None,
|
||||||
|
reverse_code=migrations.RunPython.noop,
|
||||||
),
|
),
|
||||||
# Update fields to new schema
|
# Drop seed_id column if it exists, then update Django's migration state
|
||||||
migrations.AlterField(
|
migrations.SeparateDatabaseAndState(
|
||||||
model_name='crawl',
|
state_operations=[
|
||||||
name='created_by',
|
# Update fields to new schema
|
||||||
field=models.ForeignKey(default=archivebox.base_models.models.get_or_create_system_user_pk, on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL),
|
migrations.AlterField(
|
||||||
),
|
model_name='crawl',
|
||||||
migrations.AlterField(
|
name='created_by',
|
||||||
model_name='crawl',
|
field=models.ForeignKey(default=archivebox.base_models.models.get_or_create_system_user_pk, on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL),
|
||||||
name='id',
|
),
|
||||||
field=models.UUIDField(default=uuid_compat.uuid7, editable=False, primary_key=True, serialize=False, unique=True),
|
migrations.AlterField(
|
||||||
),
|
model_name='crawl',
|
||||||
migrations.AlterField(
|
name='id',
|
||||||
model_name='crawl',
|
field=models.UUIDField(default=uuid_compat.uuid7, editable=False, primary_key=True, serialize=False, unique=True),
|
||||||
name='urls',
|
),
|
||||||
field=models.TextField(help_text='Newline-separated list of URLs to crawl'),
|
migrations.AlterField(
|
||||||
),
|
model_name='crawl',
|
||||||
migrations.AlterField(
|
name='urls',
|
||||||
model_name='crawlschedule',
|
field=models.TextField(help_text='Newline-separated list of URLs to crawl'),
|
||||||
name='created_by',
|
),
|
||||||
field=models.ForeignKey(default=archivebox.base_models.models.get_or_create_system_user_pk, on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL),
|
migrations.AlterField(
|
||||||
),
|
model_name='crawlschedule',
|
||||||
migrations.AlterField(
|
name='created_by',
|
||||||
model_name='crawlschedule',
|
field=models.ForeignKey(default=archivebox.base_models.models.get_or_create_system_user_pk, on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL),
|
||||||
name='id',
|
),
|
||||||
field=models.UUIDField(default=uuid_compat.uuid7, editable=False, primary_key=True, serialize=False, unique=True),
|
migrations.AlterField(
|
||||||
|
model_name='crawlschedule',
|
||||||
|
name='id',
|
||||||
|
field=models.UUIDField(default=uuid_compat.uuid7, editable=False, primary_key=True, serialize=False, unique=True),
|
||||||
|
),
|
||||||
|
],
|
||||||
|
database_operations=[
|
||||||
|
# Drop seed table and NULL out seed_id FK values
|
||||||
|
migrations.RunSQL(
|
||||||
|
sql="""
|
||||||
|
PRAGMA foreign_keys=OFF;
|
||||||
|
|
||||||
|
-- NULL out seed_id values in crawls_crawl
|
||||||
|
UPDATE crawls_crawl SET seed_id = NULL;
|
||||||
|
|
||||||
|
-- Drop seed table if it exists
|
||||||
|
DROP TABLE IF EXISTS crawls_seed;
|
||||||
|
|
||||||
|
PRAGMA foreign_keys=ON;
|
||||||
|
""",
|
||||||
|
reverse_sql=migrations.RunSQL.noop,
|
||||||
|
),
|
||||||
|
],
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
|
|||||||
@@ -8,12 +8,21 @@ class Migration(migrations.Migration):
|
|||||||
|
|
||||||
dependencies = [
|
dependencies = [
|
||||||
('crawls', '0002_drop_seed_model'),
|
('crawls', '0002_drop_seed_model'),
|
||||||
|
('core', '0024_d_fix_crawls_config'), # Depends on config fix
|
||||||
]
|
]
|
||||||
|
|
||||||
operations = [
|
operations = [
|
||||||
migrations.AlterField(
|
# Update Django's state only to avoid table rebuild that would re-apply old constraints
|
||||||
model_name='crawl',
|
migrations.SeparateDatabaseAndState(
|
||||||
name='output_dir',
|
state_operations=[
|
||||||
field=models.FilePathField(blank=True, default='', path=pathlib.PurePosixPath('/private/tmp/test_archivebox_migrations/archive')),
|
migrations.AlterField(
|
||||||
|
model_name='crawl',
|
||||||
|
name='output_dir',
|
||||||
|
field=models.FilePathField(blank=True, default='', path=pathlib.PurePosixPath('/private/tmp/test_archivebox_migrations/archive')),
|
||||||
|
),
|
||||||
|
],
|
||||||
|
database_operations=[
|
||||||
|
# No database changes - output_dir type change is cosmetic for Django admin
|
||||||
|
],
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
|
|||||||
@@ -11,9 +11,17 @@ class Migration(migrations.Migration):
|
|||||||
]
|
]
|
||||||
|
|
||||||
operations = [
|
operations = [
|
||||||
migrations.AlterField(
|
# Update Django's state only to avoid table rebuild that would re-apply old constraints
|
||||||
model_name='crawl',
|
migrations.SeparateDatabaseAndState(
|
||||||
name='output_dir',
|
state_operations=[
|
||||||
field=models.FilePathField(blank=True, default='', path=pathlib.PurePosixPath('/private/tmp/archivebox-makemigrations/archive')),
|
migrations.AlterField(
|
||||||
|
model_name='crawl',
|
||||||
|
name='output_dir',
|
||||||
|
field=models.FilePathField(blank=True, default='', path=pathlib.PurePosixPath('/private/tmp/archivebox-makemigrations/archive')),
|
||||||
|
),
|
||||||
|
],
|
||||||
|
database_operations=[
|
||||||
|
# No database changes - output_dir type change is cosmetic for Django admin
|
||||||
|
],
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
|
|||||||
28
archivebox/crawls/migrations/0005_drop_seed_id_column.py
Normal file
28
archivebox/crawls/migrations/0005_drop_seed_id_column.py
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
# Drop seed_id column from Django's state (leave in database to avoid FK issues)
|
||||||
|
|
||||||
|
from django.db import migrations
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
('crawls', '0004_alter_crawl_output_dir'),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
# Update Django's state only - leave seed_id column in database (unused but harmless)
|
||||||
|
# This avoids FK mismatch errors with crawls_crawlschedule
|
||||||
|
migrations.SeparateDatabaseAndState(
|
||||||
|
state_operations=[
|
||||||
|
# Remove seed field from Django's migration state
|
||||||
|
migrations.RemoveField(
|
||||||
|
model_name='crawl',
|
||||||
|
name='seed',
|
||||||
|
),
|
||||||
|
],
|
||||||
|
database_operations=[
|
||||||
|
# No database changes - seed_id column remains to avoid FK rebuild issues
|
||||||
|
# crawls_seed table can be manually dropped by DBA if needed
|
||||||
|
],
|
||||||
|
),
|
||||||
|
]
|
||||||
@@ -0,0 +1,35 @@
|
|||||||
|
# Generated by Django 6.0 on 2025-12-29 06:45
|
||||||
|
|
||||||
|
import pathlib
|
||||||
|
from django.db import migrations, models
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
('crawls', '0005_drop_seed_id_column'),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
# Update Django's state only - database already correct
|
||||||
|
migrations.SeparateDatabaseAndState(
|
||||||
|
state_operations=[
|
||||||
|
migrations.AlterField(
|
||||||
|
model_name='crawl',
|
||||||
|
name='config',
|
||||||
|
field=models.JSONField(blank=True, default=dict, null=True),
|
||||||
|
),
|
||||||
|
migrations.AlterField(
|
||||||
|
model_name='crawl',
|
||||||
|
name='output_dir',
|
||||||
|
field=models.FilePathField(blank=True, default='', path=pathlib.PurePosixPath('/Users/squash/Local/Code/archiveboxes/archivebox-nue/data/archive')),
|
||||||
|
),
|
||||||
|
migrations.DeleteModel(
|
||||||
|
name='Seed',
|
||||||
|
),
|
||||||
|
],
|
||||||
|
database_operations=[
|
||||||
|
# No database changes - Seed table already dropped in 0005
|
||||||
|
],
|
||||||
|
),
|
||||||
|
]
|
||||||
@@ -65,7 +65,7 @@ class Crawl(ModelWithOutputDir, ModelWithConfig, ModelWithHealthStats, ModelWith
|
|||||||
modified_at = models.DateTimeField(auto_now=True)
|
modified_at = models.DateTimeField(auto_now=True)
|
||||||
|
|
||||||
urls = models.TextField(blank=False, null=False, help_text='Newline-separated list of URLs to crawl')
|
urls = models.TextField(blank=False, null=False, help_text='Newline-separated list of URLs to crawl')
|
||||||
config = models.JSONField(default=dict)
|
config = models.JSONField(default=dict, null=True, blank=True)
|
||||||
max_depth = models.PositiveSmallIntegerField(default=0, validators=[MinValueValidator(0), MaxValueValidator(4)])
|
max_depth = models.PositiveSmallIntegerField(default=0, validators=[MinValueValidator(0), MaxValueValidator(4)])
|
||||||
tags_str = models.CharField(max_length=1024, blank=True, null=False, default='')
|
tags_str = models.CharField(max_length=1024, blank=True, null=False, default='')
|
||||||
persona_id = models.UUIDField(null=True, blank=True)
|
persona_id = models.UUIDField(null=True, blank=True)
|
||||||
@@ -77,7 +77,7 @@ class Crawl(ModelWithOutputDir, ModelWithConfig, ModelWithHealthStats, ModelWith
|
|||||||
status = ModelWithStateMachine.StatusField(choices=ModelWithStateMachine.StatusChoices, default=ModelWithStateMachine.StatusChoices.QUEUED)
|
status = ModelWithStateMachine.StatusField(choices=ModelWithStateMachine.StatusChoices, default=ModelWithStateMachine.StatusChoices.QUEUED)
|
||||||
retry_at = ModelWithStateMachine.RetryAtField(default=timezone.now)
|
retry_at = ModelWithStateMachine.RetryAtField(default=timezone.now)
|
||||||
|
|
||||||
state_machine_name = 'crawls.models.CrawlMachine'
|
state_machine_name = 'archivebox.crawls.models.CrawlMachine'
|
||||||
retry_at_field_name = 'retry_at'
|
retry_at_field_name = 'retry_at'
|
||||||
state_field_name = 'status'
|
state_field_name = 'status'
|
||||||
StatusChoices = ModelWithStateMachine.StatusChoices
|
StatusChoices = ModelWithStateMachine.StatusChoices
|
||||||
@@ -190,7 +190,6 @@ class Crawl(ModelWithOutputDir, ModelWithConfig, ModelWithHealthStats, ModelWith
|
|||||||
'status': Snapshot.INITIAL_STATE,
|
'status': Snapshot.INITIAL_STATE,
|
||||||
'retry_at': timezone.now(),
|
'retry_at': timezone.now(),
|
||||||
'timestamp': str(timezone.now().timestamp()),
|
'timestamp': str(timezone.now().timestamp()),
|
||||||
'created_by_id': self.created_by_id,
|
|
||||||
'depth': 0,
|
'depth': 0,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
@@ -290,7 +289,7 @@ class Crawl(ModelWithOutputDir, ModelWithConfig, ModelWithHealthStats, ModelWith
|
|||||||
'timestamp': timestamp or str(timezone.now().timestamp()),
|
'timestamp': timestamp or str(timezone.now().timestamp()),
|
||||||
'status': Snapshot.INITIAL_STATE,
|
'status': Snapshot.INITIAL_STATE,
|
||||||
'retry_at': timezone.now(),
|
'retry_at': timezone.now(),
|
||||||
'created_by_id': self.created_by_id,
|
# Note: created_by removed in 0.9.0 - Snapshot inherits from Crawl
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -7,8 +7,13 @@ class MachineConfig(AppConfig):
|
|||||||
default_auto_field = 'django.db.models.BigAutoField'
|
default_auto_field = 'django.db.models.BigAutoField'
|
||||||
|
|
||||||
name = 'archivebox.machine'
|
name = 'archivebox.machine'
|
||||||
|
label = 'machine' # Explicit label for migrations
|
||||||
verbose_name = 'Machine Info'
|
verbose_name = 'Machine Info'
|
||||||
|
|
||||||
|
def ready(self):
|
||||||
|
"""Import models to register state machines with the registry"""
|
||||||
|
from archivebox.machine import models # noqa: F401
|
||||||
|
|
||||||
|
|
||||||
def register_admin(admin_site):
|
def register_admin(admin_site):
|
||||||
from archivebox.machine.admin import register_admin
|
from archivebox.machine.admin import register_admin
|
||||||
|
|||||||
@@ -85,6 +85,12 @@ class Migration(migrations.Migration):
|
|||||||
('version', models.CharField(blank=True, default=None, max_length=32)),
|
('version', models.CharField(blank=True, default=None, max_length=32)),
|
||||||
('sha256', models.CharField(blank=True, default=None, max_length=64)),
|
('sha256', models.CharField(blank=True, default=None, max_length=64)),
|
||||||
('machine', models.ForeignKey(blank=True, default=None, on_delete=django.db.models.deletion.CASCADE, to='machine.machine')),
|
('machine', models.ForeignKey(blank=True, default=None, on_delete=django.db.models.deletion.CASCADE, to='machine.machine')),
|
||||||
|
# Fields added in migration 0005 (included here for fresh installs)
|
||||||
|
('binproviders', models.CharField(blank=True, default='env', max_length=127)),
|
||||||
|
('output_dir', models.CharField(blank=True, default='', max_length=255)),
|
||||||
|
('overrides', models.JSONField(blank=True, default=dict)),
|
||||||
|
('retry_at', models.DateTimeField(blank=True, db_index=True, default=django.utils.timezone.now, null=True)),
|
||||||
|
('status', models.CharField(choices=[('queued', 'Queued'), ('started', 'Started'), ('succeeded', 'Succeeded'), ('failed', 'Failed')], db_index=True, default='queued', max_length=16)),
|
||||||
# dependency FK removed - Dependency model deleted
|
# dependency FK removed - Dependency model deleted
|
||||||
],
|
],
|
||||||
options={
|
options={
|
||||||
|
|||||||
@@ -0,0 +1,104 @@
|
|||||||
|
# Generated by Django 6.0 on 2025-12-29 06:45
|
||||||
|
|
||||||
|
import django.db.models.deletion
|
||||||
|
import django.utils.timezone
|
||||||
|
from archivebox.uuid_compat import uuid7
|
||||||
|
from django.db import migrations, models
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
('machine', '0004_drop_dependency_table'),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
# Update Django's state only - database already has correct schema
|
||||||
|
migrations.SeparateDatabaseAndState(
|
||||||
|
state_operations=[
|
||||||
|
migrations.AddField(
|
||||||
|
model_name='binary',
|
||||||
|
name='binproviders',
|
||||||
|
field=models.CharField(blank=True, default='env', help_text='Comma-separated list of allowed providers: apt,brew,pip,npm,env', max_length=127),
|
||||||
|
),
|
||||||
|
migrations.AddField(
|
||||||
|
model_name='binary',
|
||||||
|
name='output_dir',
|
||||||
|
field=models.CharField(blank=True, default='', help_text='Directory where installation hook logs are stored', max_length=255),
|
||||||
|
),
|
||||||
|
migrations.AddField(
|
||||||
|
model_name='binary',
|
||||||
|
name='overrides',
|
||||||
|
field=models.JSONField(blank=True, default=dict, help_text="Provider-specific overrides: {'apt': {'packages': ['pkg']}, ...}"),
|
||||||
|
),
|
||||||
|
migrations.AddField(
|
||||||
|
model_name='binary',
|
||||||
|
name='retry_at',
|
||||||
|
field=models.DateTimeField(blank=True, db_index=True, default=django.utils.timezone.now, help_text='When to retry this binary installation', null=True),
|
||||||
|
),
|
||||||
|
migrations.AddField(
|
||||||
|
model_name='binary',
|
||||||
|
name='status',
|
||||||
|
field=models.CharField(choices=[('queued', 'Queued'), ('started', 'Started'), ('succeeded', 'Succeeded'), ('failed', 'Failed')], db_index=True, default='queued', max_length=16),
|
||||||
|
),
|
||||||
|
migrations.AlterField(
|
||||||
|
model_name='binary',
|
||||||
|
name='abspath',
|
||||||
|
field=models.CharField(blank=True, default='', max_length=255),
|
||||||
|
),
|
||||||
|
migrations.AlterField(
|
||||||
|
model_name='binary',
|
||||||
|
name='binprovider',
|
||||||
|
field=models.CharField(blank=True, default='', help_text='Provider that successfully installed this binary', max_length=31),
|
||||||
|
),
|
||||||
|
migrations.AlterField(
|
||||||
|
model_name='binary',
|
||||||
|
name='id',
|
||||||
|
field=models.UUIDField(default=uuid7, editable=False, primary_key=True, serialize=False, unique=True),
|
||||||
|
),
|
||||||
|
migrations.AlterField(
|
||||||
|
model_name='binary',
|
||||||
|
name='machine',
|
||||||
|
field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='machine.machine'),
|
||||||
|
),
|
||||||
|
migrations.AlterField(
|
||||||
|
model_name='binary',
|
||||||
|
name='name',
|
||||||
|
field=models.CharField(blank=True, db_index=True, default='', max_length=63),
|
||||||
|
),
|
||||||
|
migrations.AlterField(
|
||||||
|
model_name='binary',
|
||||||
|
name='sha256',
|
||||||
|
field=models.CharField(blank=True, default='', max_length=64),
|
||||||
|
),
|
||||||
|
migrations.AlterField(
|
||||||
|
model_name='binary',
|
||||||
|
name='version',
|
||||||
|
field=models.CharField(blank=True, default='', max_length=32),
|
||||||
|
),
|
||||||
|
migrations.AlterField(
|
||||||
|
model_name='machine',
|
||||||
|
name='config',
|
||||||
|
field=models.JSONField(blank=True, default=dict, help_text='Machine-specific config overrides (e.g., resolved binary paths like WGET_BINARY)', null=True),
|
||||||
|
),
|
||||||
|
migrations.AlterField(
|
||||||
|
model_name='machine',
|
||||||
|
name='id',
|
||||||
|
field=models.UUIDField(default=uuid7, editable=False, primary_key=True, serialize=False, unique=True),
|
||||||
|
),
|
||||||
|
migrations.AlterField(
|
||||||
|
model_name='machine',
|
||||||
|
name='stats',
|
||||||
|
field=models.JSONField(blank=True, default=dict, null=True),
|
||||||
|
),
|
||||||
|
migrations.AlterField(
|
||||||
|
model_name='networkinterface',
|
||||||
|
name='id',
|
||||||
|
field=models.UUIDField(default=uuid7, editable=False, primary_key=True, serialize=False, unique=True),
|
||||||
|
),
|
||||||
|
],
|
||||||
|
database_operations=[
|
||||||
|
# No database changes - schema already correct from previous migrations
|
||||||
|
],
|
||||||
|
),
|
||||||
|
]
|
||||||
@@ -44,8 +44,8 @@ class Machine(ModelWithHealthStats):
|
|||||||
os_platform = models.CharField(max_length=63, default=None, null=False)
|
os_platform = models.CharField(max_length=63, default=None, null=False)
|
||||||
os_release = models.CharField(max_length=63, default=None, null=False)
|
os_release = models.CharField(max_length=63, default=None, null=False)
|
||||||
os_kernel = models.CharField(max_length=255, default=None, null=False)
|
os_kernel = models.CharField(max_length=255, default=None, null=False)
|
||||||
stats = models.JSONField(default=dict, null=False)
|
stats = models.JSONField(default=dict, null=True, blank=True)
|
||||||
config = models.JSONField(default=dict, null=False, blank=True,
|
config = models.JSONField(default=dict, null=True, blank=True,
|
||||||
help_text="Machine-specific config overrides (e.g., resolved binary paths like WGET_BINARY)")
|
help_text="Machine-specific config overrides (e.g., resolved binary paths like WGET_BINARY)")
|
||||||
num_uses_failed = models.PositiveIntegerField(default=0)
|
num_uses_failed = models.PositiveIntegerField(default=0)
|
||||||
num_uses_succeeded = models.PositiveIntegerField(default=0)
|
num_uses_succeeded = models.PositiveIntegerField(default=0)
|
||||||
@@ -213,7 +213,7 @@ class Binary(ModelWithHealthStats):
|
|||||||
num_uses_failed = models.PositiveIntegerField(default=0)
|
num_uses_failed = models.PositiveIntegerField(default=0)
|
||||||
num_uses_succeeded = models.PositiveIntegerField(default=0)
|
num_uses_succeeded = models.PositiveIntegerField(default=0)
|
||||||
|
|
||||||
state_machine_name: str = 'machine.models.BinaryMachine'
|
state_machine_name: str = 'archivebox.machine.models.BinaryMachine'
|
||||||
|
|
||||||
objects: BinaryManager = BinaryManager()
|
objects: BinaryManager = BinaryManager()
|
||||||
|
|
||||||
|
|||||||
@@ -4,3 +4,4 @@ from django.apps import AppConfig
|
|||||||
class SessionsConfig(AppConfig):
|
class SessionsConfig(AppConfig):
|
||||||
default_auto_field = "django.db.models.BigAutoField"
|
default_auto_field = "django.db.models.BigAutoField"
|
||||||
name = "archivebox.personas"
|
name = "archivebox.personas"
|
||||||
|
label = "personas"
|
||||||
|
|||||||
@@ -21,7 +21,7 @@
|
|||||||
# # COOKIES_TXT_FILE: '/path/to/cookies.txt',
|
# # COOKIES_TXT_FILE: '/path/to/cookies.txt',
|
||||||
# # CHROME_USER_DATA_DIR: '/path/to/chrome/user/data/dir',
|
# # CHROME_USER_DATA_DIR: '/path/to/chrome/user/data/dir',
|
||||||
# # CHECK_SSL_VALIDITY: False,
|
# # CHECK_SSL_VALIDITY: False,
|
||||||
# # SAVE_ARCHIVE_DOT_ORG: True,
|
# # SAVE_ARCHIVEDOTORG: True,
|
||||||
# # CHROME_BINARY: 'chromium'
|
# # CHROME_BINARY: 'chromium'
|
||||||
# # ...
|
# # ...
|
||||||
# # }
|
# # }
|
||||||
|
|||||||
@@ -63,7 +63,7 @@ def test_ripgrep_hook_detects_binary_from_path():
|
|||||||
|
|
||||||
def test_ripgrep_hook_skips_when_backend_not_ripgrep():
|
def test_ripgrep_hook_skips_when_backend_not_ripgrep():
|
||||||
"""Test that ripgrep hook exits silently when search backend is not ripgrep."""
|
"""Test that ripgrep hook exits silently when search backend is not ripgrep."""
|
||||||
hook_path = Path(__file__).parent.parent / 'on_Crawl__00_validate_ripgrep.py'
|
hook_path = Path(__file__).parent.parent / 'on_Crawl__00_install_ripgrep.py'
|
||||||
|
|
||||||
env = os.environ.copy()
|
env = os.environ.copy()
|
||||||
env['SEARCH_BACKEND_ENGINE'] = 'sqlite' # Different backend
|
env['SEARCH_BACKEND_ENGINE'] = 'sqlite' # Different backend
|
||||||
@@ -82,7 +82,7 @@ def test_ripgrep_hook_skips_when_backend_not_ripgrep():
|
|||||||
|
|
||||||
def test_ripgrep_hook_handles_absolute_path():
|
def test_ripgrep_hook_handles_absolute_path():
|
||||||
"""Test that ripgrep hook works when RIPGREP_BINARY is an absolute path."""
|
"""Test that ripgrep hook works when RIPGREP_BINARY is an absolute path."""
|
||||||
hook_path = Path(__file__).parent.parent / 'on_Crawl__00_validate_ripgrep.py'
|
hook_path = Path(__file__).parent.parent / 'on_Crawl__00_install_ripgrep.py'
|
||||||
|
|
||||||
rg_path = shutil.which('rg')
|
rg_path = shutil.which('rg')
|
||||||
if not rg_path:
|
if not rg_path:
|
||||||
@@ -222,7 +222,7 @@ def test_ripgrep_only_detected_when_backend_enabled():
|
|||||||
if not shutil.which('rg'):
|
if not shutil.which('rg'):
|
||||||
pytest.skip("ripgrep not installed")
|
pytest.skip("ripgrep not installed")
|
||||||
|
|
||||||
hook_path = Path(__file__).parent.parent / 'on_Crawl__00_validate_ripgrep.py'
|
hook_path = Path(__file__).parent.parent / 'on_Crawl__00_install_ripgrep.py'
|
||||||
|
|
||||||
# Test 1: With ripgrep backend - should output Binary record
|
# Test 1: With ripgrep backend - should output Binary record
|
||||||
env1 = os.environ.copy()
|
env1 = os.environ.copy()
|
||||||
|
|||||||
@@ -360,9 +360,11 @@
|
|||||||
<div class="row header-bottom-frames">
|
<div class="row header-bottom-frames">
|
||||||
{% for result_info in archiveresults %}
|
{% for result_info in archiveresults %}
|
||||||
{% if result_info.result %}
|
{% if result_info.result %}
|
||||||
|
{% plugin_thumbnail result_info.result as thumbnail_html %}
|
||||||
|
{% if thumbnail_html %}
|
||||||
<div class="col-lg-2">
|
<div class="col-lg-2">
|
||||||
<div class="card{% if forloop.first %} selected-card{% endif %}">
|
<div class="card{% if forloop.first %} selected-card{% endif %}">
|
||||||
{% plugin_thumbnail result_info.result %}
|
{{ thumbnail_html }}
|
||||||
<div class="card-body">
|
<div class="card-body">
|
||||||
<a href="{{ result_info.path }}" title="Open in new tab..." target="_blank" rel="noopener">
|
<a href="{{ result_info.path }}" title="Open in new tab..." target="_blank" rel="noopener">
|
||||||
<p class="card-text"><code>{{ result_info.path }}</code></p>
|
<p class="card-text"><code>{{ result_info.path }}</code></p>
|
||||||
@@ -373,6 +375,7 @@
|
|||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
{% endif %}
|
||||||
{% endif %}
|
{% endif %}
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
|
|
||||||
@@ -395,7 +398,7 @@
|
|||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</header>
|
</header>
|
||||||
<iframe sandbox="allow-same-origin allow-top-navigation-by-user-activation allow-scripts allow-forms" class="full-page-iframe" src="{{singlefile_path}}" name="preview"></iframe>
|
<iframe sandbox="allow-same-origin allow-top-navigation-by-user-activation allow-scripts allow-forms" class="full-page-iframe" src="{{best_preview_path}}" name="preview"></iframe>
|
||||||
|
|
||||||
<script>
|
<script>
|
||||||
/*! jQuery v3.2.1 -ajax,-ajax/jsonp,-ajax/load,-ajax/parseXML,-ajax/script,-ajax/var/location,-ajax/var/nonce,-ajax/var/rquery,-ajax/xhr,-manipulation/_evalUrl,-event/ajax,-effects,-effects/Tween,-effects/animatedSelector | (c) JS Foundation and other contributors | jquery.org/license */
|
/*! jQuery v3.2.1 -ajax,-ajax/jsonp,-ajax/load,-ajax/parseXML,-ajax/script,-ajax/var/location,-ajax/var/nonce,-ajax/var/rquery,-ajax/xhr,-manipulation/_evalUrl,-event/ajax,-effects,-effects/Tween,-effects/animatedSelector | (c) JS Foundation and other contributors | jquery.org/license */
|
||||||
|
|||||||
@@ -429,19 +429,6 @@ class TestInstallHookOutput(unittest.TestCase):
|
|||||||
self.assertEqual(data['name'], 'wget')
|
self.assertEqual(data['name'], 'wget')
|
||||||
self.assertTrue(data['abspath'].startswith('/'))
|
self.assertTrue(data['abspath'].startswith('/'))
|
||||||
|
|
||||||
def test_install_hook_outputs_dependency(self):
|
|
||||||
"""Install hook should output Dependency JSONL when binary not found."""
|
|
||||||
hook_output = json.dumps({
|
|
||||||
'type': 'Dependency',
|
|
||||||
'bin_name': 'wget',
|
|
||||||
'bin_providers': 'apt,brew,env',
|
|
||||||
})
|
|
||||||
|
|
||||||
data = json.loads(hook_output)
|
|
||||||
self.assertEqual(data['type'], 'Dependency')
|
|
||||||
self.assertEqual(data['bin_name'], 'wget')
|
|
||||||
self.assertIn('apt', data['bin_providers'])
|
|
||||||
|
|
||||||
def test_install_hook_outputs_machine_config(self):
|
def test_install_hook_outputs_machine_config(self):
|
||||||
"""Install hook should output Machine config update JSONL."""
|
"""Install hook should output Machine config update JSONL."""
|
||||||
hook_output = json.dumps({
|
hook_output = json.dumps({
|
||||||
|
|||||||
@@ -459,7 +459,7 @@ class TestFilesystemMigration08to09(unittest.TestCase):
|
|||||||
'SAVE_MERCURY': 'True',
|
'SAVE_MERCURY': 'True',
|
||||||
'SAVE_PDF': 'True',
|
'SAVE_PDF': 'True',
|
||||||
'SAVE_MEDIA': 'True',
|
'SAVE_MEDIA': 'True',
|
||||||
'SAVE_ARCHIVE_DOT_ORG': 'True',
|
'SAVE_ARCHIVEDOTORG': 'True',
|
||||||
'SAVE_HEADERS': 'True',
|
'SAVE_HEADERS': 'True',
|
||||||
'SAVE_HTMLTOTEXT': 'True',
|
'SAVE_HTMLTOTEXT': 'True',
|
||||||
'SAVE_GIT': 'True',
|
'SAVE_GIT': 'True',
|
||||||
|
|||||||
@@ -949,19 +949,30 @@ def seed_0_8_data(db_path: Path) -> Dict[str, List[Dict]]:
|
|||||||
('core', '0072_rename_added_snapshot_bookmarked_at_and_more'),
|
('core', '0072_rename_added_snapshot_bookmarked_at_and_more'),
|
||||||
('core', '0073_rename_created_archiveresult_created_at_and_more'),
|
('core', '0073_rename_created_archiveresult_created_at_and_more'),
|
||||||
('core', '0074_alter_snapshot_downloaded_at'),
|
('core', '0074_alter_snapshot_downloaded_at'),
|
||||||
('core', '0023_new_schema'),
|
# For 0.8.x: DO NOT record 0023_new_schema - it replaces 0023-0074 for fresh installs
|
||||||
|
# We already recorded 0023-0074 above, so Django will know the state
|
||||||
|
# For 0.8.x: Record original machine migrations (before squashing)
|
||||||
|
# DO NOT record 0001_squashed here - it replaces 0001-0004 for fresh installs
|
||||||
('machine', '0001_initial'),
|
('machine', '0001_initial'),
|
||||||
('machine', '0002_alter_machine_stats_installedbinary'),
|
('machine', '0002_alter_machine_stats_installedbinary'),
|
||||||
('machine', '0003_alter_installedbinary_options_and_more'),
|
('machine', '0003_alter_installedbinary_options_and_more'),
|
||||||
('machine', '0004_alter_installedbinary_abspath_and_more'),
|
('machine', '0004_alter_installedbinary_abspath_and_more'),
|
||||||
('machine', '0001_squashed'),
|
# Then the new migrations after squashing
|
||||||
('machine', '0002_rename_custom_cmds_to_overrides'),
|
('machine', '0002_rename_custom_cmds_to_overrides'),
|
||||||
('machine', '0003_alter_dependency_id_alter_installedbinary_dependency_and_more'),
|
('machine', '0003_alter_dependency_id_alter_installedbinary_dependency_and_more'),
|
||||||
('machine', '0004_drop_dependency_table'),
|
('machine', '0004_drop_dependency_table'),
|
||||||
|
# Crawls must come before core.0024 because 0024_b depends on it
|
||||||
|
('crawls', '0001_initial'),
|
||||||
|
# Core 0024 migrations chain (in dependency order)
|
||||||
|
('core', '0024_b_clear_config_fields'),
|
||||||
|
('core', '0024_c_disable_fk_checks'),
|
||||||
|
('core', '0024_d_fix_crawls_config'),
|
||||||
('core', '0024_snapshot_crawl'),
|
('core', '0024_snapshot_crawl'),
|
||||||
|
('core', '0024_f_add_snapshot_config'),
|
||||||
('core', '0025_allow_duplicate_urls_per_crawl'),
|
('core', '0025_allow_duplicate_urls_per_crawl'),
|
||||||
|
# For 0.8.x: Record original api migration (before squashing)
|
||||||
|
# DO NOT record 0001_squashed here - it replaces 0001 for fresh installs
|
||||||
('api', '0001_initial'),
|
('api', '0001_initial'),
|
||||||
('api', '0001_squashed'),
|
|
||||||
('api', '0002_alter_apitoken_options'),
|
('api', '0002_alter_apitoken_options'),
|
||||||
('api', '0003_rename_user_apitoken_created_by_apitoken_abid_and_more'),
|
('api', '0003_rename_user_apitoken_created_by_apitoken_abid_and_more'),
|
||||||
('api', '0004_alter_apitoken_id_alter_apitoken_uuid'),
|
('api', '0004_alter_apitoken_id_alter_apitoken_uuid'),
|
||||||
@@ -970,11 +981,9 @@ def seed_0_8_data(db_path: Path) -> Dict[str, List[Dict]]:
|
|||||||
('api', '0007_alter_apitoken_created_by'),
|
('api', '0007_alter_apitoken_created_by'),
|
||||||
('api', '0008_alter_apitoken_created_alter_apitoken_created_by_and_more'),
|
('api', '0008_alter_apitoken_created_alter_apitoken_created_by_and_more'),
|
||||||
('api', '0009_rename_created_apitoken_created_at_and_more'),
|
('api', '0009_rename_created_apitoken_created_at_and_more'),
|
||||||
('crawls', '0001_initial'),
|
# Note: crawls.0001_initial moved earlier (before core.0024) due to dependencies
|
||||||
('crawls', '0002_drop_seed_model'),
|
# Stop here - 0.8.x ends at core.0025, crawls.0001, and we want to TEST the later migrations
|
||||||
('crawls', '0003_alter_crawl_output_dir'),
|
# Do NOT record 0026+ as they need to be tested during migration
|
||||||
('crawls', '0004_alter_crawl_output_dir'),
|
|
||||||
('core', '0035_snapshot_crawl_non_nullable_remove_created_by'),
|
|
||||||
]
|
]
|
||||||
|
|
||||||
for app, name in migrations:
|
for app, name in migrations:
|
||||||
@@ -1000,7 +1009,7 @@ def run_archivebox(data_dir: Path, args: list, timeout: int = 60, env: dict = No
|
|||||||
base_env['USE_COLOR'] = 'False'
|
base_env['USE_COLOR'] = 'False'
|
||||||
base_env['SHOW_PROGRESS'] = 'False'
|
base_env['SHOW_PROGRESS'] = 'False'
|
||||||
# Disable ALL extractors for faster tests (can be overridden by env parameter)
|
# Disable ALL extractors for faster tests (can be overridden by env parameter)
|
||||||
base_env['SAVE_ARCHIVE_DOT_ORG'] = 'False'
|
base_env['SAVE_ARCHIVEDOTORG'] = 'False'
|
||||||
base_env['SAVE_TITLE'] = 'False'
|
base_env['SAVE_TITLE'] = 'False'
|
||||||
base_env['SAVE_FAVICON'] = 'False'
|
base_env['SAVE_FAVICON'] = 'False'
|
||||||
base_env['SAVE_WGET'] = 'False'
|
base_env['SAVE_WGET'] = 'False'
|
||||||
|
|||||||
@@ -4,4 +4,5 @@ from django.apps import AppConfig
|
|||||||
class WorkersConfig(AppConfig):
|
class WorkersConfig(AppConfig):
|
||||||
default_auto_field = 'django.db.models.BigAutoField'
|
default_auto_field = 'django.db.models.BigAutoField'
|
||||||
name = 'archivebox.workers'
|
name = 'archivebox.workers'
|
||||||
|
label = 'workers'
|
||||||
|
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
# mkdir -p ~/archivebox/data && cd ~/archivebox
|
# mkdir -p ~/archivebox/data && cd ~/archivebox
|
||||||
# curl -fsSL 'https://docker-compose.archivebox.io' > docker-compose.yml
|
# curl -fsSL 'https://docker-compose.archivebox.io' > docker-compose.yml
|
||||||
# docker compose run archivebox version
|
# docker compose run archivebox version
|
||||||
# docker compose run archivebox config --set SAVE_ARCHIVE_DOT_ORG=False
|
# docker compose run archivebox config --set SAVE_ARCHIVEDOTORG=False
|
||||||
# docker compose run archivebox add --depth=1 'https://news.ycombinator.com'
|
# docker compose run archivebox add --depth=1 'https://news.ycombinator.com'
|
||||||
# docker compose run -T archivebox add < bookmarks.txt
|
# docker compose run -T archivebox add < bookmarks.txt
|
||||||
# docker compose up -d && open 'https://localhost:8000'
|
# docker compose up -d && open 'https://localhost:8000'
|
||||||
@@ -35,7 +35,7 @@ services:
|
|||||||
# - MEDIA_MAX_SIZE=750m # increase this filesize limit to allow archiving larger audio/video files
|
# - MEDIA_MAX_SIZE=750m # increase this filesize limit to allow archiving larger audio/video files
|
||||||
# - TIMEOUT=60 # increase this number to 120+ seconds if you see many slow downloads timing out
|
# - TIMEOUT=60 # increase this number to 120+ seconds if you see many slow downloads timing out
|
||||||
# - CHECK_SSL_VALIDITY=True # set to False to disable strict SSL checking (allows saving URLs w/ broken certs)
|
# - CHECK_SSL_VALIDITY=True # set to False to disable strict SSL checking (allows saving URLs w/ broken certs)
|
||||||
# - SAVE_ARCHIVE_DOT_ORG=True # set to False to disable submitting all URLs to Archive.org when archiving
|
# - SAVE_ARCHIVEDOTORG=True # set to False to disable submitting all URLs to Archive.org when archiving
|
||||||
# - USER_AGENT="..." # set a custom USER_AGENT to avoid being blocked as a bot
|
# - USER_AGENT="..." # set a custom USER_AGENT to avoid being blocked as a bot
|
||||||
# ...
|
# ...
|
||||||
# For more info, see: https://github.com/ArchiveBox/ArchiveBox/wiki/Docker#configuration
|
# For more info, see: https://github.com/ArchiveBox/ArchiveBox/wiki/Docker#configuration
|
||||||
|
|||||||
@@ -85,9 +85,9 @@ dependencies = [
|
|||||||
### Binary/Package Management
|
### Binary/Package Management
|
||||||
"abx-pkg>=0.1.0", # for: detecting, versioning, and installing binaries via apt/brew/pip/npm
|
"abx-pkg>=0.1.0", # for: detecting, versioning, and installing binaries via apt/brew/pip/npm
|
||||||
"gallery-dl>=1.31.1",
|
"gallery-dl>=1.31.1",
|
||||||
|
|
||||||
### UUID7 backport for Python <3.14
|
### UUID7 backport for Python <3.14
|
||||||
"uuid7>=0.1.0; python_version < '3.14'", # for: uuid7 support on Python 3.13 (provides uuid_extensions module)
|
"uuid7>=0.1.0; python_version < '3.14'", # for: uuid7 support on Python 3.13 (provides uuid_extensions module)
|
||||||
|
"pytest-django>=4.11.1",
|
||||||
]
|
]
|
||||||
|
|
||||||
[project.optional-dependencies]
|
[project.optional-dependencies]
|
||||||
@@ -183,6 +183,7 @@ ignore = ["E731", "E303", "E266", "E241", "E222"]
|
|||||||
|
|
||||||
[tool.pytest.ini_options]
|
[tool.pytest.ini_options]
|
||||||
testpaths = [ "tests" ]
|
testpaths = [ "tests" ]
|
||||||
|
DJANGO_SETTINGS_MODULE = "archivebox.core.settings"
|
||||||
|
|
||||||
[tool.mypy]
|
[tool.mypy]
|
||||||
mypy_path = "archivebox,archivebox/typings"
|
mypy_path = "archivebox,archivebox/typings"
|
||||||
|
|||||||
@@ -24,7 +24,7 @@ def disable_extractors_dict():
|
|||||||
"SAVE_HEADERS": "false",
|
"SAVE_HEADERS": "false",
|
||||||
"USE_GIT": "false",
|
"USE_GIT": "false",
|
||||||
"SAVE_MEDIA": "false",
|
"SAVE_MEDIA": "false",
|
||||||
"SAVE_ARCHIVE_DOT_ORG": "false",
|
"SAVE_ARCHIVEDOTORG": "false",
|
||||||
"SAVE_TITLE": "false",
|
"SAVE_TITLE": "false",
|
||||||
"SAVE_FAVICON": "false",
|
"SAVE_FAVICON": "false",
|
||||||
})
|
})
|
||||||
|
|||||||
@@ -33,7 +33,7 @@ def test_background_hooks_dont_block_parser_extractors(tmp_path, process):
|
|||||||
"SAVE_HEADERS": "false",
|
"SAVE_HEADERS": "false",
|
||||||
"USE_GIT": "false",
|
"USE_GIT": "false",
|
||||||
"SAVE_MEDIA": "false",
|
"SAVE_MEDIA": "false",
|
||||||
"SAVE_ARCHIVE_DOT_ORG": "false",
|
"SAVE_ARCHIVEDOTORG": "false",
|
||||||
"SAVE_TITLE": "false",
|
"SAVE_TITLE": "false",
|
||||||
"SAVE_FAVICON": "false",
|
"SAVE_FAVICON": "false",
|
||||||
# Enable chrome session (required for background hooks to start)
|
# Enable chrome session (required for background hooks to start)
|
||||||
@@ -133,7 +133,7 @@ def test_parser_extractors_emit_snapshot_jsonl(tmp_path, process):
|
|||||||
"SAVE_HEADERS": "false",
|
"SAVE_HEADERS": "false",
|
||||||
"USE_GIT": "false",
|
"USE_GIT": "false",
|
||||||
"SAVE_MEDIA": "false",
|
"SAVE_MEDIA": "false",
|
||||||
"SAVE_ARCHIVE_DOT_ORG": "false",
|
"SAVE_ARCHIVEDOTORG": "false",
|
||||||
"SAVE_TITLE": "false",
|
"SAVE_TITLE": "false",
|
||||||
"SAVE_FAVICON": "false",
|
"SAVE_FAVICON": "false",
|
||||||
"USE_CHROME": "false",
|
"USE_CHROME": "false",
|
||||||
|
|||||||
14
uv.lock
generated
14
uv.lock
generated
@@ -88,6 +88,7 @@ dependencies = [
|
|||||||
{ name = "py-machineid", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
{ name = "py-machineid", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||||
{ name = "pydantic", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
{ name = "pydantic", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||||
{ name = "pydantic-settings", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
{ name = "pydantic-settings", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||||
|
{ name = "pytest-django", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||||
{ name = "python-benedict", extra = ["io", "parse"], marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
{ name = "python-benedict", extra = ["io", "parse"], marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||||
{ name = "python-crontab", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
{ name = "python-crontab", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||||
{ name = "python-statemachine", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
{ name = "python-statemachine", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||||
@@ -186,6 +187,7 @@ requires-dist = [
|
|||||||
{ name = "py-machineid", specifier = ">=0.6.0" },
|
{ name = "py-machineid", specifier = ">=0.6.0" },
|
||||||
{ name = "pydantic", specifier = ">=2.8.0" },
|
{ name = "pydantic", specifier = ">=2.8.0" },
|
||||||
{ name = "pydantic-settings", specifier = ">=2.5.2" },
|
{ name = "pydantic-settings", specifier = ">=2.5.2" },
|
||||||
|
{ name = "pytest-django", specifier = ">=4.11.1" },
|
||||||
{ name = "python-benedict", extras = ["io", "parse"], specifier = ">=0.33.2" },
|
{ name = "python-benedict", extras = ["io", "parse"], specifier = ">=0.33.2" },
|
||||||
{ name = "python-crontab", specifier = ">=3.2.0" },
|
{ name = "python-crontab", specifier = ">=3.2.0" },
|
||||||
{ name = "python-ldap", marker = "extra == 'ldap'", specifier = ">=3.4.3" },
|
{ name = "python-ldap", marker = "extra == 'ldap'", specifier = ">=3.4.3" },
|
||||||
@@ -1848,6 +1850,18 @@ wheels = [
|
|||||||
{ url = "https://files.pythonhosted.org/packages/3b/ab/b3226f0bd7cdcf710fbede2b3548584366da3b19b5021e74f5bde2a8fa3f/pytest-9.0.2-py3-none-any.whl", hash = "sha256:711ffd45bf766d5264d487b917733b453d917afd2b0ad65223959f59089f875b", size = 374801, upload-time = "2025-12-06T21:30:49.154Z" },
|
{ url = "https://files.pythonhosted.org/packages/3b/ab/b3226f0bd7cdcf710fbede2b3548584366da3b19b5021e74f5bde2a8fa3f/pytest-9.0.2-py3-none-any.whl", hash = "sha256:711ffd45bf766d5264d487b917733b453d917afd2b0ad65223959f59089f875b", size = 374801, upload-time = "2025-12-06T21:30:49.154Z" },
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "pytest-django"
|
||||||
|
version = "4.11.1"
|
||||||
|
source = { registry = "https://pypi.org/simple" }
|
||||||
|
dependencies = [
|
||||||
|
{ name = "pytest", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||||
|
]
|
||||||
|
sdist = { url = "https://files.pythonhosted.org/packages/b1/fb/55d580352db26eb3d59ad50c64321ddfe228d3d8ac107db05387a2fadf3a/pytest_django-4.11.1.tar.gz", hash = "sha256:a949141a1ee103cb0e7a20f1451d355f83f5e4a5d07bdd4dcfdd1fd0ff227991", size = 86202, upload-time = "2025-04-03T18:56:09.338Z" }
|
||||||
|
wheels = [
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/be/ac/bd0608d229ec808e51a21044f3f2f27b9a37e7a0ebaca7247882e67876af/pytest_django-4.11.1-py3-none-any.whl", hash = "sha256:1b63773f648aa3d8541000c26929c1ea63934be1cfa674c76436966d73fe6a10", size = 25281, upload-time = "2025-04-03T18:56:07.678Z" },
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "python-benedict"
|
name = "python-benedict"
|
||||||
version = "0.35.0"
|
version = "0.35.0"
|
||||||
|
|||||||
Reference in New Issue
Block a user