# Generated by hand on 2025-12-29 # Creates Crawl and CrawlSchedule tables using raw SQL from django.db import migrations, models import django.db.models.deletion import django.utils.timezone import django.core.validators from django.conf import settings from archivebox.uuid_compat import uuid7 from archivebox.base_models.models import get_or_create_system_user_pk class Migration(migrations.Migration): initial = True dependencies = [ ('auth', '0012_alter_user_first_name_max_length'), migrations.swappable_dependency(settings.AUTH_USER_MODEL), ] operations = [ migrations.SeparateDatabaseAndState( database_operations=[ migrations.RunSQL( sql=""" -- Create crawls_crawlschedule table first (circular FK will be added later) CREATE TABLE IF NOT EXISTS crawls_crawlschedule ( id TEXT PRIMARY KEY NOT NULL, created_at DATETIME NOT NULL, modified_at DATETIME NOT NULL, num_uses_succeeded INTEGER NOT NULL DEFAULT 0, num_uses_failed INTEGER NOT NULL DEFAULT 0, schedule VARCHAR(64) NOT NULL, is_enabled BOOLEAN NOT NULL DEFAULT 1, label VARCHAR(64) NOT NULL DEFAULT '', notes TEXT NOT NULL DEFAULT '', template_id TEXT NOT NULL, created_by_id INTEGER NOT NULL, FOREIGN KEY (created_by_id) REFERENCES auth_user(id) ON DELETE CASCADE ); CREATE INDEX IF NOT EXISTS crawls_crawlschedule_created_at_idx ON crawls_crawlschedule(created_at); CREATE INDEX IF NOT EXISTS crawls_crawlschedule_created_by_id_idx ON crawls_crawlschedule(created_by_id); CREATE INDEX IF NOT EXISTS crawls_crawlschedule_template_id_idx ON crawls_crawlschedule(template_id); -- Create crawls_crawl table CREATE TABLE IF NOT EXISTS crawls_crawl ( id TEXT PRIMARY KEY NOT NULL, created_at DATETIME NOT NULL, modified_at DATETIME NOT NULL, num_uses_succeeded INTEGER NOT NULL DEFAULT 0, num_uses_failed INTEGER NOT NULL DEFAULT 0, urls TEXT NOT NULL, config TEXT, max_depth INTEGER NOT NULL DEFAULT 0, tags_str VARCHAR(1024) NOT NULL DEFAULT '', persona_id TEXT, label VARCHAR(64) NOT NULL DEFAULT '', notes TEXT NOT NULL DEFAULT '', output_dir VARCHAR(512) NOT NULL DEFAULT '', status VARCHAR(15) NOT NULL DEFAULT 'queued', retry_at DATETIME, created_by_id INTEGER NOT NULL, schedule_id TEXT, FOREIGN KEY (created_by_id) REFERENCES auth_user(id) ON DELETE CASCADE, FOREIGN KEY (schedule_id) REFERENCES crawls_crawlschedule(id) ON DELETE SET NULL ); CREATE INDEX IF NOT EXISTS crawls_crawl_status_idx ON crawls_crawl(status); CREATE INDEX IF NOT EXISTS crawls_crawl_retry_at_idx ON crawls_crawl(retry_at); CREATE INDEX IF NOT EXISTS crawls_crawl_created_at_idx ON crawls_crawl(created_at); CREATE INDEX IF NOT EXISTS crawls_crawl_created_by_id_idx ON crawls_crawl(created_by_id); CREATE INDEX IF NOT EXISTS crawls_crawl_schedule_id_idx ON crawls_crawl(schedule_id); """, reverse_sql=""" DROP TABLE IF EXISTS crawls_crawl; DROP TABLE IF EXISTS crawls_crawlschedule; """ ), ], state_operations=[ migrations.CreateModel( name='CrawlSchedule', fields=[ ('id', models.UUIDField(default=uuid7, editable=False, primary_key=True, serialize=False, unique=True)), ('created_at', models.DateTimeField(db_index=True, default=django.utils.timezone.now)), ('modified_at', models.DateTimeField(auto_now=True)), ('num_uses_succeeded', models.PositiveIntegerField(default=0)), ('num_uses_failed', models.PositiveIntegerField(default=0)), ('schedule', models.CharField(max_length=64)), ('is_enabled', models.BooleanField(default=True)), ('label', models.CharField(blank=True, default='', max_length=64)), ('notes', models.TextField(blank=True, default='')), ('created_by', models.ForeignKey(default=get_or_create_system_user_pk, on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL)), ], options={ 'verbose_name': 'Scheduled Crawl', 'verbose_name_plural': 'Scheduled Crawls', 'app_label': 'crawls', }, ), migrations.CreateModel( name='Crawl', fields=[ ('id', models.UUIDField(default=uuid7, editable=False, primary_key=True, serialize=False, unique=True)), ('created_at', models.DateTimeField(db_index=True, default=django.utils.timezone.now)), ('modified_at', models.DateTimeField(auto_now=True)), ('num_uses_succeeded', models.PositiveIntegerField(default=0)), ('num_uses_failed', models.PositiveIntegerField(default=0)), ('urls', models.TextField(help_text='Newline-separated list of URLs to crawl')), ('config', models.JSONField(blank=True, default=dict, null=True)), ('max_depth', models.PositiveSmallIntegerField(default=0, validators=[django.core.validators.MinValueValidator(0), django.core.validators.MaxValueValidator(4)])), ('tags_str', models.CharField(blank=True, default='', max_length=1024)), ('persona_id', models.UUIDField(blank=True, null=True)), ('label', models.CharField(blank=True, default='', max_length=64)), ('notes', models.TextField(blank=True, default='')), ('output_dir', models.CharField(blank=True, default='', max_length=512)), ('status', models.CharField(choices=[('queued', 'Queued'), ('started', 'Started'), ('sealed', 'Sealed')], db_index=True, default='queued', max_length=15)), ('retry_at', models.DateTimeField(blank=True, db_index=True, default=django.utils.timezone.now, null=True)), ('created_by', models.ForeignKey(default=get_or_create_system_user_pk, on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL)), ('schedule', models.ForeignKey(blank=True, editable=True, null=True, on_delete=django.db.models.deletion.SET_NULL, to='crawls.crawlschedule')), ], options={ 'verbose_name': 'Crawl', 'verbose_name_plural': 'Crawls', 'app_label': 'crawls', }, ), migrations.AddField( model_name='crawlschedule', name='template', field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='crawls.crawl'), ), ], ), ]