mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2026-01-03 09:25:42 +10:00
142 lines
7.7 KiB
Python
142 lines
7.7 KiB
Python
# Generated by hand on 2025-12-29
|
|
# Creates Crawl and CrawlSchedule tables using raw SQL
|
|
|
|
from django.db import migrations, models
|
|
import django.db.models.deletion
|
|
import django.utils.timezone
|
|
import django.core.validators
|
|
from django.conf import settings
|
|
from archivebox.uuid_compat import uuid7
|
|
from archivebox.base_models.models import get_or_create_system_user_pk
|
|
|
|
|
|
class Migration(migrations.Migration):
|
|
|
|
initial = True
|
|
|
|
dependencies = [
|
|
('auth', '0012_alter_user_first_name_max_length'),
|
|
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
|
|
]
|
|
|
|
operations = [
|
|
migrations.SeparateDatabaseAndState(
|
|
database_operations=[
|
|
migrations.RunSQL(
|
|
sql="""
|
|
-- Create crawls_crawlschedule table first (circular FK will be added later)
|
|
CREATE TABLE IF NOT EXISTS crawls_crawlschedule (
|
|
id TEXT PRIMARY KEY NOT NULL,
|
|
created_at DATETIME NOT NULL,
|
|
modified_at DATETIME NOT NULL,
|
|
num_uses_succeeded INTEGER NOT NULL DEFAULT 0,
|
|
num_uses_failed INTEGER NOT NULL DEFAULT 0,
|
|
|
|
schedule VARCHAR(64) NOT NULL,
|
|
is_enabled BOOLEAN NOT NULL DEFAULT 1,
|
|
label VARCHAR(64) NOT NULL DEFAULT '',
|
|
notes TEXT NOT NULL DEFAULT '',
|
|
|
|
template_id TEXT NOT NULL,
|
|
created_by_id INTEGER NOT NULL,
|
|
|
|
FOREIGN KEY (created_by_id) REFERENCES auth_user(id) ON DELETE CASCADE
|
|
);
|
|
CREATE INDEX IF NOT EXISTS crawls_crawlschedule_created_at_idx ON crawls_crawlschedule(created_at);
|
|
CREATE INDEX IF NOT EXISTS crawls_crawlschedule_created_by_id_idx ON crawls_crawlschedule(created_by_id);
|
|
CREATE INDEX IF NOT EXISTS crawls_crawlschedule_template_id_idx ON crawls_crawlschedule(template_id);
|
|
|
|
-- Create crawls_crawl table
|
|
CREATE TABLE IF NOT EXISTS crawls_crawl (
|
|
id TEXT PRIMARY KEY NOT NULL,
|
|
created_at DATETIME NOT NULL,
|
|
modified_at DATETIME NOT NULL,
|
|
num_uses_succeeded INTEGER NOT NULL DEFAULT 0,
|
|
num_uses_failed INTEGER NOT NULL DEFAULT 0,
|
|
|
|
urls TEXT NOT NULL,
|
|
config TEXT,
|
|
max_depth INTEGER NOT NULL DEFAULT 0,
|
|
tags_str VARCHAR(1024) NOT NULL DEFAULT '',
|
|
persona_id TEXT,
|
|
label VARCHAR(64) NOT NULL DEFAULT '',
|
|
notes TEXT NOT NULL DEFAULT '',
|
|
output_dir VARCHAR(512) NOT NULL DEFAULT '',
|
|
|
|
status VARCHAR(15) NOT NULL DEFAULT 'queued',
|
|
retry_at DATETIME,
|
|
created_by_id INTEGER NOT NULL,
|
|
schedule_id TEXT,
|
|
|
|
FOREIGN KEY (created_by_id) REFERENCES auth_user(id) ON DELETE CASCADE,
|
|
FOREIGN KEY (schedule_id) REFERENCES crawls_crawlschedule(id) ON DELETE SET NULL
|
|
);
|
|
CREATE INDEX IF NOT EXISTS crawls_crawl_status_idx ON crawls_crawl(status);
|
|
CREATE INDEX IF NOT EXISTS crawls_crawl_retry_at_idx ON crawls_crawl(retry_at);
|
|
CREATE INDEX IF NOT EXISTS crawls_crawl_created_at_idx ON crawls_crawl(created_at);
|
|
CREATE INDEX IF NOT EXISTS crawls_crawl_created_by_id_idx ON crawls_crawl(created_by_id);
|
|
CREATE INDEX IF NOT EXISTS crawls_crawl_schedule_id_idx ON crawls_crawl(schedule_id);
|
|
""",
|
|
reverse_sql="""
|
|
DROP TABLE IF EXISTS crawls_crawl;
|
|
DROP TABLE IF EXISTS crawls_crawlschedule;
|
|
"""
|
|
),
|
|
],
|
|
state_operations=[
|
|
migrations.CreateModel(
|
|
name='CrawlSchedule',
|
|
fields=[
|
|
('id', models.UUIDField(default=uuid7, editable=False, primary_key=True, serialize=False, unique=True)),
|
|
('created_at', models.DateTimeField(db_index=True, default=django.utils.timezone.now)),
|
|
('modified_at', models.DateTimeField(auto_now=True)),
|
|
('num_uses_succeeded', models.PositiveIntegerField(default=0)),
|
|
('num_uses_failed', models.PositiveIntegerField(default=0)),
|
|
('schedule', models.CharField(max_length=64)),
|
|
('is_enabled', models.BooleanField(default=True)),
|
|
('label', models.CharField(blank=True, default='', max_length=64)),
|
|
('notes', models.TextField(blank=True, default='')),
|
|
('created_by', models.ForeignKey(default=get_or_create_system_user_pk, on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL)),
|
|
],
|
|
options={
|
|
'verbose_name': 'Scheduled Crawl',
|
|
'verbose_name_plural': 'Scheduled Crawls',
|
|
'app_label': 'crawls',
|
|
},
|
|
),
|
|
migrations.CreateModel(
|
|
name='Crawl',
|
|
fields=[
|
|
('id', models.UUIDField(default=uuid7, editable=False, primary_key=True, serialize=False, unique=True)),
|
|
('created_at', models.DateTimeField(db_index=True, default=django.utils.timezone.now)),
|
|
('modified_at', models.DateTimeField(auto_now=True)),
|
|
('num_uses_succeeded', models.PositiveIntegerField(default=0)),
|
|
('num_uses_failed', models.PositiveIntegerField(default=0)),
|
|
('urls', models.TextField(help_text='Newline-separated list of URLs to crawl')),
|
|
('config', models.JSONField(blank=True, default=dict, null=True)),
|
|
('max_depth', models.PositiveSmallIntegerField(default=0, validators=[django.core.validators.MinValueValidator(0), django.core.validators.MaxValueValidator(4)])),
|
|
('tags_str', models.CharField(blank=True, default='', max_length=1024)),
|
|
('persona_id', models.UUIDField(blank=True, null=True)),
|
|
('label', models.CharField(blank=True, default='', max_length=64)),
|
|
('notes', models.TextField(blank=True, default='')),
|
|
('output_dir', models.CharField(blank=True, default='', max_length=512)),
|
|
('status', models.CharField(choices=[('queued', 'Queued'), ('started', 'Started'), ('sealed', 'Sealed')], db_index=True, default='queued', max_length=15)),
|
|
('retry_at', models.DateTimeField(blank=True, db_index=True, default=django.utils.timezone.now, null=True)),
|
|
('created_by', models.ForeignKey(default=get_or_create_system_user_pk, on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL)),
|
|
('schedule', models.ForeignKey(blank=True, editable=True, null=True, on_delete=django.db.models.deletion.SET_NULL, to='crawls.crawlschedule')),
|
|
],
|
|
options={
|
|
'verbose_name': 'Crawl',
|
|
'verbose_name_plural': 'Crawls',
|
|
'app_label': 'crawls',
|
|
},
|
|
),
|
|
migrations.AddField(
|
|
model_name='crawlschedule',
|
|
name='template',
|
|
field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='crawls.crawl'),
|
|
),
|
|
],
|
|
),
|
|
]
|