mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2026-04-06 07:47:53 +10:00
178 lines
8.8 KiB
Python
178 lines
8.8 KiB
Python
# Generated by hand on 2025-12-29
|
|
# Creates Crawl and CrawlSchedule tables using raw SQL
|
|
|
|
from django.db import migrations, models
|
|
import django.db.models.deletion
|
|
import django.utils.timezone
|
|
import django.core.validators
|
|
from django.conf import settings
|
|
from archivebox.uuid_compat import uuid7
|
|
from archivebox.base_models.models import get_or_create_system_user_pk
|
|
|
|
|
|
class Migration(migrations.Migration):
|
|
initial = True
|
|
|
|
dependencies = [
|
|
("auth", "0012_alter_user_first_name_max_length"),
|
|
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
|
|
]
|
|
|
|
operations = [
|
|
migrations.SeparateDatabaseAndState(
|
|
database_operations=[
|
|
migrations.RunSQL(
|
|
sql="""
|
|
-- Create crawls_crawlschedule table first (circular FK will be added later)
|
|
CREATE TABLE IF NOT EXISTS crawls_crawlschedule (
|
|
id TEXT PRIMARY KEY NOT NULL,
|
|
created_at DATETIME NOT NULL,
|
|
modified_at DATETIME NOT NULL,
|
|
num_uses_succeeded INTEGER NOT NULL DEFAULT 0,
|
|
num_uses_failed INTEGER NOT NULL DEFAULT 0,
|
|
|
|
schedule VARCHAR(64) NOT NULL,
|
|
is_enabled BOOLEAN NOT NULL DEFAULT 1,
|
|
label VARCHAR(64) NOT NULL DEFAULT '',
|
|
notes TEXT NOT NULL DEFAULT '',
|
|
|
|
template_id TEXT NOT NULL,
|
|
created_by_id INTEGER NOT NULL,
|
|
|
|
FOREIGN KEY (created_by_id) REFERENCES auth_user(id) ON DELETE CASCADE
|
|
);
|
|
CREATE INDEX IF NOT EXISTS crawls_crawlschedule_created_at_idx ON crawls_crawlschedule(created_at);
|
|
CREATE INDEX IF NOT EXISTS crawls_crawlschedule_created_by_id_idx ON crawls_crawlschedule(created_by_id);
|
|
CREATE INDEX IF NOT EXISTS crawls_crawlschedule_template_id_idx ON crawls_crawlschedule(template_id);
|
|
|
|
-- Create crawls_crawl table
|
|
CREATE TABLE IF NOT EXISTS crawls_crawl (
|
|
id TEXT PRIMARY KEY NOT NULL,
|
|
created_at DATETIME NOT NULL,
|
|
modified_at DATETIME NOT NULL,
|
|
num_uses_succeeded INTEGER NOT NULL DEFAULT 0,
|
|
num_uses_failed INTEGER NOT NULL DEFAULT 0,
|
|
|
|
urls TEXT NOT NULL,
|
|
config TEXT,
|
|
max_depth INTEGER NOT NULL DEFAULT 0,
|
|
tags_str VARCHAR(1024) NOT NULL DEFAULT '',
|
|
persona_id TEXT,
|
|
label VARCHAR(64) NOT NULL DEFAULT '',
|
|
notes TEXT NOT NULL DEFAULT '',
|
|
output_dir VARCHAR(512) NOT NULL DEFAULT '',
|
|
|
|
status VARCHAR(15) NOT NULL DEFAULT 'queued',
|
|
retry_at DATETIME,
|
|
created_by_id INTEGER NOT NULL,
|
|
schedule_id TEXT,
|
|
|
|
FOREIGN KEY (created_by_id) REFERENCES auth_user(id) ON DELETE CASCADE,
|
|
FOREIGN KEY (schedule_id) REFERENCES crawls_crawlschedule(id) ON DELETE SET NULL
|
|
);
|
|
CREATE INDEX IF NOT EXISTS crawls_crawl_status_idx ON crawls_crawl(status);
|
|
CREATE INDEX IF NOT EXISTS crawls_crawl_retry_at_idx ON crawls_crawl(retry_at);
|
|
CREATE INDEX IF NOT EXISTS crawls_crawl_created_at_idx ON crawls_crawl(created_at);
|
|
CREATE INDEX IF NOT EXISTS crawls_crawl_created_by_id_idx ON crawls_crawl(created_by_id);
|
|
CREATE INDEX IF NOT EXISTS crawls_crawl_schedule_id_idx ON crawls_crawl(schedule_id);
|
|
""",
|
|
reverse_sql="""
|
|
DROP TABLE IF EXISTS crawls_crawl;
|
|
DROP TABLE IF EXISTS crawls_crawlschedule;
|
|
""",
|
|
),
|
|
],
|
|
state_operations=[
|
|
migrations.CreateModel(
|
|
name="CrawlSchedule",
|
|
fields=[
|
|
("id", models.UUIDField(default=uuid7, editable=False, primary_key=True, serialize=False, unique=True)),
|
|
("created_at", models.DateTimeField(db_index=True, default=django.utils.timezone.now)),
|
|
("modified_at", models.DateTimeField(auto_now=True)),
|
|
("num_uses_succeeded", models.PositiveIntegerField(default=0)),
|
|
("num_uses_failed", models.PositiveIntegerField(default=0)),
|
|
("schedule", models.CharField(max_length=64)),
|
|
("is_enabled", models.BooleanField(default=True)),
|
|
("label", models.CharField(blank=True, default="", max_length=64)),
|
|
("notes", models.TextField(blank=True, default="")),
|
|
(
|
|
"created_by",
|
|
models.ForeignKey(
|
|
default=get_or_create_system_user_pk,
|
|
on_delete=django.db.models.deletion.CASCADE,
|
|
to=settings.AUTH_USER_MODEL,
|
|
),
|
|
),
|
|
],
|
|
options={
|
|
"verbose_name": "Scheduled Crawl",
|
|
"verbose_name_plural": "Scheduled Crawls",
|
|
"app_label": "crawls",
|
|
},
|
|
),
|
|
migrations.CreateModel(
|
|
name="Crawl",
|
|
fields=[
|
|
("id", models.UUIDField(default=uuid7, editable=False, primary_key=True, serialize=False, unique=True)),
|
|
("created_at", models.DateTimeField(db_index=True, default=django.utils.timezone.now)),
|
|
("modified_at", models.DateTimeField(auto_now=True)),
|
|
("num_uses_succeeded", models.PositiveIntegerField(default=0)),
|
|
("num_uses_failed", models.PositiveIntegerField(default=0)),
|
|
("urls", models.TextField(help_text="Newline-separated list of URLs to crawl")),
|
|
("config", models.JSONField(blank=True, default=dict, null=True)),
|
|
(
|
|
"max_depth",
|
|
models.PositiveSmallIntegerField(
|
|
default=0,
|
|
validators=[django.core.validators.MinValueValidator(0), django.core.validators.MaxValueValidator(4)],
|
|
),
|
|
),
|
|
("tags_str", models.CharField(blank=True, default="", max_length=1024)),
|
|
("persona_id", models.UUIDField(blank=True, null=True)),
|
|
("label", models.CharField(blank=True, default="", max_length=64)),
|
|
("notes", models.TextField(blank=True, default="")),
|
|
("output_dir", models.CharField(blank=True, default="", max_length=512)),
|
|
(
|
|
"status",
|
|
models.CharField(
|
|
choices=[("queued", "Queued"), ("started", "Started"), ("sealed", "Sealed")],
|
|
db_index=True,
|
|
default="queued",
|
|
max_length=15,
|
|
),
|
|
),
|
|
("retry_at", models.DateTimeField(blank=True, db_index=True, default=django.utils.timezone.now, null=True)),
|
|
(
|
|
"created_by",
|
|
models.ForeignKey(
|
|
default=get_or_create_system_user_pk,
|
|
on_delete=django.db.models.deletion.CASCADE,
|
|
to=settings.AUTH_USER_MODEL,
|
|
),
|
|
),
|
|
(
|
|
"schedule",
|
|
models.ForeignKey(
|
|
blank=True,
|
|
editable=True,
|
|
null=True,
|
|
on_delete=django.db.models.deletion.SET_NULL,
|
|
to="crawls.crawlschedule",
|
|
),
|
|
),
|
|
],
|
|
options={
|
|
"verbose_name": "Crawl",
|
|
"verbose_name_plural": "Crawls",
|
|
"app_label": "crawls",
|
|
},
|
|
),
|
|
migrations.AddField(
|
|
model_name="crawlschedule",
|
|
name="template",
|
|
field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to="crawls.crawl"),
|
|
),
|
|
],
|
|
),
|
|
]
|