mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2026-01-02 17:05:38 +10:00
fix archivebox add
This commit is contained in:
@@ -164,7 +164,7 @@ class BaseModelWithStateMachine(models.Model, MachineMixin):
|
||||
|
||||
@classproperty
|
||||
def ACTIVE_STATE(cls) -> str:
|
||||
return cls._state_to_str(cls.StateMachineClass.active_state)
|
||||
return cls._state_to_str(cls.active_state)
|
||||
|
||||
@classproperty
|
||||
def INITIAL_STATE(cls) -> str:
|
||||
|
||||
19
archivebox/core/migrations/0075_archiveresult_retry_at.py
Normal file
19
archivebox/core/migrations/0075_archiveresult_retry_at.py
Normal file
@@ -0,0 +1,19 @@
|
||||
# Generated by Django 5.1.2 on 2025-12-29 10:16
|
||||
|
||||
import django.utils.timezone
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('core', '0074_alter_snapshot_downloaded_at'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AddField(
|
||||
model_name='archiveresult',
|
||||
name='retry_at',
|
||||
field=models.DateTimeField(db_index=True, default=django.utils.timezone.now),
|
||||
),
|
||||
]
|
||||
@@ -0,0 +1,71 @@
|
||||
# Generated by Django 5.1.2 on 2025-12-29 10:16
|
||||
|
||||
import django.db.models.deletion
|
||||
import django.utils.timezone
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('core', '0075_archiveresult_retry_at'),
|
||||
('crawls', '0001_initial'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AddField(
|
||||
model_name='snapshot',
|
||||
name='crawl',
|
||||
field=models.ForeignKey(blank=True, default=None, null=True, on_delete=django.db.models.deletion.CASCADE, related_name='snapshot_set', to='crawls.crawl'),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='snapshot',
|
||||
name='retry_at',
|
||||
field=models.DateTimeField(db_index=True, default=django.utils.timezone.now),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='snapshot',
|
||||
name='status',
|
||||
field=models.CharField(choices=[('queued', 'Queued'), ('started', 'Started'), ('sealed', 'Sealed')], db_index=True, default='queued', max_length=15),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='archiveresult',
|
||||
name='cmd',
|
||||
field=models.JSONField(blank=True, default=None, null=True),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='archiveresult',
|
||||
name='end_ts',
|
||||
field=models.DateTimeField(blank=True, default=None, null=True),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='archiveresult',
|
||||
name='extractor',
|
||||
field=models.CharField(choices=[('htmltotext', 'htmltotext'), ('git', 'git'), ('singlefile', 'singlefile'), ('media', 'media'), ('archive_org', 'archive_org'), ('readability', 'readability'), ('mercury', 'mercury'), ('favicon', 'favicon'), ('pdf', 'pdf'), ('headers', 'headers'), ('screenshot', 'screenshot'), ('dom', 'dom'), ('title', 'title'), ('wget', 'wget')], db_index=True, max_length=32),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='archiveresult',
|
||||
name='output',
|
||||
field=models.CharField(blank=True, default=None, max_length=1024, null=True),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='archiveresult',
|
||||
name='pwd',
|
||||
field=models.CharField(blank=True, default=None, max_length=256, null=True),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='archiveresult',
|
||||
name='snapshot',
|
||||
field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='core.snapshot'),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='archiveresult',
|
||||
name='start_ts',
|
||||
field=models.DateTimeField(blank=True, default=None, null=True),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='archiveresult',
|
||||
name='status',
|
||||
field=models.CharField(choices=[('queued', 'Queued'), ('started', 'Started'), ('backoff', 'Waiting to retry'), ('succeeded', 'Succeeded'), ('failed', 'Failed'), ('skipped', 'Skipped')], db_index=True, default='queued', max_length=15),
|
||||
),
|
||||
]
|
||||
78
archivebox/crawls/migrations/0001_initial.py
Normal file
78
archivebox/crawls/migrations/0001_initial.py
Normal file
@@ -0,0 +1,78 @@
|
||||
# Generated by Django 5.1.2 on 2025-12-29 10:16
|
||||
|
||||
import abid_utils.models
|
||||
import charidfield.fields
|
||||
import django.core.validators
|
||||
import django.db.models.deletion
|
||||
import django.utils.timezone
|
||||
import statemachine.mixins
|
||||
from django.conf import settings
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
initial = True
|
||||
|
||||
dependencies = [
|
||||
('core', '0075_archiveresult_retry_at'),
|
||||
('seeds', '0001_initial'),
|
||||
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.CreateModel(
|
||||
name='CrawlSchedule',
|
||||
fields=[
|
||||
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
||||
('num_uses_failed', models.PositiveIntegerField(default=0)),
|
||||
('num_uses_succeeded', models.PositiveIntegerField(default=0)),
|
||||
('schedule', models.CharField(max_length=64)),
|
||||
('is_enabled', models.BooleanField(default=True)),
|
||||
('created_at', abid_utils.models.AutoDateTimeField(db_index=True, default=None)),
|
||||
('modified_at', models.DateTimeField(auto_now=True)),
|
||||
('created_by', models.ForeignKey(default=None, on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL)),
|
||||
],
|
||||
options={
|
||||
'abstract': False,
|
||||
},
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name='Crawl',
|
||||
fields=[
|
||||
('num_uses_failed', models.PositiveIntegerField(default=0)),
|
||||
('num_uses_succeeded', models.PositiveIntegerField(default=0)),
|
||||
('id', models.UUIDField(default=None, editable=False, primary_key=True, serialize=False, unique=True, verbose_name='ID')),
|
||||
('abid', charidfield.fields.CharIDField(blank=True, db_index=True, default=None, help_text='ABID-format identifier for this entity (e.g. snp_01BJQMF54D093DXEAWZ6JYRPAQ)', max_length=30, null=True, prefix='crl_', unique=True)),
|
||||
('created_at', abid_utils.models.AutoDateTimeField(db_index=True, default=None)),
|
||||
('modified_at', models.DateTimeField(auto_now=True)),
|
||||
('status', models.CharField(choices=[('queued', 'Queued'), ('started', 'Started'), ('sealed', 'Sealed')], db_index=True, default='queued', max_length=15)),
|
||||
('retry_at', models.DateTimeField(db_index=True, default=django.utils.timezone.now)),
|
||||
('max_depth', models.PositiveSmallIntegerField(default=0, validators=[django.core.validators.MinValueValidator(0), django.core.validators.MaxValueValidator(4)])),
|
||||
('tags_str', models.CharField(blank=True, default='', max_length=1024)),
|
||||
('persona', models.CharField(blank=True, default='auto', max_length=32)),
|
||||
('config', models.JSONField(default=dict)),
|
||||
('created_by', models.ForeignKey(default=None, on_delete=django.db.models.deletion.CASCADE, related_name='crawl_set', to=settings.AUTH_USER_MODEL)),
|
||||
('seed', models.ForeignKey(on_delete=django.db.models.deletion.PROTECT, related_name='crawl_set', to='seeds.seed')),
|
||||
('schedule', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, to='crawls.crawlschedule')),
|
||||
],
|
||||
options={
|
||||
'verbose_name': 'Crawl',
|
||||
'verbose_name_plural': 'Crawls',
|
||||
},
|
||||
bases=(models.Model, statemachine.mixins.MachineMixin),
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name='Outlink',
|
||||
fields=[
|
||||
('id', models.UUIDField(default=None, editable=False, primary_key=True, serialize=False, unique=True, verbose_name='ID')),
|
||||
('src', models.URLField()),
|
||||
('dst', models.URLField()),
|
||||
('crawl', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='outlink_set', to='crawls.crawl')),
|
||||
('via', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='outlink_set', to='core.archiveresult')),
|
||||
],
|
||||
options={
|
||||
'unique_together': {('src', 'dst', 'via')},
|
||||
},
|
||||
),
|
||||
]
|
||||
@@ -80,8 +80,8 @@ def write_html_link_details(link: Link, out_dir: Optional[str]=None) -> None:
|
||||
|
||||
@enforce_types
|
||||
def link_details_template(link: Link) -> str:
|
||||
|
||||
from abx_plugin_wget_extractor.wget import wget_output_path
|
||||
|
||||
from abx_plugin_wget.wget import wget_output_path
|
||||
|
||||
SAVE_ARCHIVE_DOT_ORG = abx.pm.hook.get_FLAT_CONFIG().SAVE_ARCHIVE_DOT_ORG
|
||||
|
||||
|
||||
@@ -427,8 +427,7 @@ class Link:
|
||||
"""predict the expected output paths that should be present after archiving"""
|
||||
|
||||
from abx_plugin_wget.wget import wget_output_path
|
||||
|
||||
FAVICON_CONFIG = abx.pm.hook.get_CONFIGS().favicon
|
||||
from abx_plugin_favicon.config import FAVICON_CONFIG
|
||||
|
||||
# TODO: banish this awful duplication from the codebase and import these
|
||||
# from their respective extractor files
|
||||
|
||||
@@ -0,0 +1,26 @@
|
||||
# Generated by Django 5.1.2 on 2025-12-29 10:16
|
||||
|
||||
from django.db import migrations
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
initial = True
|
||||
|
||||
dependencies = [
|
||||
('core', '0074_alter_snapshot_downloaded_at'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.CreateModel(
|
||||
name='SinglefileResult',
|
||||
fields=[
|
||||
],
|
||||
options={
|
||||
'proxy': True,
|
||||
'indexes': [],
|
||||
'constraints': [],
|
||||
},
|
||||
bases=('core.archiveresult',),
|
||||
),
|
||||
]
|
||||
40
archivebox/seeds/migrations/0001_initial.py
Normal file
40
archivebox/seeds/migrations/0001_initial.py
Normal file
@@ -0,0 +1,40 @@
|
||||
# Generated by Django 5.1.2 on 2025-12-29 10:16
|
||||
|
||||
import abid_utils.models
|
||||
import charidfield.fields
|
||||
import django.db.models.deletion
|
||||
from django.conf import settings
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
initial = True
|
||||
|
||||
dependencies = [
|
||||
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.CreateModel(
|
||||
name='Seed',
|
||||
fields=[
|
||||
('num_uses_failed', models.PositiveIntegerField(default=0)),
|
||||
('num_uses_succeeded', models.PositiveIntegerField(default=0)),
|
||||
('id', models.UUIDField(default=None, editable=False, primary_key=True, serialize=False, unique=True, verbose_name='ID')),
|
||||
('abid', charidfield.fields.CharIDField(blank=True, db_index=True, default=None, help_text='ABID-format identifier for this entity (e.g. snp_01BJQMF54D093DXEAWZ6JYRPAQ)', max_length=30, null=True, prefix='src_', unique=True)),
|
||||
('uri', models.URLField(max_length=2000)),
|
||||
('extractor', models.CharField(default='auto', max_length=32)),
|
||||
('tags_str', models.CharField(blank=True, default='', max_length=255)),
|
||||
('config', models.JSONField(default=dict)),
|
||||
('created_at', abid_utils.models.AutoDateTimeField(db_index=True, default=None)),
|
||||
('modified_at', models.DateTimeField(auto_now=True)),
|
||||
('created_by', models.ForeignKey(default=None, on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL)),
|
||||
],
|
||||
options={
|
||||
'verbose_name': 'Seed',
|
||||
'verbose_name_plural': 'Seeds',
|
||||
'unique_together': {('created_by', 'uri', 'extractor')},
|
||||
},
|
||||
),
|
||||
]
|
||||
Reference in New Issue
Block a user