Files
ArchiveBox/archivebox/api/v1_crawls.py
Nick Sweeting c1335fed37 Remove ABID system and KVTag model - use UUIDv7 IDs exclusively
This commit completes the simplification of the ID system by:

- Removing the ABID (ArchiveBox ID) system entirely
- Removing the base_models/abid.py file
- Removing KVTag model in favor of the existing Tag model in core/models.py
- Simplifying all models to use standard UUIDv7 primary keys
- Removing ABID-related admin functionality
- Cleaning up commented-out ABID code from views and statemachines
- Deleting migration files for ABID field removal (no longer needed)

All models now use simple UUIDv7 ids via `id = models.UUIDField(primary_key=True, default=uuid7)`

Note: Old migrations containing ABID references are preserved for database
migration history compatibility.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-24 06:13:49 -08:00

119 lines
3.2 KiB
Python

__package__ = 'archivebox.api'
from uuid import UUID
from typing import List
from datetime import datetime
from django.db.models import Q
from django.contrib.auth import get_user_model
from ninja import Router, Schema
from core.models import Snapshot
from crawls.models import Seed, Crawl
from .auth import API_AUTH_METHODS
router = Router(tags=['Crawl Models'], auth=API_AUTH_METHODS)
class SeedSchema(Schema):
TYPE: str = 'crawls.models.Seed'
id: UUID
modified_at: datetime
created_at: datetime
created_by_id: str
created_by_username: str
uri: str
tags_str: str
config: dict
@staticmethod
def resolve_created_by_id(obj):
return str(obj.created_by_id)
@staticmethod
def resolve_created_by_username(obj):
User = get_user_model()
return User.objects.get(id=obj.created_by_id).username
@router.get("/seeds", response=List[SeedSchema], url_name="get_seeds")
def get_seeds(request):
return Seed.objects.all().distinct()
@router.get("/seed/{seed_id}", response=SeedSchema, url_name="get_seed")
def get_seed(request, seed_id: str):
seed = None
request.with_snapshots = False
request.with_archiveresults = False
try:
seed = Seed.objects.get(Q(id__icontains=seed_id))
except Exception:
pass
return seed
class CrawlSchema(Schema):
TYPE: str = 'crawls.models.Crawl'
id: UUID
modified_at: datetime
created_at: datetime
created_by_id: str
created_by_username: str
status: str
retry_at: datetime | None
seed: SeedSchema
max_depth: int
# snapshots: List[SnapshotSchema]
@staticmethod
def resolve_created_by_id(obj):
return str(obj.created_by_id)
@staticmethod
def resolve_created_by_username(obj):
User = get_user_model()
return User.objects.get(id=obj.created_by_id).username
@staticmethod
def resolve_snapshots(obj, context):
if context['request'].with_snapshots:
return obj.snapshot_set.all().distinct()
return Snapshot.objects.none()
@router.get("/crawls", response=List[CrawlSchema], url_name="get_crawls")
def get_crawls(request):
return Crawl.objects.all().distinct()
@router.get("/crawl/{crawl_id}", response=CrawlSchema | str, url_name="get_crawl")
def get_crawl(request, crawl_id: str, as_rss: bool=False, with_snapshots: bool=False, with_archiveresults: bool=False):
"""Get a specific Crawl by id."""
request.with_snapshots = with_snapshots
request.with_archiveresults = with_archiveresults
crawl = Crawl.objects.get(id__icontains=crawl_id)
if crawl and as_rss:
# return snapshots as XML rss feed
urls = [
{'url': snapshot.url, 'title': snapshot.title, 'bookmarked_at': snapshot.bookmarked_at, 'tags': snapshot.tags_str}
for snapshot in crawl.snapshot_set.all()
]
xml = '<rss version="2.0"><channel>'
for url in urls:
xml += f'<item><url>{url["url"]}</url><title>{url["title"]}</title><bookmarked_at>{url["bookmarked_at"]}</bookmarked_at><tags>{url["tags"]}</tags></item>'
xml += '</channel></rss>'
return xml
return crawl