Remove ABID system and KVTag model - use UUIDv7 IDs exclusively

This commit completes the simplification of the ID system by:

- Removing the ABID (ArchiveBox ID) system entirely
- Removing the base_models/abid.py file
- Removing KVTag model in favor of the existing Tag model in core/models.py
- Simplifying all models to use standard UUIDv7 primary keys
- Removing ABID-related admin functionality
- Cleaning up commented-out ABID code from views and statemachines
- Deleting migration files for ABID field removal (no longer needed)

All models now use simple UUIDv7 ids via `id = models.UUIDField(primary_key=True, default=uuid7)`

Note: Old migrations containing ABID references are preserved for database
migration history compatibility.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Nick Sweeting
2025-12-24 06:13:49 -08:00
parent c3024815f3
commit c1335fed37
26 changed files with 497 additions and 3537 deletions

View File

@@ -15,24 +15,18 @@ from ninja.pagination import paginate, PaginationBase
from ninja.errors import HttpError
from core.models import Snapshot, ArchiveResult, Tag
from api.models import APIToken, OutboundWebhook
from api.v1_crawls import CrawlSchema, SeedSchema
# from .auth import API_AUTH_METHODS
router = Router(tags=['Core Models'])
class CustomPagination(PaginationBase):
class Input(Schema):
limit: int = 200
offset: int = 0
page: int = 0
class Output(Schema):
total_items: int
total_pages: int
@@ -64,87 +58,67 @@ class CustomPagination(PaginationBase):
class MinimalArchiveResultSchema(Schema):
TYPE: str = 'core.models.ArchiveResult'
id: UUID
abid: str
created_at: datetime | None
modified_at: datetime | None
created_by_id: str
created_by_username: str
status: str
retry_at: datetime | None
extractor: str
cmd_version: str | None
cmd: list[str] | None
pwd: str | None
output: str | None
start_ts: datetime | None
end_ts: datetime | None
@staticmethod
def resolve_created_by_id(obj):
return str(obj.created_by_id)
@staticmethod
def resolve_created_by_username(obj) -> str:
User = get_user_model()
return User.objects.filter(pk=obj.created_by_id).values_list('username', flat=True)[0]
@staticmethod
def resolve_abid(obj):
return str(obj.ABID)
class ArchiveResultSchema(MinimalArchiveResultSchema):
TYPE: str = 'core.models.ArchiveResult'
snapshot_id: UUID
snapshot_timestamp: str
snapshot_url: str
snapshot_tags: List[str]
@staticmethod
def resolve_snapshot_timestamp(obj):
return obj.snapshot.timestamp
@staticmethod
def resolve_snapshot_url(obj):
return obj.snapshot.url
@staticmethod
def resolve_snapshot_id(obj):
return str(obj.snapshot_id)
@staticmethod
def resolve_snapshot_abid(obj):
return str(obj.snapshot.ABID)
return obj.snapshot_id
@staticmethod
def resolve_snapshot_tags(obj):
return sorted(tag.name for tag in obj.snapshot.tags.all())
class ArchiveResultSchema(MinimalArchiveResultSchema):
TYPE: str = 'core.models.ArchiveResult'
# ... Extends MinimalArchiveResultSchema fields ...
snapshot_id: UUID
snapshot_abid: str
snapshot_timestamp: str
snapshot_url: str
snapshot_tags: List[str]
class ArchiveResultFilterSchema(FilterSchema):
id: Optional[str] = Field(None, q=['id__startswith', 'abid__icontains', 'snapshot__id__startswith', 'snapshot__abid__icontains', 'snapshot__timestamp__startswith'])
search: Optional[str] = Field(None, q=['snapshot__url__icontains', 'snapshot__title__icontains', 'snapshot__tags__name__icontains', 'extractor', 'output__icontains', 'id__startswith', 'abid__icontains', 'snapshot__id__startswith', 'snapshot__abid__icontains', 'snapshot__timestamp__startswith'])
snapshot_id: Optional[str] = Field(None, q=['snapshot__id__startswith', 'snapshot__abid__icontains', 'snapshot__timestamp__startswith'])
id: Optional[str] = Field(None, q=['id__startswith', 'snapshot__id__startswith', 'snapshot__timestamp__startswith'])
search: Optional[str] = Field(None, q=['snapshot__url__icontains', 'snapshot__title__icontains', 'snapshot__tags__name__icontains', 'extractor', 'output__icontains', 'id__startswith', 'snapshot__id__startswith', 'snapshot__timestamp__startswith'])
snapshot_id: Optional[str] = Field(None, q=['snapshot__id__startswith', 'snapshot__timestamp__startswith'])
snapshot_url: Optional[str] = Field(None, q='snapshot__url__icontains')
snapshot_tag: Optional[str] = Field(None, q='snapshot__tags__name__icontains')
status: Optional[str] = Field(None, q='status')
output: Optional[str] = Field(None, q='output__icontains')
extractor: Optional[str] = Field(None, q='extractor__icontains')
cmd: Optional[str] = Field(None, q='cmd__0__icontains')
pwd: Optional[str] = Field(None, q='pwd__icontains')
cmd_version: Optional[str] = Field(None, q='cmd_version')
created_at: Optional[datetime] = Field(None, q='created_at')
created_at__gte: Optional[datetime] = Field(None, q='created_at__gte')
created_at__lt: Optional[datetime] = Field(None, q='created_at__lt')
@@ -154,99 +128,49 @@ class ArchiveResultFilterSchema(FilterSchema):
@paginate(CustomPagination)
def get_archiveresults(request, filters: ArchiveResultFilterSchema = Query(...)):
"""List all ArchiveResult entries matching these filters."""
qs = ArchiveResult.objects.all()
results = filters.filter(qs).distinct()
return results
return filters.filter(ArchiveResult.objects.all()).distinct()
@router.get("/archiveresult/{archiveresult_id}", response=ArchiveResultSchema, url_name="get_archiveresult")
def get_archiveresult(request, archiveresult_id: str):
"""Get a specific ArchiveResult by id or abid."""
return ArchiveResult.objects.get(Q(id__icontains=archiveresult_id) | Q(abid__icontains=archiveresult_id))
# @router.post("/archiveresult", response=ArchiveResultSchema)
# def create_archiveresult(request, payload: ArchiveResultSchema):
# archiveresult = ArchiveResult.objects.create(**payload.dict())
# return archiveresult
#
# @router.put("/archiveresult/{archiveresult_id}", response=ArchiveResultSchema)
# def update_archiveresult(request, archiveresult_id: str, payload: ArchiveResultSchema):
# archiveresult = get_object_or_404(ArchiveResult, id=archiveresult_id)
#
# for attr, value in payload.dict().items():
# setattr(archiveresult, attr, value)
# archiveresult.save()
#
# return archiveresult
#
# @router.delete("/archiveresult/{archiveresult_id}")
# def delete_archiveresult(request, archiveresult_id: str):
# archiveresult = get_object_or_404(ArchiveResult, id=archiveresult_id)
# archiveresult.delete()
# return {"success": True}
"""Get a specific ArchiveResult by id."""
return ArchiveResult.objects.get(Q(id__icontains=archiveresult_id))
### Snapshot #########################################################################
class SnapshotSchema(Schema):
TYPE: str = 'core.models.Snapshot'
id: UUID
abid: str
created_by_id: str
created_by_username: str
created_at: datetime
modified_at: datetime
status: str
retry_at: datetime | None
bookmarked_at: datetime
downloaded_at: Optional[datetime]
url: str
tags: List[str]
title: Optional[str]
timestamp: str
archive_path: str
# url_for_admin: str
# url_for_view: str
num_archiveresults: int
archiveresults: List[MinimalArchiveResultSchema]
@staticmethod
def resolve_created_by_id(obj):
return str(obj.created_by_id)
@staticmethod
def resolve_created_by_username(obj):
User = get_user_model()
return User.objects.get(id=obj.created_by_id).username
@staticmethod
def resolve_abid(obj):
return str(obj.ABID)
@staticmethod
def resolve_tags(obj):
return sorted(tag.name for tag in obj.tags.all())
# @staticmethod
# def resolve_url_for_admin(obj):
# return f"/admin/core/snapshot/{obj.id}/change/"
# @staticmethod
# def resolve_url_for_view(obj):
# return f"/{obj.archive_path}"
@staticmethod
def resolve_num_archiveresults(obj, context):
return obj.archiveresult_set.all().distinct().count()
@@ -259,98 +183,51 @@ class SnapshotSchema(Schema):
class SnapshotFilterSchema(FilterSchema):
id: Optional[str] = Field(None, q=['id__icontains', 'abid__icontains', 'timestamp__startswith'])
abid: Optional[str] = Field(None, q='abid__icontains')
id: Optional[str] = Field(None, q=['id__icontains', 'timestamp__startswith'])
created_by_id: str = Field(None, q='created_by_id')
created_by_username: str = Field(None, q='created_by__username__icontains')
created_at__gte: datetime = Field(None, q='created_at__gte')
created_at__lt: datetime = Field(None, q='created_at__lt')
created_at: datetime = Field(None, q='created_at')
modified_at: datetime = Field(None, q='modified_at')
modified_at__gte: datetime = Field(None, q='modified_at__gte')
modified_at__lt: datetime = Field(None, q='modified_at__lt')
search: Optional[str] = Field(None, q=['url__icontains', 'title__icontains', 'tags__name__icontains', 'id__icontains', 'abid__icontains', 'timestamp__startswith'])
search: Optional[str] = Field(None, q=['url__icontains', 'title__icontains', 'tags__name__icontains', 'id__icontains', 'timestamp__startswith'])
url: Optional[str] = Field(None, q='url')
tag: Optional[str] = Field(None, q='tags__name')
title: Optional[str] = Field(None, q='title__icontains')
timestamp: Optional[str] = Field(None, q='timestamp__startswith')
bookmarked_at__gte: Optional[datetime] = Field(None, q='bookmarked_at__gte')
bookmarked_at__lt: Optional[datetime] = Field(None, q='bookmarked_at__lt')
@router.get("/snapshots", response=List[SnapshotSchema], url_name="get_snapshots")
@paginate(CustomPagination)
def get_snapshots(request, filters: SnapshotFilterSchema = Query(...), with_archiveresults: bool=False):
def get_snapshots(request, filters: SnapshotFilterSchema = Query(...), with_archiveresults: bool = False):
"""List all Snapshot entries matching these filters."""
request.with_archiveresults = with_archiveresults
return filters.filter(Snapshot.objects.all()).distinct()
qs = Snapshot.objects.all()
results = filters.filter(qs).distinct()
return results
@router.get("/snapshot/{snapshot_id}", response=SnapshotSchema, url_name="get_snapshot")
def get_snapshot(request, snapshot_id: str, with_archiveresults: bool=True):
"""Get a specific Snapshot by abid or id."""
def get_snapshot(request, snapshot_id: str, with_archiveresults: bool = True):
"""Get a specific Snapshot by id."""
request.with_archiveresults = with_archiveresults
snapshot = None
try:
snapshot = Snapshot.objects.get(Q(abid__startswith=snapshot_id) | Q(id__startswith=snapshot_id) | Q(timestamp__startswith=snapshot_id))
return Snapshot.objects.get(Q(id__startswith=snapshot_id) | Q(timestamp__startswith=snapshot_id))
except Snapshot.DoesNotExist:
pass
try:
snapshot = snapshot or Snapshot.objects.get(Q(abid__icontains=snapshot_id) | Q(id__icontains=snapshot_id))
except Snapshot.DoesNotExist:
pass
if not snapshot:
raise Snapshot.DoesNotExist
return snapshot
# @router.post("/snapshot", response=SnapshotSchema)
# def create_snapshot(request, payload: SnapshotSchema):
# snapshot = Snapshot.objects.create(**payload.dict())
# return snapshot
#
# @router.put("/snapshot/{snapshot_id}", response=SnapshotSchema)
# def update_snapshot(request, snapshot_id: str, payload: SnapshotSchema):
# snapshot = get_object_or_404(Snapshot, id=snapshot_id)
#
# for attr, value in payload.dict().items():
# setattr(snapshot, attr, value)
# snapshot.save()
#
# return snapshot
#
# @router.delete("/snapshot/{snapshot_id}")
# def delete_snapshot(request, snapshot_id: str):
# snapshot = get_object_or_404(Snapshot, id=snapshot_id)
# snapshot.delete()
# return {"success": True}
return Snapshot.objects.get(Q(id__icontains=snapshot_id))
### Tag #########################################################################
class TagSchema(Schema):
TYPE: str = 'core.models.Tag'
id: UUID
abid: str
modified_at: datetime
created_at: datetime
created_by_id: str
created_by_username: str
name: str
slug: str
num_snapshots: int
@@ -359,12 +236,12 @@ class TagSchema(Schema):
@staticmethod
def resolve_created_by_id(obj):
return str(obj.created_by_id)
@staticmethod
def resolve_created_by_username(obj):
User = get_user_model()
return User.objects.get(id=obj.created_by_id).username
@staticmethod
def resolve_num_snapshots(obj, context):
return obj.snapshot_set.all().distinct().count()
@@ -375,6 +252,7 @@ class TagSchema(Schema):
return obj.snapshot_set.all().distinct()
return Snapshot.objects.none()
@router.get("/tags", response=List[TagSchema], url_name="get_tags")
@paginate(CustomPagination)
def get_tags(request):
@@ -382,65 +260,45 @@ def get_tags(request):
request.with_archiveresults = False
return Tag.objects.all().distinct()
@router.get("/tag/{tag_id}", response=TagSchema, url_name="get_tag")
def get_tag(request, tag_id: str, with_snapshots: bool=True):
def get_tag(request, tag_id: str, with_snapshots: bool = True):
request.with_snapshots = with_snapshots
request.with_archiveresults = False
tag = None
try:
tag = Tag.objects.get(abid__icontains=tag_id)
return Tag.objects.get(id__icontains=tag_id)
except (Tag.DoesNotExist, ValidationError):
pass
return Tag.objects.get(slug__icontains=tag_id)
try:
tag = tag or Tag.objects.get(id__icontains=tag_id)
except (Tag.DoesNotExist, ValidationError):
pass
return tag
@router.get("/any/{abid}", response=Union[SnapshotSchema, ArchiveResultSchema, TagSchema, SeedSchema, CrawlSchema], url_name="get_any", summary="Get any object by its ABID or ID (e.g. snapshot, archiveresult, tag, seed, crawl, etc.)")
def get_any(request, abid: str):
"""Get any object by its ABID or ID (e.g. snapshot, archiveresult, tag, seed, crawl, etc.)."""
@router.get("/any/{id}", response=Union[SnapshotSchema, ArchiveResultSchema, TagSchema, SeedSchema, CrawlSchema], url_name="get_any", summary="Get any object by its ID")
def get_any(request, id: str):
"""Get any object by its ID (e.g. snapshot, archiveresult, tag, seed, crawl, etc.)."""
request.with_snapshots = False
request.with_archiveresults = False
if abid.startswith(APIToken.abid_prefix):
raise HttpError(403, 'APIToken objects are not accessible via REST API')
if abid.startswith(OutboundWebhook.abid_prefix):
raise HttpError(403, 'OutboundWebhook objects are not accessible via REST API')
response = None
try:
response = response or get_snapshot(request, abid)
except Exception:
pass
for getter in [get_snapshot, get_archiveresult, get_tag]:
try:
response = getter(request, id)
if response:
return redirect(f"/api/v1/{response._meta.app_label}/{response._meta.model_name}/{response.id}?{request.META['QUERY_STRING']}")
except Exception:
pass
try:
response = response or get_archiveresult(request, abid)
except Exception:
pass
try:
response = response or get_tag(request, abid)
except Exception:
pass
try:
from api.v1_crawls import get_seed
response = response or get_seed(request, abid)
response = get_seed(request, id)
if response:
return redirect(f"/api/v1/{response._meta.app_label}/{response._meta.model_name}/{response.id}?{request.META['QUERY_STRING']}")
except Exception:
pass
try:
from api.v1_crawls import get_crawl
response = response or get_crawl(request, abid)
response = get_crawl(request, id)
if response:
return redirect(f"/api/v1/{response._meta.app_label}/{response._meta.model_name}/{response.id}?{request.META['QUERY_STRING']}")
except Exception:
pass
if response:
app_label, model_name = response._meta.app_label, response._meta.model_name
return redirect(f"/api/v1/{app_label}/{model_name}/{response.abid}?{request.META['QUERY_STRING']}")
raise HttpError(404, 'Object with given ABID not found')
raise HttpError(404, 'Object with given ID not found')