Remove ABID system and KVTag model - use UUIDv7 IDs exclusively

This commit completes the simplification of the ID system by: - Removing the ABID (ArchiveBox ID) system entirely - Removing the base_models/abid.py file - Removing KVTag model in favor of the existing Tag model in core/models.py - Simplifying all models to use standard UUIDv7 primary keys - Removing ABID-related admin functionality - Cleaning up commented-out ABID code from views and statemachines - Deleting migration files for ABID field removal (no longer needed) All models now use simple UUIDv7 ids via `id = models.UUIDField(primary_key=True, default=uuid7)` Note: Old migrations containing ABID references are preserved for database migration history compatibility. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-04-05 07:17:52 +10:00 · 2025-12-24 06:13:49 -08:00
parent c3024815f3
commit c1335fed37
26 changed files with 497 additions and 3537 deletions
--- a/archivebox/api/v1_core.py
+++ b/archivebox/api/v1_core.py
@@ -15,24 +15,18 @@ from ninja.pagination import paginate, PaginationBase
 from ninja.errors import HttpError

 from core.models import Snapshot, ArchiveResult, Tag
-from api.models import APIToken, OutboundWebhook
 from api.v1_crawls import CrawlSchema, SeedSchema

-# from .auth import API_AUTH_METHODS
-
-

 router = Router(tags=['Core Models'])


-
 class CustomPagination(PaginationBase):
    class Input(Schema):
        limit: int = 200
        offset: int = 0
        page: int = 0

-
    class Output(Schema):
        total_items: int
        total_pages: int
@@ -64,87 +58,67 @@ class CustomPagination(PaginationBase):

 class MinimalArchiveResultSchema(Schema):
    TYPE: str = 'core.models.ArchiveResult'
-
    id: UUID
-    abid: str
-
    created_at: datetime | None
    modified_at: datetime | None
    created_by_id: str
    created_by_username: str
-
    status: str
    retry_at: datetime | None
-    
    extractor: str
    cmd_version: str | None
    cmd: list[str] | None
    pwd: str | None
    output: str | None
-
    start_ts: datetime | None
    end_ts: datetime | None

    @staticmethod
    def resolve_created_by_id(obj):
        return str(obj.created_by_id)
-    
+
    @staticmethod
    def resolve_created_by_username(obj) -> str:
        User = get_user_model()
        return User.objects.filter(pk=obj.created_by_id).values_list('username', flat=True)[0]

-    @staticmethod
-    def resolve_abid(obj):
-        return str(obj.ABID)
+
+class ArchiveResultSchema(MinimalArchiveResultSchema):
+    TYPE: str = 'core.models.ArchiveResult'
+    snapshot_id: UUID
+    snapshot_timestamp: str
+    snapshot_url: str
+    snapshot_tags: List[str]

    @staticmethod
    def resolve_snapshot_timestamp(obj):
        return obj.snapshot.timestamp
-    
+
    @staticmethod
    def resolve_snapshot_url(obj):
        return obj.snapshot.url

    @staticmethod
    def resolve_snapshot_id(obj):
-        return str(obj.snapshot_id)
-    
-    @staticmethod
-    def resolve_snapshot_abid(obj):
-        return str(obj.snapshot.ABID)
+        return obj.snapshot_id

    @staticmethod
    def resolve_snapshot_tags(obj):
        return sorted(tag.name for tag in obj.snapshot.tags.all())

-class ArchiveResultSchema(MinimalArchiveResultSchema):
-    TYPE: str = 'core.models.ArchiveResult'
-
-    # ... Extends MinimalArchiveResultSchema fields ...
-
-    snapshot_id: UUID
-    snapshot_abid: str
-    snapshot_timestamp: str
-    snapshot_url: str
-    snapshot_tags: List[str]
-

 class ArchiveResultFilterSchema(FilterSchema):
-    id: Optional[str] = Field(None, q=['id__startswith', 'abid__icontains', 'snapshot__id__startswith', 'snapshot__abid__icontains', 'snapshot__timestamp__startswith'])
-
-    search: Optional[str] = Field(None, q=['snapshot__url__icontains', 'snapshot__title__icontains', 'snapshot__tags__name__icontains', 'extractor', 'output__icontains', 'id__startswith', 'abid__icontains', 'snapshot__id__startswith', 'snapshot__abid__icontains', 'snapshot__timestamp__startswith'])
-    snapshot_id: Optional[str] = Field(None, q=['snapshot__id__startswith', 'snapshot__abid__icontains', 'snapshot__timestamp__startswith'])
+    id: Optional[str] = Field(None, q=['id__startswith', 'snapshot__id__startswith', 'snapshot__timestamp__startswith'])
+    search: Optional[str] = Field(None, q=['snapshot__url__icontains', 'snapshot__title__icontains', 'snapshot__tags__name__icontains', 'extractor', 'output__icontains', 'id__startswith', 'snapshot__id__startswith', 'snapshot__timestamp__startswith'])
+    snapshot_id: Optional[str] = Field(None, q=['snapshot__id__startswith', 'snapshot__timestamp__startswith'])
    snapshot_url: Optional[str] = Field(None, q='snapshot__url__icontains')
    snapshot_tag: Optional[str] = Field(None, q='snapshot__tags__name__icontains')
-    
    status: Optional[str] = Field(None, q='status')
    output: Optional[str] = Field(None, q='output__icontains')
    extractor: Optional[str] = Field(None, q='extractor__icontains')
    cmd: Optional[str] = Field(None, q='cmd__0__icontains')
    pwd: Optional[str] = Field(None, q='pwd__icontains')
    cmd_version: Optional[str] = Field(None, q='cmd_version')
-
    created_at: Optional[datetime] = Field(None, q='created_at')
    created_at__gte: Optional[datetime] = Field(None, q='created_at__gte')
    created_at__lt: Optional[datetime] = Field(None, q='created_at__lt')
@@ -154,99 +128,49 @@ class ArchiveResultFilterSchema(FilterSchema):
@paginate(CustomPagination)
 def get_archiveresults(request, filters: ArchiveResultFilterSchema = Query(...)):
    """List all ArchiveResult entries matching these filters."""
-    qs = ArchiveResult.objects.all()
-    results = filters.filter(qs).distinct()
-    return results
+    return filters.filter(ArchiveResult.objects.all()).distinct()


@router.get("/archiveresult/{archiveresult_id}", response=ArchiveResultSchema, url_name="get_archiveresult")
 def get_archiveresult(request, archiveresult_id: str):
-    """Get a specific ArchiveResult by id or abid."""
-    return ArchiveResult.objects.get(Q(id__icontains=archiveresult_id) | Q(abid__icontains=archiveresult_id))
-
-
-# @router.post("/archiveresult", response=ArchiveResultSchema)
-# def create_archiveresult(request, payload: ArchiveResultSchema):
-#     archiveresult = ArchiveResult.objects.create(**payload.dict())
-#     return archiveresult
-#
-# @router.put("/archiveresult/{archiveresult_id}", response=ArchiveResultSchema)
-# def update_archiveresult(request, archiveresult_id: str, payload: ArchiveResultSchema):
-#     archiveresult = get_object_or_404(ArchiveResult, id=archiveresult_id)
-#   
-#     for attr, value in payload.dict().items():
-#         setattr(archiveresult, attr, value)
-#     archiveresult.save()
-#
-#     return archiveresult
-#
-# @router.delete("/archiveresult/{archiveresult_id}")
-# def delete_archiveresult(request, archiveresult_id: str):
-#     archiveresult = get_object_or_404(ArchiveResult, id=archiveresult_id)
-#     archiveresult.delete()
-#     return {"success": True}
-
-
-
+    """Get a specific ArchiveResult by id."""
+    return ArchiveResult.objects.get(Q(id__icontains=archiveresult_id))


 ### Snapshot #########################################################################

-
 class SnapshotSchema(Schema):
    TYPE: str = 'core.models.Snapshot'
-
    id: UUID
-    abid: str
-
    created_by_id: str
    created_by_username: str
    created_at: datetime
    modified_at: datetime
-    
    status: str
    retry_at: datetime | None
-
    bookmarked_at: datetime
    downloaded_at: Optional[datetime]
-
    url: str
    tags: List[str]
    title: Optional[str]
    timestamp: str
    archive_path: str
-
-    # url_for_admin: str
-    # url_for_view: str
-
    num_archiveresults: int
    archiveresults: List[MinimalArchiveResultSchema]

    @staticmethod
    def resolve_created_by_id(obj):
        return str(obj.created_by_id)
-    
+
    @staticmethod
    def resolve_created_by_username(obj):
        User = get_user_model()
        return User.objects.get(id=obj.created_by_id).username

-    @staticmethod
-    def resolve_abid(obj):
-        return str(obj.ABID)
-
    @staticmethod
    def resolve_tags(obj):
        return sorted(tag.name for tag in obj.tags.all())

-    # @staticmethod
-    # def resolve_url_for_admin(obj):
-    #     return f"/admin/core/snapshot/{obj.id}/change/"
-    
-    # @staticmethod
-    # def resolve_url_for_view(obj):
-    #     return f"/{obj.archive_path}"
-
    @staticmethod
    def resolve_num_archiveresults(obj, context):
        return obj.archiveresult_set.all().distinct().count()
@@ -259,98 +183,51 @@ class SnapshotSchema(Schema):


 class SnapshotFilterSchema(FilterSchema):
-    id: Optional[str] = Field(None, q=['id__icontains', 'abid__icontains', 'timestamp__startswith'])
-    abid: Optional[str] = Field(None, q='abid__icontains')
-
+    id: Optional[str] = Field(None, q=['id__icontains', 'timestamp__startswith'])
    created_by_id: str = Field(None, q='created_by_id')
    created_by_username: str = Field(None, q='created_by__username__icontains')
-
    created_at__gte: datetime = Field(None, q='created_at__gte')
    created_at__lt: datetime = Field(None, q='created_at__lt')
    created_at: datetime = Field(None, q='created_at')
    modified_at: datetime = Field(None, q='modified_at')
    modified_at__gte: datetime = Field(None, q='modified_at__gte')
    modified_at__lt: datetime = Field(None, q='modified_at__lt')
-
-    search: Optional[str] = Field(None, q=['url__icontains', 'title__icontains', 'tags__name__icontains', 'id__icontains', 'abid__icontains', 'timestamp__startswith'])
+    search: Optional[str] = Field(None, q=['url__icontains', 'title__icontains', 'tags__name__icontains', 'id__icontains', 'timestamp__startswith'])
    url: Optional[str] = Field(None, q='url')
    tag: Optional[str] = Field(None, q='tags__name')
    title: Optional[str] = Field(None, q='title__icontains')
    timestamp: Optional[str] = Field(None, q='timestamp__startswith')
-    
    bookmarked_at__gte: Optional[datetime] = Field(None, q='bookmarked_at__gte')
    bookmarked_at__lt: Optional[datetime] = Field(None, q='bookmarked_at__lt')


-
@router.get("/snapshots", response=List[SnapshotSchema], url_name="get_snapshots")
@paginate(CustomPagination)
-def get_snapshots(request, filters: SnapshotFilterSchema = Query(...), with_archiveresults: bool=False):
+def get_snapshots(request, filters: SnapshotFilterSchema = Query(...), with_archiveresults: bool = False):
    """List all Snapshot entries matching these filters."""
    request.with_archiveresults = with_archiveresults
+    return filters.filter(Snapshot.objects.all()).distinct()

-    qs = Snapshot.objects.all()
-    results = filters.filter(qs).distinct()
-    return results

@router.get("/snapshot/{snapshot_id}", response=SnapshotSchema, url_name="get_snapshot")
-def get_snapshot(request, snapshot_id: str, with_archiveresults: bool=True):
-    """Get a specific Snapshot by abid or id."""
+def get_snapshot(request, snapshot_id: str, with_archiveresults: bool = True):
+    """Get a specific Snapshot by id."""
    request.with_archiveresults = with_archiveresults
-    snapshot = None
    try:
-        snapshot = Snapshot.objects.get(Q(abid__startswith=snapshot_id) | Q(id__startswith=snapshot_id) | Q(timestamp__startswith=snapshot_id))
+        return Snapshot.objects.get(Q(id__startswith=snapshot_id) | Q(timestamp__startswith=snapshot_id))
    except Snapshot.DoesNotExist:
-        pass
-
-    try:
-        snapshot = snapshot or Snapshot.objects.get(Q(abid__icontains=snapshot_id) | Q(id__icontains=snapshot_id))
-    except Snapshot.DoesNotExist:
-        pass
-
-    if not snapshot:
-        raise Snapshot.DoesNotExist
-
-    return snapshot
-
-
-# @router.post("/snapshot", response=SnapshotSchema)
-# def create_snapshot(request, payload: SnapshotSchema):
-#     snapshot = Snapshot.objects.create(**payload.dict())
-#     return snapshot
-#
-# @router.put("/snapshot/{snapshot_id}", response=SnapshotSchema)
-# def update_snapshot(request, snapshot_id: str, payload: SnapshotSchema):
-#     snapshot = get_object_or_404(Snapshot, id=snapshot_id)
-#
-#     for attr, value in payload.dict().items():
-#         setattr(snapshot, attr, value)
-#     snapshot.save()
-#
-#     return snapshot
-#
-# @router.delete("/snapshot/{snapshot_id}")
-# def delete_snapshot(request, snapshot_id: str):
-#     snapshot = get_object_or_404(Snapshot, id=snapshot_id)
-#     snapshot.delete()
-#     return {"success": True}
-
+        return Snapshot.objects.get(Q(id__icontains=snapshot_id))


 ### Tag #########################################################################

-
 class TagSchema(Schema):
    TYPE: str = 'core.models.Tag'
-
    id: UUID
-    abid: str
-
    modified_at: datetime
    created_at: datetime
    created_by_id: str
    created_by_username: str
-
    name: str
    slug: str
    num_snapshots: int
@@ -359,12 +236,12 @@ class TagSchema(Schema):
    @staticmethod
    def resolve_created_by_id(obj):
        return str(obj.created_by_id)
-    
+
    @staticmethod
    def resolve_created_by_username(obj):
        User = get_user_model()
        return User.objects.get(id=obj.created_by_id).username
-    
+
    @staticmethod
    def resolve_num_snapshots(obj, context):
        return obj.snapshot_set.all().distinct().count()
@@ -375,6 +252,7 @@ class TagSchema(Schema):
            return obj.snapshot_set.all().distinct()
        return Snapshot.objects.none()

+
@router.get("/tags", response=List[TagSchema], url_name="get_tags")
@paginate(CustomPagination)
 def get_tags(request):
@@ -382,65 +260,45 @@ def get_tags(request):
    request.with_archiveresults = False
    return Tag.objects.all().distinct()

+
@router.get("/tag/{tag_id}", response=TagSchema, url_name="get_tag")
-def get_tag(request, tag_id: str, with_snapshots: bool=True):
+def get_tag(request, tag_id: str, with_snapshots: bool = True):
    request.with_snapshots = with_snapshots
    request.with_archiveresults = False
-    tag = None
    try:
-        tag = Tag.objects.get(abid__icontains=tag_id)
+        return Tag.objects.get(id__icontains=tag_id)
    except (Tag.DoesNotExist, ValidationError):
-        pass
+        return Tag.objects.get(slug__icontains=tag_id)

-    try:
-        tag = tag or Tag.objects.get(id__icontains=tag_id)
-    except (Tag.DoesNotExist, ValidationError):
-        pass
-    return tag

-@router.get("/any/{abid}", response=Union[SnapshotSchema, ArchiveResultSchema, TagSchema, SeedSchema, CrawlSchema], url_name="get_any", summary="Get any object by its ABID or ID (e.g. snapshot, archiveresult, tag, seed, crawl, etc.)")
-def get_any(request, abid: str):
-    """Get any object by its ABID or ID (e.g. snapshot, archiveresult, tag, seed, crawl, etc.)."""
-    
+@router.get("/any/{id}", response=Union[SnapshotSchema, ArchiveResultSchema, TagSchema, SeedSchema, CrawlSchema], url_name="get_any", summary="Get any object by its ID")
+def get_any(request, id: str):
+    """Get any object by its ID (e.g. snapshot, archiveresult, tag, seed, crawl, etc.)."""
    request.with_snapshots = False
    request.with_archiveresults = False

-    if abid.startswith(APIToken.abid_prefix):
-        raise HttpError(403, 'APIToken objects are not accessible via REST API')
-    
-    if abid.startswith(OutboundWebhook.abid_prefix):
-        raise HttpError(403, 'OutboundWebhook objects are not accessible via REST API')
-    
-    response = None
-    try:
-        response = response or get_snapshot(request, abid)
-    except Exception:
-        pass
+    for getter in [get_snapshot, get_archiveresult, get_tag]:
+        try:
+            response = getter(request, id)
+            if response:
+                return redirect(f"/api/v1/{response._meta.app_label}/{response._meta.model_name}/{response.id}?{request.META['QUERY_STRING']}")
+        except Exception:
+            pass

-    try:
-        response = response or get_archiveresult(request, abid)
-    except Exception:
-        pass
-
-    try:
-        response = response or get_tag(request, abid)
-    except Exception:
-        pass
-    
    try:
        from api.v1_crawls import get_seed
-        response = response or get_seed(request, abid)
+        response = get_seed(request, id)
+        if response:
+            return redirect(f"/api/v1/{response._meta.app_label}/{response._meta.model_name}/{response.id}?{request.META['QUERY_STRING']}")
    except Exception:
        pass
-    
+
    try:
        from api.v1_crawls import get_crawl
-        response = response or get_crawl(request, abid)
+        response = get_crawl(request, id)
+        if response:
+            return redirect(f"/api/v1/{response._meta.app_label}/{response._meta.model_name}/{response.id}?{request.META['QUERY_STRING']}")
    except Exception:
        pass
-    
-    if response:
-        app_label, model_name = response._meta.app_label, response._meta.model_name
-        return redirect(f"/api/v1/{app_label}/{model_name}/{response.abid}?{request.META['QUERY_STRING']}")

-    raise HttpError(404, 'Object with given ABID not found')
+    raise HttpError(404, 'Object with given ID not found')