Remove ABID system and KVTag model - use UUIDv7 IDs exclusively

This commit completes the simplification of the ID system by: - Removing the ABID (ArchiveBox ID) system entirely - Removing the base_models/abid.py file - Removing KVTag model in favor of the existing Tag model in core/models.py - Simplifying all models to use standard UUIDv7 primary keys - Removing ABID-related admin functionality - Cleaning up commented-out ABID code from views and statemachines - Deleting migration files for ABID field removal (no longer needed) All models now use simple UUIDv7 ids via `id = models.UUIDField(primary_key=True, default=uuid7)` Note: Old migrations containing ABID references are preserved for database migration history compatibility. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-04-04 23:07:56 +10:00 · 2025-12-24 06:13:49 -08:00
parent c3024815f3
commit c1335fed37
26 changed files with 497 additions and 3537 deletions
--- a/archivebox/core/admin_archiveresults.py
+++ b/archivebox/core/admin_archiveresults.py
@@ -16,7 +16,7 @@ import abx
 from archivebox.config import DATA_DIR
 from archivebox.config.common import SERVER_CONFIG
 from archivebox.misc.paginators import AccelleratedPaginator
-from archivebox.base_models.admin import ABIDModelAdmin
+from archivebox.base_models.admin import BaseModelAdmin


 from core.models import ArchiveResult, Snapshot
@@ -50,7 +50,7 @@ class ArchiveResultInline(admin.TabularInline):
        try:
            return self.parent_model.objects.get(pk=resolved.kwargs['object_id'])
        except (self.parent_model.DoesNotExist, ValidationError):
-            return self.parent_model.objects.get(pk=self.parent_model.id_from_abid(resolved.kwargs['object_id']))
+            return None

    @admin.display(
        description='Completed',
@@ -60,7 +60,7 @@ class ArchiveResultInline(admin.TabularInline):
        return format_html('<p style="white-space: nowrap">{}</p>', obj.end_ts.strftime('%Y-%m-%d %H:%M:%S'))

    def result_id(self, obj):
-        return format_html('<a href="{}"><code style="font-size: 10px">[{}]</code></a>', reverse('admin:core_archiveresult_change', args=(obj.id,)), obj.abid)
+        return format_html('<a href="{}"><code style="font-size: 10px">[{}]</code></a>', reverse('admin:core_archiveresult_change', args=(obj.id,)), str(obj.id)[:8])
    
    def command(self, obj):
        return format_html('<small><code>{}</code></small>', " ".join(obj.cmd or []))
@@ -103,11 +103,11 @@ class ArchiveResultInline(admin.TabularInline):



-class ArchiveResultAdmin(ABIDModelAdmin):
-    list_display = ('abid', 'created_by', 'created_at', 'snapshot_info', 'tags_str', 'status', 'extractor', 'cmd_str', 'output_str')
-    sort_fields = ('abid', 'created_by', 'created_at', 'extractor', 'status')
-    readonly_fields = ('cmd_str', 'snapshot_info', 'tags_str', 'created_at', 'modified_at', 'abid_info', 'output_summary')
-    search_fields = ('id', 'abid', 'snapshot__url', 'extractor', 'output', 'cmd_version', 'cmd', 'snapshot__timestamp')
+class ArchiveResultAdmin(BaseModelAdmin):
+    list_display = ('id', 'created_by', 'created_at', 'snapshot_info', 'tags_str', 'status', 'extractor', 'cmd_str', 'output_str')
+    sort_fields = ('id', 'created_by', 'created_at', 'extractor', 'status')
+    readonly_fields = ('cmd_str', 'snapshot_info', 'tags_str', 'created_at', 'modified_at', 'output_summary')
+    search_fields = ('id', 'snapshot__url', 'extractor', 'output', 'cmd_version', 'cmd', 'snapshot__timestamp')
    fields = ('snapshot', 'extractor', 'status', 'retry_at', 'start_ts', 'end_ts', 'created_by', 'pwd', 'cmd_version', 'cmd', 'output', *readonly_fields)
    autocomplete_fields = ['snapshot']

@@ -135,7 +135,7 @@ class ArchiveResultAdmin(ABIDModelAdmin):
        return format_html(
            '<a href="/archive/{}/index.html"><b><code>[{}]</code></b> &nbsp; {} &nbsp; {}</a><br/>',
            result.snapshot.timestamp,
-            result.snapshot.abid,
+            str(result.snapshot.id)[:8],
            result.snapshot.bookmarked_at.strftime('%Y-%m-%d %H:%M'),
            result.snapshot.url[:128],
        )
--- a/archivebox/core/admin_snapshots.py
+++ b/archivebox/core/admin_snapshots.py
@@ -22,7 +22,7 @@ from archivebox.search.admin import SearchResultsAdminMixin
 from archivebox.index.html import snapshot_icons
 from archivebox.extractors import archive_links

-from archivebox.base_models.admin import ABIDModelAdmin
+from archivebox.base_models.admin import BaseModelAdmin
 from archivebox.workers.tasks import bg_archive_links, bg_add

 from core.models import Tag
@@ -53,11 +53,11 @@ class SnapshotActionForm(ActionForm):
    # )


-class SnapshotAdmin(SearchResultsAdminMixin, ABIDModelAdmin):
+class SnapshotAdmin(SearchResultsAdminMixin, BaseModelAdmin):
    list_display = ('created_at', 'title_str', 'status', 'files', 'size', 'url_str')
    sort_fields = ('title_str', 'url_str', 'created_at', 'status', 'crawl')
-    readonly_fields = ('admin_actions', 'status_info', 'tags_str', 'imported_timestamp', 'created_at', 'modified_at', 'downloaded_at', 'abid_info', 'link_dir')
-    search_fields = ('id', 'url', 'abid', 'timestamp', 'title', 'tags__name')
+    readonly_fields = ('admin_actions', 'status_info', 'tags_str', 'imported_timestamp', 'created_at', 'modified_at', 'downloaded_at', 'link_dir')
+    search_fields = ('id', 'url', 'timestamp', 'title', 'tags__name')
    list_filter = ('created_at', 'downloaded_at', 'archiveresult__status', 'created_by', 'tags__name')
    fields = ('url', 'title', 'created_by', 'bookmarked_at', 'status', 'retry_at', 'crawl', *readonly_fields)
    ordering = ['-created_at']
--- a/archivebox/core/admin_tags.py
+++ b/archivebox/core/admin_tags.py
@@ -6,7 +6,7 @@ from django.utils.html import format_html, mark_safe
 import abx

 from archivebox.misc.paginators import AccelleratedPaginator
-from archivebox.base_models.admin import ABIDModelAdmin
+from archivebox.base_models.admin import BaseModelAdmin

 from core.models import Tag

@@ -47,12 +47,12 @@ class TagInline(admin.TabularInline):
 #         return format_html('<a href="/admin/{}/{}/{}/change"><b>[{}]</b></a>', obj._meta.app_label, obj._meta.model_name, obj.pk, str(obj))

    
-class TagAdmin(ABIDModelAdmin):
-    list_display = ('created_at', 'created_by', 'abid', 'name', 'num_snapshots', 'snapshots')
+class TagAdmin(BaseModelAdmin):
+    list_display = ('created_at', 'created_by', 'id', 'name', 'num_snapshots', 'snapshots')
    list_filter = ('created_at', 'created_by')
-    sort_fields = ('name', 'slug', 'abid', 'created_by', 'created_at')
-    readonly_fields = ('slug', 'abid', 'created_at', 'modified_at', 'abid_info', 'snapshots')
-    search_fields = ('abid', 'name', 'slug')
+    sort_fields = ('name', 'slug', 'id', 'created_by', 'created_at')
+    readonly_fields = ('slug', 'id', 'created_at', 'modified_at', 'snapshots')
+    search_fields = ('id', 'name', 'slug')
    fields = ('name', 'created_by', *readonly_fields)
    actions = ['delete_selected', 'merge_tags']
    ordering = ['-created_at']
--- a/archivebox/core/admin_users.py
+++ b/archivebox/core/admin_users.py
@@ -21,7 +21,7 @@ class CustomUserAdmin(UserAdmin):
            format_html(
                '<code><a href="/admin/core/snapshot/{}/change"><b>[{}]</b></a></code> <b>📅 {}</b> {}',
                snap.pk,
-                snap.abid,
+                str(snap.id)[:8],
                snap.downloaded_at.strftime('%Y-%m-%d %H:%M') if snap.downloaded_at else 'pending...',
                snap.url[:64],
            )
@@ -35,7 +35,7 @@ class CustomUserAdmin(UserAdmin):
            format_html(
                '<code><a href="/admin/core/archiveresult/{}/change"><b>[{}]</b></a></code> <b>📅 {}</b> <b>📄 {}</b> {}',
                result.pk,
-                result.abid,
+                str(result.id)[:8],
                result.snapshot.downloaded_at.strftime('%Y-%m-%d %H:%M') if result.snapshot.downloaded_at else 'pending...',
                result.extractor,
                result.snapshot.url[:64],
@@ -62,7 +62,7 @@ class CustomUserAdmin(UserAdmin):
            format_html(
                '<code><a href="/admin/api/apitoken/{}/change"><b>[{}]</b></a></code> {} (expires {})',
                apitoken.pk,
-                apitoken.abid,
+                str(apitoken.id)[:8],
                apitoken.token_redacted[:64],
                apitoken.expires,
            )
@@ -76,7 +76,7 @@ class CustomUserAdmin(UserAdmin):
            format_html(
                '<code><a href="/admin/api/outboundwebhook/{}/change"><b>[{}]</b></a></code> {} -> {}',
                outboundwebhook.pk,
-                outboundwebhook.abid,
+                str(outboundwebhook.id)[:8],
                outboundwebhook.referenced_model,
                outboundwebhook.endpoint,
            )
--- a/archivebox/core/models.py
+++ b/archivebox/core/models.py
--- a/archivebox/core/statemachines.py
+++ b/archivebox/core/statemachines.py
@@ -43,7 +43,7 @@ class SnapshotMachine(StateMachine, strict_states=True):
        super().__init__(snapshot, *args, **kwargs)
        
    def __repr__(self) -> str:
-        return f'[grey53]Snapshot\\[{self.snapshot.ABID}] 🏃‍♂️ Worker\\[pid={os.getpid()}].tick()[/grey53] [blue]{self.snapshot.status.upper()}[/blue] ⚙️ [grey37]Machine[/grey37]'
+        return f'[grey53]Snapshot\\[{self.snapshot.id}] 🏃‍♂️ Worker\\[pid={os.getpid()}].tick()[/grey53] [blue]{self.snapshot.status.upper()}[/blue] ⚙️ [grey37]Machine[/grey37]'
    
    def __str__(self) -> str:
        return self.__repr__()
@@ -93,11 +93,6 @@ class SnapshotMachine(StateMachine, strict_states=True):
            status=Snapshot.StatusChoices.STARTED,
        )
        
-        # run_subcommand([
-        #     'archivebox', 'snapshot', self.snapshot.ABID,
-        #     '--start',
-        # ])
-        
    @sealed.enter
    def enter_sealed(self):
        print(f'{self}.on_sealed() ↳ snapshot.retry_at=None')
@@ -160,7 +155,7 @@ class ArchiveResultMachine(StateMachine, strict_states=True):
        super().__init__(archiveresult, *args, **kwargs)
    
    def __repr__(self) -> str:
-        return f'[grey53]ArchiveResult\\[{self.archiveresult.ABID}] 🏃‍♂️ Worker\\[pid={os.getpid()}].tick()[/grey53] [blue]{self.archiveresult.status.upper()}[/blue] ⚙️ [grey37]Machine[/grey37]'
+        return f'[grey53]ArchiveResult\\[{self.archiveresult.id}] 🏃‍♂️ Worker\\[pid={os.getpid()}].tick()[/grey53] [blue]{self.archiveresult.status.upper()}[/blue] ⚙️ [grey37]Machine[/grey37]'
    
    def __str__(self) -> str:
        return self.__repr__()
@@ -207,11 +202,7 @@ class ArchiveResultMachine(StateMachine, strict_states=True):
            status=ArchiveResult.StatusChoices.QUEUED,
            start_ts=timezone.now(),
        )   # lock the obj for the next ~30s to limit racing with other workers
-        
-        # run_subcommand([
-        #     'archivebox', 'extract', self.archiveresult.ABID,
-        # ])
-        
+
        # create the output directory and fork the new extractor job subprocess
        self.archiveresult.create_output_dir()
        # self.archiveresult.extract(background=True)
--- a/archivebox/core/views.py
+++ b/archivebox/core/views.py
@@ -205,7 +205,7 @@ class SnapshotView(View):
                    format_html(
                        (
                            '<center><br/><br/><br/>'
-                            'No Snapshot directories match the given timestamp/ID/ABID: <code>{}</code><br/><br/>'
+                            'No Snapshot directories match the given timestamp/ID: <code>{}</code><br/><br/>'
                            'You can <a href="/add/" target="_top">add a new Snapshot</a>, or return to the <a href="/" target="_top">Main Index</a>'
                            '</center>'
                        ),
@@ -230,7 +230,7 @@ class SnapshotView(View):
                return HttpResponse(
                    format_html(
                        (
-                            'Multiple Snapshots match the given timestamp/ID/ABID <code>{}</code><br/><pre>'
+                            'Multiple Snapshots match the given timestamp/ID <code>{}</code><br/><pre>'
                        ),
                        slug,
                    ) + snapshot_hrefs + format_html(
@@ -282,34 +282,12 @@ class SnapshotView(View):
                    status=404,
                )
            
-        # # slud is an ID
-        # ulid = slug.split('_', 1)[-1]
-        # try:
-        #     try:
-        #         snapshot = snapshot or Snapshot.objects.get(Q(abid=ulid) | Q(id=ulid))
-        #     except Snapshot.DoesNotExist:
-        #         pass
-
-        #     try:
-        #         snapshot = Snapshot.objects.get(Q(abid__startswith=slug) | Q(abid__startswith=Snapshot.abid_prefix + slug) | Q(id__startswith=slug))
-        #     except (Snapshot.DoesNotExist, Snapshot.MultipleObjectsReturned):
-        #         pass
-
-        #     try:
-        #         snapshot = snapshot or Snapshot.objects.get(Q(abid__icontains=snapshot_id) | Q(id__icontains=snapshot_id))
-        #     except Snapshot.DoesNotExist:
-        #         pass
-        #     return redirect(f'/archive/{snapshot.timestamp}/index.html')
-        # except Snapshot.DoesNotExist:
-        #     pass
-
        # slug is a URL
        try:
            try:
-                # try exact match on full url / ABID first
+                # try exact match on full url / ID first
                snapshot = Snapshot.objects.get(
-                    Q(url='http://' + path) | Q(url='https://' + path) | Q(id__startswith=path)
-                    | Q(abid__icontains=path) | Q(id__icontains=path)
+                    Q(url='http://' + path) | Q(url='https://' + path) | Q(id__icontains=path)
                )
            except Snapshot.DoesNotExist:
                # fall back to match on exact base_url
@@ -345,7 +323,7 @@ class SnapshotView(View):
                format_html(
                    '{} <code style="font-size: 0.8em">{}</code> <a href="/archive/{}/index.html"><b><code>{}</code></b></a> {} <b>{}</b>',
                    snap.bookmarked_at.strftime('%Y-%m-%d %H:%M:%S'),
-                    snap.abid,
+                    str(snap.id)[:8],
                    snap.timestamp,
                    snap.timestamp,
                    snap.url,
@@ -353,7 +331,7 @@ class SnapshotView(View):
                )
                for snap in Snapshot.objects.filter(
                    Q(url__startswith='http://' + base_url(path)) | Q(url__startswith='https://' + base_url(path))
-                    | Q(abid__icontains=path) | Q(id__icontains=path)
+                    | Q(id__icontains=path)
                ).only('url', 'timestamp', 'title', 'bookmarked_at').order_by('-bookmarked_at')
            )
            return HttpResponse(