Remove ABID system and KVTag model - use UUIDv7 IDs exclusively

This commit completes the simplification of the ID system by:

- Removing the ABID (ArchiveBox ID) system entirely
- Removing the base_models/abid.py file
- Removing KVTag model in favor of the existing Tag model in core/models.py
- Simplifying all models to use standard UUIDv7 primary keys
- Removing ABID-related admin functionality
- Cleaning up commented-out ABID code from views and statemachines
- Deleting migration files for ABID field removal (no longer needed)

All models now use simple UUIDv7 ids via `id = models.UUIDField(primary_key=True, default=uuid7)`

Note: Old migrations containing ABID references are preserved for database
migration history compatibility.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Nick Sweeting
2025-12-24 06:13:49 -08:00
parent c3024815f3
commit c1335fed37
26 changed files with 497 additions and 3537 deletions

View File

@@ -16,7 +16,7 @@ import abx
from archivebox.config import DATA_DIR
from archivebox.config.common import SERVER_CONFIG
from archivebox.misc.paginators import AccelleratedPaginator
from archivebox.base_models.admin import ABIDModelAdmin
from archivebox.base_models.admin import BaseModelAdmin
from core.models import ArchiveResult, Snapshot
@@ -50,7 +50,7 @@ class ArchiveResultInline(admin.TabularInline):
try:
return self.parent_model.objects.get(pk=resolved.kwargs['object_id'])
except (self.parent_model.DoesNotExist, ValidationError):
return self.parent_model.objects.get(pk=self.parent_model.id_from_abid(resolved.kwargs['object_id']))
return None
@admin.display(
description='Completed',
@@ -60,7 +60,7 @@ class ArchiveResultInline(admin.TabularInline):
return format_html('<p style="white-space: nowrap">{}</p>', obj.end_ts.strftime('%Y-%m-%d %H:%M:%S'))
def result_id(self, obj):
return format_html('<a href="{}"><code style="font-size: 10px">[{}]</code></a>', reverse('admin:core_archiveresult_change', args=(obj.id,)), obj.abid)
return format_html('<a href="{}"><code style="font-size: 10px">[{}]</code></a>', reverse('admin:core_archiveresult_change', args=(obj.id,)), str(obj.id)[:8])
def command(self, obj):
return format_html('<small><code>{}</code></small>', " ".join(obj.cmd or []))
@@ -103,11 +103,11 @@ class ArchiveResultInline(admin.TabularInline):
class ArchiveResultAdmin(ABIDModelAdmin):
list_display = ('abid', 'created_by', 'created_at', 'snapshot_info', 'tags_str', 'status', 'extractor', 'cmd_str', 'output_str')
sort_fields = ('abid', 'created_by', 'created_at', 'extractor', 'status')
readonly_fields = ('cmd_str', 'snapshot_info', 'tags_str', 'created_at', 'modified_at', 'abid_info', 'output_summary')
search_fields = ('id', 'abid', 'snapshot__url', 'extractor', 'output', 'cmd_version', 'cmd', 'snapshot__timestamp')
class ArchiveResultAdmin(BaseModelAdmin):
list_display = ('id', 'created_by', 'created_at', 'snapshot_info', 'tags_str', 'status', 'extractor', 'cmd_str', 'output_str')
sort_fields = ('id', 'created_by', 'created_at', 'extractor', 'status')
readonly_fields = ('cmd_str', 'snapshot_info', 'tags_str', 'created_at', 'modified_at', 'output_summary')
search_fields = ('id', 'snapshot__url', 'extractor', 'output', 'cmd_version', 'cmd', 'snapshot__timestamp')
fields = ('snapshot', 'extractor', 'status', 'retry_at', 'start_ts', 'end_ts', 'created_by', 'pwd', 'cmd_version', 'cmd', 'output', *readonly_fields)
autocomplete_fields = ['snapshot']
@@ -135,7 +135,7 @@ class ArchiveResultAdmin(ABIDModelAdmin):
return format_html(
'<a href="/archive/{}/index.html"><b><code>[{}]</code></b> &nbsp; {} &nbsp; {}</a><br/>',
result.snapshot.timestamp,
result.snapshot.abid,
str(result.snapshot.id)[:8],
result.snapshot.bookmarked_at.strftime('%Y-%m-%d %H:%M'),
result.snapshot.url[:128],
)

View File

@@ -22,7 +22,7 @@ from archivebox.search.admin import SearchResultsAdminMixin
from archivebox.index.html import snapshot_icons
from archivebox.extractors import archive_links
from archivebox.base_models.admin import ABIDModelAdmin
from archivebox.base_models.admin import BaseModelAdmin
from archivebox.workers.tasks import bg_archive_links, bg_add
from core.models import Tag
@@ -53,11 +53,11 @@ class SnapshotActionForm(ActionForm):
# )
class SnapshotAdmin(SearchResultsAdminMixin, ABIDModelAdmin):
class SnapshotAdmin(SearchResultsAdminMixin, BaseModelAdmin):
list_display = ('created_at', 'title_str', 'status', 'files', 'size', 'url_str')
sort_fields = ('title_str', 'url_str', 'created_at', 'status', 'crawl')
readonly_fields = ('admin_actions', 'status_info', 'tags_str', 'imported_timestamp', 'created_at', 'modified_at', 'downloaded_at', 'abid_info', 'link_dir')
search_fields = ('id', 'url', 'abid', 'timestamp', 'title', 'tags__name')
readonly_fields = ('admin_actions', 'status_info', 'tags_str', 'imported_timestamp', 'created_at', 'modified_at', 'downloaded_at', 'link_dir')
search_fields = ('id', 'url', 'timestamp', 'title', 'tags__name')
list_filter = ('created_at', 'downloaded_at', 'archiveresult__status', 'created_by', 'tags__name')
fields = ('url', 'title', 'created_by', 'bookmarked_at', 'status', 'retry_at', 'crawl', *readonly_fields)
ordering = ['-created_at']

View File

@@ -6,7 +6,7 @@ from django.utils.html import format_html, mark_safe
import abx
from archivebox.misc.paginators import AccelleratedPaginator
from archivebox.base_models.admin import ABIDModelAdmin
from archivebox.base_models.admin import BaseModelAdmin
from core.models import Tag
@@ -47,12 +47,12 @@ class TagInline(admin.TabularInline):
# return format_html('<a href="/admin/{}/{}/{}/change"><b>[{}]</b></a>', obj._meta.app_label, obj._meta.model_name, obj.pk, str(obj))
class TagAdmin(ABIDModelAdmin):
list_display = ('created_at', 'created_by', 'abid', 'name', 'num_snapshots', 'snapshots')
class TagAdmin(BaseModelAdmin):
list_display = ('created_at', 'created_by', 'id', 'name', 'num_snapshots', 'snapshots')
list_filter = ('created_at', 'created_by')
sort_fields = ('name', 'slug', 'abid', 'created_by', 'created_at')
readonly_fields = ('slug', 'abid', 'created_at', 'modified_at', 'abid_info', 'snapshots')
search_fields = ('abid', 'name', 'slug')
sort_fields = ('name', 'slug', 'id', 'created_by', 'created_at')
readonly_fields = ('slug', 'id', 'created_at', 'modified_at', 'snapshots')
search_fields = ('id', 'name', 'slug')
fields = ('name', 'created_by', *readonly_fields)
actions = ['delete_selected', 'merge_tags']
ordering = ['-created_at']

View File

@@ -21,7 +21,7 @@ class CustomUserAdmin(UserAdmin):
format_html(
'<code><a href="/admin/core/snapshot/{}/change"><b>[{}]</b></a></code> <b>📅 {}</b> {}',
snap.pk,
snap.abid,
str(snap.id)[:8],
snap.downloaded_at.strftime('%Y-%m-%d %H:%M') if snap.downloaded_at else 'pending...',
snap.url[:64],
)
@@ -35,7 +35,7 @@ class CustomUserAdmin(UserAdmin):
format_html(
'<code><a href="/admin/core/archiveresult/{}/change"><b>[{}]</b></a></code> <b>📅 {}</b> <b>📄 {}</b> {}',
result.pk,
result.abid,
str(result.id)[:8],
result.snapshot.downloaded_at.strftime('%Y-%m-%d %H:%M') if result.snapshot.downloaded_at else 'pending...',
result.extractor,
result.snapshot.url[:64],
@@ -62,7 +62,7 @@ class CustomUserAdmin(UserAdmin):
format_html(
'<code><a href="/admin/api/apitoken/{}/change"><b>[{}]</b></a></code> {} (expires {})',
apitoken.pk,
apitoken.abid,
str(apitoken.id)[:8],
apitoken.token_redacted[:64],
apitoken.expires,
)
@@ -76,7 +76,7 @@ class CustomUserAdmin(UserAdmin):
format_html(
'<code><a href="/admin/api/outboundwebhook/{}/change"><b>[{}]</b></a></code> {} -> {}',
outboundwebhook.pk,
outboundwebhook.abid,
str(outboundwebhook.id)[:8],
outboundwebhook.referenced_model,
outboundwebhook.endpoint,
)

File diff suppressed because it is too large Load Diff

View File

@@ -43,7 +43,7 @@ class SnapshotMachine(StateMachine, strict_states=True):
super().__init__(snapshot, *args, **kwargs)
def __repr__(self) -> str:
return f'[grey53]Snapshot\\[{self.snapshot.ABID}] 🏃‍♂️ Worker\\[pid={os.getpid()}].tick()[/grey53] [blue]{self.snapshot.status.upper()}[/blue] ⚙️ [grey37]Machine[/grey37]'
return f'[grey53]Snapshot\\[{self.snapshot.id}] 🏃‍♂️ Worker\\[pid={os.getpid()}].tick()[/grey53] [blue]{self.snapshot.status.upper()}[/blue] ⚙️ [grey37]Machine[/grey37]'
def __str__(self) -> str:
return self.__repr__()
@@ -93,11 +93,6 @@ class SnapshotMachine(StateMachine, strict_states=True):
status=Snapshot.StatusChoices.STARTED,
)
# run_subcommand([
# 'archivebox', 'snapshot', self.snapshot.ABID,
# '--start',
# ])
@sealed.enter
def enter_sealed(self):
print(f'{self}.on_sealed() ↳ snapshot.retry_at=None')
@@ -160,7 +155,7 @@ class ArchiveResultMachine(StateMachine, strict_states=True):
super().__init__(archiveresult, *args, **kwargs)
def __repr__(self) -> str:
return f'[grey53]ArchiveResult\\[{self.archiveresult.ABID}] 🏃‍♂️ Worker\\[pid={os.getpid()}].tick()[/grey53] [blue]{self.archiveresult.status.upper()}[/blue] ⚙️ [grey37]Machine[/grey37]'
return f'[grey53]ArchiveResult\\[{self.archiveresult.id}] 🏃‍♂️ Worker\\[pid={os.getpid()}].tick()[/grey53] [blue]{self.archiveresult.status.upper()}[/blue] ⚙️ [grey37]Machine[/grey37]'
def __str__(self) -> str:
return self.__repr__()
@@ -207,11 +202,7 @@ class ArchiveResultMachine(StateMachine, strict_states=True):
status=ArchiveResult.StatusChoices.QUEUED,
start_ts=timezone.now(),
) # lock the obj for the next ~30s to limit racing with other workers
# run_subcommand([
# 'archivebox', 'extract', self.archiveresult.ABID,
# ])
# create the output directory and fork the new extractor job subprocess
self.archiveresult.create_output_dir()
# self.archiveresult.extract(background=True)

View File

@@ -205,7 +205,7 @@ class SnapshotView(View):
format_html(
(
'<center><br/><br/><br/>'
'No Snapshot directories match the given timestamp/ID/ABID: <code>{}</code><br/><br/>'
'No Snapshot directories match the given timestamp/ID: <code>{}</code><br/><br/>'
'You can <a href="/add/" target="_top">add a new Snapshot</a>, or return to the <a href="/" target="_top">Main Index</a>'
'</center>'
),
@@ -230,7 +230,7 @@ class SnapshotView(View):
return HttpResponse(
format_html(
(
'Multiple Snapshots match the given timestamp/ID/ABID <code>{}</code><br/><pre>'
'Multiple Snapshots match the given timestamp/ID <code>{}</code><br/><pre>'
),
slug,
) + snapshot_hrefs + format_html(
@@ -282,34 +282,12 @@ class SnapshotView(View):
status=404,
)
# # slud is an ID
# ulid = slug.split('_', 1)[-1]
# try:
# try:
# snapshot = snapshot or Snapshot.objects.get(Q(abid=ulid) | Q(id=ulid))
# except Snapshot.DoesNotExist:
# pass
# try:
# snapshot = Snapshot.objects.get(Q(abid__startswith=slug) | Q(abid__startswith=Snapshot.abid_prefix + slug) | Q(id__startswith=slug))
# except (Snapshot.DoesNotExist, Snapshot.MultipleObjectsReturned):
# pass
# try:
# snapshot = snapshot or Snapshot.objects.get(Q(abid__icontains=snapshot_id) | Q(id__icontains=snapshot_id))
# except Snapshot.DoesNotExist:
# pass
# return redirect(f'/archive/{snapshot.timestamp}/index.html')
# except Snapshot.DoesNotExist:
# pass
# slug is a URL
try:
try:
# try exact match on full url / ABID first
# try exact match on full url / ID first
snapshot = Snapshot.objects.get(
Q(url='http://' + path) | Q(url='https://' + path) | Q(id__startswith=path)
| Q(abid__icontains=path) | Q(id__icontains=path)
Q(url='http://' + path) | Q(url='https://' + path) | Q(id__icontains=path)
)
except Snapshot.DoesNotExist:
# fall back to match on exact base_url
@@ -345,7 +323,7 @@ class SnapshotView(View):
format_html(
'{} <code style="font-size: 0.8em">{}</code> <a href="/archive/{}/index.html"><b><code>{}</code></b></a> {} <b>{}</b>',
snap.bookmarked_at.strftime('%Y-%m-%d %H:%M:%S'),
snap.abid,
str(snap.id)[:8],
snap.timestamp,
snap.timestamp,
snap.url,
@@ -353,7 +331,7 @@ class SnapshotView(View):
)
for snap in Snapshot.objects.filter(
Q(url__startswith='http://' + base_url(path)) | Q(url__startswith='https://' + base_url(path))
| Q(abid__icontains=path) | Q(id__icontains=path)
| Q(id__icontains=path)
).only('url', 'timestamp', 'title', 'bookmarked_at').order_by('-bookmarked_at')
)
return HttpResponse(