mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2026-04-04 23:07:56 +10:00
Remove ABID system and KVTag model - use UUIDv7 IDs exclusively
This commit completes the simplification of the ID system by: - Removing the ABID (ArchiveBox ID) system entirely - Removing the base_models/abid.py file - Removing KVTag model in favor of the existing Tag model in core/models.py - Simplifying all models to use standard UUIDv7 primary keys - Removing ABID-related admin functionality - Cleaning up commented-out ABID code from views and statemachines - Deleting migration files for ABID field removal (no longer needed) All models now use simple UUIDv7 ids via `id = models.UUIDField(primary_key=True, default=uuid7)` Note: Old migrations containing ABID references are preserved for database migration history compatibility. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -16,7 +16,7 @@ import abx
|
||||
from archivebox.config import DATA_DIR
|
||||
from archivebox.config.common import SERVER_CONFIG
|
||||
from archivebox.misc.paginators import AccelleratedPaginator
|
||||
from archivebox.base_models.admin import ABIDModelAdmin
|
||||
from archivebox.base_models.admin import BaseModelAdmin
|
||||
|
||||
|
||||
from core.models import ArchiveResult, Snapshot
|
||||
@@ -50,7 +50,7 @@ class ArchiveResultInline(admin.TabularInline):
|
||||
try:
|
||||
return self.parent_model.objects.get(pk=resolved.kwargs['object_id'])
|
||||
except (self.parent_model.DoesNotExist, ValidationError):
|
||||
return self.parent_model.objects.get(pk=self.parent_model.id_from_abid(resolved.kwargs['object_id']))
|
||||
return None
|
||||
|
||||
@admin.display(
|
||||
description='Completed',
|
||||
@@ -60,7 +60,7 @@ class ArchiveResultInline(admin.TabularInline):
|
||||
return format_html('<p style="white-space: nowrap">{}</p>', obj.end_ts.strftime('%Y-%m-%d %H:%M:%S'))
|
||||
|
||||
def result_id(self, obj):
|
||||
return format_html('<a href="{}"><code style="font-size: 10px">[{}]</code></a>', reverse('admin:core_archiveresult_change', args=(obj.id,)), obj.abid)
|
||||
return format_html('<a href="{}"><code style="font-size: 10px">[{}]</code></a>', reverse('admin:core_archiveresult_change', args=(obj.id,)), str(obj.id)[:8])
|
||||
|
||||
def command(self, obj):
|
||||
return format_html('<small><code>{}</code></small>', " ".join(obj.cmd or []))
|
||||
@@ -103,11 +103,11 @@ class ArchiveResultInline(admin.TabularInline):
|
||||
|
||||
|
||||
|
||||
class ArchiveResultAdmin(ABIDModelAdmin):
|
||||
list_display = ('abid', 'created_by', 'created_at', 'snapshot_info', 'tags_str', 'status', 'extractor', 'cmd_str', 'output_str')
|
||||
sort_fields = ('abid', 'created_by', 'created_at', 'extractor', 'status')
|
||||
readonly_fields = ('cmd_str', 'snapshot_info', 'tags_str', 'created_at', 'modified_at', 'abid_info', 'output_summary')
|
||||
search_fields = ('id', 'abid', 'snapshot__url', 'extractor', 'output', 'cmd_version', 'cmd', 'snapshot__timestamp')
|
||||
class ArchiveResultAdmin(BaseModelAdmin):
|
||||
list_display = ('id', 'created_by', 'created_at', 'snapshot_info', 'tags_str', 'status', 'extractor', 'cmd_str', 'output_str')
|
||||
sort_fields = ('id', 'created_by', 'created_at', 'extractor', 'status')
|
||||
readonly_fields = ('cmd_str', 'snapshot_info', 'tags_str', 'created_at', 'modified_at', 'output_summary')
|
||||
search_fields = ('id', 'snapshot__url', 'extractor', 'output', 'cmd_version', 'cmd', 'snapshot__timestamp')
|
||||
fields = ('snapshot', 'extractor', 'status', 'retry_at', 'start_ts', 'end_ts', 'created_by', 'pwd', 'cmd_version', 'cmd', 'output', *readonly_fields)
|
||||
autocomplete_fields = ['snapshot']
|
||||
|
||||
@@ -135,7 +135,7 @@ class ArchiveResultAdmin(ABIDModelAdmin):
|
||||
return format_html(
|
||||
'<a href="/archive/{}/index.html"><b><code>[{}]</code></b> {} {}</a><br/>',
|
||||
result.snapshot.timestamp,
|
||||
result.snapshot.abid,
|
||||
str(result.snapshot.id)[:8],
|
||||
result.snapshot.bookmarked_at.strftime('%Y-%m-%d %H:%M'),
|
||||
result.snapshot.url[:128],
|
||||
)
|
||||
|
||||
@@ -22,7 +22,7 @@ from archivebox.search.admin import SearchResultsAdminMixin
|
||||
from archivebox.index.html import snapshot_icons
|
||||
from archivebox.extractors import archive_links
|
||||
|
||||
from archivebox.base_models.admin import ABIDModelAdmin
|
||||
from archivebox.base_models.admin import BaseModelAdmin
|
||||
from archivebox.workers.tasks import bg_archive_links, bg_add
|
||||
|
||||
from core.models import Tag
|
||||
@@ -53,11 +53,11 @@ class SnapshotActionForm(ActionForm):
|
||||
# )
|
||||
|
||||
|
||||
class SnapshotAdmin(SearchResultsAdminMixin, ABIDModelAdmin):
|
||||
class SnapshotAdmin(SearchResultsAdminMixin, BaseModelAdmin):
|
||||
list_display = ('created_at', 'title_str', 'status', 'files', 'size', 'url_str')
|
||||
sort_fields = ('title_str', 'url_str', 'created_at', 'status', 'crawl')
|
||||
readonly_fields = ('admin_actions', 'status_info', 'tags_str', 'imported_timestamp', 'created_at', 'modified_at', 'downloaded_at', 'abid_info', 'link_dir')
|
||||
search_fields = ('id', 'url', 'abid', 'timestamp', 'title', 'tags__name')
|
||||
readonly_fields = ('admin_actions', 'status_info', 'tags_str', 'imported_timestamp', 'created_at', 'modified_at', 'downloaded_at', 'link_dir')
|
||||
search_fields = ('id', 'url', 'timestamp', 'title', 'tags__name')
|
||||
list_filter = ('created_at', 'downloaded_at', 'archiveresult__status', 'created_by', 'tags__name')
|
||||
fields = ('url', 'title', 'created_by', 'bookmarked_at', 'status', 'retry_at', 'crawl', *readonly_fields)
|
||||
ordering = ['-created_at']
|
||||
|
||||
@@ -6,7 +6,7 @@ from django.utils.html import format_html, mark_safe
|
||||
import abx
|
||||
|
||||
from archivebox.misc.paginators import AccelleratedPaginator
|
||||
from archivebox.base_models.admin import ABIDModelAdmin
|
||||
from archivebox.base_models.admin import BaseModelAdmin
|
||||
|
||||
from core.models import Tag
|
||||
|
||||
@@ -47,12 +47,12 @@ class TagInline(admin.TabularInline):
|
||||
# return format_html('<a href="/admin/{}/{}/{}/change"><b>[{}]</b></a>', obj._meta.app_label, obj._meta.model_name, obj.pk, str(obj))
|
||||
|
||||
|
||||
class TagAdmin(ABIDModelAdmin):
|
||||
list_display = ('created_at', 'created_by', 'abid', 'name', 'num_snapshots', 'snapshots')
|
||||
class TagAdmin(BaseModelAdmin):
|
||||
list_display = ('created_at', 'created_by', 'id', 'name', 'num_snapshots', 'snapshots')
|
||||
list_filter = ('created_at', 'created_by')
|
||||
sort_fields = ('name', 'slug', 'abid', 'created_by', 'created_at')
|
||||
readonly_fields = ('slug', 'abid', 'created_at', 'modified_at', 'abid_info', 'snapshots')
|
||||
search_fields = ('abid', 'name', 'slug')
|
||||
sort_fields = ('name', 'slug', 'id', 'created_by', 'created_at')
|
||||
readonly_fields = ('slug', 'id', 'created_at', 'modified_at', 'snapshots')
|
||||
search_fields = ('id', 'name', 'slug')
|
||||
fields = ('name', 'created_by', *readonly_fields)
|
||||
actions = ['delete_selected', 'merge_tags']
|
||||
ordering = ['-created_at']
|
||||
|
||||
@@ -21,7 +21,7 @@ class CustomUserAdmin(UserAdmin):
|
||||
format_html(
|
||||
'<code><a href="/admin/core/snapshot/{}/change"><b>[{}]</b></a></code> <b>📅 {}</b> {}',
|
||||
snap.pk,
|
||||
snap.abid,
|
||||
str(snap.id)[:8],
|
||||
snap.downloaded_at.strftime('%Y-%m-%d %H:%M') if snap.downloaded_at else 'pending...',
|
||||
snap.url[:64],
|
||||
)
|
||||
@@ -35,7 +35,7 @@ class CustomUserAdmin(UserAdmin):
|
||||
format_html(
|
||||
'<code><a href="/admin/core/archiveresult/{}/change"><b>[{}]</b></a></code> <b>📅 {}</b> <b>📄 {}</b> {}',
|
||||
result.pk,
|
||||
result.abid,
|
||||
str(result.id)[:8],
|
||||
result.snapshot.downloaded_at.strftime('%Y-%m-%d %H:%M') if result.snapshot.downloaded_at else 'pending...',
|
||||
result.extractor,
|
||||
result.snapshot.url[:64],
|
||||
@@ -62,7 +62,7 @@ class CustomUserAdmin(UserAdmin):
|
||||
format_html(
|
||||
'<code><a href="/admin/api/apitoken/{}/change"><b>[{}]</b></a></code> {} (expires {})',
|
||||
apitoken.pk,
|
||||
apitoken.abid,
|
||||
str(apitoken.id)[:8],
|
||||
apitoken.token_redacted[:64],
|
||||
apitoken.expires,
|
||||
)
|
||||
@@ -76,7 +76,7 @@ class CustomUserAdmin(UserAdmin):
|
||||
format_html(
|
||||
'<code><a href="/admin/api/outboundwebhook/{}/change"><b>[{}]</b></a></code> {} -> {}',
|
||||
outboundwebhook.pk,
|
||||
outboundwebhook.abid,
|
||||
str(outboundwebhook.id)[:8],
|
||||
outboundwebhook.referenced_model,
|
||||
outboundwebhook.endpoint,
|
||||
)
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -43,7 +43,7 @@ class SnapshotMachine(StateMachine, strict_states=True):
|
||||
super().__init__(snapshot, *args, **kwargs)
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f'[grey53]Snapshot\\[{self.snapshot.ABID}] 🏃♂️ Worker\\[pid={os.getpid()}].tick()[/grey53] [blue]{self.snapshot.status.upper()}[/blue] ⚙️ [grey37]Machine[/grey37]'
|
||||
return f'[grey53]Snapshot\\[{self.snapshot.id}] 🏃♂️ Worker\\[pid={os.getpid()}].tick()[/grey53] [blue]{self.snapshot.status.upper()}[/blue] ⚙️ [grey37]Machine[/grey37]'
|
||||
|
||||
def __str__(self) -> str:
|
||||
return self.__repr__()
|
||||
@@ -93,11 +93,6 @@ class SnapshotMachine(StateMachine, strict_states=True):
|
||||
status=Snapshot.StatusChoices.STARTED,
|
||||
)
|
||||
|
||||
# run_subcommand([
|
||||
# 'archivebox', 'snapshot', self.snapshot.ABID,
|
||||
# '--start',
|
||||
# ])
|
||||
|
||||
@sealed.enter
|
||||
def enter_sealed(self):
|
||||
print(f'{self}.on_sealed() ↳ snapshot.retry_at=None')
|
||||
@@ -160,7 +155,7 @@ class ArchiveResultMachine(StateMachine, strict_states=True):
|
||||
super().__init__(archiveresult, *args, **kwargs)
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f'[grey53]ArchiveResult\\[{self.archiveresult.ABID}] 🏃♂️ Worker\\[pid={os.getpid()}].tick()[/grey53] [blue]{self.archiveresult.status.upper()}[/blue] ⚙️ [grey37]Machine[/grey37]'
|
||||
return f'[grey53]ArchiveResult\\[{self.archiveresult.id}] 🏃♂️ Worker\\[pid={os.getpid()}].tick()[/grey53] [blue]{self.archiveresult.status.upper()}[/blue] ⚙️ [grey37]Machine[/grey37]'
|
||||
|
||||
def __str__(self) -> str:
|
||||
return self.__repr__()
|
||||
@@ -207,11 +202,7 @@ class ArchiveResultMachine(StateMachine, strict_states=True):
|
||||
status=ArchiveResult.StatusChoices.QUEUED,
|
||||
start_ts=timezone.now(),
|
||||
) # lock the obj for the next ~30s to limit racing with other workers
|
||||
|
||||
# run_subcommand([
|
||||
# 'archivebox', 'extract', self.archiveresult.ABID,
|
||||
# ])
|
||||
|
||||
|
||||
# create the output directory and fork the new extractor job subprocess
|
||||
self.archiveresult.create_output_dir()
|
||||
# self.archiveresult.extract(background=True)
|
||||
|
||||
@@ -205,7 +205,7 @@ class SnapshotView(View):
|
||||
format_html(
|
||||
(
|
||||
'<center><br/><br/><br/>'
|
||||
'No Snapshot directories match the given timestamp/ID/ABID: <code>{}</code><br/><br/>'
|
||||
'No Snapshot directories match the given timestamp/ID: <code>{}</code><br/><br/>'
|
||||
'You can <a href="/add/" target="_top">add a new Snapshot</a>, or return to the <a href="/" target="_top">Main Index</a>'
|
||||
'</center>'
|
||||
),
|
||||
@@ -230,7 +230,7 @@ class SnapshotView(View):
|
||||
return HttpResponse(
|
||||
format_html(
|
||||
(
|
||||
'Multiple Snapshots match the given timestamp/ID/ABID <code>{}</code><br/><pre>'
|
||||
'Multiple Snapshots match the given timestamp/ID <code>{}</code><br/><pre>'
|
||||
),
|
||||
slug,
|
||||
) + snapshot_hrefs + format_html(
|
||||
@@ -282,34 +282,12 @@ class SnapshotView(View):
|
||||
status=404,
|
||||
)
|
||||
|
||||
# # slud is an ID
|
||||
# ulid = slug.split('_', 1)[-1]
|
||||
# try:
|
||||
# try:
|
||||
# snapshot = snapshot or Snapshot.objects.get(Q(abid=ulid) | Q(id=ulid))
|
||||
# except Snapshot.DoesNotExist:
|
||||
# pass
|
||||
|
||||
# try:
|
||||
# snapshot = Snapshot.objects.get(Q(abid__startswith=slug) | Q(abid__startswith=Snapshot.abid_prefix + slug) | Q(id__startswith=slug))
|
||||
# except (Snapshot.DoesNotExist, Snapshot.MultipleObjectsReturned):
|
||||
# pass
|
||||
|
||||
# try:
|
||||
# snapshot = snapshot or Snapshot.objects.get(Q(abid__icontains=snapshot_id) | Q(id__icontains=snapshot_id))
|
||||
# except Snapshot.DoesNotExist:
|
||||
# pass
|
||||
# return redirect(f'/archive/{snapshot.timestamp}/index.html')
|
||||
# except Snapshot.DoesNotExist:
|
||||
# pass
|
||||
|
||||
# slug is a URL
|
||||
try:
|
||||
try:
|
||||
# try exact match on full url / ABID first
|
||||
# try exact match on full url / ID first
|
||||
snapshot = Snapshot.objects.get(
|
||||
Q(url='http://' + path) | Q(url='https://' + path) | Q(id__startswith=path)
|
||||
| Q(abid__icontains=path) | Q(id__icontains=path)
|
||||
Q(url='http://' + path) | Q(url='https://' + path) | Q(id__icontains=path)
|
||||
)
|
||||
except Snapshot.DoesNotExist:
|
||||
# fall back to match on exact base_url
|
||||
@@ -345,7 +323,7 @@ class SnapshotView(View):
|
||||
format_html(
|
||||
'{} <code style="font-size: 0.8em">{}</code> <a href="/archive/{}/index.html"><b><code>{}</code></b></a> {} <b>{}</b>',
|
||||
snap.bookmarked_at.strftime('%Y-%m-%d %H:%M:%S'),
|
||||
snap.abid,
|
||||
str(snap.id)[:8],
|
||||
snap.timestamp,
|
||||
snap.timestamp,
|
||||
snap.url,
|
||||
@@ -353,7 +331,7 @@ class SnapshotView(View):
|
||||
)
|
||||
for snap in Snapshot.objects.filter(
|
||||
Q(url__startswith='http://' + base_url(path)) | Q(url__startswith='https://' + base_url(path))
|
||||
| Q(abid__icontains=path) | Q(id__icontains=path)
|
||||
| Q(id__icontains=path)
|
||||
).only('url', 'timestamp', 'title', 'bookmarked_at').order_by('-bookmarked_at')
|
||||
)
|
||||
return HttpResponse(
|
||||
|
||||
Reference in New Issue
Block a user