mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2026-04-06 07:47:53 +10:00
wip
This commit is contained in:
@@ -1 +1 @@
|
||||
__package__ = 'archivebox.api'
|
||||
__package__ = "archivebox.api"
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
__package__ = 'archivebox.api'
|
||||
__package__ = "archivebox.api"
|
||||
|
||||
from django.contrib import admin
|
||||
from django.http import HttpRequest
|
||||
@@ -11,57 +11,81 @@ from archivebox.api.models import APIToken
|
||||
|
||||
|
||||
class APITokenAdmin(BaseModelAdmin):
|
||||
list_display = ('created_at', 'id', 'created_by', 'token_redacted', 'expires')
|
||||
sort_fields = ('id', 'created_at', 'created_by', 'expires')
|
||||
readonly_fields = ('created_at', 'modified_at')
|
||||
search_fields = ('id', 'created_by__username', 'token')
|
||||
list_display = ("created_at", "id", "created_by", "token_redacted", "expires")
|
||||
sort_fields = ("id", "created_at", "created_by", "expires")
|
||||
readonly_fields = ("created_at", "modified_at")
|
||||
search_fields = ("id", "created_by__username", "token")
|
||||
|
||||
fieldsets = (
|
||||
('Token', {
|
||||
'fields': ('token', 'expires'),
|
||||
'classes': ('card',),
|
||||
}),
|
||||
('Owner', {
|
||||
'fields': ('created_by',),
|
||||
'classes': ('card',),
|
||||
}),
|
||||
('Timestamps', {
|
||||
'fields': ('created_at', 'modified_at'),
|
||||
'classes': ('card',),
|
||||
}),
|
||||
(
|
||||
"Token",
|
||||
{
|
||||
"fields": ("token", "expires"),
|
||||
"classes": ("card",),
|
||||
},
|
||||
),
|
||||
(
|
||||
"Owner",
|
||||
{
|
||||
"fields": ("created_by",),
|
||||
"classes": ("card",),
|
||||
},
|
||||
),
|
||||
(
|
||||
"Timestamps",
|
||||
{
|
||||
"fields": ("created_at", "modified_at"),
|
||||
"classes": ("card",),
|
||||
},
|
||||
),
|
||||
)
|
||||
|
||||
list_filter = ('created_by',)
|
||||
ordering = ['-created_at']
|
||||
list_filter = ("created_by",)
|
||||
ordering = ["-created_at"]
|
||||
list_per_page = 100
|
||||
|
||||
|
||||
class CustomWebhookAdmin(WebhookAdmin, BaseModelAdmin):
|
||||
list_display = ('created_at', 'created_by', 'id', *WebhookAdmin.list_display)
|
||||
sort_fields = ('created_at', 'created_by', 'id', 'referenced_model', 'endpoint', 'last_success', 'last_error')
|
||||
readonly_fields = ('created_at', 'modified_at', *WebhookAdmin.readonly_fields)
|
||||
list_display = ("created_at", "created_by", "id", *WebhookAdmin.list_display)
|
||||
sort_fields = ("created_at", "created_by", "id", "referenced_model", "endpoint", "last_success", "last_error")
|
||||
readonly_fields = ("created_at", "modified_at", *WebhookAdmin.readonly_fields)
|
||||
|
||||
fieldsets = (
|
||||
('Webhook', {
|
||||
'fields': ('name', 'signal', 'referenced_model', 'endpoint'),
|
||||
'classes': ('card', 'wide'),
|
||||
}),
|
||||
('Authentication', {
|
||||
'fields': ('auth_token',),
|
||||
'classes': ('card',),
|
||||
}),
|
||||
('Status', {
|
||||
'fields': ('enabled', 'last_success', 'last_error'),
|
||||
'classes': ('card',),
|
||||
}),
|
||||
('Owner', {
|
||||
'fields': ('created_by',),
|
||||
'classes': ('card',),
|
||||
}),
|
||||
('Timestamps', {
|
||||
'fields': ('created_at', 'modified_at'),
|
||||
'classes': ('card',),
|
||||
}),
|
||||
(
|
||||
"Webhook",
|
||||
{
|
||||
"fields": ("name", "signal", "referenced_model", "endpoint"),
|
||||
"classes": ("card", "wide"),
|
||||
},
|
||||
),
|
||||
(
|
||||
"Authentication",
|
||||
{
|
||||
"fields": ("auth_token",),
|
||||
"classes": ("card",),
|
||||
},
|
||||
),
|
||||
(
|
||||
"Status",
|
||||
{
|
||||
"fields": ("enabled", "last_success", "last_error"),
|
||||
"classes": ("card",),
|
||||
},
|
||||
),
|
||||
(
|
||||
"Owner",
|
||||
{
|
||||
"fields": ("created_by",),
|
||||
"classes": ("card",),
|
||||
},
|
||||
),
|
||||
(
|
||||
"Timestamps",
|
||||
{
|
||||
"fields": ("created_at", "modified_at"),
|
||||
"classes": ("card",),
|
||||
},
|
||||
),
|
||||
)
|
||||
|
||||
def lookup_allowed(self, lookup: str, value: str, request: HttpRequest | None = None) -> bool:
|
||||
|
||||
@@ -1,13 +1,14 @@
|
||||
__package__ = 'archivebox.api'
|
||||
__package__ = "archivebox.api"
|
||||
|
||||
from django.apps import AppConfig
|
||||
|
||||
|
||||
class APIConfig(AppConfig):
|
||||
name = 'archivebox.api'
|
||||
label = 'api'
|
||||
name = "archivebox.api"
|
||||
label = "api"
|
||||
|
||||
|
||||
def register_admin(admin_site):
|
||||
from archivebox.api.admin import register_admin
|
||||
|
||||
register_admin(admin_site)
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
__package__ = 'archivebox.api'
|
||||
__package__ = "archivebox.api"
|
||||
|
||||
from typing import Optional
|
||||
from datetime import timedelta
|
||||
|
||||
from django.utils import timezone
|
||||
@@ -14,7 +13,7 @@ from ninja.errors import HttpError
|
||||
|
||||
def get_or_create_api_token(user: User | None):
|
||||
from archivebox.api.models import APIToken
|
||||
|
||||
|
||||
if user and user.is_superuser:
|
||||
api_tokens = APIToken.objects.filter(created_by_id=user.pk, expires__gt=timezone.now())
|
||||
if api_tokens.exists():
|
||||
@@ -34,18 +33,18 @@ def get_or_create_api_token(user: User | None):
|
||||
|
||||
def auth_using_token(token: str | None, request: HttpRequest | None = None) -> User | None:
|
||||
"""Given an API token string, check if a corresponding non-expired APIToken exists, and return its user"""
|
||||
from archivebox.api.models import APIToken # lazy import model to avoid loading it at urls.py import time
|
||||
|
||||
from archivebox.api.models import APIToken # lazy import model to avoid loading it at urls.py import time
|
||||
|
||||
user: User | None = None
|
||||
|
||||
submitted_empty_form = str(token).strip() in ('string', '', 'None', 'null')
|
||||
submitted_empty_form = str(token).strip() in ("string", "", "None", "null")
|
||||
if not submitted_empty_form:
|
||||
try:
|
||||
api_token = APIToken.objects.get(token=token)
|
||||
if api_token.is_valid() and isinstance(api_token.created_by, User):
|
||||
user = api_token.created_by
|
||||
if request is not None:
|
||||
setattr(request, '_api_token', api_token)
|
||||
setattr(request, "_api_token", api_token)
|
||||
except APIToken.DoesNotExist:
|
||||
pass
|
||||
|
||||
@@ -55,8 +54,8 @@ def auth_using_token(token: str | None, request: HttpRequest | None = None) -> U
|
||||
def auth_using_password(username: str | None, password: str | None, request: HttpRequest | None = None) -> User | None:
|
||||
"""Given a username and password, check if they are valid and return the corresponding user"""
|
||||
user: User | None = None
|
||||
|
||||
submitted_empty_form = (username, password) in (('string', 'string'), ('', ''), (None, None))
|
||||
|
||||
submitted_empty_form = (username, password) in (("string", "string"), ("", ""), (None, None))
|
||||
if not submitted_empty_form:
|
||||
authenticated_user = authenticate(
|
||||
username=username,
|
||||
@@ -73,34 +72,40 @@ def auth_using_password(username: str | None, password: str | None, request: Htt
|
||||
def _require_superuser(user: User | None, request: HttpRequest, auth_method: str) -> User | None:
|
||||
if user and user.pk:
|
||||
request.user = user
|
||||
setattr(request, '_api_auth_method', auth_method)
|
||||
setattr(request, "_api_auth_method", auth_method)
|
||||
if not user.is_superuser:
|
||||
raise HttpError(403, 'Valid credentials but User does not have permission (make sure user.is_superuser=True)')
|
||||
raise HttpError(403, "Valid credentials but User does not have permission (make sure user.is_superuser=True)")
|
||||
return user
|
||||
|
||||
|
||||
### Django-Ninja-Provided Auth Methods
|
||||
|
||||
|
||||
class HeaderTokenAuth(APIKeyHeader):
|
||||
"""Allow authenticating by passing X-API-Key=xyz as a request header"""
|
||||
|
||||
param_name = "X-ArchiveBox-API-Key"
|
||||
|
||||
def authenticate(self, request: HttpRequest, key: Optional[str]) -> User | None:
|
||||
def authenticate(self, request: HttpRequest, key: str | None) -> User | None:
|
||||
return _require_superuser(auth_using_token(token=key, request=request), request, self.__class__.__name__)
|
||||
|
||||
|
||||
class BearerTokenAuth(HttpBearer):
|
||||
"""Allow authenticating by passing Bearer=xyz as a request header"""
|
||||
|
||||
def authenticate(self, request: HttpRequest, token: str) -> User | None:
|
||||
return _require_superuser(auth_using_token(token=token, request=request), request, self.__class__.__name__)
|
||||
|
||||
|
||||
class QueryParamTokenAuth(APIKeyQuery):
|
||||
"""Allow authenticating by passing api_key=xyz as a GET/POST query parameter"""
|
||||
|
||||
param_name = "api_key"
|
||||
|
||||
def authenticate(self, request: HttpRequest, key: Optional[str]) -> User | None:
|
||||
def authenticate(self, request: HttpRequest, key: str | None) -> User | None:
|
||||
return _require_superuser(auth_using_token(token=key, request=request), request, self.__class__.__name__)
|
||||
|
||||
|
||||
class UsernameAndPasswordAuth(HttpBasicAuth):
|
||||
"""Allow authenticating by passing username & password via HTTP Basic Authentication (not recommended)"""
|
||||
|
||||
@@ -111,25 +116,28 @@ class UsernameAndPasswordAuth(HttpBasicAuth):
|
||||
self.__class__.__name__,
|
||||
)
|
||||
|
||||
|
||||
class DjangoSessionAuth:
|
||||
"""Allow authenticating with existing Django session cookies (same-origin only)."""
|
||||
|
||||
def __call__(self, request: HttpRequest) -> User | None:
|
||||
return self.authenticate(request)
|
||||
|
||||
def authenticate(self, request: HttpRequest, **kwargs) -> User | None:
|
||||
user = getattr(request, 'user', None)
|
||||
user = getattr(request, "user", None)
|
||||
if isinstance(user, User) and user.is_authenticated:
|
||||
setattr(request, '_api_auth_method', self.__class__.__name__)
|
||||
setattr(request, "_api_auth_method", self.__class__.__name__)
|
||||
if not user.is_superuser:
|
||||
raise HttpError(403, 'Valid session but User does not have permission (make sure user.is_superuser=True)')
|
||||
raise HttpError(403, "Valid session but User does not have permission (make sure user.is_superuser=True)")
|
||||
return user
|
||||
return None
|
||||
|
||||
|
||||
### Enabled Auth Methods
|
||||
|
||||
API_AUTH_METHODS = [
|
||||
HeaderTokenAuth(),
|
||||
BearerTokenAuth(),
|
||||
QueryParamTokenAuth(),
|
||||
QueryParamTokenAuth(),
|
||||
# django_auth_superuser, # django admin cookie auth, not secure to use with csrf=False
|
||||
]
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
__package__ = 'archivebox.api'
|
||||
__package__ = "archivebox.api"
|
||||
|
||||
from django.http import HttpResponse
|
||||
|
||||
@@ -10,8 +10,8 @@ class ApiCorsMiddleware:
|
||||
self.get_response = get_response
|
||||
|
||||
def __call__(self, request):
|
||||
if request.path.startswith('/api/'):
|
||||
if request.method == 'OPTIONS' and request.META.get('HTTP_ACCESS_CONTROL_REQUEST_METHOD'):
|
||||
if request.path.startswith("/api/"):
|
||||
if request.method == "OPTIONS" and request.META.get("HTTP_ACCESS_CONTROL_REQUEST_METHOD"):
|
||||
response = HttpResponse(status=204)
|
||||
return self._add_cors_headers(request, response)
|
||||
|
||||
@@ -21,14 +21,12 @@ class ApiCorsMiddleware:
|
||||
return self.get_response(request)
|
||||
|
||||
def _add_cors_headers(self, request, response):
|
||||
origin = request.META.get('HTTP_ORIGIN')
|
||||
origin = request.META.get("HTTP_ORIGIN")
|
||||
if not origin:
|
||||
return response
|
||||
|
||||
response['Access-Control-Allow-Origin'] = '*'
|
||||
response['Access-Control-Allow-Methods'] = 'GET, POST, PUT, PATCH, DELETE, OPTIONS'
|
||||
response['Access-Control-Allow-Headers'] = (
|
||||
'Authorization, X-ArchiveBox-API-Key, Content-Type, X-CSRFToken'
|
||||
)
|
||||
response['Access-Control-Max-Age'] = '600'
|
||||
response["Access-Control-Allow-Origin"] = "*"
|
||||
response["Access-Control-Allow-Methods"] = "GET, POST, PUT, PATCH, DELETE, OPTIONS"
|
||||
response["Access-Control-Allow-Headers"] = "Authorization, X-ArchiveBox-API-Key, Content-Type, X-CSRFToken"
|
||||
response["Access-Control-Max-Age"] = "600"
|
||||
return response
|
||||
|
||||
@@ -13,11 +13,10 @@ import signal_webhooks.utils
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
initial = True
|
||||
|
||||
dependencies = [
|
||||
('auth', '0012_alter_user_first_name_max_length'),
|
||||
("auth", "0012_alter_user_first_name_max_length"),
|
||||
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
|
||||
]
|
||||
|
||||
@@ -75,55 +74,165 @@ class Migration(migrations.Migration):
|
||||
reverse_sql="""
|
||||
DROP TABLE IF EXISTS api_outboundwebhook;
|
||||
DROP TABLE IF EXISTS api_apitoken;
|
||||
"""
|
||||
""",
|
||||
),
|
||||
],
|
||||
state_operations=[
|
||||
migrations.CreateModel(
|
||||
name='APIToken',
|
||||
name="APIToken",
|
||||
fields=[
|
||||
('id', models.UUIDField(default=uuid7, editable=False, primary_key=True, serialize=False, unique=True)),
|
||||
('created_at', models.DateTimeField(db_index=True, default=django.utils.timezone.now)),
|
||||
('modified_at', models.DateTimeField(auto_now=True)),
|
||||
('token', models.CharField(default=archivebox.api.models.generate_secret_token, max_length=32, unique=True)),
|
||||
('expires', models.DateTimeField(blank=True, null=True)),
|
||||
('created_by', models.ForeignKey(default=get_or_create_system_user_pk, on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL)),
|
||||
("id", models.UUIDField(default=uuid7, editable=False, primary_key=True, serialize=False, unique=True)),
|
||||
("created_at", models.DateTimeField(db_index=True, default=django.utils.timezone.now)),
|
||||
("modified_at", models.DateTimeField(auto_now=True)),
|
||||
("token", models.CharField(default=archivebox.api.models.generate_secret_token, max_length=32, unique=True)),
|
||||
("expires", models.DateTimeField(blank=True, null=True)),
|
||||
(
|
||||
"created_by",
|
||||
models.ForeignKey(
|
||||
default=get_or_create_system_user_pk,
|
||||
on_delete=django.db.models.deletion.CASCADE,
|
||||
to=settings.AUTH_USER_MODEL,
|
||||
),
|
||||
),
|
||||
],
|
||||
options={
|
||||
'verbose_name': 'API Key',
|
||||
'verbose_name_plural': 'API Keys',
|
||||
'app_label': 'api',
|
||||
"verbose_name": "API Key",
|
||||
"verbose_name_plural": "API Keys",
|
||||
"app_label": "api",
|
||||
},
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name='OutboundWebhook',
|
||||
name="OutboundWebhook",
|
||||
fields=[
|
||||
('id', models.UUIDField(default=uuid7, editable=False, primary_key=True, serialize=False, unique=True)),
|
||||
('created_at', models.DateTimeField(db_index=True, default=django.utils.timezone.now)),
|
||||
('modified_at', models.DateTimeField(auto_now=True)),
|
||||
('name', models.CharField(db_index=True, help_text='Webhook name.', max_length=255, unique=True, verbose_name='name')),
|
||||
('signal', models.CharField(choices=[('CREATE', 'Create'), ('UPDATE', 'Update'), ('DELETE', 'Delete'), ('M2M', 'M2M changed'), ('CREATE_OR_UPDATE', 'Create or Update'), ('CREATE_OR_DELETE', 'Create or Delete'), ('CREATE_OR_M2M', 'Create or M2M changed'), ('UPDATE_OR_DELETE', 'Update or Delete'), ('UPDATE_OR_M2M', 'Update or M2M changed'), ('DELETE_OR_M2M', 'Delete or M2M changed'), ('CREATE_UPDATE_OR_DELETE', 'Create, Update or Delete'), ('CREATE_UPDATE_OR_M2M', 'Create, Update or M2M changed'), ('CREATE_DELETE_OR_M2M', 'Create, Delete or M2M changed'), ('UPDATE_DELETE_OR_M2M', 'Update, Delete or M2M changed'), ('CREATE_UPDATE_DELETE_OR_M2M', 'Create, Update or Delete, or M2M changed')], help_text='Signal the webhook fires to.', max_length=255, verbose_name='signal')),
|
||||
('ref', models.CharField(db_index=True, help_text='Dot import notation to the model the webhook is for.', max_length=1023, validators=[signal_webhooks.utils.model_from_reference], verbose_name='referenced model')),
|
||||
('endpoint', models.URLField(help_text='Target endpoint for this webhook.', max_length=2047, verbose_name='endpoint')),
|
||||
('headers', models.JSONField(blank=True, default=dict, help_text='Headers to send with the webhook request.', validators=[signal_webhooks.utils.is_dict], verbose_name='headers')),
|
||||
('auth_token', signal_webhooks.fields.TokenField(blank=True, default='', help_text='Authentication token to use in an Authorization header.', max_length=8000, validators=[signal_webhooks.utils.decode_cipher_key], verbose_name='authentication token')),
|
||||
('enabled', models.BooleanField(default=True, help_text='Is this webhook enabled?', verbose_name='enabled')),
|
||||
('keep_last_response', models.BooleanField(default=False, help_text='Should the webhook keep a log of the latest response it got?', verbose_name='keep last response')),
|
||||
('created', models.DateTimeField(auto_now_add=True, help_text='When the webhook was created.', verbose_name='created')),
|
||||
('updated', models.DateTimeField(auto_now=True, help_text='When the webhook was last updated.', verbose_name='updated')),
|
||||
('last_response', models.CharField(blank=True, default='', help_text='Latest response to this webhook.', max_length=8000, verbose_name='last response')),
|
||||
('last_success', models.DateTimeField(default=None, help_text='When the webhook last succeeded.', null=True, verbose_name='last success')),
|
||||
('last_failure', models.DateTimeField(default=None, help_text='When the webhook last failed.', null=True, verbose_name='last failure')),
|
||||
('created_by', models.ForeignKey(default=get_or_create_system_user_pk, on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL)),
|
||||
("id", models.UUIDField(default=uuid7, editable=False, primary_key=True, serialize=False, unique=True)),
|
||||
("created_at", models.DateTimeField(db_index=True, default=django.utils.timezone.now)),
|
||||
("modified_at", models.DateTimeField(auto_now=True)),
|
||||
(
|
||||
"name",
|
||||
models.CharField(db_index=True, help_text="Webhook name.", max_length=255, unique=True, verbose_name="name"),
|
||||
),
|
||||
(
|
||||
"signal",
|
||||
models.CharField(
|
||||
choices=[
|
||||
("CREATE", "Create"),
|
||||
("UPDATE", "Update"),
|
||||
("DELETE", "Delete"),
|
||||
("M2M", "M2M changed"),
|
||||
("CREATE_OR_UPDATE", "Create or Update"),
|
||||
("CREATE_OR_DELETE", "Create or Delete"),
|
||||
("CREATE_OR_M2M", "Create or M2M changed"),
|
||||
("UPDATE_OR_DELETE", "Update or Delete"),
|
||||
("UPDATE_OR_M2M", "Update or M2M changed"),
|
||||
("DELETE_OR_M2M", "Delete or M2M changed"),
|
||||
("CREATE_UPDATE_OR_DELETE", "Create, Update or Delete"),
|
||||
("CREATE_UPDATE_OR_M2M", "Create, Update or M2M changed"),
|
||||
("CREATE_DELETE_OR_M2M", "Create, Delete or M2M changed"),
|
||||
("UPDATE_DELETE_OR_M2M", "Update, Delete or M2M changed"),
|
||||
("CREATE_UPDATE_DELETE_OR_M2M", "Create, Update or Delete, or M2M changed"),
|
||||
],
|
||||
help_text="Signal the webhook fires to.",
|
||||
max_length=255,
|
||||
verbose_name="signal",
|
||||
),
|
||||
),
|
||||
(
|
||||
"ref",
|
||||
models.CharField(
|
||||
db_index=True,
|
||||
help_text="Dot import notation to the model the webhook is for.",
|
||||
max_length=1023,
|
||||
validators=[signal_webhooks.utils.model_from_reference],
|
||||
verbose_name="referenced model",
|
||||
),
|
||||
),
|
||||
(
|
||||
"endpoint",
|
||||
models.URLField(help_text="Target endpoint for this webhook.", max_length=2047, verbose_name="endpoint"),
|
||||
),
|
||||
(
|
||||
"headers",
|
||||
models.JSONField(
|
||||
blank=True,
|
||||
default=dict,
|
||||
help_text="Headers to send with the webhook request.",
|
||||
validators=[signal_webhooks.utils.is_dict],
|
||||
verbose_name="headers",
|
||||
),
|
||||
),
|
||||
(
|
||||
"auth_token",
|
||||
signal_webhooks.fields.TokenField(
|
||||
blank=True,
|
||||
default="",
|
||||
help_text="Authentication token to use in an Authorization header.",
|
||||
max_length=8000,
|
||||
validators=[signal_webhooks.utils.decode_cipher_key],
|
||||
verbose_name="authentication token",
|
||||
),
|
||||
),
|
||||
("enabled", models.BooleanField(default=True, help_text="Is this webhook enabled?", verbose_name="enabled")),
|
||||
(
|
||||
"keep_last_response",
|
||||
models.BooleanField(
|
||||
default=False,
|
||||
help_text="Should the webhook keep a log of the latest response it got?",
|
||||
verbose_name="keep last response",
|
||||
),
|
||||
),
|
||||
(
|
||||
"created",
|
||||
models.DateTimeField(auto_now_add=True, help_text="When the webhook was created.", verbose_name="created"),
|
||||
),
|
||||
(
|
||||
"updated",
|
||||
models.DateTimeField(auto_now=True, help_text="When the webhook was last updated.", verbose_name="updated"),
|
||||
),
|
||||
(
|
||||
"last_response",
|
||||
models.CharField(
|
||||
blank=True,
|
||||
default="",
|
||||
help_text="Latest response to this webhook.",
|
||||
max_length=8000,
|
||||
verbose_name="last response",
|
||||
),
|
||||
),
|
||||
(
|
||||
"last_success",
|
||||
models.DateTimeField(
|
||||
default=None,
|
||||
help_text="When the webhook last succeeded.",
|
||||
null=True,
|
||||
verbose_name="last success",
|
||||
),
|
||||
),
|
||||
(
|
||||
"last_failure",
|
||||
models.DateTimeField(
|
||||
default=None,
|
||||
help_text="When the webhook last failed.",
|
||||
null=True,
|
||||
verbose_name="last failure",
|
||||
),
|
||||
),
|
||||
(
|
||||
"created_by",
|
||||
models.ForeignKey(
|
||||
default=get_or_create_system_user_pk,
|
||||
on_delete=django.db.models.deletion.CASCADE,
|
||||
to=settings.AUTH_USER_MODEL,
|
||||
),
|
||||
),
|
||||
],
|
||||
options={
|
||||
'verbose_name': 'API Outbound Webhook',
|
||||
'app_label': 'api',
|
||||
"verbose_name": "API Outbound Webhook",
|
||||
"app_label": "api",
|
||||
},
|
||||
),
|
||||
migrations.AddConstraint(
|
||||
model_name='outboundwebhook',
|
||||
constraint=models.UniqueConstraint(fields=['ref', 'endpoint'], name='prevent_duplicate_hooks_api_outboundwebhook'),
|
||||
model_name="outboundwebhook",
|
||||
constraint=models.UniqueConstraint(fields=["ref", "endpoint"], name="prevent_duplicate_hooks_api_outboundwebhook"),
|
||||
),
|
||||
],
|
||||
),
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
__package__ = 'archivebox.api'
|
||||
__package__ = "archivebox.api"
|
||||
|
||||
import secrets
|
||||
from archivebox.uuid_compat import uuid7
|
||||
@@ -25,7 +25,7 @@ class APIToken(models.Model):
|
||||
expires = models.DateTimeField(null=True, blank=True)
|
||||
|
||||
class Meta(TypedModelMeta):
|
||||
app_label = 'api'
|
||||
app_label = "api"
|
||||
verbose_name = "API Key"
|
||||
verbose_name_plural = "API Keys"
|
||||
|
||||
@@ -34,7 +34,7 @@ class APIToken(models.Model):
|
||||
|
||||
@property
|
||||
def token_redacted(self):
|
||||
return f'************{self.token[-4:]}'
|
||||
return f"************{self.token[-4:]}"
|
||||
|
||||
def is_valid(self, for_date=None):
|
||||
return not self.expires or self.expires >= (for_date or timezone.now())
|
||||
@@ -47,8 +47,8 @@ class OutboundWebhook(WebhookBase):
|
||||
modified_at = models.DateTimeField(auto_now=True)
|
||||
|
||||
class Meta(WebhookBase.Meta):
|
||||
app_label = 'api'
|
||||
verbose_name = 'API Outbound Webhook'
|
||||
app_label = "api"
|
||||
verbose_name = "API Outbound Webhook"
|
||||
|
||||
def __str__(self) -> str:
|
||||
return f'[{self.id}] {self.ref} -> {self.endpoint}'
|
||||
return f"[{self.id}] {self.ref} -> {self.endpoint}"
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
__package__ = 'archivebox.api'
|
||||
__package__ = "archivebox.api"
|
||||
|
||||
from django.urls import path
|
||||
from django.views.generic.base import RedirectView
|
||||
@@ -6,12 +6,10 @@ from django.views.generic.base import RedirectView
|
||||
from .v1_api import urls as v1_api_urls
|
||||
|
||||
urlpatterns = [
|
||||
path("", RedirectView.as_view(url='/api/v1/docs')),
|
||||
|
||||
path("v1/", RedirectView.as_view(url='/api/v1/docs')),
|
||||
path("v1/", v1_api_urls),
|
||||
path("v1", RedirectView.as_view(url='/api/v1/docs')),
|
||||
|
||||
path("", RedirectView.as_view(url="/api/v1/docs")),
|
||||
path("v1/", RedirectView.as_view(url="/api/v1/docs")),
|
||||
path("v1/", v1_api_urls),
|
||||
path("v1", RedirectView.as_view(url="/api/v1/docs")),
|
||||
# ... v2 can be added here ...
|
||||
# path("v2/", v2_api_urls),
|
||||
# path("v2", RedirectView.as_view(url='/api/v2/docs')),
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
__package__ = 'archivebox.api'
|
||||
__package__ = "archivebox.api"
|
||||
|
||||
|
||||
from io import StringIO
|
||||
@@ -20,9 +20,9 @@ from archivebox.api.auth import API_AUTH_METHODS
|
||||
from archivebox.api.models import APIToken
|
||||
|
||||
|
||||
COMMIT_HASH = get_COMMIT_HASH() or 'unknown'
|
||||
COMMIT_HASH = get_COMMIT_HASH() or "unknown"
|
||||
|
||||
html_description=f'''
|
||||
html_description = f"""
|
||||
<h3>Welcome to your ArchiveBox server's REST API <code>[v1 ALPHA]</code> homepage!</h3>
|
||||
<br/>
|
||||
<i><b>WARNING: This API is still in an early development stage and may change!</b></i>
|
||||
@@ -35,47 +35,47 @@ html_description=f'''
|
||||
<li>📜 See the API source code: <a href="https://github.com/ArchiveBox/ArchiveBox/blob/dev/archivebox/api"><code>archivebox/api/</code></a></li>
|
||||
</ul>
|
||||
<small>Served by ArchiveBox v{VERSION} (<a href="https://github.com/ArchiveBox/ArchiveBox/commit/{COMMIT_HASH}"><code>{COMMIT_HASH[:8]}</code></a>), API powered by <a href="https://django-ninja.dev/"><code>django-ninja</code></a>.</small>
|
||||
'''
|
||||
"""
|
||||
|
||||
|
||||
def register_urls(api: NinjaAPI) -> NinjaAPI:
|
||||
api.add_router('/auth/', 'archivebox.api.v1_auth.router')
|
||||
api.add_router('/core/', 'archivebox.api.v1_core.router')
|
||||
api.add_router('/crawls/', 'archivebox.api.v1_crawls.router')
|
||||
api.add_router('/cli/', 'archivebox.api.v1_cli.router')
|
||||
api.add_router('/machine/', 'archivebox.api.v1_machine.router')
|
||||
api.add_router("/auth/", "archivebox.api.v1_auth.router")
|
||||
api.add_router("/core/", "archivebox.api.v1_core.router")
|
||||
api.add_router("/crawls/", "archivebox.api.v1_crawls.router")
|
||||
api.add_router("/cli/", "archivebox.api.v1_cli.router")
|
||||
api.add_router("/machine/", "archivebox.api.v1_machine.router")
|
||||
return api
|
||||
|
||||
|
||||
class NinjaAPIWithIOCapture(NinjaAPI):
|
||||
class NinjaAPIWithIOCapture(NinjaAPI):
|
||||
def create_temporal_response(self, request: HttpRequest) -> HttpResponse:
|
||||
stdout, stderr = StringIO(), StringIO()
|
||||
|
||||
with redirect_stderr(stderr):
|
||||
with redirect_stdout(stdout):
|
||||
setattr(request, 'stdout', stdout)
|
||||
setattr(request, 'stderr', stderr)
|
||||
setattr(request, "stdout", stdout)
|
||||
setattr(request, "stderr", stderr)
|
||||
|
||||
response = super().create_temporal_response(request)
|
||||
|
||||
# Diable caching of API responses entirely
|
||||
response['Cache-Control'] = 'no-store'
|
||||
# Disable caching of API responses entirely
|
||||
response["Cache-Control"] = "no-store"
|
||||
|
||||
# Add debug stdout and stderr headers to response
|
||||
response['X-ArchiveBox-Stdout'] = stdout.getvalue().replace('\n', '\\n')[:200]
|
||||
response['X-ArchiveBox-Stderr'] = stderr.getvalue().replace('\n', '\\n')[:200]
|
||||
response["X-ArchiveBox-Stdout"] = stdout.getvalue().replace("\n", "\\n")[:200]
|
||||
response["X-ArchiveBox-Stderr"] = stderr.getvalue().replace("\n", "\\n")[:200]
|
||||
# response['X-ArchiveBox-View'] = self.get_openapi_operation_id(request) or 'Unknown'
|
||||
|
||||
# Add Auth Headers to response
|
||||
api_token_attr = getattr(request, '_api_token', None)
|
||||
api_token_attr = getattr(request, "_api_token", None)
|
||||
api_token = api_token_attr if isinstance(api_token_attr, APIToken) else None
|
||||
token_expiry = api_token.expires.isoformat() if api_token and api_token.expires else 'Never'
|
||||
token_expiry = api_token.expires.isoformat() if api_token and api_token.expires else "Never"
|
||||
|
||||
response['X-ArchiveBox-Auth-Method'] = str(getattr(request, '_api_auth_method', 'None'))
|
||||
response['X-ArchiveBox-Auth-Expires'] = token_expiry
|
||||
response['X-ArchiveBox-Auth-Token-Id'] = str(api_token.id) if api_token else 'None'
|
||||
response['X-ArchiveBox-Auth-User-Id'] = str(request.user.pk) if getattr(request.user, 'pk', None) else 'None'
|
||||
response['X-ArchiveBox-Auth-User-Username'] = request.user.username if isinstance(request.user, User) else 'None'
|
||||
response["X-ArchiveBox-Auth-Method"] = str(getattr(request, "_api_auth_method", "None"))
|
||||
response["X-ArchiveBox-Auth-Expires"] = token_expiry
|
||||
response["X-ArchiveBox-Auth-Token-Id"] = str(api_token.id) if api_token else "None"
|
||||
response["X-ArchiveBox-Auth-User-Id"] = str(request.user.pk) if getattr(request.user, "pk", None) else "None"
|
||||
response["X-ArchiveBox-Auth-User-Username"] = request.user.username if isinstance(request.user, User) else "None"
|
||||
|
||||
# import ipdb; ipdb.set_trace()
|
||||
# print('RESPONDING NOW', response)
|
||||
@@ -84,7 +84,7 @@ class NinjaAPIWithIOCapture(NinjaAPI):
|
||||
|
||||
|
||||
api = NinjaAPIWithIOCapture(
|
||||
title='ArchiveBox API',
|
||||
title="ArchiveBox API",
|
||||
description=html_description,
|
||||
version=VERSION,
|
||||
auth=API_AUTH_METHODS,
|
||||
@@ -103,15 +103,15 @@ def generic_exception_handler(request, err):
|
||||
if isinstance(err, (ObjectDoesNotExist, EmptyResultSet, PermissionDenied)):
|
||||
status = 404
|
||||
|
||||
print(''.join(format_exception(err)))
|
||||
print("".join(format_exception(err)))
|
||||
|
||||
return api.create_response(
|
||||
request,
|
||||
{
|
||||
"succeeded": False,
|
||||
"message": f'{err.__class__.__name__}: {err}',
|
||||
"message": f"{err.__class__.__name__}: {err}",
|
||||
"errors": [
|
||||
''.join(format_exception(err)),
|
||||
"".join(format_exception(err)),
|
||||
# or send simpler parent-only traceback:
|
||||
# *([str(err.__context__)] if getattr(err, '__context__', None) else []),
|
||||
],
|
||||
@@ -120,7 +120,6 @@ def generic_exception_handler(request, err):
|
||||
)
|
||||
|
||||
|
||||
|
||||
# import orjson
|
||||
# from ninja.renderers import BaseRenderer
|
||||
# class ORJSONRenderer(BaseRenderer):
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
__package__ = 'archivebox.api'
|
||||
__package__ = "archivebox.api"
|
||||
|
||||
from typing import Optional
|
||||
from django.http import HttpRequest
|
||||
|
||||
from ninja import Router, Schema
|
||||
@@ -8,16 +7,21 @@ from ninja import Router, Schema
|
||||
from archivebox.api.auth import auth_using_token, auth_using_password, get_or_create_api_token
|
||||
|
||||
|
||||
router = Router(tags=['Authentication'], auth=None)
|
||||
router = Router(tags=["Authentication"], auth=None)
|
||||
|
||||
|
||||
class PasswordAuthSchema(Schema):
|
||||
"""Schema for a /get_api_token request"""
|
||||
username: Optional[str] = None
|
||||
password: Optional[str] = None
|
||||
|
||||
username: str | None = None
|
||||
password: str | None = None
|
||||
|
||||
|
||||
@router.post("/get_api_token", auth=None, summary='Generate an API token for a given username & password (or currently logged-in user)') # auth=None because they are not authed yet
|
||||
@router.post(
|
||||
"/get_api_token",
|
||||
auth=None,
|
||||
summary="Generate an API token for a given username & password (or currently logged-in user)",
|
||||
) # auth=None because they are not authed yet
|
||||
def get_api_token(request: HttpRequest, auth_data: PasswordAuthSchema):
|
||||
user = auth_using_password(
|
||||
username=auth_data.username,
|
||||
@@ -35,17 +39,21 @@ def get_api_token(request: HttpRequest, auth_data: PasswordAuthSchema):
|
||||
"token": api_token.token,
|
||||
"expires": api_token.expires.isoformat() if api_token.expires else None,
|
||||
}
|
||||
|
||||
return {"success": False, "errors": ["Invalid credentials"]}
|
||||
|
||||
return {"success": False, "errors": ["Invalid credentials"]}
|
||||
|
||||
|
||||
class TokenAuthSchema(Schema):
|
||||
"""Schema for a /check_api_token request"""
|
||||
|
||||
token: str
|
||||
|
||||
|
||||
@router.post("/check_api_token", auth=None, summary='Validate an API token to make sure its valid and non-expired') # auth=None because they are not authed yet
|
||||
@router.post(
|
||||
"/check_api_token",
|
||||
auth=None,
|
||||
summary="Validate an API token to make sure its valid and non-expired",
|
||||
) # auth=None because they are not authed yet
|
||||
def check_api_token(request: HttpRequest, token_data: TokenAuthSchema):
|
||||
user = auth_using_token(
|
||||
token=token_data.token,
|
||||
@@ -53,5 +61,5 @@ def check_api_token(request: HttpRequest, token_data: TokenAuthSchema):
|
||||
)
|
||||
if user:
|
||||
return {"success": True, "user_id": str(user.pk)}
|
||||
|
||||
|
||||
return {"success": False, "user_id": None}
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
__package__ = 'archivebox.api'
|
||||
__package__ = "archivebox.api"
|
||||
|
||||
import json
|
||||
from io import StringIO
|
||||
from typing import List, Dict, Any, Optional
|
||||
from typing import Any
|
||||
from enum import Enum
|
||||
|
||||
from django.http import HttpRequest
|
||||
@@ -16,44 +16,47 @@ from archivebox.config.common import ARCHIVING_CONFIG
|
||||
# from .auth import API_AUTH_METHODS
|
||||
|
||||
# router for API that exposes archivebox cli subcommands as REST endpoints
|
||||
router = Router(tags=['ArchiveBox CLI Sub-Commands'])
|
||||
router = Router(tags=["ArchiveBox CLI Sub-Commands"])
|
||||
|
||||
|
||||
# Schemas
|
||||
|
||||
JSONType = List[Any] | Dict[str, Any] | bool | int | str | None
|
||||
JSONType = list[Any] | dict[str, Any] | bool | int | str | None
|
||||
|
||||
|
||||
class CLICommandResponseSchema(Schema):
|
||||
success: bool
|
||||
errors: List[str]
|
||||
errors: list[str]
|
||||
result: JSONType
|
||||
result_format: str = 'str'
|
||||
result_format: str = "str"
|
||||
stdout: str
|
||||
stderr: str
|
||||
|
||||
|
||||
class FilterTypeChoices(str, Enum):
|
||||
exact = 'exact'
|
||||
substring = 'substring'
|
||||
regex = 'regex'
|
||||
domain = 'domain'
|
||||
tag = 'tag'
|
||||
timestamp = 'timestamp'
|
||||
exact = "exact"
|
||||
substring = "substring"
|
||||
regex = "regex"
|
||||
domain = "domain"
|
||||
tag = "tag"
|
||||
timestamp = "timestamp"
|
||||
|
||||
|
||||
class StatusChoices(str, Enum):
|
||||
indexed = 'indexed'
|
||||
archived = 'archived'
|
||||
unarchived = 'unarchived'
|
||||
present = 'present'
|
||||
valid = 'valid'
|
||||
invalid = 'invalid'
|
||||
duplicate = 'duplicate'
|
||||
orphaned = 'orphaned'
|
||||
corrupted = 'corrupted'
|
||||
unrecognized = 'unrecognized'
|
||||
indexed = "indexed"
|
||||
archived = "archived"
|
||||
unarchived = "unarchived"
|
||||
present = "present"
|
||||
valid = "valid"
|
||||
invalid = "invalid"
|
||||
duplicate = "duplicate"
|
||||
orphaned = "orphaned"
|
||||
corrupted = "corrupted"
|
||||
unrecognized = "unrecognized"
|
||||
|
||||
|
||||
class AddCommandSchema(Schema):
|
||||
urls: List[str]
|
||||
urls: list[str]
|
||||
tag: str = ""
|
||||
depth: int = 0
|
||||
parser: str = "auto"
|
||||
@@ -62,53 +65,54 @@ class AddCommandSchema(Schema):
|
||||
overwrite: bool = False
|
||||
index_only: bool = False
|
||||
|
||||
|
||||
class UpdateCommandSchema(Schema):
|
||||
resume: Optional[str] = None
|
||||
after: Optional[float] = 0
|
||||
before: Optional[float] = 999999999999999
|
||||
filter_type: Optional[str] = FilterTypeChoices.substring
|
||||
filter_patterns: Optional[List[str]] = ['https://example.com']
|
||||
resume: str | None = None
|
||||
after: float | None = 0
|
||||
before: float | None = 999999999999999
|
||||
filter_type: str | None = FilterTypeChoices.substring
|
||||
filter_patterns: list[str] | None = ["https://example.com"]
|
||||
batch_size: int = 100
|
||||
continuous: bool = False
|
||||
|
||||
|
||||
class ScheduleCommandSchema(Schema):
|
||||
import_path: Optional[str] = None
|
||||
import_path: str | None = None
|
||||
add: bool = False
|
||||
show: bool = False
|
||||
foreground: bool = False
|
||||
run_all: bool = False
|
||||
quiet: bool = False
|
||||
every: Optional[str] = None
|
||||
tag: str = ''
|
||||
every: str | None = None
|
||||
tag: str = ""
|
||||
depth: int = 0
|
||||
overwrite: bool = False
|
||||
update: bool = not ARCHIVING_CONFIG.ONLY_NEW
|
||||
clear: bool = False
|
||||
|
||||
|
||||
class ListCommandSchema(Schema):
|
||||
filter_patterns: Optional[List[str]] = ['https://example.com']
|
||||
filter_patterns: list[str] | None = ["https://example.com"]
|
||||
filter_type: str = FilterTypeChoices.substring
|
||||
status: StatusChoices = StatusChoices.indexed
|
||||
after: Optional[float] = 0
|
||||
before: Optional[float] = 999999999999999
|
||||
sort: str = 'bookmarked_at'
|
||||
after: float | None = 0
|
||||
before: float | None = 999999999999999
|
||||
sort: str = "bookmarked_at"
|
||||
as_json: bool = True
|
||||
as_html: bool = False
|
||||
as_csv: str | None = 'timestamp,url'
|
||||
as_csv: str | None = "timestamp,url"
|
||||
with_headers: bool = False
|
||||
|
||||
|
||||
class RemoveCommandSchema(Schema):
|
||||
delete: bool = True
|
||||
after: Optional[float] = 0
|
||||
before: Optional[float] = 999999999999999
|
||||
after: float | None = 0
|
||||
before: float | None = 999999999999999
|
||||
filter_type: str = FilterTypeChoices.exact
|
||||
filter_patterns: Optional[List[str]] = ['https://example.com']
|
||||
filter_patterns: list[str] | None = ["https://example.com"]
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@router.post("/add", response=CLICommandResponseSchema, summary='archivebox add [args] [urls]')
|
||||
@router.post("/add", response=CLICommandResponseSchema, summary="archivebox add [args] [urls]")
|
||||
def cli_add(request: HttpRequest, args: AddCommandSchema):
|
||||
from archivebox.cli.archivebox_add import add
|
||||
|
||||
@@ -125,30 +129,30 @@ def cli_add(request: HttpRequest, args: AddCommandSchema):
|
||||
created_by_id=request.user.pk,
|
||||
)
|
||||
|
||||
snapshot_ids = [str(snapshot_id) for snapshot_id in snapshots.values_list('id', flat=True)]
|
||||
snapshot_ids = [str(snapshot_id) for snapshot_id in snapshots.values_list("id", flat=True)]
|
||||
result_payload = {
|
||||
"crawl_id": str(crawl.id),
|
||||
"num_snapshots": len(snapshot_ids),
|
||||
"snapshot_ids": snapshot_ids,
|
||||
"queued_urls": args.urls,
|
||||
}
|
||||
stdout = getattr(request, 'stdout', None)
|
||||
stderr = getattr(request, 'stderr', None)
|
||||
stdout = getattr(request, "stdout", None)
|
||||
stderr = getattr(request, "stderr", None)
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"errors": [],
|
||||
"result": result_payload,
|
||||
"result_format": "json",
|
||||
"stdout": ansi_to_html(stdout.getvalue().strip()) if isinstance(stdout, StringIO) else '',
|
||||
"stderr": ansi_to_html(stderr.getvalue().strip()) if isinstance(stderr, StringIO) else '',
|
||||
"stdout": ansi_to_html(stdout.getvalue().strip()) if isinstance(stdout, StringIO) else "",
|
||||
"stderr": ansi_to_html(stderr.getvalue().strip()) if isinstance(stderr, StringIO) else "",
|
||||
}
|
||||
|
||||
|
||||
@router.post("/update", response=CLICommandResponseSchema, summary='archivebox update [args] [filter_patterns]')
|
||||
@router.post("/update", response=CLICommandResponseSchema, summary="archivebox update [args] [filter_patterns]")
|
||||
def cli_update(request: HttpRequest, args: UpdateCommandSchema):
|
||||
from archivebox.cli.archivebox_update import update
|
||||
|
||||
|
||||
result = update(
|
||||
filter_patterns=args.filter_patterns or [],
|
||||
filter_type=args.filter_type or FilterTypeChoices.substring,
|
||||
@@ -158,21 +162,21 @@ def cli_update(request: HttpRequest, args: UpdateCommandSchema):
|
||||
batch_size=args.batch_size,
|
||||
continuous=args.continuous,
|
||||
)
|
||||
stdout = getattr(request, 'stdout', None)
|
||||
stderr = getattr(request, 'stderr', None)
|
||||
stdout = getattr(request, "stdout", None)
|
||||
stderr = getattr(request, "stderr", None)
|
||||
return {
|
||||
"success": True,
|
||||
"errors": [],
|
||||
"result": result,
|
||||
"stdout": ansi_to_html(stdout.getvalue().strip()) if isinstance(stdout, StringIO) else '',
|
||||
"stderr": ansi_to_html(stderr.getvalue().strip()) if isinstance(stderr, StringIO) else '',
|
||||
"stdout": ansi_to_html(stdout.getvalue().strip()) if isinstance(stdout, StringIO) else "",
|
||||
"stderr": ansi_to_html(stderr.getvalue().strip()) if isinstance(stderr, StringIO) else "",
|
||||
}
|
||||
|
||||
|
||||
@router.post("/schedule", response=CLICommandResponseSchema, summary='archivebox schedule [args] [import_path]')
|
||||
@router.post("/schedule", response=CLICommandResponseSchema, summary="archivebox schedule [args] [import_path]")
|
||||
def cli_schedule(request: HttpRequest, args: ScheduleCommandSchema):
|
||||
from archivebox.cli.archivebox_schedule import schedule
|
||||
|
||||
|
||||
result = schedule(
|
||||
import_path=args.import_path,
|
||||
add=args.add,
|
||||
@@ -188,23 +192,22 @@ def cli_schedule(request: HttpRequest, args: ScheduleCommandSchema):
|
||||
update=args.update,
|
||||
)
|
||||
|
||||
stdout = getattr(request, 'stdout', None)
|
||||
stderr = getattr(request, 'stderr', None)
|
||||
stdout = getattr(request, "stdout", None)
|
||||
stderr = getattr(request, "stderr", None)
|
||||
return {
|
||||
"success": True,
|
||||
"errors": [],
|
||||
"result": result,
|
||||
"result_format": "json",
|
||||
"stdout": ansi_to_html(stdout.getvalue().strip()) if isinstance(stdout, StringIO) else '',
|
||||
"stderr": ansi_to_html(stderr.getvalue().strip()) if isinstance(stderr, StringIO) else '',
|
||||
"stdout": ansi_to_html(stdout.getvalue().strip()) if isinstance(stdout, StringIO) else "",
|
||||
"stderr": ansi_to_html(stderr.getvalue().strip()) if isinstance(stderr, StringIO) else "",
|
||||
}
|
||||
|
||||
|
||||
|
||||
@router.post("/search", response=CLICommandResponseSchema, summary='archivebox search [args] [filter_patterns]')
|
||||
@router.post("/search", response=CLICommandResponseSchema, summary="archivebox search [args] [filter_patterns]")
|
||||
def cli_search(request: HttpRequest, args: ListCommandSchema):
|
||||
from archivebox.cli.archivebox_search import search
|
||||
|
||||
|
||||
result = search(
|
||||
filter_patterns=args.filter_patterns,
|
||||
filter_type=args.filter_type,
|
||||
@@ -218,7 +221,7 @@ def cli_search(request: HttpRequest, args: ListCommandSchema):
|
||||
with_headers=args.with_headers,
|
||||
)
|
||||
|
||||
result_format = 'txt'
|
||||
result_format = "txt"
|
||||
if args.as_json:
|
||||
result_format = "json"
|
||||
result = json.loads(result)
|
||||
@@ -227,20 +230,19 @@ def cli_search(request: HttpRequest, args: ListCommandSchema):
|
||||
elif args.as_csv:
|
||||
result_format = "csv"
|
||||
|
||||
stdout = getattr(request, 'stdout', None)
|
||||
stderr = getattr(request, 'stderr', None)
|
||||
stdout = getattr(request, "stdout", None)
|
||||
stderr = getattr(request, "stderr", None)
|
||||
return {
|
||||
"success": True,
|
||||
"errors": [],
|
||||
"result": result,
|
||||
"result_format": result_format,
|
||||
"stdout": ansi_to_html(stdout.getvalue().strip()) if isinstance(stdout, StringIO) else '',
|
||||
"stderr": ansi_to_html(stderr.getvalue().strip()) if isinstance(stderr, StringIO) else '',
|
||||
"stdout": ansi_to_html(stdout.getvalue().strip()) if isinstance(stdout, StringIO) else "",
|
||||
"stderr": ansi_to_html(stderr.getvalue().strip()) if isinstance(stderr, StringIO) else "",
|
||||
}
|
||||
|
||||
|
||||
|
||||
@router.post("/remove", response=CLICommandResponseSchema, summary='archivebox remove [args] [filter_patterns]')
|
||||
@router.post("/remove", response=CLICommandResponseSchema, summary="archivebox remove [args] [filter_patterns]")
|
||||
def cli_remove(request: HttpRequest, args: RemoveCommandSchema):
|
||||
from archivebox.cli.archivebox_remove import remove
|
||||
from archivebox.cli.archivebox_search import get_snapshots
|
||||
@@ -253,10 +255,10 @@ def cli_remove(request: HttpRequest, args: RemoveCommandSchema):
|
||||
after=args.after,
|
||||
before=args.before,
|
||||
)
|
||||
removed_snapshot_ids = [str(snapshot_id) for snapshot_id in snapshots_to_remove.values_list('id', flat=True)]
|
||||
|
||||
removed_snapshot_ids = [str(snapshot_id) for snapshot_id in snapshots_to_remove.values_list("id", flat=True)]
|
||||
|
||||
remove(
|
||||
yes=True, # no way to interactively ask for confirmation via API, so we force yes
|
||||
yes=True, # no way to interactively ask for confirmation via API, so we force yes
|
||||
delete=args.delete,
|
||||
snapshots=snapshots_to_remove,
|
||||
before=args.before,
|
||||
@@ -270,14 +272,13 @@ def cli_remove(request: HttpRequest, args: RemoveCommandSchema):
|
||||
"removed_snapshot_ids": removed_snapshot_ids,
|
||||
"remaining_snapshots": Snapshot.objects.count(),
|
||||
}
|
||||
stdout = getattr(request, 'stdout', None)
|
||||
stderr = getattr(request, 'stderr', None)
|
||||
stdout = getattr(request, "stdout", None)
|
||||
stderr = getattr(request, "stderr", None)
|
||||
return {
|
||||
"success": True,
|
||||
"errors": [],
|
||||
"result": result,
|
||||
"result_format": "json",
|
||||
"stdout": ansi_to_html(stdout.getvalue().strip()) if isinstance(stdout, StringIO) else '',
|
||||
"stderr": ansi_to_html(stderr.getvalue().strip()) if isinstance(stderr, StringIO) else '',
|
||||
"stdout": ansi_to_html(stdout.getvalue().strip()) if isinstance(stdout, StringIO) else "",
|
||||
"stderr": ansi_to_html(stderr.getvalue().strip()) if isinstance(stderr, StringIO) else "",
|
||||
}
|
||||
|
||||
|
||||
@@ -1,11 +1,13 @@
|
||||
__package__ = 'archivebox.api'
|
||||
__package__ = "archivebox.api"
|
||||
|
||||
import math
|
||||
from collections import defaultdict
|
||||
from uuid import UUID
|
||||
from typing import List, Optional, Union, Any, Annotated
|
||||
from typing import Union, Any, Annotated
|
||||
from datetime import datetime
|
||||
|
||||
from django.db.models import Model, Q
|
||||
from django.db.models import Model, Q, Sum
|
||||
from django.db.models.functions import Coalesce
|
||||
from django.conf import settings
|
||||
from django.http import HttpRequest, HttpResponse
|
||||
from django.core.exceptions import ValidationError
|
||||
@@ -39,7 +41,7 @@ from archivebox.crawls.models import Crawl
|
||||
from archivebox.api.v1_crawls import CrawlSchema
|
||||
|
||||
|
||||
router = Router(tags=['Core Models'])
|
||||
router = Router(tags=["Core Models"])
|
||||
|
||||
|
||||
class CustomPagination(PaginationBase):
|
||||
@@ -49,13 +51,14 @@ class CustomPagination(PaginationBase):
|
||||
page: int = 0
|
||||
|
||||
class Output(PaginationBase.Output):
|
||||
count: int
|
||||
total_items: int
|
||||
total_pages: int
|
||||
page: int
|
||||
limit: int
|
||||
offset: int
|
||||
num_items: int
|
||||
items: List[Any]
|
||||
items: list[Any]
|
||||
|
||||
def paginate_queryset(self, queryset, pagination: Input, request: HttpRequest, **params):
|
||||
limit = min(pagination.limit, 500)
|
||||
@@ -65,27 +68,29 @@ class CustomPagination(PaginationBase):
|
||||
current_page = math.ceil(offset / (limit + 1))
|
||||
items = queryset[offset : offset + limit]
|
||||
return {
|
||||
'total_items': total,
|
||||
'total_pages': total_pages,
|
||||
'page': current_page,
|
||||
'limit': limit,
|
||||
'offset': offset,
|
||||
'num_items': len(items),
|
||||
'items': items,
|
||||
"count": total,
|
||||
"total_items": total,
|
||||
"total_pages": total_pages,
|
||||
"page": current_page,
|
||||
"limit": limit,
|
||||
"offset": offset,
|
||||
"num_items": len(items),
|
||||
"items": items,
|
||||
}
|
||||
|
||||
|
||||
### ArchiveResult #########################################################################
|
||||
|
||||
|
||||
class MinimalArchiveResultSchema(Schema):
|
||||
TYPE: str = 'core.models.ArchiveResult'
|
||||
TYPE: str = "core.models.ArchiveResult"
|
||||
id: UUID
|
||||
created_at: datetime | None
|
||||
modified_at: datetime | None
|
||||
created_by_id: str
|
||||
created_by_username: str
|
||||
status: str
|
||||
retry_at: datetime | None
|
||||
retry_at: datetime | None = None
|
||||
plugin: str
|
||||
hook_name: str
|
||||
process_id: UUID | None
|
||||
@@ -93,8 +98,8 @@ class MinimalArchiveResultSchema(Schema):
|
||||
cmd: list[str] | None
|
||||
pwd: str | None
|
||||
output_str: str
|
||||
output_json: dict | None
|
||||
output_files: dict | None
|
||||
output_json: dict[str, Any] | None
|
||||
output_files: dict[str, dict[str, Any]] | None
|
||||
output_size: int
|
||||
output_mimetypes: str
|
||||
start_ts: datetime | None
|
||||
@@ -108,13 +113,34 @@ class MinimalArchiveResultSchema(Schema):
|
||||
def resolve_created_by_username(obj) -> str:
|
||||
return obj.created_by.username
|
||||
|
||||
@staticmethod
|
||||
def resolve_output_files(obj):
|
||||
return obj.output_file_map()
|
||||
|
||||
@staticmethod
|
||||
def resolve_output_mimetypes(obj) -> str:
|
||||
mime_sizes: dict[str, int] = defaultdict(int)
|
||||
for metadata in obj.output_file_map().values():
|
||||
if not isinstance(metadata, dict):
|
||||
continue
|
||||
mimetype = str(metadata.get("mimetype") or "").strip()
|
||||
try:
|
||||
size = max(int(metadata.get("size") or 0), 0)
|
||||
except (TypeError, ValueError):
|
||||
size = 0
|
||||
if mimetype and size:
|
||||
mime_sizes[mimetype] += size
|
||||
if mime_sizes:
|
||||
return ",".join(mime for mime, _size in sorted(mime_sizes.items(), key=lambda item: item[1], reverse=True))
|
||||
return obj.output_mimetypes or ""
|
||||
|
||||
|
||||
class ArchiveResultSchema(MinimalArchiveResultSchema):
|
||||
TYPE: str = 'core.models.ArchiveResult'
|
||||
TYPE: str = "core.models.ArchiveResult"
|
||||
snapshot_id: UUID
|
||||
snapshot_timestamp: str
|
||||
snapshot_url: str
|
||||
snapshot_tags: List[str]
|
||||
snapshot_tags: list[str]
|
||||
|
||||
@staticmethod
|
||||
def resolve_snapshot_timestamp(obj):
|
||||
@@ -134,25 +160,39 @@ class ArchiveResultSchema(MinimalArchiveResultSchema):
|
||||
|
||||
|
||||
class ArchiveResultFilterSchema(FilterSchema):
|
||||
id: Annotated[Optional[str], FilterLookup(['id__startswith', 'snapshot__id__startswith', 'snapshot__timestamp__startswith'])] = None
|
||||
search: Annotated[Optional[str], FilterLookup(['snapshot__url__icontains', 'snapshot__title__icontains', 'snapshot__tags__name__icontains', 'plugin', 'output_str__icontains', 'id__startswith', 'snapshot__id__startswith', 'snapshot__timestamp__startswith'])] = None
|
||||
snapshot_id: Annotated[Optional[str], FilterLookup(['snapshot__id__startswith', 'snapshot__timestamp__startswith'])] = None
|
||||
snapshot_url: Annotated[Optional[str], FilterLookup('snapshot__url__icontains')] = None
|
||||
snapshot_tag: Annotated[Optional[str], FilterLookup('snapshot__tags__name__icontains')] = None
|
||||
status: Annotated[Optional[str], FilterLookup('status')] = None
|
||||
output_str: Annotated[Optional[str], FilterLookup('output_str__icontains')] = None
|
||||
plugin: Annotated[Optional[str], FilterLookup('plugin__icontains')] = None
|
||||
hook_name: Annotated[Optional[str], FilterLookup('hook_name__icontains')] = None
|
||||
process_id: Annotated[Optional[str], FilterLookup('process__id__startswith')] = None
|
||||
cmd: Annotated[Optional[str], FilterLookup('cmd__0__icontains')] = None
|
||||
pwd: Annotated[Optional[str], FilterLookup('pwd__icontains')] = None
|
||||
cmd_version: Annotated[Optional[str], FilterLookup('cmd_version')] = None
|
||||
created_at: Annotated[Optional[datetime], FilterLookup('created_at')] = None
|
||||
created_at__gte: Annotated[Optional[datetime], FilterLookup('created_at__gte')] = None
|
||||
created_at__lt: Annotated[Optional[datetime], FilterLookup('created_at__lt')] = None
|
||||
id: Annotated[str | None, FilterLookup(["id__startswith", "snapshot__id__startswith", "snapshot__timestamp__startswith"])] = None
|
||||
search: Annotated[
|
||||
str | None,
|
||||
FilterLookup(
|
||||
[
|
||||
"snapshot__url__icontains",
|
||||
"snapshot__title__icontains",
|
||||
"snapshot__tags__name__icontains",
|
||||
"plugin",
|
||||
"output_str__icontains",
|
||||
"id__startswith",
|
||||
"snapshot__id__startswith",
|
||||
"snapshot__timestamp__startswith",
|
||||
],
|
||||
),
|
||||
] = None
|
||||
snapshot_id: Annotated[str | None, FilterLookup(["snapshot__id__startswith", "snapshot__timestamp__startswith"])] = None
|
||||
snapshot_url: Annotated[str | None, FilterLookup("snapshot__url__icontains")] = None
|
||||
snapshot_tag: Annotated[str | None, FilterLookup("snapshot__tags__name__icontains")] = None
|
||||
status: Annotated[str | None, FilterLookup("status")] = None
|
||||
output_str: Annotated[str | None, FilterLookup("output_str__icontains")] = None
|
||||
plugin: Annotated[str | None, FilterLookup("plugin__icontains")] = None
|
||||
hook_name: Annotated[str | None, FilterLookup("hook_name__icontains")] = None
|
||||
process_id: Annotated[str | None, FilterLookup("process__id__startswith")] = None
|
||||
cmd: Annotated[str | None, FilterLookup("cmd__0__icontains")] = None
|
||||
pwd: Annotated[str | None, FilterLookup("pwd__icontains")] = None
|
||||
cmd_version: Annotated[str | None, FilterLookup("cmd_version")] = None
|
||||
created_at: Annotated[datetime | None, FilterLookup("created_at")] = None
|
||||
created_at__gte: Annotated[datetime | None, FilterLookup("created_at__gte")] = None
|
||||
created_at__lt: Annotated[datetime | None, FilterLookup("created_at__lt")] = None
|
||||
|
||||
|
||||
@router.get("/archiveresults", response=List[ArchiveResultSchema], url_name="get_archiveresult")
|
||||
@router.get("/archiveresults", response=list[ArchiveResultSchema], url_name="get_archiveresult")
|
||||
@paginate(CustomPagination)
|
||||
def get_archiveresults(request: HttpRequest, filters: Query[ArchiveResultFilterSchema]):
|
||||
"""List all ArchiveResult entries matching these filters."""
|
||||
@@ -167,8 +207,9 @@ def get_archiveresult(request: HttpRequest, archiveresult_id: str):
|
||||
|
||||
### Snapshot #########################################################################
|
||||
|
||||
|
||||
class SnapshotSchema(Schema):
|
||||
TYPE: str = 'core.models.Snapshot'
|
||||
TYPE: str = "core.models.Snapshot"
|
||||
id: UUID
|
||||
created_by_id: str
|
||||
created_by_username: str
|
||||
@@ -177,14 +218,16 @@ class SnapshotSchema(Schema):
|
||||
status: str
|
||||
retry_at: datetime | None
|
||||
bookmarked_at: datetime
|
||||
downloaded_at: Optional[datetime]
|
||||
downloaded_at: datetime | None
|
||||
url: str
|
||||
tags: List[str]
|
||||
title: Optional[str]
|
||||
tags: list[str]
|
||||
title: str | None
|
||||
timestamp: str
|
||||
archive_path: str
|
||||
archive_size: int
|
||||
output_size: int
|
||||
num_archiveresults: int
|
||||
archiveresults: List[MinimalArchiveResultSchema]
|
||||
archiveresults: list[MinimalArchiveResultSchema]
|
||||
|
||||
@staticmethod
|
||||
def resolve_created_by_id(obj):
|
||||
@@ -198,13 +241,21 @@ class SnapshotSchema(Schema):
|
||||
def resolve_tags(obj):
|
||||
return sorted(tag.name for tag in obj.tags.all())
|
||||
|
||||
@staticmethod
|
||||
def resolve_archive_size(obj):
|
||||
return int(getattr(obj, "output_size_sum", obj.archive_size) or 0)
|
||||
|
||||
@staticmethod
|
||||
def resolve_output_size(obj):
|
||||
return SnapshotSchema.resolve_archive_size(obj)
|
||||
|
||||
@staticmethod
|
||||
def resolve_num_archiveresults(obj, context):
|
||||
return obj.archiveresult_set.all().distinct().count()
|
||||
|
||||
@staticmethod
|
||||
def resolve_archiveresults(obj, context):
|
||||
if bool(getattr(context['request'], 'with_archiveresults', False)):
|
||||
if bool(getattr(context["request"], "with_archiveresults", False)):
|
||||
return obj.archiveresult_set.all().distinct()
|
||||
return ArchiveResult.objects.none()
|
||||
|
||||
@@ -212,16 +263,16 @@ class SnapshotSchema(Schema):
|
||||
class SnapshotUpdateSchema(Schema):
|
||||
status: str | None = None
|
||||
retry_at: datetime | None = None
|
||||
tags: Optional[List[str]] = None
|
||||
tags: list[str] | None = None
|
||||
|
||||
|
||||
class SnapshotCreateSchema(Schema):
|
||||
url: str
|
||||
crawl_id: Optional[str] = None
|
||||
crawl_id: str | None = None
|
||||
depth: int = 0
|
||||
title: Optional[str] = None
|
||||
tags: Optional[List[str]] = None
|
||||
status: Optional[str] = None
|
||||
title: str | None = None
|
||||
tags: list[str] | None = None
|
||||
status: str | None = None
|
||||
|
||||
|
||||
class SnapshotDeleteResponseSchema(Schema):
|
||||
@@ -231,77 +282,82 @@ class SnapshotDeleteResponseSchema(Schema):
|
||||
deleted_count: int
|
||||
|
||||
|
||||
def normalize_tag_list(tags: Optional[List[str]] = None) -> List[str]:
|
||||
def normalize_tag_list(tags: list[str] | None = None) -> list[str]:
|
||||
return [tag.strip() for tag in (tags or []) if tag and tag.strip()]
|
||||
|
||||
|
||||
class SnapshotFilterSchema(FilterSchema):
|
||||
id: Annotated[Optional[str], FilterLookup(['id__icontains', 'timestamp__startswith'])] = None
|
||||
created_by_id: Annotated[Optional[str], FilterLookup('crawl__created_by_id')] = None
|
||||
created_by_username: Annotated[Optional[str], FilterLookup('crawl__created_by__username__icontains')] = None
|
||||
created_at__gte: Annotated[Optional[datetime], FilterLookup('created_at__gte')] = None
|
||||
created_at__lt: Annotated[Optional[datetime], FilterLookup('created_at__lt')] = None
|
||||
created_at: Annotated[Optional[datetime], FilterLookup('created_at')] = None
|
||||
modified_at: Annotated[Optional[datetime], FilterLookup('modified_at')] = None
|
||||
modified_at__gte: Annotated[Optional[datetime], FilterLookup('modified_at__gte')] = None
|
||||
modified_at__lt: Annotated[Optional[datetime], FilterLookup('modified_at__lt')] = None
|
||||
search: Annotated[Optional[str], FilterLookup(['url__icontains', 'title__icontains', 'tags__name__icontains', 'id__icontains', 'timestamp__startswith'])] = None
|
||||
url: Annotated[Optional[str], FilterLookup('url')] = None
|
||||
tag: Annotated[Optional[str], FilterLookup('tags__name')] = None
|
||||
title: Annotated[Optional[str], FilterLookup('title__icontains')] = None
|
||||
timestamp: Annotated[Optional[str], FilterLookup('timestamp__startswith')] = None
|
||||
bookmarked_at__gte: Annotated[Optional[datetime], FilterLookup('bookmarked_at__gte')] = None
|
||||
bookmarked_at__lt: Annotated[Optional[datetime], FilterLookup('bookmarked_at__lt')] = None
|
||||
id: Annotated[str | None, FilterLookup(["id__icontains", "timestamp__startswith"])] = None
|
||||
created_by_id: Annotated[str | None, FilterLookup("crawl__created_by_id")] = None
|
||||
created_by_username: Annotated[str | None, FilterLookup("crawl__created_by__username__icontains")] = None
|
||||
created_at__gte: Annotated[datetime | None, FilterLookup("created_at__gte")] = None
|
||||
created_at__lt: Annotated[datetime | None, FilterLookup("created_at__lt")] = None
|
||||
created_at: Annotated[datetime | None, FilterLookup("created_at")] = None
|
||||
modified_at: Annotated[datetime | None, FilterLookup("modified_at")] = None
|
||||
modified_at__gte: Annotated[datetime | None, FilterLookup("modified_at__gte")] = None
|
||||
modified_at__lt: Annotated[datetime | None, FilterLookup("modified_at__lt")] = None
|
||||
search: Annotated[
|
||||
str | None,
|
||||
FilterLookup(["url__icontains", "title__icontains", "tags__name__icontains", "id__icontains", "timestamp__startswith"]),
|
||||
] = None
|
||||
url: Annotated[str | None, FilterLookup("url")] = None
|
||||
tag: Annotated[str | None, FilterLookup("tags__name")] = None
|
||||
title: Annotated[str | None, FilterLookup("title__icontains")] = None
|
||||
timestamp: Annotated[str | None, FilterLookup("timestamp__startswith")] = None
|
||||
bookmarked_at__gte: Annotated[datetime | None, FilterLookup("bookmarked_at__gte")] = None
|
||||
bookmarked_at__lt: Annotated[datetime | None, FilterLookup("bookmarked_at__lt")] = None
|
||||
|
||||
|
||||
@router.get("/snapshots", response=List[SnapshotSchema], url_name="get_snapshots")
|
||||
@router.get("/snapshots", response=list[SnapshotSchema], url_name="get_snapshots")
|
||||
@paginate(CustomPagination)
|
||||
def get_snapshots(request: HttpRequest, filters: Query[SnapshotFilterSchema], with_archiveresults: bool = False):
|
||||
"""List all Snapshot entries matching these filters."""
|
||||
setattr(request, 'with_archiveresults', with_archiveresults)
|
||||
return filters.filter(Snapshot.objects.all()).distinct()
|
||||
setattr(request, "with_archiveresults", with_archiveresults)
|
||||
queryset = Snapshot.objects.annotate(output_size_sum=Coalesce(Sum("archiveresult__output_size"), 0))
|
||||
return filters.filter(queryset).distinct()
|
||||
|
||||
|
||||
@router.get("/snapshot/{snapshot_id}", response=SnapshotSchema, url_name="get_snapshot")
|
||||
def get_snapshot(request: HttpRequest, snapshot_id: str, with_archiveresults: bool = True):
|
||||
"""Get a specific Snapshot by id."""
|
||||
setattr(request, 'with_archiveresults', with_archiveresults)
|
||||
setattr(request, "with_archiveresults", with_archiveresults)
|
||||
queryset = Snapshot.objects.annotate(output_size_sum=Coalesce(Sum("archiveresult__output_size"), 0))
|
||||
try:
|
||||
return Snapshot.objects.get(Q(id__startswith=snapshot_id) | Q(timestamp__startswith=snapshot_id))
|
||||
return queryset.get(Q(id__startswith=snapshot_id) | Q(timestamp__startswith=snapshot_id))
|
||||
except Snapshot.DoesNotExist:
|
||||
return Snapshot.objects.get(Q(id__icontains=snapshot_id))
|
||||
return queryset.get(Q(id__icontains=snapshot_id))
|
||||
|
||||
|
||||
@router.post("/snapshots", response=SnapshotSchema, url_name="create_snapshot")
|
||||
def create_snapshot(request: HttpRequest, data: SnapshotCreateSchema):
|
||||
tags = normalize_tag_list(data.tags)
|
||||
if data.status is not None and data.status not in Snapshot.StatusChoices.values:
|
||||
raise HttpError(400, f'Invalid status: {data.status}')
|
||||
raise HttpError(400, f"Invalid status: {data.status}")
|
||||
if not data.url.strip():
|
||||
raise HttpError(400, 'URL is required')
|
||||
raise HttpError(400, "URL is required")
|
||||
if data.depth not in (0, 1, 2, 3, 4):
|
||||
raise HttpError(400, 'depth must be between 0 and 4')
|
||||
raise HttpError(400, "depth must be between 0 and 4")
|
||||
|
||||
if data.crawl_id:
|
||||
crawl = Crawl.objects.get(id__icontains=data.crawl_id)
|
||||
crawl_tags = normalize_tag_list(crawl.tags_str.split(','))
|
||||
crawl_tags = normalize_tag_list(crawl.tags_str.split(","))
|
||||
tags = tags or crawl_tags
|
||||
else:
|
||||
crawl = Crawl.objects.create(
|
||||
urls=data.url,
|
||||
max_depth=max(data.depth, 0),
|
||||
tags_str=','.join(tags),
|
||||
tags_str=",".join(tags),
|
||||
status=Crawl.StatusChoices.QUEUED,
|
||||
retry_at=timezone.now(),
|
||||
created_by=request.user if isinstance(request.user, User) else None,
|
||||
)
|
||||
|
||||
snapshot_defaults = {
|
||||
'depth': data.depth,
|
||||
'title': data.title,
|
||||
'timestamp': str(timezone.now().timestamp()),
|
||||
'status': data.status or Snapshot.StatusChoices.QUEUED,
|
||||
'retry_at': timezone.now(),
|
||||
"depth": data.depth,
|
||||
"title": data.title,
|
||||
"timestamp": str(timezone.now().timestamp()),
|
||||
"status": data.status or Snapshot.StatusChoices.QUEUED,
|
||||
"retry_at": timezone.now(),
|
||||
}
|
||||
snapshot, _ = Snapshot.objects.get_or_create(
|
||||
url=data.url,
|
||||
@@ -309,17 +365,17 @@ def create_snapshot(request: HttpRequest, data: SnapshotCreateSchema):
|
||||
defaults=snapshot_defaults,
|
||||
)
|
||||
|
||||
update_fields: List[str] = []
|
||||
update_fields: list[str] = []
|
||||
if data.title is not None and snapshot.title != data.title:
|
||||
snapshot.title = data.title
|
||||
update_fields.append('title')
|
||||
update_fields.append("title")
|
||||
if data.status is not None and snapshot.status != data.status:
|
||||
if data.status not in Snapshot.StatusChoices.values:
|
||||
raise HttpError(400, f'Invalid status: {data.status}')
|
||||
raise HttpError(400, f"Invalid status: {data.status}")
|
||||
snapshot.status = data.status
|
||||
update_fields.append('status')
|
||||
update_fields.append("status")
|
||||
if update_fields:
|
||||
update_fields.append('modified_at')
|
||||
update_fields.append("modified_at")
|
||||
snapshot.save(update_fields=update_fields)
|
||||
|
||||
if tags:
|
||||
@@ -330,7 +386,7 @@ def create_snapshot(request: HttpRequest, data: SnapshotCreateSchema):
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
setattr(request, 'with_archiveresults', False)
|
||||
setattr(request, "with_archiveresults", False)
|
||||
return snapshot
|
||||
|
||||
|
||||
@@ -343,26 +399,26 @@ def patch_snapshot(request: HttpRequest, snapshot_id: str, data: SnapshotUpdateS
|
||||
snapshot = Snapshot.objects.get(Q(id__icontains=snapshot_id))
|
||||
|
||||
payload = data.dict(exclude_unset=True)
|
||||
update_fields = ['modified_at']
|
||||
tags = payload.pop('tags', None)
|
||||
update_fields = ["modified_at"]
|
||||
tags = payload.pop("tags", None)
|
||||
|
||||
if 'status' in payload:
|
||||
if payload['status'] not in Snapshot.StatusChoices.values:
|
||||
raise HttpError(400, f'Invalid status: {payload["status"]}')
|
||||
snapshot.status = payload['status']
|
||||
if snapshot.status == Snapshot.StatusChoices.SEALED and 'retry_at' not in payload:
|
||||
if "status" in payload:
|
||||
if payload["status"] not in Snapshot.StatusChoices.values:
|
||||
raise HttpError(400, f"Invalid status: {payload['status']}")
|
||||
snapshot.status = payload["status"]
|
||||
if snapshot.status == Snapshot.StatusChoices.SEALED and "retry_at" not in payload:
|
||||
snapshot.retry_at = None
|
||||
update_fields.append('status')
|
||||
update_fields.append("status")
|
||||
|
||||
if 'retry_at' in payload:
|
||||
snapshot.retry_at = payload['retry_at']
|
||||
update_fields.append('retry_at')
|
||||
if "retry_at" in payload:
|
||||
snapshot.retry_at = payload["retry_at"]
|
||||
update_fields.append("retry_at")
|
||||
|
||||
if tags is not None:
|
||||
snapshot.save_tags(normalize_tag_list(tags))
|
||||
|
||||
snapshot.save(update_fields=update_fields)
|
||||
setattr(request, 'with_archiveresults', False)
|
||||
setattr(request, "with_archiveresults", False)
|
||||
return snapshot
|
||||
|
||||
|
||||
@@ -373,17 +429,18 @@ def delete_snapshot(request: HttpRequest, snapshot_id: str):
|
||||
crawl_id_str = str(snapshot.crawl.pk)
|
||||
deleted_count, _ = snapshot.delete()
|
||||
return {
|
||||
'success': True,
|
||||
'snapshot_id': snapshot_id_str,
|
||||
'crawl_id': crawl_id_str,
|
||||
'deleted_count': deleted_count,
|
||||
"success": True,
|
||||
"snapshot_id": snapshot_id_str,
|
||||
"crawl_id": crawl_id_str,
|
||||
"deleted_count": deleted_count,
|
||||
}
|
||||
|
||||
|
||||
### Tag #########################################################################
|
||||
|
||||
|
||||
class TagSchema(Schema):
|
||||
TYPE: str = 'core.models.Tag'
|
||||
TYPE: str = "core.models.Tag"
|
||||
id: int
|
||||
modified_at: datetime
|
||||
created_at: datetime
|
||||
@@ -392,7 +449,7 @@ class TagSchema(Schema):
|
||||
name: str
|
||||
slug: str
|
||||
num_snapshots: int
|
||||
snapshots: List[SnapshotSchema]
|
||||
snapshots: list[SnapshotSchema]
|
||||
|
||||
@staticmethod
|
||||
def resolve_created_by_id(obj):
|
||||
@@ -402,7 +459,7 @@ class TagSchema(Schema):
|
||||
def resolve_created_by_username(obj):
|
||||
user_model = get_user_model()
|
||||
user = user_model.objects.get(id=obj.created_by_id)
|
||||
username = getattr(user, 'username', None)
|
||||
username = getattr(user, "username", None)
|
||||
return username if isinstance(username, str) else str(user)
|
||||
|
||||
@staticmethod
|
||||
@@ -411,58 +468,67 @@ class TagSchema(Schema):
|
||||
|
||||
@staticmethod
|
||||
def resolve_snapshots(obj, context):
|
||||
if bool(getattr(context['request'], 'with_snapshots', False)):
|
||||
if bool(getattr(context["request"], "with_snapshots", False)):
|
||||
return obj.snapshot_set.all().distinct()
|
||||
return Snapshot.objects.none()
|
||||
|
||||
|
||||
@router.get("/tags", response=List[TagSchema], url_name="get_tags")
|
||||
@router.get("/tags", response=list[TagSchema], url_name="get_tags")
|
||||
@paginate(CustomPagination)
|
||||
def get_tags(request: HttpRequest):
|
||||
setattr(request, 'with_snapshots', False)
|
||||
setattr(request, 'with_archiveresults', False)
|
||||
setattr(request, "with_snapshots", False)
|
||||
setattr(request, "with_archiveresults", False)
|
||||
return get_matching_tags()
|
||||
|
||||
|
||||
@router.get("/tag/{tag_id}", response=TagSchema, url_name="get_tag")
|
||||
def get_tag(request: HttpRequest, tag_id: str, with_snapshots: bool = True):
|
||||
setattr(request, 'with_snapshots', with_snapshots)
|
||||
setattr(request, 'with_archiveresults', False)
|
||||
setattr(request, "with_snapshots", with_snapshots)
|
||||
setattr(request, "with_archiveresults", False)
|
||||
try:
|
||||
return get_tag_by_ref(tag_id)
|
||||
except (Tag.DoesNotExist, ValidationError):
|
||||
raise HttpError(404, 'Tag not found')
|
||||
raise HttpError(404, "Tag not found")
|
||||
|
||||
|
||||
@router.get("/any/{id}", response=Union[SnapshotSchema, ArchiveResultSchema, TagSchema, CrawlSchema], url_name="get_any", summary="Get any object by its ID")
|
||||
@router.get(
|
||||
"/any/{id}",
|
||||
response=Union[SnapshotSchema, ArchiveResultSchema, TagSchema, CrawlSchema],
|
||||
url_name="get_any",
|
||||
summary="Get any object by its ID",
|
||||
)
|
||||
def get_any(request: HttpRequest, id: str):
|
||||
"""Get any object by its ID (e.g. snapshot, archiveresult, tag, crawl, etc.)."""
|
||||
setattr(request, 'with_snapshots', False)
|
||||
setattr(request, 'with_archiveresults', False)
|
||||
setattr(request, "with_snapshots", False)
|
||||
setattr(request, "with_archiveresults", False)
|
||||
|
||||
for getter in [get_snapshot, get_archiveresult, get_tag]:
|
||||
try:
|
||||
response = getter(request, id)
|
||||
if isinstance(response, Model):
|
||||
return redirect(f"/api/v1/{response._meta.app_label}/{response._meta.model_name}/{response.pk}?{request.META['QUERY_STRING']}")
|
||||
return redirect(
|
||||
f"/api/v1/{response._meta.app_label}/{response._meta.model_name}/{response.pk}?{request.META['QUERY_STRING']}",
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
from archivebox.api.v1_crawls import get_crawl
|
||||
|
||||
response = get_crawl(request, id)
|
||||
if isinstance(response, Model):
|
||||
return redirect(f"/api/v1/{response._meta.app_label}/{response._meta.model_name}/{response.pk}?{request.META['QUERY_STRING']}")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
raise HttpError(404, 'Object with given ID not found')
|
||||
raise HttpError(404, "Object with given ID not found")
|
||||
|
||||
|
||||
### Tag Editor API Endpoints #########################################################################
|
||||
|
||||
|
||||
class TagAutocompleteSchema(Schema):
|
||||
tags: List[dict]
|
||||
tags: list[dict]
|
||||
|
||||
|
||||
class TagCreateSchema(Schema):
|
||||
@@ -483,7 +549,7 @@ class TagSearchSnapshotSchema(Schema):
|
||||
favicon_url: str
|
||||
admin_url: str
|
||||
archive_url: str
|
||||
downloaded_at: Optional[str] = None
|
||||
downloaded_at: str | None = None
|
||||
|
||||
|
||||
class TagSearchCardSchema(Schema):
|
||||
@@ -497,11 +563,11 @@ class TagSearchCardSchema(Schema):
|
||||
export_jsonl_url: str
|
||||
rename_url: str
|
||||
delete_url: str
|
||||
snapshots: List[TagSearchSnapshotSchema]
|
||||
snapshots: list[TagSearchSnapshotSchema]
|
||||
|
||||
|
||||
class TagSearchResponseSchema(Schema):
|
||||
tags: List[TagSearchCardSchema]
|
||||
tags: list[TagSearchCardSchema]
|
||||
sort: str
|
||||
created_by: str
|
||||
year: str
|
||||
@@ -527,8 +593,8 @@ class TagDeleteResponseSchema(Schema):
|
||||
|
||||
class TagSnapshotRequestSchema(Schema):
|
||||
snapshot_id: str
|
||||
tag_name: Optional[str] = None
|
||||
tag_id: Optional[int] = None
|
||||
tag_name: str | None = None
|
||||
tag_id: int | None = None
|
||||
|
||||
|
||||
class TagSnapshotResponseSchema(Schema):
|
||||
@@ -541,10 +607,10 @@ class TagSnapshotResponseSchema(Schema):
|
||||
def search_tags(
|
||||
request: HttpRequest,
|
||||
q: str = "",
|
||||
sort: str = 'created_desc',
|
||||
created_by: str = '',
|
||||
year: str = '',
|
||||
has_snapshots: str = 'all',
|
||||
sort: str = "created_desc",
|
||||
created_by: str = "",
|
||||
year: str = "",
|
||||
has_snapshots: str = "all",
|
||||
):
|
||||
"""Return detailed tag cards for admin/live-search UIs."""
|
||||
normalized_sort = normalize_tag_sort(sort)
|
||||
@@ -552,7 +618,7 @@ def search_tags(
|
||||
normalized_year = normalize_created_year_filter(year)
|
||||
normalized_has_snapshots = normalize_has_snapshots_filter(has_snapshots)
|
||||
return {
|
||||
'tags': build_tag_cards(
|
||||
"tags": build_tag_cards(
|
||||
query=q,
|
||||
request=request,
|
||||
sort=normalized_sort,
|
||||
@@ -560,28 +626,28 @@ def search_tags(
|
||||
year=normalized_year,
|
||||
has_snapshots=normalized_has_snapshots,
|
||||
),
|
||||
'sort': normalized_sort,
|
||||
'created_by': normalized_created_by,
|
||||
'year': normalized_year,
|
||||
'has_snapshots': normalized_has_snapshots,
|
||||
"sort": normalized_sort,
|
||||
"created_by": normalized_created_by,
|
||||
"year": normalized_year,
|
||||
"has_snapshots": normalized_has_snapshots,
|
||||
}
|
||||
|
||||
|
||||
def _public_tag_listing_enabled() -> bool:
|
||||
explicit = getattr(settings, 'PUBLIC_SNAPSHOTS_LIST', None)
|
||||
explicit = getattr(settings, "PUBLIC_SNAPSHOTS_LIST", None)
|
||||
if explicit is not None:
|
||||
return bool(explicit)
|
||||
return bool(getattr(settings, 'PUBLIC_INDEX', SERVER_CONFIG.PUBLIC_INDEX))
|
||||
return bool(getattr(settings, "PUBLIC_INDEX", SERVER_CONFIG.PUBLIC_INDEX))
|
||||
|
||||
|
||||
def _request_has_tag_autocomplete_access(request: HttpRequest) -> bool:
|
||||
user = getattr(request, 'user', None)
|
||||
if getattr(user, 'is_authenticated', False):
|
||||
user = getattr(request, "user", None)
|
||||
if getattr(user, "is_authenticated", False):
|
||||
return True
|
||||
|
||||
token = request.GET.get('api_key') or request.headers.get('X-ArchiveBox-API-Key')
|
||||
auth_header = request.headers.get('Authorization', '')
|
||||
if not token and auth_header.lower().startswith('bearer '):
|
||||
token = request.GET.get("api_key") or request.headers.get("X-ArchiveBox-API-Key")
|
||||
auth_header = request.headers.get("Authorization", "")
|
||||
if not token and auth_header.lower().startswith("bearer "):
|
||||
token = auth_header.split(None, 1)[1].strip()
|
||||
|
||||
if token and auth_using_token(token=token, request=request):
|
||||
@@ -594,12 +660,12 @@ def _request_has_tag_autocomplete_access(request: HttpRequest) -> bool:
|
||||
def tags_autocomplete(request: HttpRequest, q: str = ""):
|
||||
"""Return tags matching the query for autocomplete."""
|
||||
if not _request_has_tag_autocomplete_access(request):
|
||||
raise HttpError(401, 'Authentication required')
|
||||
raise HttpError(401, "Authentication required")
|
||||
|
||||
tags = get_matching_tags(q)[:50 if not q else 20]
|
||||
tags = get_matching_tags(q)[: 50 if not q else 20]
|
||||
|
||||
return {
|
||||
'tags': [{'id': tag.pk, 'name': tag.name, 'slug': tag.slug, 'num_snapshots': getattr(tag, 'num_snapshots', 0)} for tag in tags]
|
||||
"tags": [{"id": tag.pk, "name": tag.name, "slug": tag.slug, "num_snapshots": getattr(tag, "num_snapshots", 0)} for tag in tags],
|
||||
}
|
||||
|
||||
|
||||
@@ -615,10 +681,10 @@ def tags_create(request: HttpRequest, data: TagCreateSchema):
|
||||
raise HttpError(400, str(err)) from err
|
||||
|
||||
return {
|
||||
'success': True,
|
||||
'tag_id': tag.pk,
|
||||
'tag_name': tag.name,
|
||||
'created': created,
|
||||
"success": True,
|
||||
"tag_id": tag.pk,
|
||||
"tag_name": tag.name,
|
||||
"created": created,
|
||||
}
|
||||
|
||||
|
||||
@@ -627,15 +693,15 @@ def rename_tag(request: HttpRequest, tag_id: int, data: TagUpdateSchema):
|
||||
try:
|
||||
tag = rename_tag_record(get_tag_by_ref(tag_id), data.name)
|
||||
except Tag.DoesNotExist as err:
|
||||
raise HttpError(404, 'Tag not found') from err
|
||||
raise HttpError(404, "Tag not found") from err
|
||||
except ValueError as err:
|
||||
raise HttpError(400, str(err)) from err
|
||||
|
||||
return {
|
||||
'success': True,
|
||||
'tag_id': tag.pk,
|
||||
'tag_name': tag.name,
|
||||
'slug': tag.slug,
|
||||
"success": True,
|
||||
"tag_id": tag.pk,
|
||||
"tag_name": tag.name,
|
||||
"slug": tag.slug,
|
||||
}
|
||||
|
||||
|
||||
@@ -644,13 +710,13 @@ def delete_tag(request: HttpRequest, tag_id: int):
|
||||
try:
|
||||
tag = get_tag_by_ref(tag_id)
|
||||
except Tag.DoesNotExist as err:
|
||||
raise HttpError(404, 'Tag not found') from err
|
||||
raise HttpError(404, "Tag not found") from err
|
||||
|
||||
deleted_count, _ = delete_tag_record(tag)
|
||||
return {
|
||||
'success': True,
|
||||
'tag_id': int(tag_id),
|
||||
'deleted_count': deleted_count,
|
||||
"success": True,
|
||||
"tag_id": int(tag_id),
|
||||
"deleted_count": deleted_count,
|
||||
}
|
||||
|
||||
|
||||
@@ -659,10 +725,10 @@ def tag_urls_export(request: HttpRequest, tag_id: int):
|
||||
try:
|
||||
tag = get_tag_by_ref(tag_id)
|
||||
except Tag.DoesNotExist as err:
|
||||
raise HttpError(404, 'Tag not found') from err
|
||||
raise HttpError(404, "Tag not found") from err
|
||||
|
||||
response = HttpResponse(export_tag_urls(tag), content_type='text/plain; charset=utf-8')
|
||||
response['Content-Disposition'] = f'attachment; filename="tag-{tag.slug}-urls.txt"'
|
||||
response = HttpResponse(export_tag_urls(tag), content_type="text/plain; charset=utf-8")
|
||||
response["Content-Disposition"] = f'attachment; filename="tag-{tag.slug}-urls.txt"'
|
||||
return response
|
||||
|
||||
|
||||
@@ -671,10 +737,10 @@ def tag_snapshots_export(request: HttpRequest, tag_id: int):
|
||||
try:
|
||||
tag = get_tag_by_ref(tag_id)
|
||||
except Tag.DoesNotExist as err:
|
||||
raise HttpError(404, 'Tag not found') from err
|
||||
raise HttpError(404, "Tag not found") from err
|
||||
|
||||
response = HttpResponse(export_tag_snapshots_jsonl(tag), content_type='application/x-ndjson; charset=utf-8')
|
||||
response['Content-Disposition'] = f'attachment; filename="tag-{tag.slug}-snapshots.jsonl"'
|
||||
response = HttpResponse(export_tag_snapshots_jsonl(tag), content_type="application/x-ndjson; charset=utf-8")
|
||||
response["Content-Disposition"] = f'attachment; filename="tag-{tag.slug}-snapshots.jsonl"'
|
||||
return response
|
||||
|
||||
|
||||
@@ -684,16 +750,16 @@ def tags_add_to_snapshot(request: HttpRequest, data: TagSnapshotRequestSchema):
|
||||
# Get the snapshot
|
||||
try:
|
||||
snapshot = Snapshot.objects.get(
|
||||
Q(id__startswith=data.snapshot_id) | Q(timestamp__startswith=data.snapshot_id)
|
||||
Q(id__startswith=data.snapshot_id) | Q(timestamp__startswith=data.snapshot_id),
|
||||
)
|
||||
except Snapshot.DoesNotExist:
|
||||
raise HttpError(404, 'Snapshot not found')
|
||||
raise HttpError(404, "Snapshot not found")
|
||||
except Snapshot.MultipleObjectsReturned:
|
||||
snapshot = Snapshot.objects.filter(
|
||||
Q(id__startswith=data.snapshot_id) | Q(timestamp__startswith=data.snapshot_id)
|
||||
Q(id__startswith=data.snapshot_id) | Q(timestamp__startswith=data.snapshot_id),
|
||||
).first()
|
||||
if snapshot is None:
|
||||
raise HttpError(404, 'Snapshot not found')
|
||||
raise HttpError(404, "Snapshot not found")
|
||||
|
||||
# Get or create the tag
|
||||
if data.tag_name:
|
||||
@@ -708,17 +774,17 @@ def tags_add_to_snapshot(request: HttpRequest, data: TagSnapshotRequestSchema):
|
||||
try:
|
||||
tag = get_tag_by_ref(data.tag_id)
|
||||
except Tag.DoesNotExist:
|
||||
raise HttpError(404, 'Tag not found')
|
||||
raise HttpError(404, "Tag not found")
|
||||
else:
|
||||
raise HttpError(400, 'Either tag_name or tag_id is required')
|
||||
raise HttpError(400, "Either tag_name or tag_id is required")
|
||||
|
||||
# Add the tag to the snapshot
|
||||
snapshot.tags.add(tag.pk)
|
||||
|
||||
return {
|
||||
'success': True,
|
||||
'tag_id': tag.pk,
|
||||
'tag_name': tag.name,
|
||||
"success": True,
|
||||
"tag_id": tag.pk,
|
||||
"tag_name": tag.name,
|
||||
}
|
||||
|
||||
|
||||
@@ -728,36 +794,36 @@ def tags_remove_from_snapshot(request: HttpRequest, data: TagSnapshotRequestSche
|
||||
# Get the snapshot
|
||||
try:
|
||||
snapshot = Snapshot.objects.get(
|
||||
Q(id__startswith=data.snapshot_id) | Q(timestamp__startswith=data.snapshot_id)
|
||||
Q(id__startswith=data.snapshot_id) | Q(timestamp__startswith=data.snapshot_id),
|
||||
)
|
||||
except Snapshot.DoesNotExist:
|
||||
raise HttpError(404, 'Snapshot not found')
|
||||
raise HttpError(404, "Snapshot not found")
|
||||
except Snapshot.MultipleObjectsReturned:
|
||||
snapshot = Snapshot.objects.filter(
|
||||
Q(id__startswith=data.snapshot_id) | Q(timestamp__startswith=data.snapshot_id)
|
||||
Q(id__startswith=data.snapshot_id) | Q(timestamp__startswith=data.snapshot_id),
|
||||
).first()
|
||||
if snapshot is None:
|
||||
raise HttpError(404, 'Snapshot not found')
|
||||
raise HttpError(404, "Snapshot not found")
|
||||
|
||||
# Get the tag
|
||||
if data.tag_id:
|
||||
try:
|
||||
tag = Tag.objects.get(pk=data.tag_id)
|
||||
except Tag.DoesNotExist:
|
||||
raise HttpError(404, 'Tag not found')
|
||||
raise HttpError(404, "Tag not found")
|
||||
elif data.tag_name:
|
||||
try:
|
||||
tag = Tag.objects.get(name__iexact=data.tag_name.strip())
|
||||
except Tag.DoesNotExist:
|
||||
raise HttpError(404, 'Tag not found')
|
||||
raise HttpError(404, "Tag not found")
|
||||
else:
|
||||
raise HttpError(400, 'Either tag_name or tag_id is required')
|
||||
raise HttpError(400, "Either tag_name or tag_id is required")
|
||||
|
||||
# Remove the tag from the snapshot
|
||||
snapshot.tags.remove(tag.pk)
|
||||
|
||||
return {
|
||||
'success': True,
|
||||
'tag_id': tag.pk,
|
||||
'tag_name': tag.name,
|
||||
"success": True,
|
||||
"tag_id": tag.pk,
|
||||
"tag_name": tag.name,
|
||||
}
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
__package__ = 'archivebox.api'
|
||||
__package__ = "archivebox.api"
|
||||
|
||||
from uuid import UUID
|
||||
from typing import List, Optional
|
||||
from datetime import datetime
|
||||
from django.http import HttpRequest
|
||||
from django.utils import timezone
|
||||
@@ -17,11 +16,11 @@ from archivebox.crawls.models import Crawl
|
||||
|
||||
from .auth import API_AUTH_METHODS
|
||||
|
||||
router = Router(tags=['Crawl Models'], auth=API_AUTH_METHODS)
|
||||
router = Router(tags=["Crawl Models"], auth=API_AUTH_METHODS)
|
||||
|
||||
|
||||
class CrawlSchema(Schema):
|
||||
TYPE: str = 'crawls.models.Crawl'
|
||||
TYPE: str = "crawls.models.Crawl"
|
||||
|
||||
id: UUID
|
||||
|
||||
@@ -35,6 +34,8 @@ class CrawlSchema(Schema):
|
||||
|
||||
urls: str
|
||||
max_depth: int
|
||||
max_urls: int
|
||||
max_size: int
|
||||
tags_str: str
|
||||
config: dict
|
||||
|
||||
@@ -48,12 +49,12 @@ class CrawlSchema(Schema):
|
||||
def resolve_created_by_username(obj):
|
||||
user_model = get_user_model()
|
||||
user = user_model.objects.get(id=obj.created_by_id)
|
||||
username = getattr(user, 'username', None)
|
||||
username = getattr(user, "username", None)
|
||||
return username if isinstance(username, str) else str(user)
|
||||
|
||||
@staticmethod
|
||||
def resolve_snapshots(obj, context):
|
||||
if bool(getattr(context['request'], 'with_snapshots', False)):
|
||||
if bool(getattr(context["request"], "with_snapshots", False)):
|
||||
return obj.snapshot_set.all().distinct()
|
||||
return Snapshot.objects.none()
|
||||
|
||||
@@ -61,17 +62,19 @@ class CrawlSchema(Schema):
|
||||
class CrawlUpdateSchema(Schema):
|
||||
status: str | None = None
|
||||
retry_at: datetime | None = None
|
||||
tags: Optional[List[str]] = None
|
||||
tags: list[str] | None = None
|
||||
tags_str: str | None = None
|
||||
|
||||
|
||||
class CrawlCreateSchema(Schema):
|
||||
urls: List[str]
|
||||
urls: list[str]
|
||||
max_depth: int = 0
|
||||
tags: Optional[List[str]] = None
|
||||
tags_str: str = ''
|
||||
label: str = ''
|
||||
notes: str = ''
|
||||
max_urls: int = 0
|
||||
max_size: int = 0
|
||||
tags: list[str] | None = None
|
||||
tags_str: str = ""
|
||||
label: str = ""
|
||||
notes: str = ""
|
||||
config: dict = {}
|
||||
|
||||
|
||||
@@ -82,13 +85,13 @@ class CrawlDeleteResponseSchema(Schema):
|
||||
deleted_snapshots: int
|
||||
|
||||
|
||||
def normalize_tag_list(tags: Optional[List[str]] = None, tags_str: str = '') -> List[str]:
|
||||
def normalize_tag_list(tags: list[str] | None = None, tags_str: str = "") -> list[str]:
|
||||
if tags is not None:
|
||||
return [tag.strip() for tag in tags if tag and tag.strip()]
|
||||
return [tag.strip() for tag in tags_str.split(',') if tag.strip()]
|
||||
return [tag.strip() for tag in tags_str.split(",") if tag.strip()]
|
||||
|
||||
|
||||
@router.get("/crawls", response=List[CrawlSchema], url_name="get_crawls")
|
||||
@router.get("/crawls", response=list[CrawlSchema], url_name="get_crawls")
|
||||
def get_crawls(request: HttpRequest):
|
||||
return Crawl.objects.all().distinct()
|
||||
|
||||
@@ -97,15 +100,21 @@ def get_crawls(request: HttpRequest):
|
||||
def create_crawl(request: HttpRequest, data: CrawlCreateSchema):
|
||||
urls = [url.strip() for url in data.urls if url and url.strip()]
|
||||
if not urls:
|
||||
raise HttpError(400, 'At least one URL is required')
|
||||
raise HttpError(400, "At least one URL is required")
|
||||
if data.max_depth not in (0, 1, 2, 3, 4):
|
||||
raise HttpError(400, 'max_depth must be between 0 and 4')
|
||||
raise HttpError(400, "max_depth must be between 0 and 4")
|
||||
if data.max_urls < 0:
|
||||
raise HttpError(400, "max_urls must be >= 0")
|
||||
if data.max_size < 0:
|
||||
raise HttpError(400, "max_size must be >= 0")
|
||||
|
||||
tags = normalize_tag_list(data.tags, data.tags_str)
|
||||
crawl = Crawl.objects.create(
|
||||
urls='\n'.join(urls),
|
||||
urls="\n".join(urls),
|
||||
max_depth=data.max_depth,
|
||||
tags_str=','.join(tags),
|
||||
max_urls=data.max_urls,
|
||||
max_size=data.max_size,
|
||||
tags_str=",".join(tags),
|
||||
label=data.label,
|
||||
notes=data.notes,
|
||||
config=data.config,
|
||||
@@ -116,25 +125,26 @@ def create_crawl(request: HttpRequest, data: CrawlCreateSchema):
|
||||
crawl.create_snapshots_from_urls()
|
||||
return crawl
|
||||
|
||||
|
||||
@router.get("/crawl/{crawl_id}", response=CrawlSchema | str, url_name="get_crawl")
|
||||
def get_crawl(request: HttpRequest, crawl_id: str, as_rss: bool=False, with_snapshots: bool=False, with_archiveresults: bool=False):
|
||||
def get_crawl(request: HttpRequest, crawl_id: str, as_rss: bool = False, with_snapshots: bool = False, with_archiveresults: bool = False):
|
||||
"""Get a specific Crawl by id."""
|
||||
setattr(request, 'with_snapshots', with_snapshots)
|
||||
setattr(request, 'with_archiveresults', with_archiveresults)
|
||||
setattr(request, "with_snapshots", with_snapshots)
|
||||
setattr(request, "with_archiveresults", with_archiveresults)
|
||||
crawl = Crawl.objects.get(id__icontains=crawl_id)
|
||||
|
||||
|
||||
if crawl and as_rss:
|
||||
# return snapshots as XML rss feed
|
||||
urls = [
|
||||
{'url': snapshot.url, 'title': snapshot.title, 'bookmarked_at': snapshot.bookmarked_at, 'tags': snapshot.tags_str}
|
||||
{"url": snapshot.url, "title": snapshot.title, "bookmarked_at": snapshot.bookmarked_at, "tags": snapshot.tags_str}
|
||||
for snapshot in crawl.snapshot_set.all()
|
||||
]
|
||||
xml = '<rss version="2.0"><channel>'
|
||||
for url in urls:
|
||||
xml += f'<item><url>{url["url"]}</url><title>{url["title"]}</title><bookmarked_at>{url["bookmarked_at"]}</bookmarked_at><tags>{url["tags"]}</tags></item>'
|
||||
xml += '</channel></rss>'
|
||||
xml += f"<item><url>{url['url']}</url><title>{url['title']}</title><bookmarked_at>{url['bookmarked_at']}</bookmarked_at><tags>{url['tags']}</tags></item>"
|
||||
xml += "</channel></rss>"
|
||||
return xml
|
||||
|
||||
|
||||
return crawl
|
||||
|
||||
|
||||
@@ -143,29 +153,29 @@ def patch_crawl(request: HttpRequest, crawl_id: str, data: CrawlUpdateSchema):
|
||||
"""Update a crawl (e.g., set status=sealed to cancel queued work)."""
|
||||
crawl = Crawl.objects.get(id__icontains=crawl_id)
|
||||
payload = data.dict(exclude_unset=True)
|
||||
update_fields = ['modified_at']
|
||||
update_fields = ["modified_at"]
|
||||
|
||||
tags = payload.pop('tags', None)
|
||||
tags_str = payload.pop('tags_str', None)
|
||||
tags = payload.pop("tags", None)
|
||||
tags_str = payload.pop("tags_str", None)
|
||||
if tags is not None or tags_str is not None:
|
||||
crawl.tags_str = ','.join(normalize_tag_list(tags, tags_str or ''))
|
||||
update_fields.append('tags_str')
|
||||
crawl.tags_str = ",".join(normalize_tag_list(tags, tags_str or ""))
|
||||
update_fields.append("tags_str")
|
||||
|
||||
if 'status' in payload:
|
||||
if payload['status'] not in Crawl.StatusChoices.values:
|
||||
raise HttpError(400, f'Invalid status: {payload["status"]}')
|
||||
crawl.status = payload['status']
|
||||
if crawl.status == Crawl.StatusChoices.SEALED and 'retry_at' not in payload:
|
||||
if "status" in payload:
|
||||
if payload["status"] not in Crawl.StatusChoices.values:
|
||||
raise HttpError(400, f"Invalid status: {payload['status']}")
|
||||
crawl.status = payload["status"]
|
||||
if crawl.status == Crawl.StatusChoices.SEALED and "retry_at" not in payload:
|
||||
crawl.retry_at = None
|
||||
update_fields.append('status')
|
||||
update_fields.append("status")
|
||||
|
||||
if 'retry_at' in payload:
|
||||
crawl.retry_at = payload['retry_at']
|
||||
update_fields.append('retry_at')
|
||||
if "retry_at" in payload:
|
||||
crawl.retry_at = payload["retry_at"]
|
||||
update_fields.append("retry_at")
|
||||
|
||||
crawl.save(update_fields=update_fields)
|
||||
|
||||
if payload.get('status') == Crawl.StatusChoices.SEALED:
|
||||
if payload.get("status") == Crawl.StatusChoices.SEALED:
|
||||
Snapshot.objects.filter(
|
||||
crawl=crawl,
|
||||
status__in=[Snapshot.StatusChoices.QUEUED, Snapshot.StatusChoices.STARTED],
|
||||
@@ -184,8 +194,8 @@ def delete_crawl(request: HttpRequest, crawl_id: str):
|
||||
snapshot_count = crawl.snapshot_set.count()
|
||||
deleted_count, _ = crawl.delete()
|
||||
return {
|
||||
'success': True,
|
||||
'crawl_id': crawl_id_str,
|
||||
'deleted_count': deleted_count,
|
||||
'deleted_snapshots': snapshot_count,
|
||||
"success": True,
|
||||
"crawl_id": crawl_id_str,
|
||||
"deleted_count": deleted_count,
|
||||
"deleted_snapshots": snapshot_count,
|
||||
}
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
__package__ = 'archivebox.api'
|
||||
__package__ = "archivebox.api"
|
||||
|
||||
from uuid import UUID
|
||||
from typing import Annotated, List, Optional
|
||||
from typing import Annotated
|
||||
from datetime import datetime
|
||||
|
||||
from django.http import HttpRequest
|
||||
@@ -12,16 +12,18 @@ from ninja.pagination import paginate
|
||||
from archivebox.api.v1_core import CustomPagination
|
||||
|
||||
|
||||
router = Router(tags=['Machine and Dependencies'])
|
||||
router = Router(tags=["Machine and Dependencies"])
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Machine Schemas
|
||||
# ============================================================================
|
||||
|
||||
|
||||
class MachineSchema(Schema):
|
||||
"""Schema for Machine model."""
|
||||
TYPE: str = 'machine.Machine'
|
||||
|
||||
TYPE: str = "machine.Machine"
|
||||
id: UUID
|
||||
created_at: datetime
|
||||
modified_at: datetime
|
||||
@@ -43,22 +45,24 @@ class MachineSchema(Schema):
|
||||
|
||||
|
||||
class MachineFilterSchema(FilterSchema):
|
||||
id: Annotated[Optional[str], FilterLookup('id__startswith')] = None
|
||||
hostname: Annotated[Optional[str], FilterLookup('hostname__icontains')] = None
|
||||
os_platform: Annotated[Optional[str], FilterLookup('os_platform__icontains')] = None
|
||||
os_arch: Annotated[Optional[str], FilterLookup('os_arch')] = None
|
||||
hw_in_docker: Annotated[Optional[bool], FilterLookup('hw_in_docker')] = None
|
||||
hw_in_vm: Annotated[Optional[bool], FilterLookup('hw_in_vm')] = None
|
||||
bin_providers: Annotated[Optional[str], FilterLookup('bin_providers__icontains')] = None
|
||||
id: Annotated[str | None, FilterLookup("id__startswith")] = None
|
||||
hostname: Annotated[str | None, FilterLookup("hostname__icontains")] = None
|
||||
os_platform: Annotated[str | None, FilterLookup("os_platform__icontains")] = None
|
||||
os_arch: Annotated[str | None, FilterLookup("os_arch")] = None
|
||||
hw_in_docker: Annotated[bool | None, FilterLookup("hw_in_docker")] = None
|
||||
hw_in_vm: Annotated[bool | None, FilterLookup("hw_in_vm")] = None
|
||||
bin_providers: Annotated[str | None, FilterLookup("bin_providers__icontains")] = None
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Binary Schemas
|
||||
# ============================================================================
|
||||
|
||||
|
||||
class BinarySchema(Schema):
|
||||
"""Schema for Binary model."""
|
||||
TYPE: str = 'machine.Binary'
|
||||
|
||||
TYPE: str = "machine.Binary"
|
||||
id: UUID
|
||||
created_at: datetime
|
||||
modified_at: datetime
|
||||
@@ -85,23 +89,25 @@ class BinarySchema(Schema):
|
||||
|
||||
|
||||
class BinaryFilterSchema(FilterSchema):
|
||||
id: Annotated[Optional[str], FilterLookup('id__startswith')] = None
|
||||
name: Annotated[Optional[str], FilterLookup('name__icontains')] = None
|
||||
binprovider: Annotated[Optional[str], FilterLookup('binprovider')] = None
|
||||
status: Annotated[Optional[str], FilterLookup('status')] = None
|
||||
machine_id: Annotated[Optional[str], FilterLookup('machine_id__startswith')] = None
|
||||
version: Annotated[Optional[str], FilterLookup('version__icontains')] = None
|
||||
id: Annotated[str | None, FilterLookup("id__startswith")] = None
|
||||
name: Annotated[str | None, FilterLookup("name__icontains")] = None
|
||||
binprovider: Annotated[str | None, FilterLookup("binprovider")] = None
|
||||
status: Annotated[str | None, FilterLookup("status")] = None
|
||||
machine_id: Annotated[str | None, FilterLookup("machine_id__startswith")] = None
|
||||
version: Annotated[str | None, FilterLookup("version__icontains")] = None
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Machine Endpoints
|
||||
# ============================================================================
|
||||
|
||||
@router.get("/machines", response=List[MachineSchema], url_name="get_machines")
|
||||
|
||||
@router.get("/machines", response=list[MachineSchema], url_name="get_machines")
|
||||
@paginate(CustomPagination)
|
||||
def get_machines(request: HttpRequest, filters: Query[MachineFilterSchema]):
|
||||
"""List all machines."""
|
||||
from archivebox.machine.models import Machine
|
||||
|
||||
return filters.filter(Machine.objects.all()).distinct()
|
||||
|
||||
|
||||
@@ -109,6 +115,7 @@ def get_machines(request: HttpRequest, filters: Query[MachineFilterSchema]):
|
||||
def get_current_machine(request: HttpRequest):
|
||||
"""Get the current machine."""
|
||||
from archivebox.machine.models import Machine
|
||||
|
||||
return Machine.current()
|
||||
|
||||
|
||||
@@ -117,6 +124,7 @@ def get_machine(request: HttpRequest, machine_id: str):
|
||||
"""Get a specific machine by ID."""
|
||||
from archivebox.machine.models import Machine
|
||||
from django.db.models import Q
|
||||
|
||||
return Machine.objects.get(Q(id__startswith=machine_id) | Q(hostname__iexact=machine_id))
|
||||
|
||||
|
||||
@@ -127,23 +135,27 @@ def get_machine(request: HttpRequest, machine_id: str):
|
||||
# Binary Endpoints
|
||||
# ============================================================================
|
||||
|
||||
@router.get("/binaries", response=List[BinarySchema], url_name="get_binaries")
|
||||
|
||||
@router.get("/binaries", response=list[BinarySchema], url_name="get_binaries")
|
||||
@paginate(CustomPagination)
|
||||
def get_binaries(request: HttpRequest, filters: Query[BinaryFilterSchema]):
|
||||
"""List all binaries."""
|
||||
from archivebox.machine.models import Binary
|
||||
return filters.filter(Binary.objects.all().select_related('machine')).distinct()
|
||||
|
||||
return filters.filter(Binary.objects.all().select_related("machine")).distinct()
|
||||
|
||||
|
||||
@router.get("/binary/{binary_id}", response=BinarySchema, url_name="get_binary")
|
||||
def get_binary(request: HttpRequest, binary_id: str):
|
||||
"""Get a specific binary by ID."""
|
||||
from archivebox.machine.models import Binary
|
||||
return Binary.objects.select_related('machine').get(id__startswith=binary_id)
|
||||
|
||||
return Binary.objects.select_related("machine").get(id__startswith=binary_id)
|
||||
|
||||
|
||||
@router.get("/binary/by-name/{name}", response=List[BinarySchema], url_name="get_binaries_by_name")
|
||||
@router.get("/binary/by-name/{name}", response=list[BinarySchema], url_name="get_binaries_by_name")
|
||||
def get_binaries_by_name(request: HttpRequest, name: str):
|
||||
"""Get all binaries with the given name."""
|
||||
from archivebox.machine.models import Binary
|
||||
return list(Binary.objects.filter(name__iexact=name).select_related('machine'))
|
||||
|
||||
return list(Binary.objects.filter(name__iexact=name).select_related("machine"))
|
||||
|
||||
Reference in New Issue
Block a user