mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2026-04-05 07:17:52 +10:00
New Snapshot detail page UI (#1429)
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@@ -29,6 +29,7 @@ dist/
|
|||||||
data/
|
data/
|
||||||
data*/
|
data*/
|
||||||
output/
|
output/
|
||||||
|
index.sqlite3
|
||||||
|
|
||||||
# vim
|
# vim
|
||||||
*.sw?
|
*.sw?
|
||||||
|
|||||||
@@ -1,7 +1,4 @@
|
|||||||
__package__ = 'archivebox'
|
__package__ = 'archivebox'
|
||||||
|
|
||||||
|
|
||||||
# monkey patch django timezone to add back utc (it was removed in Django 5.0)
|
from .monkey_patches import *
|
||||||
import datetime
|
|
||||||
from django.utils import timezone
|
|
||||||
timezone.utc = datetime.timezone.utc
|
|
||||||
|
|||||||
1
archivebox/abid_utils/__init__.py
Normal file
1
archivebox/abid_utils/__init__.py
Normal file
@@ -0,0 +1 @@
|
|||||||
|
__package__ = 'abid_utils'
|
||||||
191
archivebox/abid_utils/abid.py
Normal file
191
archivebox/abid_utils/abid.py
Normal file
@@ -0,0 +1,191 @@
|
|||||||
|
from typing import NamedTuple, Any, Union, Optional
|
||||||
|
|
||||||
|
import ulid
|
||||||
|
import uuid6
|
||||||
|
import hashlib
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
|
from uuid import UUID
|
||||||
|
from typeid import TypeID # type: ignore[import-untyped]
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
ABID_PREFIX_LEN = 4
|
||||||
|
ABID_SUFFIX_LEN = 26
|
||||||
|
ABID_LEN = 30
|
||||||
|
ABID_TS_LEN = 10
|
||||||
|
ABID_URI_LEN = 8
|
||||||
|
ABID_SUBTYPE_LEN = 2
|
||||||
|
ABID_RAND_LEN = 6
|
||||||
|
|
||||||
|
DEFAULT_ABID_PREFIX = 'obj_'
|
||||||
|
|
||||||
|
|
||||||
|
class ABID(NamedTuple):
|
||||||
|
"""
|
||||||
|
e.g. ABID('obj_01HX9FPYTRE4A5CCD901ZYEBQE')
|
||||||
|
"""
|
||||||
|
prefix: str # e.g. obj_
|
||||||
|
ts: str # e.g. 01HX9FPYTR
|
||||||
|
uri: str # e.g. E4A5CCD9
|
||||||
|
subtype: str # e.g. 01
|
||||||
|
rand: str # e.g. ZYEBQE
|
||||||
|
|
||||||
|
def __getattr__(self, attr: str) -> Any:
|
||||||
|
return getattr(self.ulid, attr)
|
||||||
|
|
||||||
|
def __eq__(self, other: Any) -> bool:
|
||||||
|
try:
|
||||||
|
return self.ulid == other.ulid
|
||||||
|
except AttributeError:
|
||||||
|
return NotImplemented
|
||||||
|
|
||||||
|
def __str__(self) -> str:
|
||||||
|
return self.prefix + self.suffix
|
||||||
|
|
||||||
|
def __len__(self) -> int:
|
||||||
|
return len(self.prefix + self.suffix)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def parse(cls, buffer: Union[str, UUID, ulid.ULID, TypeID, 'ABID'], prefix=DEFAULT_ABID_PREFIX) -> 'ABID':
|
||||||
|
assert buffer, f'Attempted to create ABID from null value {buffer}'
|
||||||
|
|
||||||
|
buffer = str(buffer)
|
||||||
|
if '_' in buffer:
|
||||||
|
prefix, suffix = buffer.split('_')
|
||||||
|
else:
|
||||||
|
prefix, suffix = prefix.strip('_'), buffer
|
||||||
|
|
||||||
|
assert len(prefix) == ABID_PREFIX_LEN - 1 # length without trailing _
|
||||||
|
assert len(suffix) == ABID_SUFFIX_LEN, f'Suffix {suffix} from {buffer} was not {ABID_SUFFIX_LEN} chars long'
|
||||||
|
|
||||||
|
return cls(
|
||||||
|
prefix=abid_part_from_prefix(prefix),
|
||||||
|
ts=suffix[0:10].upper(),
|
||||||
|
uri=suffix[10:18].upper(),
|
||||||
|
subtype=suffix[18:20].upper(),
|
||||||
|
rand=suffix[20:26].upper(),
|
||||||
|
)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def suffix(self):
|
||||||
|
return ''.join((self.ts, self.uri, self.subtype, self.rand))
|
||||||
|
|
||||||
|
@property
|
||||||
|
def ulid(self) -> ulid.ULID:
|
||||||
|
return ulid.parse(self.suffix)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def uuid(self) -> UUID:
|
||||||
|
return self.ulid.uuid
|
||||||
|
|
||||||
|
@property
|
||||||
|
def uuid6(self) -> uuid6.UUID:
|
||||||
|
return uuid6.UUID(hex=self.uuid.hex)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def typeid(self) -> TypeID:
|
||||||
|
return TypeID.from_uuid(prefix=self.prefix.strip('_'), suffix=self.uuid6)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def datetime(self) -> datetime:
|
||||||
|
return self.ulid.timestamp().datetime
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
####################################################
|
||||||
|
|
||||||
|
|
||||||
|
def uri_hash(uri: Union[str, bytes]) -> str:
|
||||||
|
"""
|
||||||
|
'E4A5CCD9AF4ED2A6E0954DF19FD274E9CDDB4853051F033FD518BFC90AA1AC25'
|
||||||
|
"""
|
||||||
|
if isinstance(uri, bytes):
|
||||||
|
uri_str: str = uri.decode()
|
||||||
|
else:
|
||||||
|
uri_str = uri
|
||||||
|
|
||||||
|
# only hash the domain part of URLs
|
||||||
|
if '://' in uri_str:
|
||||||
|
try:
|
||||||
|
domain = urlparse(uri_str).netloc
|
||||||
|
if domain:
|
||||||
|
uri_str = domain
|
||||||
|
except AttributeError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
uri_bytes = uri_str.encode('utf-8')
|
||||||
|
|
||||||
|
return hashlib.sha256(uri_bytes).hexdigest().upper()
|
||||||
|
|
||||||
|
def abid_part_from_prefix(prefix: Optional[str]) -> str:
|
||||||
|
"""
|
||||||
|
'snp_'
|
||||||
|
"""
|
||||||
|
if prefix is None:
|
||||||
|
return 'obj_'
|
||||||
|
|
||||||
|
prefix = prefix.strip('_').lower()
|
||||||
|
assert len(prefix) == 3
|
||||||
|
return prefix + '_'
|
||||||
|
|
||||||
|
def abid_part_from_uri(uri: str) -> str:
|
||||||
|
"""
|
||||||
|
'E4A5CCD9' # takes first 8 characters of sha256(url)
|
||||||
|
"""
|
||||||
|
uri = str(uri)
|
||||||
|
return uri_hash(uri)[:ABID_URI_LEN]
|
||||||
|
|
||||||
|
def abid_part_from_ts(ts: Optional[datetime]) -> str:
|
||||||
|
"""
|
||||||
|
'01HX9FPYTR' # produces 10 character Timestamp section of ulid based on added date
|
||||||
|
"""
|
||||||
|
return str(ulid.from_timestamp(ts) if ts else ulid.new())[:ABID_TS_LEN]
|
||||||
|
|
||||||
|
def abid_part_from_subtype(subtype: str) -> str:
|
||||||
|
"""
|
||||||
|
Snapshots have 01 type, other objects have other subtypes like wget/media/etc.
|
||||||
|
Also allows us to change the ulid spec later by putting special sigil values here.
|
||||||
|
"""
|
||||||
|
subtype = str(subtype)
|
||||||
|
if len(subtype) == ABID_SUBTYPE_LEN:
|
||||||
|
return subtype
|
||||||
|
|
||||||
|
return hashlib.sha256(subtype.encode('utf-8')).hexdigest()[:ABID_SUBTYPE_LEN].upper()
|
||||||
|
|
||||||
|
def abid_part_from_rand(rand: Union[str, UUID, None, int]) -> str:
|
||||||
|
"""
|
||||||
|
'ZYEBQE' # takes last 6 characters of randomness from existing legacy uuid db field
|
||||||
|
"""
|
||||||
|
if rand is None:
|
||||||
|
# if it's None we generate a new random 6 character hex string
|
||||||
|
return str(ulid.new())[-ABID_RAND_LEN:]
|
||||||
|
elif isinstance(rand, UUID):
|
||||||
|
# if it's a uuid we take the last 6 characters of the ULID represation of it
|
||||||
|
return str(ulid.from_uuid(rand))[-ABID_RAND_LEN:]
|
||||||
|
elif isinstance(rand, int):
|
||||||
|
# if it's a BigAutoInteger field we convert it from an int to a 0-padded string
|
||||||
|
rand_str = str(rand)[-ABID_RAND_LEN:]
|
||||||
|
padding_needed = ABID_RAND_LEN - len(rand_str)
|
||||||
|
rand_str = ('0'*padding_needed) + rand_str
|
||||||
|
return rand_str
|
||||||
|
|
||||||
|
# otherwise treat it as a string, take the last 6 characters of it verbatim
|
||||||
|
return str(rand)[-ABID_RAND_LEN:].upper()
|
||||||
|
|
||||||
|
|
||||||
|
def abid_from_values(prefix, ts, uri, subtype, rand) -> ABID:
|
||||||
|
"""
|
||||||
|
Return a freshly derived ABID (assembled from attrs defined in ABIDModel.abid_*_src).
|
||||||
|
"""
|
||||||
|
|
||||||
|
abid = ABID(
|
||||||
|
prefix=abid_part_from_prefix(prefix),
|
||||||
|
ts=abid_part_from_ts(ts),
|
||||||
|
uri=abid_part_from_uri(uri),
|
||||||
|
subtype=abid_part_from_subtype(subtype),
|
||||||
|
rand=abid_part_from_rand(rand),
|
||||||
|
)
|
||||||
|
assert abid.ulid and abid.uuid and abid.typeid, f'Failed to calculate {prefix}_ABID for ts={ts} uri={uri} subtyp={subtype} rand={rand}'
|
||||||
|
return abid
|
||||||
7
archivebox/abid_utils/apps.py
Normal file
7
archivebox/abid_utils/apps.py
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
from django.apps import AppConfig
|
||||||
|
|
||||||
|
|
||||||
|
class AbidUtilsConfig(AppConfig):
|
||||||
|
default_auto_field = 'django.db.models.BigAutoField'
|
||||||
|
|
||||||
|
name = 'abid_utils'
|
||||||
314
archivebox/abid_utils/models.py
Normal file
314
archivebox/abid_utils/models.py
Normal file
@@ -0,0 +1,314 @@
|
|||||||
|
"""
|
||||||
|
This file provides the Django ABIDField and ABIDModel base model to inherit from.
|
||||||
|
|
||||||
|
It implements the ArchiveBox ID (ABID) interfaces including abid_values, get_abid, .abid, .uuid, .id.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from typing import Any, Dict, Union, List, Set, NamedTuple, cast
|
||||||
|
|
||||||
|
from ulid import ULID
|
||||||
|
from uuid import uuid4, UUID
|
||||||
|
from typeid import TypeID # type: ignore[import-untyped]
|
||||||
|
from datetime import datetime
|
||||||
|
from functools import partial
|
||||||
|
from charidfield import CharIDField # type: ignore[import-untyped]
|
||||||
|
|
||||||
|
from django.conf import settings
|
||||||
|
from django.db import models
|
||||||
|
from django.db.utils import OperationalError
|
||||||
|
from django.contrib.auth import get_user_model
|
||||||
|
|
||||||
|
from django_stubs_ext.db.models import TypedModelMeta
|
||||||
|
|
||||||
|
from .abid import (
|
||||||
|
ABID,
|
||||||
|
ABID_LEN,
|
||||||
|
ABID_RAND_LEN,
|
||||||
|
ABID_SUFFIX_LEN,
|
||||||
|
DEFAULT_ABID_PREFIX,
|
||||||
|
abid_part_from_prefix,
|
||||||
|
abid_from_values
|
||||||
|
)
|
||||||
|
|
||||||
|
####################################################
|
||||||
|
|
||||||
|
|
||||||
|
# Database Field for typeid/ulid style IDs with a prefix, e.g. snp_01BJQMF54D093DXEAWZ6JYRPAQ
|
||||||
|
ABIDField = partial(
|
||||||
|
CharIDField,
|
||||||
|
max_length=ABID_LEN,
|
||||||
|
help_text="ABID-format identifier for this entity (e.g. snp_01BJQMF54D093DXEAWZ6JYRPAQ)",
|
||||||
|
default=None,
|
||||||
|
null=True,
|
||||||
|
blank=True,
|
||||||
|
db_index=True,
|
||||||
|
unique=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
def get_or_create_system_user_pk(username='system'):
|
||||||
|
"""Get or create a system user with is_superuser=True to be the default owner for new DB rows"""
|
||||||
|
|
||||||
|
User = get_user_model()
|
||||||
|
|
||||||
|
# if only one user exists total, return that user
|
||||||
|
if User.objects.filter(is_superuser=True).count() == 1:
|
||||||
|
return User.objects.filter(is_superuser=True).values_list('pk', flat=True)[0]
|
||||||
|
|
||||||
|
# otherwise, create a dedicated "system" user
|
||||||
|
user, created = User.objects.get_or_create(username=username, is_staff=True, is_superuser=True, defaults={'email': '', 'password': ''})
|
||||||
|
return user.pk
|
||||||
|
|
||||||
|
|
||||||
|
class ABIDModel(models.Model):
|
||||||
|
"""
|
||||||
|
Abstract Base Model for other models to depend on. Provides ArchiveBox ID (ABID) interface.
|
||||||
|
"""
|
||||||
|
abid_prefix: str = DEFAULT_ABID_PREFIX # e.g. 'tag_'
|
||||||
|
abid_ts_src = 'None' # e.g. 'self.created'
|
||||||
|
abid_uri_src = 'None' # e.g. 'self.uri'
|
||||||
|
abid_subtype_src = 'None' # e.g. 'self.extractor'
|
||||||
|
abid_rand_src = 'None' # e.g. 'self.uuid' or 'self.id'
|
||||||
|
|
||||||
|
id = models.UUIDField(primary_key=True, default=uuid4, editable=True)
|
||||||
|
uuid = models.UUIDField(blank=True, null=True, editable=True, unique=True)
|
||||||
|
abid = ABIDField(prefix=abid_prefix)
|
||||||
|
|
||||||
|
created_by = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE, default=get_or_create_system_user_pk)
|
||||||
|
created = models.DateTimeField(auto_now_add=True)
|
||||||
|
modified = models.DateTimeField(auto_now=True)
|
||||||
|
|
||||||
|
class Meta(TypedModelMeta):
|
||||||
|
abstract = True
|
||||||
|
|
||||||
|
def save(self, *args: Any, **kwargs: Any) -> None:
|
||||||
|
if hasattr(self, 'abid'):
|
||||||
|
# self.abid = ABID.parse(self.abid) if self.abid else self.get_abid()
|
||||||
|
self.abid = self.get_abid()
|
||||||
|
else:
|
||||||
|
print(f'[!] WARNING: {self.__class__.__name__}.abid is not a DB field so ABID will not be persisted!')
|
||||||
|
self.abid = self.get_abid()
|
||||||
|
|
||||||
|
super().save(*args, **kwargs)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def abid_values(self) -> Dict[str, Any]:
|
||||||
|
return {
|
||||||
|
'prefix': self.abid_prefix,
|
||||||
|
'ts': eval(self.abid_ts_src),
|
||||||
|
'uri': eval(self.abid_uri_src),
|
||||||
|
'subtype': eval(self.abid_subtype_src),
|
||||||
|
'rand': eval(self.abid_rand_src),
|
||||||
|
}
|
||||||
|
|
||||||
|
def get_abid(self) -> ABID:
|
||||||
|
"""
|
||||||
|
Return a freshly derived ABID (assembled from attrs defined in ABIDModel.abid_*_src).
|
||||||
|
"""
|
||||||
|
prefix, ts, uri, subtype, rand = self.abid_values.values()
|
||||||
|
|
||||||
|
if (not prefix) or prefix == DEFAULT_ABID_PREFIX:
|
||||||
|
suggested_abid = self.__class__.__name__[:3].lower()
|
||||||
|
raise Exception(f'{self.__class__.__name__}.abid_prefix must be defined to calculate ABIDs (suggested: {suggested_abid})')
|
||||||
|
|
||||||
|
if not ts:
|
||||||
|
ts = datetime.utcfromtimestamp(0)
|
||||||
|
print(f'[!] WARNING: Generating ABID with ts=0000000000 placeholder because {self.__class__.__name__}.abid_ts_src={self.abid_ts_src} is unset!', ts.isoformat())
|
||||||
|
|
||||||
|
if not uri:
|
||||||
|
uri = str(self)
|
||||||
|
print(f'[!] WARNING: Generating ABID with uri=str(self) placeholder because {self.__class__.__name__}.abid_uri_src={self.abid_uri_src} is unset!', uri)
|
||||||
|
|
||||||
|
if not subtype:
|
||||||
|
subtype = self.__class__.__name__
|
||||||
|
print(f'[!] WARNING: Generating ABID with subtype={subtype} placeholder because {self.__class__.__name__}.abid_subtype_src={self.abid_subtype_src} is unset!', subtype)
|
||||||
|
|
||||||
|
if not rand:
|
||||||
|
rand = getattr(self, 'uuid', None) or getattr(self, 'id', None) or getattr(self, 'pk')
|
||||||
|
print(f'[!] WARNING: Generating ABID with rand=self.id placeholder because {self.__class__.__name__}.abid_rand_src={self.abid_rand_src} is unset!', rand)
|
||||||
|
|
||||||
|
abid = abid_from_values(
|
||||||
|
prefix=prefix,
|
||||||
|
ts=ts,
|
||||||
|
uri=uri,
|
||||||
|
subtype=subtype,
|
||||||
|
rand=rand,
|
||||||
|
)
|
||||||
|
assert abid.ulid and abid.uuid and abid.typeid, f'Failed to calculate {prefix}_ABID for {self.__class__.__name__}'
|
||||||
|
return abid
|
||||||
|
|
||||||
|
@property
|
||||||
|
def ABID(self) -> ABID:
|
||||||
|
"""
|
||||||
|
ULIDParts(timestamp='01HX9FPYTR', url='E4A5CCD9', subtype='00', randomness='ZYEBQE')
|
||||||
|
"""
|
||||||
|
return ABID.parse(self.abid) if getattr(self, 'abid', None) else self.get_abid()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def ULID(self) -> ULID:
|
||||||
|
"""
|
||||||
|
Get a ulid.ULID representation of the object's ABID.
|
||||||
|
"""
|
||||||
|
return self.ABID.ulid
|
||||||
|
|
||||||
|
@property
|
||||||
|
def UUID(self) -> UUID:
|
||||||
|
"""
|
||||||
|
Get a uuid.UUID (v4) representation of the object's ABID.
|
||||||
|
"""
|
||||||
|
return self.ABID.uuid
|
||||||
|
|
||||||
|
@property
|
||||||
|
def TypeID(self) -> TypeID:
|
||||||
|
"""
|
||||||
|
Get a typeid.TypeID (stripe-style) representation of the object's ABID.
|
||||||
|
"""
|
||||||
|
return self.ABID.typeid
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
####################################################
|
||||||
|
|
||||||
|
# Django helpers
|
||||||
|
def find_all_abid_prefixes() -> Dict[str, type[models.Model]]:
|
||||||
|
"""
|
||||||
|
Return the mapping of all ABID prefixes to their models.
|
||||||
|
e.g. {'tag_': core.models.Tag, 'snp_': core.models.Snapshot, ...}
|
||||||
|
"""
|
||||||
|
import django.apps
|
||||||
|
prefix_map = {}
|
||||||
|
|
||||||
|
for model in django.apps.apps.get_models():
|
||||||
|
abid_prefix = getattr(model, 'abid_prefix', None)
|
||||||
|
if abid_prefix:
|
||||||
|
prefix_map[abid_prefix] = model
|
||||||
|
return prefix_map
|
||||||
|
|
||||||
|
def find_prefix_for_abid(abid: ABID) -> str:
|
||||||
|
"""
|
||||||
|
Find the correct prefix for a given ABID that may have be missing a prefix (slow).
|
||||||
|
e.g. ABID('obj_01BJQMF54D093DXEAWZ6JYRPAQ') -> 'snp_'
|
||||||
|
"""
|
||||||
|
# if existing abid prefix is correct, lookup is easy
|
||||||
|
model = find_model_from_abid(abid)
|
||||||
|
if model:
|
||||||
|
assert issubclass(model, ABIDModel)
|
||||||
|
return model.abid_prefix
|
||||||
|
|
||||||
|
# prefix might be obj_ or missing, fuzzy-search to find any object that matches
|
||||||
|
return find_obj_from_abid_rand(abid)[0].abid_prefix
|
||||||
|
|
||||||
|
def find_model_from_abid_prefix(prefix: str) -> type[ABIDModel] | None:
|
||||||
|
"""
|
||||||
|
Return the Django Model that corresponds to a given ABID prefix.
|
||||||
|
e.g. 'tag_' -> core.models.Tag
|
||||||
|
"""
|
||||||
|
prefix = abid_part_from_prefix(prefix)
|
||||||
|
|
||||||
|
import django.apps
|
||||||
|
|
||||||
|
for model in django.apps.apps.get_models():
|
||||||
|
if not issubclass(model, ABIDModel): continue # skip non-ABID-enabled models
|
||||||
|
if not hasattr(model, 'objects'): continue # skip abstract models
|
||||||
|
|
||||||
|
if (model.abid_prefix == prefix):
|
||||||
|
return model
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
def find_model_from_abid(abid: ABID) -> type[models.Model] | None:
|
||||||
|
"""
|
||||||
|
Shortcut for find_model_from_abid_prefix(abid.prefix)
|
||||||
|
"""
|
||||||
|
return find_model_from_abid_prefix(abid.prefix)
|
||||||
|
|
||||||
|
def find_obj_from_abid_rand(rand: Union[ABID, str], model=None) -> List[ABIDModel]:
|
||||||
|
"""
|
||||||
|
Find an object corresponding to an ABID by exhaustively searching using its random suffix (slow).
|
||||||
|
e.g. 'obj_....................JYRPAQ' -> Snapshot('snp_01BJQMF54D093DXEAWZ6JYRPAQ')
|
||||||
|
"""
|
||||||
|
|
||||||
|
# convert str to ABID if necessary
|
||||||
|
if isinstance(rand, ABID):
|
||||||
|
abid: ABID = rand
|
||||||
|
else:
|
||||||
|
rand = str(rand)
|
||||||
|
if len(rand) < ABID_SUFFIX_LEN:
|
||||||
|
padding_needed = ABID_SUFFIX_LEN - len(rand)
|
||||||
|
rand = ('0'*padding_needed) + rand
|
||||||
|
abid = ABID.parse(rand)
|
||||||
|
|
||||||
|
import django.apps
|
||||||
|
|
||||||
|
partial_matches: List[ABIDModel] = []
|
||||||
|
|
||||||
|
models_to_try = cast(Set[type[models.Model]], set(filter(bool, (
|
||||||
|
model,
|
||||||
|
find_model_from_abid(abid),
|
||||||
|
*django.apps.apps.get_models(),
|
||||||
|
))))
|
||||||
|
# print(abid, abid.rand, abid.uuid, models_to_try)
|
||||||
|
|
||||||
|
for model in models_to_try:
|
||||||
|
if not issubclass(model, ABIDModel): continue # skip Models that arent ABID-enabled
|
||||||
|
if not hasattr(model, 'objects'): continue # skip abstract Models
|
||||||
|
assert hasattr(model, 'objects') # force-fix for type hint nit about missing manager https://github.com/typeddjango/django-stubs/issues/1684
|
||||||
|
|
||||||
|
# continue on to try fuzzy searching by randomness portion derived from uuid field
|
||||||
|
try:
|
||||||
|
qs = []
|
||||||
|
if hasattr(model, 'abid'):
|
||||||
|
qs = model.objects.filter(abid__endswith=abid.rand)
|
||||||
|
elif hasattr(model, 'uuid'):
|
||||||
|
qs = model.objects.filter(uuid__endswith=str(abid.uuid)[-ABID_RAND_LEN:])
|
||||||
|
elif hasattr(model, 'id'):
|
||||||
|
# NOTE: this only works on SQLite where every column is a string
|
||||||
|
# other DB backends like postgres dont let you do __endswith if this is a BigAutoInteger field
|
||||||
|
|
||||||
|
# try to search for uuid=...-2354352
|
||||||
|
# try to search for id=...2354352
|
||||||
|
# try to search for id=2354352
|
||||||
|
qs = model.objects.filter(
|
||||||
|
models.Q(id__endswith=str(abid.uuid)[-ABID_RAND_LEN:])
|
||||||
|
| models.Q(id__endswith=abid.rand)
|
||||||
|
| models.Q(id__startswith=str(int(abid.rand)) if abid.rand.isdigit() else abid.rand)
|
||||||
|
)
|
||||||
|
|
||||||
|
for obj in qs:
|
||||||
|
if obj.get_abid() == abid:
|
||||||
|
# found exact match, no need to keep iterating
|
||||||
|
return [obj]
|
||||||
|
partial_matches.append(obj)
|
||||||
|
except OperationalError as err:
|
||||||
|
print(f'[!] WARNING: Got error while trying to iterate through QuerySet for {model}:', err, '\n')
|
||||||
|
|
||||||
|
return partial_matches
|
||||||
|
|
||||||
|
def find_obj_from_abid(abid: ABID, model=None, fuzzy=False) -> Any:
|
||||||
|
"""
|
||||||
|
Find an object with a given ABID by filtering possible models for a matching abid/uuid/id (fast).
|
||||||
|
e.g. 'snp_01BJQMF54D093DXEAWZ6JYRPAQ' -> Snapshot('snp_01BJQMF54D093DXEAWZ6JYRPAQ')
|
||||||
|
"""
|
||||||
|
|
||||||
|
model = model or find_model_from_abid(abid)
|
||||||
|
assert model, f'Could not find model that could match this ABID type: {abid}'
|
||||||
|
|
||||||
|
try:
|
||||||
|
if hasattr(model, 'abid'):
|
||||||
|
return model.objects.get(abid__endswith=abid.suffix)
|
||||||
|
if hasattr(model, 'uuid'):
|
||||||
|
return model.objects.get(uuid=abid.uuid)
|
||||||
|
return model.objects.get(id=abid.uuid)
|
||||||
|
except model.DoesNotExist:
|
||||||
|
# if the model has an abid field then it shouldve matched, pointless to fuzzy search in that case
|
||||||
|
if hasattr(model, 'abid') or (not fuzzy):
|
||||||
|
raise
|
||||||
|
|
||||||
|
# continue on to try fuzzy searching by randomness portion derived from uuid field
|
||||||
|
match_by_rand = find_obj_from_abid_rand(abid, model=model)
|
||||||
|
if match_by_rand:
|
||||||
|
if match_by_rand[0].abid_prefix != abid.prefix:
|
||||||
|
print(f'[!] WARNING: fetched object {match_by_rand} even though prefix {abid.prefix} doesnt match!', abid, '\n')
|
||||||
|
return match_by_rand
|
||||||
|
|
||||||
|
raise model.DoesNotExist
|
||||||
|
|
||||||
3
archivebox/abid_utils/tests.py
Normal file
3
archivebox/abid_utils/tests.py
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
from django.test import TestCase
|
||||||
|
|
||||||
|
# Create your tests here.
|
||||||
@@ -3,5 +3,9 @@ __package__ = 'archivebox.api'
|
|||||||
from django.apps import AppConfig
|
from django.apps import AppConfig
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class APIConfig(AppConfig):
|
class APIConfig(AppConfig):
|
||||||
name = 'api'
|
name = 'api'
|
||||||
|
|
||||||
|
def ready(self):
|
||||||
|
pass
|
||||||
|
|||||||
@@ -0,0 +1,60 @@
|
|||||||
|
# Generated by Django 5.0.6 on 2024-05-13 10:58
|
||||||
|
|
||||||
|
import charidfield.fields
|
||||||
|
import signal_webhooks.fields
|
||||||
|
import signal_webhooks.utils
|
||||||
|
import uuid
|
||||||
|
from django.db import migrations, models
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
('api', '0002_alter_apitoken_options'),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.CreateModel(
|
||||||
|
name='OutboundWebhook',
|
||||||
|
fields=[
|
||||||
|
('name', models.CharField(db_index=True, help_text='Give your webhook a descriptive name (e.g. Notify ACME Slack channel of any new ArchiveResults).', max_length=255, unique=True, verbose_name='name')),
|
||||||
|
('signal', models.CharField(choices=[('CREATE', 'Create'), ('UPDATE', 'Update'), ('DELETE', 'Delete'), ('M2M', 'M2M changed'), ('CREATE_OR_UPDATE', 'Create or Update'), ('CREATE_OR_DELETE', 'Create or Delete'), ('CREATE_OR_M2M', 'Create or M2M changed'), ('UPDATE_OR_DELETE', 'Update or Delete'), ('UPDATE_OR_M2M', 'Update or M2M changed'), ('DELETE_OR_M2M', 'Delete or M2M changed'), ('CREATE_UPDATE_OR_DELETE', 'Create, Update or Delete'), ('CREATE_UPDATE_OR_M2M', 'Create, Update or M2M changed'), ('CREATE_DELETE_OR_M2M', 'Create, Delete or M2M changed'), ('UPDATE_DELETE_OR_M2M', 'Update, Delete or M2M changed'), ('CREATE_UPDATE_DELETE_OR_M2M', 'Create, Update or Delete, or M2M changed')], help_text='The type of event the webhook should fire for (e.g. Create, Update, Delete).', max_length=255, verbose_name='signal')),
|
||||||
|
('ref', models.CharField(db_index=True, help_text='Dot import notation of the model the webhook should fire for (e.g. core.models.Snapshot or core.models.ArchiveResult).', max_length=1023, validators=[signal_webhooks.utils.model_from_reference], verbose_name='referenced model')),
|
||||||
|
('endpoint', models.URLField(help_text='External URL to POST the webhook notification to (e.g. https://someapp.example.com/webhook/some-webhook-receiver).', max_length=2047, verbose_name='endpoint')),
|
||||||
|
('headers', models.JSONField(blank=True, default=dict, help_text='Headers to send with the webhook request.', validators=[signal_webhooks.utils.is_dict], verbose_name='headers')),
|
||||||
|
('auth_token', signal_webhooks.fields.TokenField(blank=True, default='', help_text='Authentication token to use in an Authorization header.', max_length=8000, validators=[signal_webhooks.utils.decode_cipher_key], verbose_name='authentication token')),
|
||||||
|
('enabled', models.BooleanField(default=True, help_text='Is this webhook enabled?', verbose_name='enabled')),
|
||||||
|
('keep_last_response', models.BooleanField(default=False, help_text='Should the webhook keep a log of the latest response it got?', verbose_name='keep last response')),
|
||||||
|
('created', models.DateTimeField(auto_now_add=True, help_text='When the webhook was created.', verbose_name='created')),
|
||||||
|
('updated', models.DateTimeField(auto_now=True, help_text='When the webhook was last updated.', verbose_name='updated')),
|
||||||
|
('last_response', models.CharField(blank=True, default='', help_text='Latest response to this webhook.', max_length=8000, verbose_name='last response')),
|
||||||
|
('last_success', models.DateTimeField(default=None, help_text='When the webhook last succeeded.', null=True, verbose_name='last success')),
|
||||||
|
('last_failure', models.DateTimeField(default=None, help_text='When the webhook last failed.', null=True, verbose_name='last failure')),
|
||||||
|
('uuid', models.UUIDField(default=uuid.uuid4, primary_key=True, serialize=False)),
|
||||||
|
('abid', charidfield.fields.CharIDField(blank=True, db_index=True, default=None, help_text='ABID-format identifier for this entity (e.g. snp_01BJQMF54D093DXEAWZ6JYRPAQ)', max_length=30, null=True, prefix='whk', unique=True)),
|
||||||
|
],
|
||||||
|
options={
|
||||||
|
'verbose_name': 'API Outbound Webhook',
|
||||||
|
'abstract': False,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
migrations.AddField(
|
||||||
|
model_name='apitoken',
|
||||||
|
name='abid',
|
||||||
|
field=charidfield.fields.CharIDField(blank=True, db_index=True, default=None, help_text='ABID-format identifier for this entity (e.g. snp_01BJQMF54D093DXEAWZ6JYRPAQ)', max_length=30, null=True, prefix='apt', unique=True),
|
||||||
|
),
|
||||||
|
migrations.AddField(
|
||||||
|
model_name='apitoken',
|
||||||
|
name='uuid',
|
||||||
|
field=models.UUIDField(blank=True, null=True, unique=True),
|
||||||
|
),
|
||||||
|
migrations.AlterField(
|
||||||
|
model_name='apitoken',
|
||||||
|
name='id',
|
||||||
|
field=models.UUIDField(default=uuid.uuid4, primary_key=True, serialize=False),
|
||||||
|
),
|
||||||
|
migrations.AddConstraint(
|
||||||
|
model_name='outboundwebhook',
|
||||||
|
constraint=models.UniqueConstraint(fields=('ref', 'endpoint'), name='prevent_duplicate_hooks_api_outboundwebhook'),
|
||||||
|
),
|
||||||
|
]
|
||||||
@@ -0,0 +1,58 @@
|
|||||||
|
# Generated by Django 5.0.6 on 2024-05-13 14:36
|
||||||
|
|
||||||
|
import abid_utils.models
|
||||||
|
import charidfield.fields
|
||||||
|
import django.db.models.deletion
|
||||||
|
from django.conf import settings
|
||||||
|
from django.db import migrations, models
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
('api', '0003_outboundwebhook_apitoken_abid_apitoken_uuid_and_more'),
|
||||||
|
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.RenameField(
|
||||||
|
model_name='apitoken',
|
||||||
|
old_name='user',
|
||||||
|
new_name='created_by',
|
||||||
|
),
|
||||||
|
migrations.AddField(
|
||||||
|
model_name='apitoken',
|
||||||
|
name='modified',
|
||||||
|
field=models.DateTimeField(auto_now=True),
|
||||||
|
),
|
||||||
|
migrations.AddField(
|
||||||
|
model_name='outboundwebhook',
|
||||||
|
name='created_by',
|
||||||
|
field=models.ForeignKey(default=abid_utils.models.get_or_create_system_user_pk, on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL),
|
||||||
|
),
|
||||||
|
migrations.AddField(
|
||||||
|
model_name='outboundwebhook',
|
||||||
|
name='id',
|
||||||
|
field=models.UUIDField(blank=True, null=True, unique=True),
|
||||||
|
),
|
||||||
|
migrations.AddField(
|
||||||
|
model_name='outboundwebhook',
|
||||||
|
name='modified',
|
||||||
|
field=models.DateTimeField(auto_now=True),
|
||||||
|
),
|
||||||
|
migrations.AlterField(
|
||||||
|
model_name='apitoken',
|
||||||
|
name='abid',
|
||||||
|
field=charidfield.fields.CharIDField(blank=True, db_index=True, default=None, help_text='ABID-format identifier for this entity (e.g. snp_01BJQMF54D093DXEAWZ6JYRPAQ)', max_length=30, null=True, prefix='apt_', unique=True),
|
||||||
|
),
|
||||||
|
migrations.AlterField(
|
||||||
|
model_name='outboundwebhook',
|
||||||
|
name='abid',
|
||||||
|
field=charidfield.fields.CharIDField(blank=True, db_index=True, default=None, help_text='ABID-format identifier for this entity (e.g. snp_01BJQMF54D093DXEAWZ6JYRPAQ)', max_length=30, null=True, prefix='whk_', unique=True),
|
||||||
|
),
|
||||||
|
migrations.AlterField(
|
||||||
|
model_name='outboundwebhook',
|
||||||
|
name='created',
|
||||||
|
field=models.DateTimeField(auto_now_add=True),
|
||||||
|
),
|
||||||
|
]
|
||||||
@@ -8,22 +8,39 @@ from django.conf import settings
|
|||||||
from django.db import models
|
from django.db import models
|
||||||
from django.utils import timezone
|
from django.utils import timezone
|
||||||
|
|
||||||
|
from signal_webhooks.models import WebhookBase
|
||||||
|
|
||||||
from django_stubs_ext.db.models import TypedModelMeta
|
from django_stubs_ext.db.models import TypedModelMeta
|
||||||
|
|
||||||
|
from abid_utils.models import ABIDModel, ABIDField
|
||||||
|
|
||||||
|
|
||||||
def generate_secret_token() -> str:
|
def generate_secret_token() -> str:
|
||||||
# returns cryptographically secure string with len() == 32
|
# returns cryptographically secure string with len() == 32
|
||||||
return secrets.token_hex(16)
|
return secrets.token_hex(16)
|
||||||
|
|
||||||
|
|
||||||
class APIToken(models.Model):
|
class APIToken(ABIDModel):
|
||||||
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
|
"""
|
||||||
|
A secret key generated by a User that's used to authenticate REST API requests to ArchiveBox.
|
||||||
|
"""
|
||||||
|
# ABID: apt_<created_ts>_<token_hash>_<user_id_hash>_<uuid_rand>
|
||||||
|
abid_prefix = 'apt_'
|
||||||
|
abid_ts_src = 'self.created'
|
||||||
|
abid_uri_src = 'self.token'
|
||||||
|
abid_subtype_src = 'self.user_id'
|
||||||
|
abid_rand_src = 'self.id'
|
||||||
|
|
||||||
user = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE)
|
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=True)
|
||||||
|
uuid = models.UUIDField(blank=True, null=True, editable=True, unique=True)
|
||||||
|
abid = ABIDField(prefix=abid_prefix)
|
||||||
|
|
||||||
|
created_by = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE)
|
||||||
token = models.CharField(max_length=32, default=generate_secret_token, unique=True)
|
token = models.CharField(max_length=32, default=generate_secret_token, unique=True)
|
||||||
|
|
||||||
created = models.DateTimeField(auto_now_add=True)
|
created = models.DateTimeField(auto_now_add=True)
|
||||||
expires = models.DateTimeField(null=True, blank=True)
|
expires = models.DateTimeField(null=True, blank=True)
|
||||||
|
|
||||||
|
|
||||||
class Meta(TypedModelMeta):
|
class Meta(TypedModelMeta):
|
||||||
verbose_name = "API Key"
|
verbose_name = "API Key"
|
||||||
@@ -38,7 +55,8 @@ class APIToken(models.Model):
|
|||||||
def __json__(self) -> dict:
|
def __json__(self) -> dict:
|
||||||
return {
|
return {
|
||||||
"TYPE": "APIToken",
|
"TYPE": "APIToken",
|
||||||
"id": str(self.id),
|
"uuid": str(self.id),
|
||||||
|
"abid": str(self.get_abid()),
|
||||||
"user_id": str(self.user.id),
|
"user_id": str(self.user.id),
|
||||||
"user_username": self.user.username,
|
"user_username": self.user.username,
|
||||||
"token": self.token,
|
"token": self.token,
|
||||||
@@ -61,3 +79,37 @@ class APIToken(models.Model):
|
|||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# monkey patch django-signals-webhooks to change how it shows up in Admin UI
|
||||||
|
|
||||||
|
class OutboundWebhook(ABIDModel, WebhookBase):
|
||||||
|
"""
|
||||||
|
Model used in place of (extending) signals_webhooks.models.WebhookModel. Swapped using:
|
||||||
|
settings.SIGNAL_WEBHOOKS_CUSTOM_MODEL = 'api.models.OutboundWebhook'
|
||||||
|
"""
|
||||||
|
abid_prefix = 'whk_'
|
||||||
|
abid_ts_src = 'self.created'
|
||||||
|
abid_uri_src = 'self.endpoint'
|
||||||
|
abid_subtype_src = 'self.ref'
|
||||||
|
abid_rand_src = 'self.id'
|
||||||
|
|
||||||
|
id = models.UUIDField(blank=True, null=True, unique=True, editable=True)
|
||||||
|
uuid = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=True)
|
||||||
|
abid = ABIDField(prefix=abid_prefix)
|
||||||
|
|
||||||
|
WebhookBase._meta.get_field('name').help_text = (
|
||||||
|
'Give your webhook a descriptive name (e.g. Notify ACME Slack channel of any new ArchiveResults).')
|
||||||
|
WebhookBase._meta.get_field('signal').help_text = (
|
||||||
|
'The type of event the webhook should fire for (e.g. Create, Update, Delete).')
|
||||||
|
WebhookBase._meta.get_field('ref').help_text = (
|
||||||
|
'Dot import notation of the model the webhook should fire for (e.g. core.models.Snapshot or core.models.ArchiveResult).')
|
||||||
|
WebhookBase._meta.get_field('endpoint').help_text = (
|
||||||
|
'External URL to POST the webhook notification to (e.g. https://someapp.example.com/webhook/some-webhook-receiver).')
|
||||||
|
|
||||||
|
class Meta(WebhookBase.Meta):
|
||||||
|
verbose_name = 'API Outbound Webhook'
|
||||||
|
|
||||||
|
|||||||
@@ -47,6 +47,6 @@ def check_api_token(request, token_data: TokenAuthSchema):
|
|||||||
request=request,
|
request=request,
|
||||||
)
|
)
|
||||||
if user:
|
if user:
|
||||||
return {"success": True, "user_id": str(user.id)}
|
return {"success": True, "user_id": str(user.pk)}
|
||||||
|
|
||||||
return {"success": False, "user_id": None}
|
return {"success": False, "user_id": None}
|
||||||
|
|||||||
@@ -4,13 +4,14 @@ from uuid import UUID
|
|||||||
from typing import List, Optional
|
from typing import List, Optional
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
|
from django.db.models import Q
|
||||||
from django.shortcuts import get_object_or_404
|
from django.shortcuts import get_object_or_404
|
||||||
|
|
||||||
from ninja import Router, Schema, FilterSchema, Field, Query
|
from ninja import Router, Schema, FilterSchema, Field, Query
|
||||||
from ninja.pagination import paginate
|
from ninja.pagination import paginate
|
||||||
|
|
||||||
from core.models import Snapshot, ArchiveResult, Tag
|
from core.models import Snapshot, ArchiveResult, Tag
|
||||||
|
from abid_utils.abid import ABID
|
||||||
|
|
||||||
router = Router(tags=['Core Models'])
|
router = Router(tags=['Core Models'])
|
||||||
|
|
||||||
@@ -20,24 +21,39 @@ router = Router(tags=['Core Models'])
|
|||||||
### ArchiveResult #########################################################################
|
### ArchiveResult #########################################################################
|
||||||
|
|
||||||
class ArchiveResultSchema(Schema):
|
class ArchiveResultSchema(Schema):
|
||||||
id: UUID
|
abid: str
|
||||||
|
uuid: UUID
|
||||||
|
pk: str
|
||||||
|
modified: datetime
|
||||||
|
created: datetime
|
||||||
|
created_by_id: str
|
||||||
|
|
||||||
snapshot_id: UUID
|
snapshot_abid: str
|
||||||
snapshot_url: str
|
snapshot_url: str
|
||||||
snapshot_tags: str
|
snapshot_tags: str
|
||||||
|
|
||||||
extractor: str
|
extractor: str
|
||||||
|
cmd_version: str
|
||||||
cmd: List[str]
|
cmd: List[str]
|
||||||
pwd: str
|
pwd: str
|
||||||
cmd_version: str
|
|
||||||
output: str
|
|
||||||
status: str
|
status: str
|
||||||
|
output: str
|
||||||
created: datetime
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def resolve_id(obj):
|
def resolve_created_by_id(obj):
|
||||||
return obj.uuid
|
return str(obj.created_by_id)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def resolve_pk(obj):
|
||||||
|
return str(obj.pk)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def resolve_uuid(obj):
|
||||||
|
return str(obj.uuid)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def resolve_abid(obj):
|
||||||
|
return str(obj.ABID)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def resolve_created(obj):
|
def resolve_created(obj):
|
||||||
@@ -47,18 +63,23 @@ class ArchiveResultSchema(Schema):
|
|||||||
def resolve_snapshot_url(obj):
|
def resolve_snapshot_url(obj):
|
||||||
return obj.snapshot.url
|
return obj.snapshot.url
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def resolve_snapshot_abid(obj):
|
||||||
|
return str(obj.snapshot.ABID)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def resolve_snapshot_tags(obj):
|
def resolve_snapshot_tags(obj):
|
||||||
return obj.snapshot.tags_str()
|
return obj.snapshot.tags_str()
|
||||||
|
|
||||||
|
|
||||||
class ArchiveResultFilterSchema(FilterSchema):
|
class ArchiveResultFilterSchema(FilterSchema):
|
||||||
id: Optional[UUID] = Field(None, q='uuid')
|
uuid: Optional[UUID] = Field(None, q='uuid')
|
||||||
|
# abid: Optional[str] = Field(None, q='abid')
|
||||||
|
|
||||||
search: Optional[str] = Field(None, q=['snapshot__url__icontains', 'snapshot__title__icontains', 'snapshot__tags__name__icontains', 'extractor', 'output__icontains'])
|
search: Optional[str] = Field(None, q=['snapshot__url__icontains', 'snapshot__title__icontains', 'snapshot__tags__name__icontains', 'extractor', 'output__icontains'])
|
||||||
snapshot_id: Optional[UUID] = Field(None, q='snapshot_id')
|
snapshot_uuid: Optional[UUID] = Field(None, q='snapshot_uuid__icontains')
|
||||||
snapshot_url: Optional[str] = Field(None, q='snapshot__url')
|
snapshot_url: Optional[str] = Field(None, q='snapshot__url__icontains')
|
||||||
snapshot_tag: Optional[str] = Field(None, q='snapshot__tags__name')
|
snapshot_tag: Optional[str] = Field(None, q='snapshot__tags__name__icontains')
|
||||||
|
|
||||||
status: Optional[str] = Field(None, q='status')
|
status: Optional[str] = Field(None, q='status')
|
||||||
output: Optional[str] = Field(None, q='output__icontains')
|
output: Optional[str] = Field(None, q='output__icontains')
|
||||||
@@ -75,6 +96,7 @@ class ArchiveResultFilterSchema(FilterSchema):
|
|||||||
@router.get("/archiveresults", response=List[ArchiveResultSchema])
|
@router.get("/archiveresults", response=List[ArchiveResultSchema])
|
||||||
@paginate
|
@paginate
|
||||||
def list_archiveresults(request, filters: ArchiveResultFilterSchema = Query(...)):
|
def list_archiveresults(request, filters: ArchiveResultFilterSchema = Query(...)):
|
||||||
|
"""List all ArchiveResult entries matching these filters."""
|
||||||
qs = ArchiveResult.objects.all()
|
qs = ArchiveResult.objects.all()
|
||||||
results = filters.filter(qs)
|
results = filters.filter(qs)
|
||||||
return results
|
return results
|
||||||
@@ -82,8 +104,8 @@ def list_archiveresults(request, filters: ArchiveResultFilterSchema = Query(...)
|
|||||||
|
|
||||||
@router.get("/archiveresult/{archiveresult_id}", response=ArchiveResultSchema)
|
@router.get("/archiveresult/{archiveresult_id}", response=ArchiveResultSchema)
|
||||||
def get_archiveresult(request, archiveresult_id: str):
|
def get_archiveresult(request, archiveresult_id: str):
|
||||||
archiveresult = get_object_or_404(ArchiveResult, id=archiveresult_id)
|
"""Get a specific ArchiveResult by abid, uuid, or pk."""
|
||||||
return archiveresult
|
return ArchiveResult.objects.get(Q(pk__icontains=archiveresult_id) | Q(abid__icontains=archiveresult_id) | Q(uuid__icontains=archiveresult_id))
|
||||||
|
|
||||||
|
|
||||||
# @router.post("/archiveresult", response=ArchiveResultSchema)
|
# @router.post("/archiveresult", response=ArchiveResultSchema)
|
||||||
@@ -115,27 +137,50 @@ def get_archiveresult(request, archiveresult_id: str):
|
|||||||
|
|
||||||
|
|
||||||
class SnapshotSchema(Schema):
|
class SnapshotSchema(Schema):
|
||||||
id: UUID
|
abid: str
|
||||||
|
uuid: UUID
|
||||||
|
pk: str
|
||||||
|
modified: datetime
|
||||||
|
created: datetime
|
||||||
|
created_by_id: str
|
||||||
|
|
||||||
url: str
|
url: str
|
||||||
tags: str
|
tags: str
|
||||||
title: Optional[str]
|
title: Optional[str]
|
||||||
timestamp: str
|
timestamp: str
|
||||||
bookmarked: datetime
|
|
||||||
added: datetime
|
|
||||||
updated: datetime
|
|
||||||
archive_path: str
|
archive_path: str
|
||||||
|
|
||||||
|
bookmarked: datetime
|
||||||
|
added: datetime
|
||||||
|
updated: Optional[datetime]
|
||||||
|
|
||||||
|
num_archiveresults: int
|
||||||
archiveresults: List[ArchiveResultSchema]
|
archiveresults: List[ArchiveResultSchema]
|
||||||
|
|
||||||
# @staticmethod
|
@staticmethod
|
||||||
# def resolve_id(obj):
|
def resolve_created_by_id(obj):
|
||||||
# return str(obj.id)
|
return str(obj.created_by_id)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def resolve_pk(obj):
|
||||||
|
return str(obj.pk)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def resolve_uuid(obj):
|
||||||
|
return str(obj.uuid)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def resolve_abid(obj):
|
||||||
|
return str(obj.ABID)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def resolve_tags(obj):
|
def resolve_tags(obj):
|
||||||
return obj.tags_str()
|
return obj.tags_str()
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def resolve_num_archiveresults(obj, context):
|
||||||
|
return obj.archiveresult_set.all().distinct().count()
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def resolve_archiveresults(obj, context):
|
def resolve_archiveresults(obj, context):
|
||||||
if context['request'].with_archiveresults:
|
if context['request'].with_archiveresults:
|
||||||
@@ -144,23 +189,32 @@ class SnapshotSchema(Schema):
|
|||||||
|
|
||||||
|
|
||||||
class SnapshotFilterSchema(FilterSchema):
|
class SnapshotFilterSchema(FilterSchema):
|
||||||
id: Optional[UUID] = Field(None, q='id')
|
abid: Optional[str] = Field(None, q='abid__icontains')
|
||||||
|
uuid: Optional[str] = Field(None, q='uuid__icontains')
|
||||||
|
pk: Optional[str] = Field(None, q='pk__icontains')
|
||||||
|
created_by_id: str = Field(None, q='created_by_id__icontains')
|
||||||
|
created__gte: datetime = Field(None, q='created__gte')
|
||||||
|
created__lt: datetime = Field(None, q='created__lt')
|
||||||
|
created: datetime = Field(None, q='created')
|
||||||
|
modified: datetime = Field(None, q='modified')
|
||||||
|
modified__gte: datetime = Field(None, q='modified__gte')
|
||||||
|
modified__lt: datetime = Field(None, q='modified__lt')
|
||||||
|
|
||||||
search: Optional[str] = Field(None, q=['url__icontains', 'title__icontains', 'tags__name__icontains'])
|
search: Optional[str] = Field(None, q=['url__icontains', 'title__icontains', 'tags__name__icontains', 'abid__icontains', 'uuid__icontains'])
|
||||||
url: Optional[str] = Field(None, q='url')
|
url: Optional[str] = Field(None, q='url')
|
||||||
tag: Optional[str] = Field(None, q='tags__name')
|
tag: Optional[str] = Field(None, q='tags__name')
|
||||||
title: Optional[str] = Field(None, q='title__icontains')
|
title: Optional[str] = Field(None, q='title__icontains')
|
||||||
|
|
||||||
timestamp: Optional[str] = Field(None, q='timestamp__startswith')
|
timestamp: Optional[str] = Field(None, q='timestamp__startswith')
|
||||||
|
|
||||||
added: Optional[datetime] = Field(None, q='added')
|
|
||||||
added__gte: Optional[datetime] = Field(None, q='added__gte')
|
added__gte: Optional[datetime] = Field(None, q='added__gte')
|
||||||
added__lt: Optional[datetime] = Field(None, q='added__lt')
|
added__lt: Optional[datetime] = Field(None, q='added__lt')
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@router.get("/snapshots", response=List[SnapshotSchema])
|
@router.get("/snapshots", response=List[SnapshotSchema])
|
||||||
@paginate
|
@paginate
|
||||||
def list_snapshots(request, filters: SnapshotFilterSchema = Query(...), with_archiveresults: bool=True):
|
def list_snapshots(request, filters: SnapshotFilterSchema = Query(...), with_archiveresults: bool=True):
|
||||||
|
"""List all Snapshot entries matching these filters."""
|
||||||
request.with_archiveresults = with_archiveresults
|
request.with_archiveresults = with_archiveresults
|
||||||
|
|
||||||
qs = Snapshot.objects.all()
|
qs = Snapshot.objects.all()
|
||||||
@@ -169,8 +223,24 @@ def list_snapshots(request, filters: SnapshotFilterSchema = Query(...), with_arc
|
|||||||
|
|
||||||
@router.get("/snapshot/{snapshot_id}", response=SnapshotSchema)
|
@router.get("/snapshot/{snapshot_id}", response=SnapshotSchema)
|
||||||
def get_snapshot(request, snapshot_id: str, with_archiveresults: bool=True):
|
def get_snapshot(request, snapshot_id: str, with_archiveresults: bool=True):
|
||||||
|
"""Get a specific Snapshot by abid, uuid, or pk."""
|
||||||
request.with_archiveresults = with_archiveresults
|
request.with_archiveresults = with_archiveresults
|
||||||
snapshot = get_object_or_404(Snapshot, id=snapshot_id)
|
snapshot = None
|
||||||
|
try:
|
||||||
|
snapshot = Snapshot.objects.get(Q(uuid__startswith=snapshot_id) | Q(abid__startswith=snapshot_id)| Q(pk__startswith=snapshot_id))
|
||||||
|
except Snapshot.DoesNotExist:
|
||||||
|
pass
|
||||||
|
|
||||||
|
try:
|
||||||
|
snapshot = snapshot or Snapshot.objects.get()
|
||||||
|
except Snapshot.DoesNotExist:
|
||||||
|
pass
|
||||||
|
|
||||||
|
try:
|
||||||
|
snapshot = snapshot or Snapshot.objects.get(Q(uuid__icontains=snapshot_id) | Q(abid__icontains=snapshot_id))
|
||||||
|
except Snapshot.DoesNotExist:
|
||||||
|
pass
|
||||||
|
|
||||||
return snapshot
|
return snapshot
|
||||||
|
|
||||||
|
|
||||||
@@ -179,9 +249,9 @@ def get_snapshot(request, snapshot_id: str, with_archiveresults: bool=True):
|
|||||||
# snapshot = Snapshot.objects.create(**payload.dict())
|
# snapshot = Snapshot.objects.create(**payload.dict())
|
||||||
# return snapshot
|
# return snapshot
|
||||||
#
|
#
|
||||||
# @router.put("/snapshot/{snapshot_id}", response=SnapshotSchema)
|
# @router.put("/snapshot/{snapshot_uuid}", response=SnapshotSchema)
|
||||||
# def update_snapshot(request, snapshot_id: str, payload: SnapshotSchema):
|
# def update_snapshot(request, snapshot_uuid: str, payload: SnapshotSchema):
|
||||||
# snapshot = get_object_or_404(Snapshot, id=snapshot_id)
|
# snapshot = get_object_or_404(Snapshot, uuid=snapshot_uuid)
|
||||||
#
|
#
|
||||||
# for attr, value in payload.dict().items():
|
# for attr, value in payload.dict().items():
|
||||||
# setattr(snapshot, attr, value)
|
# setattr(snapshot, attr, value)
|
||||||
@@ -189,9 +259,9 @@ def get_snapshot(request, snapshot_id: str, with_archiveresults: bool=True):
|
|||||||
#
|
#
|
||||||
# return snapshot
|
# return snapshot
|
||||||
#
|
#
|
||||||
# @router.delete("/snapshot/{snapshot_id}")
|
# @router.delete("/snapshot/{snapshot_uuid}")
|
||||||
# def delete_snapshot(request, snapshot_id: str):
|
# def delete_snapshot(request, snapshot_uuid: str):
|
||||||
# snapshot = get_object_or_404(Snapshot, id=snapshot_id)
|
# snapshot = get_object_or_404(Snapshot, uuid=snapshot_uuid)
|
||||||
# snapshot.delete()
|
# snapshot.delete()
|
||||||
# return {"success": True}
|
# return {"success": True}
|
||||||
|
|
||||||
@@ -201,10 +271,21 @@ def get_snapshot(request, snapshot_id: str, with_archiveresults: bool=True):
|
|||||||
|
|
||||||
|
|
||||||
class TagSchema(Schema):
|
class TagSchema(Schema):
|
||||||
|
abid: Optional[UUID] = Field(None, q='abid')
|
||||||
|
uuid: Optional[UUID] = Field(None, q='uuid')
|
||||||
|
pk: Optional[UUID] = Field(None, q='pk')
|
||||||
|
modified: datetime
|
||||||
|
created: datetime
|
||||||
|
created_by_id: str
|
||||||
|
|
||||||
name: str
|
name: str
|
||||||
slug: str
|
slug: str
|
||||||
|
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def resolve_created_by_id(obj):
|
||||||
|
return str(obj.created_by_id)
|
||||||
|
|
||||||
@router.get("/tags", response=List[TagSchema])
|
@router.get("/tags", response=List[TagSchema])
|
||||||
def list_tags(request):
|
def list_tags(request):
|
||||||
return Tag.objects.all()
|
return Tag.objects.all()
|
||||||
|
|||||||
@@ -4,14 +4,18 @@ __command__ = 'archivebox'
|
|||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
import argparse
|
import argparse
|
||||||
|
import threading
|
||||||
|
from time import sleep
|
||||||
|
|
||||||
from typing import Optional, Dict, List, IO, Union
|
from typing import Optional, Dict, List, IO, Union, Iterable
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from ..config import OUTPUT_DIR, check_data_folder, check_migrations
|
from ..config import OUTPUT_DIR, check_data_folder, check_migrations, stderr
|
||||||
|
|
||||||
from importlib import import_module
|
from importlib import import_module
|
||||||
|
|
||||||
|
BUILTIN_LIST = list
|
||||||
|
|
||||||
CLI_DIR = Path(__file__).resolve().parent
|
CLI_DIR = Path(__file__).resolve().parent
|
||||||
|
|
||||||
# these common commands will appear sorted before any others for ease-of-use
|
# these common commands will appear sorted before any others for ease-of-use
|
||||||
@@ -33,6 +37,40 @@ is_valid_cli_module = lambda module, subcommand: (
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
IGNORED_BG_THREADS = ('MainThread', 'ThreadPoolExecutor', 'IPythonHistorySavingThread') # threads we dont have to wait for before exiting
|
||||||
|
|
||||||
|
|
||||||
|
def wait_for_bg_threads_to_exit(thread_names: Iterable[str]=(), ignore_names: Iterable[str]=IGNORED_BG_THREADS, timeout: int=60) -> int:
|
||||||
|
"""
|
||||||
|
Block until the specified threads exit. e.g. pass thread_names=('default_hook_handler',) to wait for webhooks.
|
||||||
|
Useful for waiting for signal handlers, webhooks, etc. to finish running after a mgmt command completes.
|
||||||
|
"""
|
||||||
|
|
||||||
|
wait_for_all: bool = thread_names == ()
|
||||||
|
|
||||||
|
thread_matches = lambda thread, ptns: any(ptn in repr(thread) for ptn in ptns)
|
||||||
|
|
||||||
|
should_wait = lambda thread: (
|
||||||
|
not thread_matches(thread, ignore_names)
|
||||||
|
and (wait_for_all or thread_matches(thread, thread_names)))
|
||||||
|
|
||||||
|
for tries in range(timeout):
|
||||||
|
all_threads = [*threading.enumerate()]
|
||||||
|
blocking_threads = [*filter(should_wait, all_threads)]
|
||||||
|
threads_summary = ', '.join(repr(t) for t in blocking_threads)
|
||||||
|
if blocking_threads:
|
||||||
|
sleep(1)
|
||||||
|
if tries == 5: # only show stderr message if we need to wait more than 5s
|
||||||
|
stderr(
|
||||||
|
f'[…] Waiting up to {timeout}s for background jobs (e.g. webhooks) to finish...',
|
||||||
|
threads_summary,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
return tries
|
||||||
|
|
||||||
|
raise Exception('Background threads failed to exit after {tries}s: {threads_summary}')
|
||||||
|
|
||||||
|
|
||||||
def list_subcommands() -> Dict[str, str]:
|
def list_subcommands() -> Dict[str, str]:
|
||||||
"""find and import all valid archivebox_<subcommand>.py files in CLI_DIR"""
|
"""find and import all valid archivebox_<subcommand>.py files in CLI_DIR"""
|
||||||
|
|
||||||
@@ -79,6 +117,9 @@ def run_subcommand(subcommand: str,
|
|||||||
module = import_module('.archivebox_{}'.format(subcommand), __package__)
|
module = import_module('.archivebox_{}'.format(subcommand), __package__)
|
||||||
module.main(args=subcommand_args, stdin=stdin, pwd=pwd) # type: ignore
|
module.main(args=subcommand_args, stdin=stdin, pwd=pwd) # type: ignore
|
||||||
|
|
||||||
|
# wait for webhooks, signals, and other background jobs to finish before exit
|
||||||
|
wait_for_bg_threads_to_exit(timeout=60)
|
||||||
|
|
||||||
|
|
||||||
SUBCOMMANDS = list_subcommands()
|
SUBCOMMANDS = list_subcommands()
|
||||||
|
|
||||||
|
|||||||
@@ -37,7 +37,7 @@ from sqlite3 import dbapi2 as sqlite3
|
|||||||
from hashlib import md5
|
from hashlib import md5
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
from typing import Optional, Type, Tuple, Dict, Union, List
|
from typing import Optional, Type, Tuple, Dict, Union, List, Any
|
||||||
from subprocess import run, PIPE, DEVNULL
|
from subprocess import run, PIPE, DEVNULL
|
||||||
from configparser import ConfigParser
|
from configparser import ConfigParser
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
@@ -281,6 +281,7 @@ TEMPLATES_DIR_NAME = 'templates'
|
|||||||
ARCHIVE_DIR_NAME = 'archive'
|
ARCHIVE_DIR_NAME = 'archive'
|
||||||
SOURCES_DIR_NAME = 'sources'
|
SOURCES_DIR_NAME = 'sources'
|
||||||
LOGS_DIR_NAME = 'logs'
|
LOGS_DIR_NAME = 'logs'
|
||||||
|
CACHE_DIR_NAME = 'cache'
|
||||||
PERSONAS_DIR_NAME = 'personas'
|
PERSONAS_DIR_NAME = 'personas'
|
||||||
CRONTABS_DIR_NAME = 'crontabs'
|
CRONTABS_DIR_NAME = 'crontabs'
|
||||||
SQL_INDEX_FILENAME = 'index.sqlite3'
|
SQL_INDEX_FILENAME = 'index.sqlite3'
|
||||||
@@ -360,6 +361,7 @@ ALLOWED_IN_OUTPUT_DIR = {
|
|||||||
ARCHIVE_DIR_NAME,
|
ARCHIVE_DIR_NAME,
|
||||||
SOURCES_DIR_NAME,
|
SOURCES_DIR_NAME,
|
||||||
LOGS_DIR_NAME,
|
LOGS_DIR_NAME,
|
||||||
|
CACHE_DIR_NAME,
|
||||||
PERSONAS_DIR_NAME,
|
PERSONAS_DIR_NAME,
|
||||||
SQL_INDEX_FILENAME,
|
SQL_INDEX_FILENAME,
|
||||||
f'{SQL_INDEX_FILENAME}-wal',
|
f'{SQL_INDEX_FILENAME}-wal',
|
||||||
@@ -511,6 +513,7 @@ DYNAMIC_CONFIG_SCHEMA: ConfigDefaultDict = {
|
|||||||
'ARCHIVE_DIR': {'default': lambda c: c['OUTPUT_DIR'] / ARCHIVE_DIR_NAME},
|
'ARCHIVE_DIR': {'default': lambda c: c['OUTPUT_DIR'] / ARCHIVE_DIR_NAME},
|
||||||
'SOURCES_DIR': {'default': lambda c: c['OUTPUT_DIR'] / SOURCES_DIR_NAME},
|
'SOURCES_DIR': {'default': lambda c: c['OUTPUT_DIR'] / SOURCES_DIR_NAME},
|
||||||
'LOGS_DIR': {'default': lambda c: c['OUTPUT_DIR'] / LOGS_DIR_NAME},
|
'LOGS_DIR': {'default': lambda c: c['OUTPUT_DIR'] / LOGS_DIR_NAME},
|
||||||
|
'CACHE_DIR': {'default': lambda c: c['OUTPUT_DIR'] / CACHE_DIR_NAME},
|
||||||
'PERSONAS_DIR': {'default': lambda c: c['OUTPUT_DIR'] / PERSONAS_DIR_NAME},
|
'PERSONAS_DIR': {'default': lambda c: c['OUTPUT_DIR'] / PERSONAS_DIR_NAME},
|
||||||
'CONFIG_FILE': {'default': lambda c: Path(c['CONFIG_FILE']).resolve() if c['CONFIG_FILE'] else c['OUTPUT_DIR'] / CONFIG_FILENAME},
|
'CONFIG_FILE': {'default': lambda c: Path(c['CONFIG_FILE']).resolve() if c['CONFIG_FILE'] else c['OUTPUT_DIR'] / CONFIG_FILENAME},
|
||||||
'COOKIES_FILE': {'default': lambda c: c['COOKIES_FILE'] and Path(c['COOKIES_FILE']).resolve()},
|
'COOKIES_FILE': {'default': lambda c: c['COOKIES_FILE'] and Path(c['COOKIES_FILE']).resolve()},
|
||||||
@@ -1038,6 +1041,11 @@ def get_data_locations(config: ConfigDict) -> ConfigValue:
|
|||||||
'enabled': True,
|
'enabled': True,
|
||||||
'is_valid': config['LOGS_DIR'].exists(),
|
'is_valid': config['LOGS_DIR'].exists(),
|
||||||
},
|
},
|
||||||
|
'CACHE_DIR': {
|
||||||
|
'path': config['CACHE_DIR'].resolve(),
|
||||||
|
'enabled': True,
|
||||||
|
'is_valid': config['CACHE_DIR'].exists(),
|
||||||
|
},
|
||||||
'CUSTOM_TEMPLATES_DIR': {
|
'CUSTOM_TEMPLATES_DIR': {
|
||||||
'path': config['CUSTOM_TEMPLATES_DIR'] and Path(config['CUSTOM_TEMPLATES_DIR']).resolve(),
|
'path': config['CUSTOM_TEMPLATES_DIR'] and Path(config['CUSTOM_TEMPLATES_DIR']).resolve(),
|
||||||
'enabled': bool(config['CUSTOM_TEMPLATES_DIR']),
|
'enabled': bool(config['CUSTOM_TEMPLATES_DIR']),
|
||||||
@@ -1299,7 +1307,10 @@ def check_system_config(config: ConfigDict=CONFIG) -> None:
|
|||||||
stderr()
|
stderr()
|
||||||
stderr(' Try removing /Default from the end e.g.:')
|
stderr(' Try removing /Default from the end e.g.:')
|
||||||
stderr(' CHROME_USER_DATA_DIR="{}"'.format(config['CHROME_USER_DATA_DIR'].split('/Default')[0]))
|
stderr(' CHROME_USER_DATA_DIR="{}"'.format(config['CHROME_USER_DATA_DIR'].split('/Default')[0]))
|
||||||
raise SystemExit(2)
|
|
||||||
|
# hard error is too annoying here, instead just set it to nothing
|
||||||
|
# raise SystemExit(2)
|
||||||
|
config['CHROME_USER_DATA_DIR'] = None
|
||||||
|
|
||||||
|
|
||||||
def check_dependencies(config: ConfigDict=CONFIG, show_help: bool=True) -> None:
|
def check_dependencies(config: ConfigDict=CONFIG, show_help: bool=True) -> None:
|
||||||
@@ -1385,6 +1396,7 @@ def check_migrations(out_dir: Union[str, Path, None]=None, config: ConfigDict=CO
|
|||||||
|
|
||||||
(Path(output_dir) / SOURCES_DIR_NAME).mkdir(exist_ok=True)
|
(Path(output_dir) / SOURCES_DIR_NAME).mkdir(exist_ok=True)
|
||||||
(Path(output_dir) / LOGS_DIR_NAME).mkdir(exist_ok=True)
|
(Path(output_dir) / LOGS_DIR_NAME).mkdir(exist_ok=True)
|
||||||
|
(Path(output_dir) / CACHE_DIR_NAME).mkdir(exist_ok=True)
|
||||||
(Path(output_dir) / PERSONAS_DIR_NAME).mkdir(exist_ok=True)
|
(Path(output_dir) / PERSONAS_DIR_NAME).mkdir(exist_ok=True)
|
||||||
(Path(output_dir) / PERSONAS_DIR_NAME / 'Default').mkdir(exist_ok=True)
|
(Path(output_dir) / PERSONAS_DIR_NAME / 'Default').mkdir(exist_ok=True)
|
||||||
|
|
||||||
|
|||||||
@@ -15,8 +15,8 @@ from django.contrib.auth import get_user_model
|
|||||||
from django import forms
|
from django import forms
|
||||||
|
|
||||||
|
|
||||||
from signal_webhooks.apps import DjangoSignalWebhooksConfig
|
from signal_webhooks.admin import WebhookAdmin, get_webhook_model
|
||||||
from signal_webhooks.admin import WebhookAdmin, WebhookModel
|
# from plugantic.admin import CustomPlugin
|
||||||
|
|
||||||
from ..util import htmldecode, urldecode, ansi_to_html
|
from ..util import htmldecode, urldecode, ansi_to_html
|
||||||
|
|
||||||
@@ -38,6 +38,7 @@ from config import (
|
|||||||
CAN_UPGRADE
|
CAN_UPGRADE
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
GLOBAL_CONTEXT = {'VERSION': VERSION, 'VERSIONS_AVAILABLE': VERSIONS_AVAILABLE, 'CAN_UPGRADE': CAN_UPGRADE}
|
GLOBAL_CONTEXT = {'VERSION': VERSION, 'VERSIONS_AVAILABLE': VERSIONS_AVAILABLE, 'CAN_UPGRADE': CAN_UPGRADE}
|
||||||
|
|
||||||
# Admin URLs
|
# Admin URLs
|
||||||
@@ -104,23 +105,16 @@ class ArchiveBoxAdmin(admin.AdminSite):
|
|||||||
return render(template_name='add.html', request=request, context=context)
|
return render(template_name='add.html', request=request, context=context)
|
||||||
|
|
||||||
|
|
||||||
# monkey patch django-signals-webhooks to change how it shows up in Admin UI
|
|
||||||
DjangoSignalWebhooksConfig.verbose_name = 'API'
|
|
||||||
WebhookModel._meta.get_field('name').help_text = 'Give your webhook a descriptive name (e.g. Notify ACME Slack channel of any new ArchiveResults).'
|
|
||||||
WebhookModel._meta.get_field('signal').help_text = 'The type of event the webhook should fire for (e.g. Create, Update, Delete).'
|
|
||||||
WebhookModel._meta.get_field('ref').help_text = 'Dot import notation of the model the webhook should fire for (e.g. core.models.Snapshot or core.models.ArchiveResult).'
|
|
||||||
WebhookModel._meta.get_field('endpoint').help_text = 'External URL to POST the webhook notification to (e.g. https://someapp.example.com/webhook/some-webhook-receiver).'
|
|
||||||
WebhookModel._meta.app_label = 'api'
|
|
||||||
|
|
||||||
|
|
||||||
archivebox_admin = ArchiveBoxAdmin()
|
archivebox_admin = ArchiveBoxAdmin()
|
||||||
archivebox_admin.register(get_user_model())
|
archivebox_admin.register(get_user_model())
|
||||||
archivebox_admin.register(APIToken)
|
archivebox_admin.register(APIToken)
|
||||||
archivebox_admin.register(WebhookModel, WebhookAdmin)
|
archivebox_admin.register(get_webhook_model(), WebhookAdmin)
|
||||||
archivebox_admin.disable_action('delete_selected')
|
archivebox_admin.disable_action('delete_selected')
|
||||||
|
|
||||||
|
# archivebox_admin.register(CustomPlugin)
|
||||||
|
|
||||||
# patch admin with methods to add data views
|
# patch admin with methods to add data views (implemented by admin_data_views package)
|
||||||
|
############### Additional sections are defined in settings.ADMIN_DATA_VIEWS #########
|
||||||
from admin_data_views.admin import get_app_list, admin_data_index_view, get_admin_data_urls, get_urls
|
from admin_data_views.admin import get_app_list, admin_data_index_view, get_admin_data_urls, get_urls
|
||||||
|
|
||||||
archivebox_admin.get_app_list = get_app_list.__get__(archivebox_admin, ArchiveBoxAdmin)
|
archivebox_admin.get_app_list = get_app_list.__get__(archivebox_admin, ArchiveBoxAdmin)
|
||||||
@@ -170,14 +164,41 @@ class SnapshotActionForm(ActionForm):
|
|||||||
# )
|
# )
|
||||||
|
|
||||||
|
|
||||||
|
def get_abid_info(self, obj):
|
||||||
|
return format_html(
|
||||||
|
# URL Hash: <code style="font-size: 10px; user-select: all">{}</code><br/>
|
||||||
|
'''
|
||||||
|
ABID: <code style="font-size: 16px; user-select: all"><b>{}</b></code><br/>
|
||||||
|
TS: <code style="font-size: 10px; user-select: all"><b>{}</b></code> ({})<br/>
|
||||||
|
URI: <code style="font-size: 10px; user-select: all"><b>{}</b></code> ({})<br/>
|
||||||
|
SUBTYPE: <code style="font-size: 10px; user-select: all"><b>{}</b></code> ({})<br/>
|
||||||
|
RAND: <code style="font-size: 10px; user-select: all"><b>{}</b></code> ({})<br/><br/>
|
||||||
|
ABID AS UUID: <code style="font-size: 10px; user-select: all">{}</code> <br/><br/>
|
||||||
|
|
||||||
|
.uuid: <code style="font-size: 10px; user-select: all">{}</code> <br/>
|
||||||
|
.id: <code style="font-size: 10px; user-select: all">{}</code> <br/>
|
||||||
|
.pk: <code style="font-size: 10px; user-select: all">{}</code> <br/><br/>
|
||||||
|
''',
|
||||||
|
obj.abid,
|
||||||
|
obj.ABID.ts, obj.abid_values['ts'].isoformat() if isinstance(obj.abid_values['ts'], datetime) else obj.abid_values['ts'],
|
||||||
|
obj.ABID.uri, str(obj.abid_values['uri']),
|
||||||
|
obj.ABID.subtype, str(obj.abid_values['subtype']),
|
||||||
|
obj.ABID.rand, str(obj.abid_values['rand'])[-7:],
|
||||||
|
obj.ABID.uuid,
|
||||||
|
obj.uuid,
|
||||||
|
obj.id,
|
||||||
|
obj.pk,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@admin.register(Snapshot, site=archivebox_admin)
|
@admin.register(Snapshot, site=archivebox_admin)
|
||||||
class SnapshotAdmin(SearchResultsAdminMixin, admin.ModelAdmin):
|
class SnapshotAdmin(SearchResultsAdminMixin, admin.ModelAdmin):
|
||||||
list_display = ('added', 'title_str', 'files', 'size', 'url_str')
|
list_display = ('added', 'title_str', 'files', 'size', 'url_str')
|
||||||
sort_fields = ('title_str', 'url_str', 'added', 'files')
|
sort_fields = ('title_str', 'url_str', 'added', 'files')
|
||||||
readonly_fields = ('info', 'bookmarked', 'added', 'updated')
|
readonly_fields = ('admin_actions', 'status_info', 'bookmarked', 'added', 'updated', 'created', 'modified', 'identifiers')
|
||||||
search_fields = ('id', 'url', 'timestamp', 'title', 'tags__name')
|
search_fields = ('id', 'url', 'abid', 'uuid', 'timestamp', 'title', 'tags__name')
|
||||||
fields = ('timestamp', 'url', 'title', 'tags', *readonly_fields)
|
fields = ('url', 'timestamp', 'created_by', 'tags', 'title', *readonly_fields)
|
||||||
list_filter = ('added', 'updated', 'tags', 'archiveresult__status')
|
list_filter = ('added', 'updated', 'tags', 'archiveresult__status', 'created_by')
|
||||||
ordering = ['-added']
|
ordering = ['-added']
|
||||||
actions = ['add_tags', 'remove_tags', 'update_titles', 'update_snapshots', 'resnapshot_snapshot', 'overwrite_snapshots', 'delete_snapshots']
|
actions = ['add_tags', 'remove_tags', 'update_titles', 'update_snapshots', 'resnapshot_snapshot', 'overwrite_snapshots', 'delete_snapshots']
|
||||||
autocomplete_fields = ['tags']
|
autocomplete_fields = ['tags']
|
||||||
@@ -223,40 +244,46 @@ class SnapshotAdmin(SearchResultsAdminMixin, admin.ModelAdmin):
|
|||||||
# </form>
|
# </form>
|
||||||
# ''',
|
# ''',
|
||||||
# csrf.get_token(self.request),
|
# csrf.get_token(self.request),
|
||||||
# obj.id,
|
# obj.pk,
|
||||||
# )
|
# )
|
||||||
|
|
||||||
def info(self, obj):
|
def admin_actions(self, obj):
|
||||||
return format_html(
|
return format_html(
|
||||||
|
# URL Hash: <code style="font-size: 10px; user-select: all">{}</code><br/>
|
||||||
|
'''
|
||||||
|
<a class="btn" style="font-size: 18px; display: inline-block; border-radius: 10px; border: 3px solid #eee; padding: 4px 8px" href="/archive/{}">Summary page ➡️</a>
|
||||||
|
<a class="btn" style="font-size: 18px; display: inline-block; border-radius: 10px; border: 3px solid #eee; padding: 4px 8px" href="/archive/{}/index.html#all">Result files 📑</a>
|
||||||
|
<a class="btn" style="font-size: 18px; display: inline-block; border-radius: 10px; border: 3px solid #eee; padding: 4px 8px" href="/admin/core/snapshot/?id__exact={}">Admin actions ⚙️</a>
|
||||||
|
''',
|
||||||
|
obj.timestamp,
|
||||||
|
obj.timestamp,
|
||||||
|
obj.pk,
|
||||||
|
)
|
||||||
|
|
||||||
|
def status_info(self, obj):
|
||||||
|
return format_html(
|
||||||
|
# URL Hash: <code style="font-size: 10px; user-select: all">{}</code><br/>
|
||||||
'''
|
'''
|
||||||
UUID: <code style="font-size: 10px; user-select: all">{}</code>
|
|
||||||
Timestamp: <code style="font-size: 10px; user-select: all">{}</code>
|
|
||||||
URL Hash: <code style="font-size: 10px; user-select: all">{}</code><br/>
|
|
||||||
Archived: {} ({} files {})
|
Archived: {} ({} files {})
|
||||||
Favicon: <img src="{}" style="height: 20px"/>
|
Favicon: <img src="{}" style="height: 20px"/>
|
||||||
Status code: {}
|
Status code: {} <br/>
|
||||||
Server: {}
|
Server: {}
|
||||||
Content type: {}
|
Content type: {}
|
||||||
Extension: {}
|
Extension: {}
|
||||||
<br/><br/>
|
|
||||||
<a href="/archive/{}">View Snapshot index ➡️</a>
|
|
||||||
<a href="/admin/core/snapshot/?id__exact={}">View actions ⚙️</a>
|
|
||||||
''',
|
''',
|
||||||
obj.id,
|
|
||||||
obj.timestamp,
|
|
||||||
obj.url_hash,
|
|
||||||
'✅' if obj.is_archived else '❌',
|
'✅' if obj.is_archived else '❌',
|
||||||
obj.num_outputs,
|
obj.num_outputs,
|
||||||
self.size(obj),
|
self.size(obj) or '0kb',
|
||||||
f'/archive/{obj.timestamp}/favicon.ico',
|
f'/archive/{obj.timestamp}/favicon.ico',
|
||||||
obj.status_code or '?',
|
obj.status_code or '-',
|
||||||
obj.headers and obj.headers.get('Server') or '?',
|
obj.headers and obj.headers.get('Server') or '-',
|
||||||
obj.headers and obj.headers.get('Content-Type') or '?',
|
obj.headers and obj.headers.get('Content-Type') or '-',
|
||||||
obj.extension or '?',
|
obj.extension or '-',
|
||||||
obj.timestamp,
|
|
||||||
obj.id,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def identifiers(self, obj):
|
||||||
|
return get_abid_info(self, obj)
|
||||||
|
|
||||||
@admin.display(
|
@admin.display(
|
||||||
description='Title',
|
description='Title',
|
||||||
ordering='title',
|
ordering='title',
|
||||||
@@ -316,7 +343,7 @@ class SnapshotAdmin(SearchResultsAdminMixin, admin.ModelAdmin):
|
|||||||
return format_html(
|
return format_html(
|
||||||
'<a href="{}"><code style="user-select: all;">{}</code></a>',
|
'<a href="{}"><code style="user-select: all;">{}</code></a>',
|
||||||
obj.url,
|
obj.url,
|
||||||
obj.url,
|
obj.url[:128],
|
||||||
)
|
)
|
||||||
|
|
||||||
def grid_view(self, request, extra_context=None):
|
def grid_view(self, request, extra_context=None):
|
||||||
@@ -419,42 +446,45 @@ class SnapshotAdmin(SearchResultsAdminMixin, admin.ModelAdmin):
|
|||||||
|
|
||||||
@admin.register(Tag, site=archivebox_admin)
|
@admin.register(Tag, site=archivebox_admin)
|
||||||
class TagAdmin(admin.ModelAdmin):
|
class TagAdmin(admin.ModelAdmin):
|
||||||
list_display = ('slug', 'name', 'num_snapshots', 'snapshots', 'id')
|
list_display = ('slug', 'name', 'num_snapshots', 'snapshots', 'abid')
|
||||||
sort_fields = ('id', 'name', 'slug')
|
sort_fields = ('id', 'name', 'slug', 'abid')
|
||||||
readonly_fields = ('id', 'num_snapshots', 'snapshots')
|
readonly_fields = ('created', 'modified', 'identifiers', 'num_snapshots', 'snapshots')
|
||||||
search_fields = ('id', 'name', 'slug')
|
search_fields = ('id', 'abid', 'uuid', 'name', 'slug')
|
||||||
fields = (*readonly_fields, 'name', 'slug')
|
fields = ('name', 'slug', 'created_by', *readonly_fields, )
|
||||||
actions = ['delete_selected']
|
actions = ['delete_selected']
|
||||||
ordering = ['-id']
|
ordering = ['-id']
|
||||||
|
|
||||||
def num_snapshots(self, obj):
|
def identifiers(self, obj):
|
||||||
|
return get_abid_info(self, obj)
|
||||||
|
|
||||||
|
def num_snapshots(self, tag):
|
||||||
return format_html(
|
return format_html(
|
||||||
'<a href="/admin/core/snapshot/?tags__id__exact={}">{} total</a>',
|
'<a href="/admin/core/snapshot/?tags__id__exact={}">{} total</a>',
|
||||||
obj.id,
|
tag.id,
|
||||||
obj.snapshot_set.count(),
|
tag.snapshot_set.count(),
|
||||||
)
|
)
|
||||||
|
|
||||||
def snapshots(self, obj):
|
def snapshots(self, tag):
|
||||||
total_count = obj.snapshot_set.count()
|
total_count = tag.snapshot_set.count()
|
||||||
return mark_safe('<br/>'.join(
|
return mark_safe('<br/>'.join(
|
||||||
format_html(
|
format_html(
|
||||||
'{} <code><a href="/admin/core/snapshot/{}/change"><b>[{}]</b></a> {}</code>',
|
'{} <code><a href="/admin/core/snapshot/{}/change"><b>[{}]</b></a> {}</code>',
|
||||||
snap.updated.strftime('%Y-%m-%d %H:%M') if snap.updated else 'pending...',
|
snap.updated.strftime('%Y-%m-%d %H:%M') if snap.updated else 'pending...',
|
||||||
snap.id,
|
snap.pk,
|
||||||
snap.timestamp,
|
snap.abid,
|
||||||
snap.url,
|
snap.url,
|
||||||
)
|
)
|
||||||
for snap in obj.snapshot_set.order_by('-updated')[:10]
|
for snap in tag.snapshot_set.order_by('-updated')[:10]
|
||||||
) + (f'<br/><a href="/admin/core/snapshot/?tags__id__exact={obj.id}">and {total_count-10} more...<a>' if obj.snapshot_set.count() > 10 else ''))
|
) + (f'<br/><a href="/admin/core/snapshot/?tags__id__exact={tag.id}">and {total_count-10} more...<a>' if tag.snapshot_set.count() > 10 else ''))
|
||||||
|
|
||||||
|
|
||||||
@admin.register(ArchiveResult, site=archivebox_admin)
|
@admin.register(ArchiveResult, site=archivebox_admin)
|
||||||
class ArchiveResultAdmin(admin.ModelAdmin):
|
class ArchiveResultAdmin(admin.ModelAdmin):
|
||||||
list_display = ('id', 'start_ts', 'extractor', 'snapshot_str', 'tags_str', 'cmd_str', 'status', 'output_str')
|
list_display = ('start_ts', 'snapshot_info', 'tags_str', 'extractor', 'cmd_str', 'status', 'output_str')
|
||||||
sort_fields = ('start_ts', 'extractor', 'status')
|
sort_fields = ('start_ts', 'extractor', 'status')
|
||||||
readonly_fields = ('id', 'uuid', 'snapshot_str', 'tags_str')
|
readonly_fields = ('snapshot_info', 'tags_str', 'created_by', 'created', 'modified', 'identifiers')
|
||||||
search_fields = ('id', 'uuid', 'snapshot__url', 'extractor', 'output', 'cmd_version', 'cmd', 'snapshot__timestamp')
|
search_fields = ('id', 'uuid', 'abid', 'snapshot__url', 'extractor', 'output', 'cmd_version', 'cmd', 'snapshot__timestamp')
|
||||||
fields = (*readonly_fields, 'snapshot', 'extractor', 'status', 'start_ts', 'end_ts', 'output', 'pwd', 'cmd', 'cmd_version')
|
fields = ('snapshot', 'extractor', 'status', 'output', 'pwd', 'cmd', 'start_ts', 'end_ts', 'cmd_version', *readonly_fields)
|
||||||
autocomplete_fields = ['snapshot']
|
autocomplete_fields = ['snapshot']
|
||||||
|
|
||||||
list_filter = ('status', 'extractor', 'start_ts', 'cmd_version')
|
list_filter = ('status', 'extractor', 'start_ts', 'cmd_version')
|
||||||
@@ -462,33 +492,36 @@ class ArchiveResultAdmin(admin.ModelAdmin):
|
|||||||
list_per_page = SNAPSHOTS_PER_PAGE
|
list_per_page = SNAPSHOTS_PER_PAGE
|
||||||
|
|
||||||
@admin.display(
|
@admin.display(
|
||||||
description='snapshot'
|
description='Snapshot Info'
|
||||||
)
|
)
|
||||||
def snapshot_str(self, obj):
|
def snapshot_info(self, result):
|
||||||
return format_html(
|
return format_html(
|
||||||
'<a href="/archive/{}/index.html"><b><code>[{}]</code></b></a><br/>'
|
'<a href="/archive/{}/index.html"><b><code>[{}]</code></b> {} {}</a><br/>',
|
||||||
'<small>{}</small>',
|
result.snapshot.timestamp,
|
||||||
obj.snapshot.timestamp,
|
result.snapshot.abid,
|
||||||
obj.snapshot.timestamp,
|
result.snapshot.added.strftime('%Y-%m-%d %H:%M'),
|
||||||
obj.snapshot.url[:128],
|
result.snapshot.url[:128],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def identifiers(self, obj):
|
||||||
|
return get_abid_info(self, obj)
|
||||||
|
|
||||||
@admin.display(
|
@admin.display(
|
||||||
description='tags'
|
description='Snapshot Tags'
|
||||||
)
|
)
|
||||||
def tags_str(self, obj):
|
def tags_str(self, result):
|
||||||
return obj.snapshot.tags_str()
|
return result.snapshot.tags_str()
|
||||||
|
|
||||||
def cmd_str(self, obj):
|
def cmd_str(self, result):
|
||||||
return format_html(
|
return format_html(
|
||||||
'<pre>{}</pre>',
|
'<pre>{}</pre>',
|
||||||
' '.join(obj.cmd) if isinstance(obj.cmd, list) else str(obj.cmd),
|
' '.join(result.cmd) if isinstance(result.cmd, list) else str(result.cmd),
|
||||||
)
|
)
|
||||||
|
|
||||||
def output_str(self, obj):
|
def output_str(self, result):
|
||||||
return format_html(
|
return format_html(
|
||||||
'<a href="/archive/{}/{}" class="output-link">↗️</a><pre>{}</pre>',
|
'<a href="/archive/{}/{}" class="output-link">↗️</a><pre>{}</pre>',
|
||||||
obj.snapshot.timestamp,
|
result.snapshot.timestamp,
|
||||||
obj.output if (obj.status == 'succeeded') and obj.extractor not in ('title', 'archive_org') else 'index.html',
|
result.output if (result.status == 'succeeded') and result.extractor not in ('title', 'archive_org') else 'index.html',
|
||||||
obj.output,
|
result.output,
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -17,8 +17,6 @@ except AttributeError:
|
|||||||
|
|
||||||
|
|
||||||
def forwards_func(apps, schema_editor):
|
def forwards_func(apps, schema_editor):
|
||||||
from core.models import EXTRACTORS
|
|
||||||
|
|
||||||
Snapshot = apps.get_model("core", "Snapshot")
|
Snapshot = apps.get_model("core", "Snapshot")
|
||||||
ArchiveResult = apps.get_model("core", "ArchiveResult")
|
ArchiveResult = apps.get_model("core", "ArchiveResult")
|
||||||
|
|
||||||
|
|||||||
@@ -0,0 +1,43 @@
|
|||||||
|
# Generated by Django 5.0.6 on 2024-05-13 10:56
|
||||||
|
|
||||||
|
import charidfield.fields
|
||||||
|
from django.db import migrations, models
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
('core', '0022_auto_20231023_2008'),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.AlterModelOptions(
|
||||||
|
name='archiveresult',
|
||||||
|
options={'verbose_name': 'Result'},
|
||||||
|
),
|
||||||
|
migrations.AddField(
|
||||||
|
model_name='archiveresult',
|
||||||
|
name='abid',
|
||||||
|
field=charidfield.fields.CharIDField(blank=True, db_index=True, default=None, help_text='ABID-format identifier for this entity (e.g. snp_01BJQMF54D093DXEAWZ6JYRPAQ)', max_length=30, null=True, prefix='res_', unique=True),
|
||||||
|
),
|
||||||
|
migrations.AddField(
|
||||||
|
model_name='snapshot',
|
||||||
|
name='abid',
|
||||||
|
field=charidfield.fields.CharIDField(blank=True, db_index=True, default=None, help_text='ABID-format identifier for this entity (e.g. snp_01BJQMF54D093DXEAWZ6JYRPAQ)', max_length=30, null=True, prefix='snp_', unique=True),
|
||||||
|
),
|
||||||
|
migrations.AddField(
|
||||||
|
model_name='snapshot',
|
||||||
|
name='uuid',
|
||||||
|
field=models.UUIDField(blank=True, null=True, unique=True),
|
||||||
|
),
|
||||||
|
migrations.AddField(
|
||||||
|
model_name='tag',
|
||||||
|
name='abid',
|
||||||
|
field=charidfield.fields.CharIDField(blank=True, db_index=True, default=None, help_text='ABID-format identifier for this entity (e.g. snp_01BJQMF54D093DXEAWZ6JYRPAQ)', max_length=30, null=True, prefix='tag_', unique=True),
|
||||||
|
),
|
||||||
|
migrations.AlterField(
|
||||||
|
model_name='archiveresult',
|
||||||
|
name='extractor',
|
||||||
|
field=models.CharField(choices=[('htmltotext', 'htmltotext'), ('git', 'git'), ('singlefile', 'singlefile'), ('media', 'media'), ('archive_org', 'archive_org'), ('readability', 'readability'), ('mercury', 'mercury'), ('favicon', 'favicon'), ('pdf', 'pdf'), ('headers', 'headers'), ('screenshot', 'screenshot'), ('dom', 'dom'), ('title', 'title'), ('wget', 'wget')], max_length=32),
|
||||||
|
),
|
||||||
|
]
|
||||||
95
archivebox/core/migrations/0024_auto_20240513_1143.py
Normal file
95
archivebox/core/migrations/0024_auto_20240513_1143.py
Normal file
@@ -0,0 +1,95 @@
|
|||||||
|
# Generated by Django 5.0.6 on 2024-05-13 11:43
|
||||||
|
|
||||||
|
from django.db import migrations
|
||||||
|
from datetime import datetime
|
||||||
|
from abid_utils.abid import abid_from_values
|
||||||
|
|
||||||
|
|
||||||
|
def calculate_abid(self):
|
||||||
|
"""
|
||||||
|
Return a freshly derived ABID (assembled from attrs defined in ABIDModel.abid_*_src).
|
||||||
|
"""
|
||||||
|
prefix = self.abid_prefix
|
||||||
|
ts = eval(self.abid_ts_src)
|
||||||
|
uri = eval(self.abid_uri_src)
|
||||||
|
subtype = eval(self.abid_subtype_src)
|
||||||
|
rand = eval(self.abid_rand_src)
|
||||||
|
|
||||||
|
if (not prefix) or prefix == 'obj_':
|
||||||
|
suggested_abid = self.__class__.__name__[:3].lower()
|
||||||
|
raise Exception(f'{self.__class__.__name__}.abid_prefix must be defined to calculate ABIDs (suggested: {suggested_abid})')
|
||||||
|
|
||||||
|
if not ts:
|
||||||
|
ts = datetime.utcfromtimestamp(0)
|
||||||
|
print(f'[!] WARNING: Generating ABID with ts=0000000000 placeholder because {self.__class__.__name__}.abid_ts_src={self.abid_ts_src} is unset!', ts.isoformat())
|
||||||
|
|
||||||
|
if not uri:
|
||||||
|
uri = str(self)
|
||||||
|
print(f'[!] WARNING: Generating ABID with uri=str(self) placeholder because {self.__class__.__name__}.abid_uri_src={self.abid_uri_src} is unset!', uri)
|
||||||
|
|
||||||
|
if not subtype:
|
||||||
|
subtype = self.__class__.__name__
|
||||||
|
print(f'[!] WARNING: Generating ABID with subtype={subtype} placeholder because {self.__class__.__name__}.abid_subtype_src={self.abid_subtype_src} is unset!', subtype)
|
||||||
|
|
||||||
|
if not rand:
|
||||||
|
rand = getattr(self, 'uuid', None) or getattr(self, 'id', None) or getattr(self, 'pk')
|
||||||
|
print(f'[!] WARNING: Generating ABID with rand=self.id placeholder because {self.__class__.__name__}.abid_rand_src={self.abid_rand_src} is unset!', rand)
|
||||||
|
|
||||||
|
abid = abid_from_values(
|
||||||
|
prefix=prefix,
|
||||||
|
ts=ts,
|
||||||
|
uri=uri,
|
||||||
|
subtype=subtype,
|
||||||
|
rand=rand,
|
||||||
|
)
|
||||||
|
assert abid.ulid and abid.uuid and abid.typeid, f'Failed to calculate {prefix}_ABID for {self.__class__.__name__}'
|
||||||
|
return abid
|
||||||
|
|
||||||
|
|
||||||
|
def copy_snapshot_uuids(apps, schema_editor):
|
||||||
|
Snapshot = apps.get_model("core", "Snapshot")
|
||||||
|
for snapshot in Snapshot.objects.all():
|
||||||
|
snapshot.uuid = snapshot.id
|
||||||
|
snapshot.save(update_fields=["uuid"])
|
||||||
|
|
||||||
|
def generate_snapshot_abids(apps, schema_editor):
|
||||||
|
Snapshot = apps.get_model("core", "Snapshot")
|
||||||
|
for snapshot in Snapshot.objects.all():
|
||||||
|
snapshot.abid_prefix = 'snp_'
|
||||||
|
snapshot.abid_ts_src = 'self.added'
|
||||||
|
snapshot.abid_uri_src = 'self.url'
|
||||||
|
snapshot.abid_subtype_src = '"01"'
|
||||||
|
snapshot.abid_rand_src = 'self.uuid'
|
||||||
|
|
||||||
|
snapshot.abid = calculate_abid(snapshot)
|
||||||
|
snapshot.save(update_fields=["abid"])
|
||||||
|
|
||||||
|
def generate_archiveresult_abids(apps, schema_editor):
|
||||||
|
ArchiveResult = apps.get_model("core", "ArchiveResult")
|
||||||
|
Snapshot = apps.get_model("core", "Snapshot")
|
||||||
|
for result in ArchiveResult.objects.all():
|
||||||
|
result.abid_prefix = 'res_'
|
||||||
|
result.snapshot = Snapshot.objects.get(pk=result.snapshot_id)
|
||||||
|
result.snapshot_added = result.snapshot.added
|
||||||
|
result.snapshot_url = result.snapshot.url
|
||||||
|
result.abid_ts_src = 'self.snapshot_added'
|
||||||
|
result.abid_uri_src = 'self.snapshot_url'
|
||||||
|
result.abid_subtype_src = 'self.extractor'
|
||||||
|
result.abid_rand_src = 'self.id'
|
||||||
|
|
||||||
|
result.abid = calculate_abid(result)
|
||||||
|
result.uuid = result.abid.uuid
|
||||||
|
result.save(update_fields=["abid", "uuid"])
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
('core', '0023_alter_archiveresult_options_archiveresult_abid_and_more'),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.RunPython(copy_snapshot_uuids, reverse_code=migrations.RunPython.noop),
|
||||||
|
migrations.RunPython(generate_snapshot_abids, reverse_code=migrations.RunPython.noop),
|
||||||
|
migrations.RunPython(generate_archiveresult_abids, reverse_code=migrations.RunPython.noop),
|
||||||
|
]
|
||||||
19
archivebox/core/migrations/0025_alter_archiveresult_uuid.py
Normal file
19
archivebox/core/migrations/0025_alter_archiveresult_uuid.py
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
# Generated by Django 5.0.6 on 2024-05-13 12:08
|
||||||
|
|
||||||
|
import uuid
|
||||||
|
from django.db import migrations, models
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
('core', '0024_auto_20240513_1143'),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.AlterField(
|
||||||
|
model_name='archiveresult',
|
||||||
|
name='uuid',
|
||||||
|
field=models.UUIDField(default=uuid.uuid4, editable=False, unique=True),
|
||||||
|
),
|
||||||
|
]
|
||||||
@@ -0,0 +1,76 @@
|
|||||||
|
# Generated by Django 5.0.6 on 2024-05-13 13:01
|
||||||
|
|
||||||
|
import abid_utils.models
|
||||||
|
import django.db.models.deletion
|
||||||
|
import django.utils.timezone
|
||||||
|
from django.conf import settings
|
||||||
|
from django.db import migrations, models
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
('core', '0025_alter_archiveresult_uuid'),
|
||||||
|
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.AddField(
|
||||||
|
model_name='archiveresult',
|
||||||
|
name='created',
|
||||||
|
field=models.DateTimeField(auto_now_add=True, default=django.utils.timezone.now),
|
||||||
|
preserve_default=False,
|
||||||
|
),
|
||||||
|
migrations.AddField(
|
||||||
|
model_name='archiveresult',
|
||||||
|
name='created_by',
|
||||||
|
field=models.ForeignKey(default=abid_utils.models.get_or_create_system_user_pk, on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL),
|
||||||
|
),
|
||||||
|
migrations.AddField(
|
||||||
|
model_name='archiveresult',
|
||||||
|
name='modified',
|
||||||
|
field=models.DateTimeField(auto_now=True),
|
||||||
|
),
|
||||||
|
migrations.AddField(
|
||||||
|
model_name='snapshot',
|
||||||
|
name='created',
|
||||||
|
field=models.DateTimeField(auto_now_add=True, default=django.utils.timezone.now),
|
||||||
|
preserve_default=False,
|
||||||
|
),
|
||||||
|
migrations.AddField(
|
||||||
|
model_name='snapshot',
|
||||||
|
name='created_by',
|
||||||
|
field=models.ForeignKey(default=abid_utils.models.get_or_create_system_user_pk, on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL),
|
||||||
|
),
|
||||||
|
migrations.AddField(
|
||||||
|
model_name='snapshot',
|
||||||
|
name='modified',
|
||||||
|
field=models.DateTimeField(auto_now=True),
|
||||||
|
),
|
||||||
|
migrations.AddField(
|
||||||
|
model_name='tag',
|
||||||
|
name='created',
|
||||||
|
field=models.DateTimeField(auto_now_add=True, default=django.utils.timezone.now),
|
||||||
|
preserve_default=False,
|
||||||
|
),
|
||||||
|
migrations.AddField(
|
||||||
|
model_name='tag',
|
||||||
|
name='created_by',
|
||||||
|
field=models.ForeignKey(default=abid_utils.models.get_or_create_system_user_pk, on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL),
|
||||||
|
),
|
||||||
|
migrations.AddField(
|
||||||
|
model_name='tag',
|
||||||
|
name='modified',
|
||||||
|
field=models.DateTimeField(auto_now=True),
|
||||||
|
),
|
||||||
|
migrations.AddField(
|
||||||
|
model_name='tag',
|
||||||
|
name='uuid',
|
||||||
|
field=models.UUIDField(blank=True, null=True, unique=True),
|
||||||
|
),
|
||||||
|
migrations.AlterField(
|
||||||
|
model_name='archiveresult',
|
||||||
|
name='uuid',
|
||||||
|
field=models.UUIDField(blank=True, null=True, unique=True),
|
||||||
|
),
|
||||||
|
]
|
||||||
@@ -1,11 +1,14 @@
|
|||||||
__package__ = 'archivebox.core'
|
__package__ = 'archivebox.core'
|
||||||
|
|
||||||
|
|
||||||
import uuid
|
from typing import Optional, List, Dict
|
||||||
|
from django_stubs_ext.db.models import TypedModelMeta
|
||||||
|
|
||||||
import json
|
import json
|
||||||
|
|
||||||
|
import uuid
|
||||||
|
from uuid import uuid4
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Optional, List
|
|
||||||
|
|
||||||
from django.db import models
|
from django.db import models
|
||||||
from django.utils.functional import cached_property
|
from django.utils.functional import cached_property
|
||||||
@@ -15,40 +18,58 @@ from django.urls import reverse
|
|||||||
from django.db.models import Case, When, Value, IntegerField
|
from django.db.models import Case, When, Value, IntegerField
|
||||||
from django.contrib.auth.models import User # noqa
|
from django.contrib.auth.models import User # noqa
|
||||||
|
|
||||||
|
from abid_utils.models import ABIDModel, ABIDField
|
||||||
|
|
||||||
from ..config import ARCHIVE_DIR, ARCHIVE_DIR_NAME
|
from ..config import ARCHIVE_DIR, ARCHIVE_DIR_NAME
|
||||||
from ..system import get_dir_size
|
from ..system import get_dir_size
|
||||||
from ..util import parse_date, base_url, hashurl
|
from ..util import parse_date, base_url
|
||||||
from ..index.schema import Link
|
from ..index.schema import Link
|
||||||
from ..index.html import snapshot_icons
|
from ..index.html import snapshot_icons
|
||||||
from ..extractors import get_default_archive_methods, ARCHIVE_METHODS_INDEXING_PRECEDENCE
|
from ..extractors import ARCHIVE_METHODS_INDEXING_PRECEDENCE, EXTRACTORS
|
||||||
|
|
||||||
EXTRACTORS = [(extractor[0], extractor[0]) for extractor in get_default_archive_methods()]
|
|
||||||
|
EXTRACTOR_CHOICES = [(extractor_name, extractor_name) for extractor_name in EXTRACTORS.keys()]
|
||||||
STATUS_CHOICES = [
|
STATUS_CHOICES = [
|
||||||
("succeeded", "succeeded"),
|
("succeeded", "succeeded"),
|
||||||
("failed", "failed"),
|
("failed", "failed"),
|
||||||
("skipped", "skipped")
|
("skipped", "skipped")
|
||||||
]
|
]
|
||||||
|
|
||||||
try:
|
|
||||||
JSONField = models.JSONField
|
|
||||||
except AttributeError:
|
|
||||||
import jsonfield
|
|
||||||
JSONField = jsonfield.JSONField
|
|
||||||
|
|
||||||
|
|
||||||
class Tag(models.Model):
|
# class BaseModel(models.Model):
|
||||||
|
# # TODO: migrate all models to a shared base class with all our standard fields and helpers:
|
||||||
|
# # ulid/created/modified/owner/is_deleted/as_json/from_json/etc.
|
||||||
|
# #
|
||||||
|
# # id = models.AutoField(primary_key=True, serialize=False, verbose_name='ID')
|
||||||
|
# # ulid = models.CharField(max_length=26, null=True, blank=True, db_index=True, unique=True)
|
||||||
|
|
||||||
|
# class Meta(TypedModelMeta):
|
||||||
|
# abstract = True
|
||||||
|
|
||||||
|
|
||||||
|
class Tag(ABIDModel):
|
||||||
"""
|
"""
|
||||||
Based on django-taggit model
|
Based on django-taggit model + ABID base.
|
||||||
"""
|
"""
|
||||||
|
abid_prefix = 'tag_'
|
||||||
|
abid_ts_src = 'self.created' # TODO: add created/modified time
|
||||||
|
abid_uri_src = 'self.name'
|
||||||
|
abid_subtype_src = '"03"'
|
||||||
|
abid_rand_src = 'self.id'
|
||||||
|
|
||||||
|
# id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=True)
|
||||||
id = models.AutoField(primary_key=True, serialize=False, verbose_name='ID')
|
id = models.AutoField(primary_key=True, serialize=False, verbose_name='ID')
|
||||||
|
uuid = models.UUIDField(blank=True, null=True, editable=True, unique=True)
|
||||||
|
abid = ABIDField(prefix=abid_prefix)
|
||||||
|
|
||||||
|
|
||||||
name = models.CharField(unique=True, blank=False, max_length=100)
|
name = models.CharField(unique=True, blank=False, max_length=100)
|
||||||
|
|
||||||
# slug is autoset on save from name, never set it manually
|
|
||||||
slug = models.SlugField(unique=True, blank=True, max_length=100)
|
slug = models.SlugField(unique=True, blank=True, max_length=100)
|
||||||
|
# slug is autoset on save from name, never set it manually
|
||||||
|
|
||||||
|
|
||||||
class Meta:
|
class Meta(TypedModelMeta):
|
||||||
verbose_name = "Tag"
|
verbose_name = "Tag"
|
||||||
verbose_name_plural = "Tags"
|
verbose_name_plural = "Tags"
|
||||||
|
|
||||||
@@ -84,8 +105,16 @@ class Tag(models.Model):
|
|||||||
return super().save(*args, **kwargs)
|
return super().save(*args, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
class Snapshot(models.Model):
|
class Snapshot(ABIDModel):
|
||||||
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
|
abid_prefix = 'snp_'
|
||||||
|
abid_ts_src = 'self.added'
|
||||||
|
abid_uri_src = 'self.url'
|
||||||
|
abid_subtype_src = '"01"'
|
||||||
|
abid_rand_src = 'self.id'
|
||||||
|
|
||||||
|
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False) # legacy pk
|
||||||
|
uuid = models.UUIDField(blank=True, null=True, editable=True, unique=True)
|
||||||
|
abid = ABIDField(prefix=abid_prefix)
|
||||||
|
|
||||||
url = models.URLField(unique=True, db_index=True)
|
url = models.URLField(unique=True, db_index=True)
|
||||||
timestamp = models.CharField(max_length=32, unique=True, db_index=True)
|
timestamp = models.CharField(max_length=32, unique=True, db_index=True)
|
||||||
@@ -98,6 +127,7 @@ class Snapshot(models.Model):
|
|||||||
|
|
||||||
keys = ('url', 'timestamp', 'title', 'tags', 'updated')
|
keys = ('url', 'timestamp', 'title', 'tags', 'updated')
|
||||||
|
|
||||||
|
|
||||||
def __repr__(self) -> str:
|
def __repr__(self) -> str:
|
||||||
title = self.title or '-'
|
title = self.title or '-'
|
||||||
return f'[{self.timestamp}] {self.url[:64]} ({title[:64]})'
|
return f'[{self.timestamp}] {self.url[:64]} ({title[:64]})'
|
||||||
@@ -126,8 +156,8 @@ class Snapshot(models.Model):
|
|||||||
from ..index import load_link_details
|
from ..index import load_link_details
|
||||||
return load_link_details(self.as_link())
|
return load_link_details(self.as_link())
|
||||||
|
|
||||||
def tags_str(self, nocache=True) -> str:
|
def tags_str(self, nocache=True) -> str | None:
|
||||||
cache_key = f'{self.id}-{(self.updated or self.added).timestamp()}-tags'
|
cache_key = f'{self.pk}-{(self.updated or self.added).timestamp()}-tags'
|
||||||
calc_tags_str = lambda: ','.join(self.tags.order_by('name').values_list('name', flat=True))
|
calc_tags_str = lambda: ','.join(self.tags.order_by('name').values_list('name', flat=True))
|
||||||
if nocache:
|
if nocache:
|
||||||
tags_str = calc_tags_str()
|
tags_str = calc_tags_str()
|
||||||
@@ -157,13 +187,9 @@ class Snapshot(models.Model):
|
|||||||
return self.as_link().is_archived
|
return self.as_link().is_archived
|
||||||
|
|
||||||
@cached_property
|
@cached_property
|
||||||
def num_outputs(self):
|
def num_outputs(self) -> int:
|
||||||
return self.archiveresult_set.filter(status='succeeded').count()
|
return self.archiveresult_set.filter(status='succeeded').count()
|
||||||
|
|
||||||
@cached_property
|
|
||||||
def url_hash(self):
|
|
||||||
return hashurl(self.url)
|
|
||||||
|
|
||||||
@cached_property
|
@cached_property
|
||||||
def base_url(self):
|
def base_url(self):
|
||||||
return base_url(self.url)
|
return base_url(self.url)
|
||||||
@@ -178,7 +204,7 @@ class Snapshot(models.Model):
|
|||||||
|
|
||||||
@cached_property
|
@cached_property
|
||||||
def archive_size(self):
|
def archive_size(self):
|
||||||
cache_key = f'{str(self.id)[:12]}-{(self.updated or self.added).timestamp()}-size'
|
cache_key = f'{str(self.pk)[:12]}-{(self.updated or self.added).timestamp()}-size'
|
||||||
|
|
||||||
def calc_dir_size():
|
def calc_dir_size():
|
||||||
try:
|
try:
|
||||||
@@ -199,7 +225,7 @@ class Snapshot(models.Model):
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
@cached_property
|
@cached_property
|
||||||
def headers(self) -> Optional[dict]:
|
def headers(self) -> Optional[Dict[str, str]]:
|
||||||
try:
|
try:
|
||||||
return json.loads((Path(self.link_dir) / 'headers.json').read_text(encoding='utf-8').strip())
|
return json.loads((Path(self.link_dir) / 'headers.json').read_text(encoding='utf-8').strip())
|
||||||
except Exception:
|
except Exception:
|
||||||
@@ -250,11 +276,37 @@ class Snapshot(models.Model):
|
|||||||
tags_id = []
|
tags_id = []
|
||||||
for tag in tags:
|
for tag in tags:
|
||||||
if tag.strip():
|
if tag.strip():
|
||||||
tags_id.append(Tag.objects.get_or_create(name=tag)[0].id)
|
tags_id.append(Tag.objects.get_or_create(name=tag)[0].pk)
|
||||||
self.tags.clear()
|
self.tags.clear()
|
||||||
self.tags.add(*tags_id)
|
self.tags.add(*tags_id)
|
||||||
|
|
||||||
|
|
||||||
|
# def get_storage_dir(self, create=True, symlink=True) -> Path:
|
||||||
|
# date_str = self.added.strftime('%Y%m%d')
|
||||||
|
# domain_str = domain(self.url)
|
||||||
|
# abs_storage_dir = Path(ARCHIVE_DIR) / 'snapshots' / date_str / domain_str / str(self.ulid)
|
||||||
|
|
||||||
|
# if create and not abs_storage_dir.is_dir():
|
||||||
|
# abs_storage_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
# if symlink:
|
||||||
|
# LINK_PATHS = [
|
||||||
|
# Path(ARCHIVE_DIR).parent / 'index' / 'all_by_id' / str(self.ulid),
|
||||||
|
# # Path(ARCHIVE_DIR).parent / 'index' / 'snapshots_by_id' / str(self.ulid),
|
||||||
|
# Path(ARCHIVE_DIR).parent / 'index' / 'snapshots_by_date' / date_str / domain_str / str(self.ulid),
|
||||||
|
# Path(ARCHIVE_DIR).parent / 'index' / 'snapshots_by_domain' / domain_str / date_str / str(self.ulid),
|
||||||
|
# ]
|
||||||
|
# for link_path in LINK_PATHS:
|
||||||
|
# link_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
# try:
|
||||||
|
# link_path.symlink_to(abs_storage_dir)
|
||||||
|
# except FileExistsError:
|
||||||
|
# link_path.unlink()
|
||||||
|
# link_path.symlink_to(abs_storage_dir)
|
||||||
|
|
||||||
|
# return abs_storage_dir
|
||||||
|
|
||||||
|
|
||||||
class ArchiveResultManager(models.Manager):
|
class ArchiveResultManager(models.Manager):
|
||||||
def indexable(self, sorted: bool = True):
|
def indexable(self, sorted: bool = True):
|
||||||
INDEXABLE_METHODS = [ r[0] for r in ARCHIVE_METHODS_INDEXING_PRECEDENCE ]
|
INDEXABLE_METHODS = [ r[0] for r in ARCHIVE_METHODS_INDEXING_PRECEDENCE ]
|
||||||
@@ -266,13 +318,22 @@ class ArchiveResultManager(models.Manager):
|
|||||||
return qs
|
return qs
|
||||||
|
|
||||||
|
|
||||||
class ArchiveResult(models.Model):
|
class ArchiveResult(ABIDModel):
|
||||||
id = models.AutoField(primary_key=True, serialize=False, verbose_name='ID')
|
abid_prefix = 'res_'
|
||||||
uuid = models.UUIDField(default=uuid.uuid4, editable=False)
|
abid_ts_src = 'self.snapshot.added'
|
||||||
|
abid_uri_src = 'self.snapshot.url'
|
||||||
|
abid_subtype_src = 'self.extractor'
|
||||||
|
abid_rand_src = 'self.uuid'
|
||||||
|
EXTRACTOR_CHOICES = EXTRACTOR_CHOICES
|
||||||
|
|
||||||
|
# id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
|
||||||
|
id = models.AutoField(primary_key=True, serialize=False, verbose_name='ID') # legacy pk
|
||||||
|
uuid = models.UUIDField(blank=True, null=True, editable=True, unique=True)
|
||||||
|
abid = ABIDField(prefix=abid_prefix)
|
||||||
|
|
||||||
snapshot = models.ForeignKey(Snapshot, on_delete=models.CASCADE)
|
snapshot = models.ForeignKey(Snapshot, on_delete=models.CASCADE)
|
||||||
extractor = models.CharField(choices=EXTRACTORS, max_length=32)
|
extractor = models.CharField(choices=EXTRACTOR_CHOICES, max_length=32)
|
||||||
cmd = JSONField()
|
cmd = models.JSONField()
|
||||||
pwd = models.CharField(max_length=256)
|
pwd = models.CharField(max_length=256)
|
||||||
cmd_version = models.CharField(max_length=128, default=None, null=True, blank=True)
|
cmd_version = models.CharField(max_length=128, default=None, null=True, blank=True)
|
||||||
output = models.CharField(max_length=1024)
|
output = models.CharField(max_length=1024)
|
||||||
@@ -282,5 +343,69 @@ class ArchiveResult(models.Model):
|
|||||||
|
|
||||||
objects = ArchiveResultManager()
|
objects = ArchiveResultManager()
|
||||||
|
|
||||||
|
class Meta(TypedModelMeta):
|
||||||
|
verbose_name = 'Result'
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return self.extractor
|
return self.extractor
|
||||||
|
|
||||||
|
@cached_property
|
||||||
|
def snapshot_dir(self):
|
||||||
|
return Path(self.snapshot.link_dir)
|
||||||
|
|
||||||
|
|
||||||
|
@property
|
||||||
|
def extractor_module(self):
|
||||||
|
return EXTRACTORS[self.extractor]
|
||||||
|
|
||||||
|
def output_path(self) -> str:
|
||||||
|
"""return the canonical output filename or directory name within the snapshot dir"""
|
||||||
|
return self.extractor_module.get_output_path()
|
||||||
|
|
||||||
|
def embed_path(self) -> str:
|
||||||
|
"""
|
||||||
|
return the actual runtime-calculated path to the file on-disk that
|
||||||
|
should be used for user-facing iframe embeds of this result
|
||||||
|
"""
|
||||||
|
|
||||||
|
if hasattr(self.extractor_module, 'get_embed_path'):
|
||||||
|
return self.extractor_module.get_embed_path(self)
|
||||||
|
|
||||||
|
return self.extractor_module.get_output_path()
|
||||||
|
|
||||||
|
def legacy_output_path(self):
|
||||||
|
link = self.snapshot.as_link()
|
||||||
|
return link.canonical_outputs().get(f'{self.extractor}_path')
|
||||||
|
|
||||||
|
def output_exists(self) -> bool:
|
||||||
|
return Path(self.output_path()).exists()
|
||||||
|
|
||||||
|
|
||||||
|
# def get_storage_dir(self, create=True, symlink=True):
|
||||||
|
# date_str = self.snapshot.added.strftime('%Y%m%d')
|
||||||
|
# domain_str = domain(self.snapshot.url)
|
||||||
|
# abs_storage_dir = Path(ARCHIVE_DIR) / 'results' / date_str / domain_str / self.extractor / str(self.ulid)
|
||||||
|
|
||||||
|
# if create and not abs_storage_dir.is_dir():
|
||||||
|
# abs_storage_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
# if symlink:
|
||||||
|
# LINK_PATHS = [
|
||||||
|
# Path(ARCHIVE_DIR).parent / 'index' / 'all_by_id' / str(self.ulid),
|
||||||
|
# # Path(ARCHIVE_DIR).parent / 'index' / 'results_by_id' / str(self.ulid),
|
||||||
|
# # Path(ARCHIVE_DIR).parent / 'index' / 'results_by_date' / date_str / domain_str / self.extractor / str(self.ulid),
|
||||||
|
# Path(ARCHIVE_DIR).parent / 'index' / 'results_by_domain' / domain_str / date_str / self.extractor / str(self.ulid),
|
||||||
|
# Path(ARCHIVE_DIR).parent / 'index' / 'results_by_type' / self.extractor / date_str / domain_str / str(self.ulid),
|
||||||
|
# ]
|
||||||
|
# for link_path in LINK_PATHS:
|
||||||
|
# link_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
# try:
|
||||||
|
# link_path.symlink_to(abs_storage_dir)
|
||||||
|
# except FileExistsError:
|
||||||
|
# link_path.unlink()
|
||||||
|
# link_path.symlink_to(abs_storage_dir)
|
||||||
|
|
||||||
|
# return abs_storage_dir
|
||||||
|
|
||||||
|
# def symlink_index(self, create=True):
|
||||||
|
# abs_result_dir = self.get_storage_dir(create=create)
|
||||||
|
|||||||
@@ -10,6 +10,7 @@ from pathlib import Path
|
|||||||
from django.utils.crypto import get_random_string
|
from django.utils.crypto import get_random_string
|
||||||
|
|
||||||
from ..config import (
|
from ..config import (
|
||||||
|
CONFIG,
|
||||||
DEBUG,
|
DEBUG,
|
||||||
SECRET_KEY,
|
SECRET_KEY,
|
||||||
ALLOWED_HOSTS,
|
ALLOWED_HOSTS,
|
||||||
@@ -20,6 +21,7 @@ from ..config import (
|
|||||||
OUTPUT_DIR,
|
OUTPUT_DIR,
|
||||||
ARCHIVE_DIR,
|
ARCHIVE_DIR,
|
||||||
LOGS_DIR,
|
LOGS_DIR,
|
||||||
|
CACHE_DIR,
|
||||||
TIMEZONE,
|
TIMEZONE,
|
||||||
|
|
||||||
LDAP,
|
LDAP,
|
||||||
@@ -53,6 +55,26 @@ APPEND_SLASH = True
|
|||||||
|
|
||||||
DEBUG = DEBUG or ('--debug' in sys.argv)
|
DEBUG = DEBUG or ('--debug' in sys.argv)
|
||||||
|
|
||||||
|
|
||||||
|
# add plugins folders to system path, and load plugins in installed_apps
|
||||||
|
BUILTIN_PLUGINS_DIR = PACKAGE_DIR / 'plugins'
|
||||||
|
USER_PLUGINS_DIR = OUTPUT_DIR / 'plugins'
|
||||||
|
sys.path.insert(0, str(BUILTIN_PLUGINS_DIR))
|
||||||
|
sys.path.insert(0, str(USER_PLUGINS_DIR))
|
||||||
|
|
||||||
|
def find_plugins(plugins_dir):
|
||||||
|
return {
|
||||||
|
# plugin_entrypoint.parent.name: import_module(plugin_entrypoint.parent.name).METADATA
|
||||||
|
plugin_entrypoint.parent.name: plugin_entrypoint.parent
|
||||||
|
for plugin_entrypoint in plugins_dir.glob('*/apps.py')
|
||||||
|
}
|
||||||
|
|
||||||
|
INSTALLED_PLUGINS = {
|
||||||
|
**find_plugins(BUILTIN_PLUGINS_DIR),
|
||||||
|
**find_plugins(USER_PLUGINS_DIR),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
INSTALLED_APPS = [
|
INSTALLED_APPS = [
|
||||||
'django.contrib.auth',
|
'django.contrib.auth',
|
||||||
'django.contrib.contenttypes',
|
'django.contrib.contenttypes',
|
||||||
@@ -60,13 +82,18 @@ INSTALLED_APPS = [
|
|||||||
'django.contrib.messages',
|
'django.contrib.messages',
|
||||||
'django.contrib.staticfiles',
|
'django.contrib.staticfiles',
|
||||||
'django.contrib.admin',
|
'django.contrib.admin',
|
||||||
|
'django_jsonform',
|
||||||
|
|
||||||
|
'signal_webhooks',
|
||||||
|
'abid_utils',
|
||||||
|
'plugantic',
|
||||||
'core',
|
'core',
|
||||||
'api',
|
'api',
|
||||||
|
|
||||||
|
*INSTALLED_PLUGINS.keys(),
|
||||||
|
|
||||||
'admin_data_views',
|
'admin_data_views',
|
||||||
|
|
||||||
'signal_webhooks',
|
|
||||||
'django_extensions',
|
'django_extensions',
|
||||||
]
|
]
|
||||||
|
|
||||||
@@ -227,6 +254,11 @@ TEMPLATES = [
|
|||||||
### External Service Settings
|
### External Service Settings
|
||||||
################################################################################
|
################################################################################
|
||||||
|
|
||||||
|
|
||||||
|
CACHE_DB_FILENAME = 'cache.sqlite3'
|
||||||
|
CACHE_DB_PATH = CACHE_DIR / CACHE_DB_FILENAME
|
||||||
|
CACHE_DB_TABLE = 'django_cache'
|
||||||
|
|
||||||
DATABASE_FILE = Path(OUTPUT_DIR) / SQL_INDEX_FILENAME
|
DATABASE_FILE = Path(OUTPUT_DIR) / SQL_INDEX_FILENAME
|
||||||
DATABASE_NAME = os.environ.get("ARCHIVEBOX_DATABASE_NAME", str(DATABASE_FILE))
|
DATABASE_NAME = os.environ.get("ARCHIVEBOX_DATABASE_NAME", str(DATABASE_FILE))
|
||||||
|
|
||||||
@@ -240,18 +272,28 @@ DATABASES = {
|
|||||||
},
|
},
|
||||||
'TIME_ZONE': TIMEZONE,
|
'TIME_ZONE': TIMEZONE,
|
||||||
# DB setup is sometimes modified at runtime by setup_django() in config.py
|
# DB setup is sometimes modified at runtime by setup_django() in config.py
|
||||||
}
|
},
|
||||||
|
# 'cache': {
|
||||||
|
# 'ENGINE': 'django.db.backends.sqlite3',
|
||||||
|
# 'NAME': CACHE_DB_PATH,
|
||||||
|
# 'OPTIONS': {
|
||||||
|
# 'timeout': 60,
|
||||||
|
# 'check_same_thread': False,
|
||||||
|
# },
|
||||||
|
# 'TIME_ZONE': TIMEZONE,
|
||||||
|
# },
|
||||||
}
|
}
|
||||||
|
MIGRATION_MODULES = {'signal_webhooks': None}
|
||||||
|
|
||||||
|
# as much as I'd love this to be a UUID or ULID field, it's not supported yet as of Django 5.0
|
||||||
|
DEFAULT_AUTO_FIELD = 'django.db.models.BigAutoField'
|
||||||
|
|
||||||
CACHE_BACKEND = 'django.core.cache.backends.locmem.LocMemCache'
|
|
||||||
# CACHE_BACKEND = 'django.core.cache.backends.db.DatabaseCache'
|
|
||||||
# CACHE_BACKEND = 'django.core.cache.backends.dummy.DummyCache'
|
|
||||||
|
|
||||||
CACHES = {
|
CACHES = {
|
||||||
'default': {
|
'default': {'BACKEND': 'django.core.cache.backends.locmem.LocMemCache'},
|
||||||
'BACKEND': CACHE_BACKEND,
|
# 'sqlite': {'BACKEND': 'django.core.cache.backends.db.DatabaseCache', 'LOCATION': 'cache'},
|
||||||
'LOCATION': 'django_cache_default',
|
# 'dummy': {'BACKEND': 'django.core.cache.backends.dummy.DummyCache'},
|
||||||
}
|
# 'filebased': {"BACKEND": "django.core.cache.backends.filebased.FileBasedCache", "LOCATION": CACHE_DIR / 'cache_filebased'},
|
||||||
}
|
}
|
||||||
|
|
||||||
EMAIL_BACKEND = 'django.core.mail.backends.console.EmailBackend'
|
EMAIL_BACKEND = 'django.core.mail.backends.console.EmailBackend'
|
||||||
@@ -409,9 +451,11 @@ LOGGING = {
|
|||||||
|
|
||||||
|
|
||||||
# Add default webhook configuration to the User model
|
# Add default webhook configuration to the User model
|
||||||
|
SIGNAL_WEBHOOKS_CUSTOM_MODEL = 'api.models.OutboundWebhook'
|
||||||
SIGNAL_WEBHOOKS = {
|
SIGNAL_WEBHOOKS = {
|
||||||
"HOOKS": {
|
"HOOKS": {
|
||||||
"django.contrib.auth.models.User": ..., # ... is a special value that means "use the default autogenerated hooks"
|
# ... is a special sigil value that means "use the default autogenerated hooks"
|
||||||
|
"django.contrib.auth.models.User": ...,
|
||||||
"core.models.Snapshot": ...,
|
"core.models.Snapshot": ...,
|
||||||
"core.models.ArchiveResult": ...,
|
"core.models.ArchiveResult": ...,
|
||||||
"core.models.Tag": ...,
|
"core.models.Tag": ...,
|
||||||
@@ -421,16 +465,36 @@ SIGNAL_WEBHOOKS = {
|
|||||||
|
|
||||||
|
|
||||||
ADMIN_DATA_VIEWS = {
|
ADMIN_DATA_VIEWS = {
|
||||||
"NAME": "configuration",
|
"NAME": "Environment",
|
||||||
"URLS": [
|
"URLS": [
|
||||||
{
|
{
|
||||||
"route": "live/",
|
"route": "config/",
|
||||||
"view": "core.views.live_config_list_view",
|
"view": "core.views.live_config_list_view",
|
||||||
"name": "live",
|
"name": "Configuration",
|
||||||
"items": {
|
"items": {
|
||||||
"route": "<str:key>/",
|
"route": "<str:key>/",
|
||||||
"view": "core.views.live_config_value_view",
|
"view": "core.views.live_config_value_view",
|
||||||
"name": "live_config_value",
|
"name": "config_val",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"route": "binaries/",
|
||||||
|
"view": "plugantic.views.binaries_list_view",
|
||||||
|
"name": "Binaries",
|
||||||
|
"items": {
|
||||||
|
"route": "<str:key>/",
|
||||||
|
"view": "plugantic.views.binary_detail_view",
|
||||||
|
"name": "binary",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"route": "plugins/",
|
||||||
|
"view": "plugantic.views.plugins_list_view",
|
||||||
|
"name": "Plugins",
|
||||||
|
"items": {
|
||||||
|
"route": "<str:key>/",
|
||||||
|
"view": "plugantic.views.plugin_detail_view",
|
||||||
|
"name": "plugin",
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
|
|||||||
@@ -3,6 +3,7 @@ __package__ = 'archivebox.core'
|
|||||||
from typing import Callable
|
from typing import Callable
|
||||||
|
|
||||||
from io import StringIO
|
from io import StringIO
|
||||||
|
from pathlib import Path
|
||||||
from contextlib import redirect_stdout
|
from contextlib import redirect_stdout
|
||||||
|
|
||||||
from django.shortcuts import render, redirect
|
from django.shortcuts import render, redirect
|
||||||
@@ -36,10 +37,14 @@ from ..config import (
|
|||||||
CONFIG_SCHEMA,
|
CONFIG_SCHEMA,
|
||||||
DYNAMIC_CONFIG_SCHEMA,
|
DYNAMIC_CONFIG_SCHEMA,
|
||||||
USER_CONFIG,
|
USER_CONFIG,
|
||||||
|
SAVE_ARCHIVE_DOT_ORG,
|
||||||
|
PREVIEW_ORIGINALS,
|
||||||
)
|
)
|
||||||
|
from ..logging_util import printable_filesize
|
||||||
from ..main import add
|
from ..main import add
|
||||||
from ..util import base_url, ansi_to_html
|
from ..util import base_url, ansi_to_html, htmlencode, urldecode, urlencode, ts_to_date_str
|
||||||
from ..search import query_search_index
|
from ..search import query_search_index
|
||||||
|
from ..extractors.wget import wget_output_path
|
||||||
|
|
||||||
|
|
||||||
class HomepageView(View):
|
class HomepageView(View):
|
||||||
@@ -56,10 +61,80 @@ class HomepageView(View):
|
|||||||
class SnapshotView(View):
|
class SnapshotView(View):
|
||||||
# render static html index from filesystem archive/<timestamp>/index.html
|
# render static html index from filesystem archive/<timestamp>/index.html
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def render_live_index(request, snapshot):
|
||||||
|
TITLE_LOADING_MSG = 'Not yet archived...'
|
||||||
|
HIDDEN_RESULTS = ('favicon', 'headers', 'title', 'htmltotext', 'warc', 'archive_org')
|
||||||
|
|
||||||
|
archiveresults = {}
|
||||||
|
|
||||||
|
results = snapshot.archiveresult_set.all()
|
||||||
|
|
||||||
|
for result in results:
|
||||||
|
embed_path = result.embed_path()
|
||||||
|
abs_path = result.snapshot_dir / (embed_path or 'None')
|
||||||
|
|
||||||
|
if (result.status == 'succeeded'
|
||||||
|
and (result.extractor not in HIDDEN_RESULTS)
|
||||||
|
and embed_path
|
||||||
|
and abs_path.exists()):
|
||||||
|
if abs_path.is_dir() and not any(abs_path.glob('*.*')):
|
||||||
|
continue
|
||||||
|
|
||||||
|
result_info = {
|
||||||
|
'name': result.extractor,
|
||||||
|
'path': embed_path,
|
||||||
|
'ts': ts_to_date_str(result.end_ts),
|
||||||
|
}
|
||||||
|
archiveresults[result.extractor] = result_info
|
||||||
|
|
||||||
|
preferred_types = ('singlefile', 'wget', 'screenshot', 'dom', 'media', 'pdf', 'readability', 'mercury')
|
||||||
|
all_types = preferred_types + tuple(result_type for result_type in archiveresults.keys() if result_type not in preferred_types)
|
||||||
|
|
||||||
|
best_result = {'path': 'None'}
|
||||||
|
for result_type in preferred_types:
|
||||||
|
if result_type in archiveresults:
|
||||||
|
best_result = archiveresults[result_type]
|
||||||
|
break
|
||||||
|
|
||||||
|
link = snapshot.as_link()
|
||||||
|
|
||||||
|
link_info = link._asdict(extended=True)
|
||||||
|
|
||||||
|
try:
|
||||||
|
warc_path = 'warc/' + list(Path(snapshot.link_dir).glob('warc/*.warc.*'))[0].name
|
||||||
|
except IndexError:
|
||||||
|
warc_path = 'warc/'
|
||||||
|
|
||||||
|
context = {
|
||||||
|
**link_info,
|
||||||
|
**link_info['canonical'],
|
||||||
|
'title': htmlencode(
|
||||||
|
link.title
|
||||||
|
or (link.base_url if link.is_archived else TITLE_LOADING_MSG)
|
||||||
|
),
|
||||||
|
'extension': link.extension or 'html',
|
||||||
|
'tags': link.tags or 'untagged',
|
||||||
|
'size': printable_filesize(link.archive_size) if link.archive_size else 'pending',
|
||||||
|
'status': 'archived' if link.is_archived else 'not yet archived',
|
||||||
|
'status_color': 'success' if link.is_archived else 'danger',
|
||||||
|
'oldest_archive_date': ts_to_date_str(link.oldest_archive_date),
|
||||||
|
'warc_path': warc_path,
|
||||||
|
'SAVE_ARCHIVE_DOT_ORG': SAVE_ARCHIVE_DOT_ORG,
|
||||||
|
'PREVIEW_ORIGINALS': PREVIEW_ORIGINALS,
|
||||||
|
'archiveresults': sorted(archiveresults.values(), key=lambda r: all_types.index(r['name'])),
|
||||||
|
'best_result': best_result,
|
||||||
|
# 'tags_str': 'somealskejrewlkrjwer,werlmwrwlekrjewlkrjwer324m532l,4m32,23m324234',
|
||||||
|
}
|
||||||
|
return render(template_name='core/snapshot_live.html', request=request, context=context)
|
||||||
|
|
||||||
|
|
||||||
def get(self, request, path):
|
def get(self, request, path):
|
||||||
if not request.user.is_authenticated and not PUBLIC_SNAPSHOTS:
|
if not request.user.is_authenticated and not PUBLIC_SNAPSHOTS:
|
||||||
return redirect(f'/admin/login/?next={request.path}')
|
return redirect(f'/admin/login/?next={request.path}')
|
||||||
|
|
||||||
|
snapshot = None
|
||||||
|
|
||||||
try:
|
try:
|
||||||
slug, archivefile = path.split('/', 1)
|
slug, archivefile = path.split('/', 1)
|
||||||
except (IndexError, ValueError):
|
except (IndexError, ValueError):
|
||||||
@@ -75,7 +150,11 @@ class SnapshotView(View):
|
|||||||
try:
|
try:
|
||||||
try:
|
try:
|
||||||
snapshot = Snapshot.objects.get(Q(timestamp=slug) | Q(id__startswith=slug))
|
snapshot = Snapshot.objects.get(Q(timestamp=slug) | Q(id__startswith=slug))
|
||||||
response = static.serve(request, archivefile, document_root=snapshot.link_dir, show_indexes=True)
|
if archivefile == 'index.html':
|
||||||
|
# if they requested snapshot index, serve live rendered template instead of static html
|
||||||
|
response = self.render_live_index(request, snapshot)
|
||||||
|
else:
|
||||||
|
response = static.serve(request, archivefile, document_root=snapshot.link_dir, show_indexes=True)
|
||||||
response["Link"] = f'<{snapshot.url}>; rel="canonical"'
|
response["Link"] = f'<{snapshot.url}>; rel="canonical"'
|
||||||
return response
|
return response
|
||||||
except Snapshot.DoesNotExist:
|
except Snapshot.DoesNotExist:
|
||||||
@@ -127,26 +206,33 @@ class SnapshotView(View):
|
|||||||
status=404,
|
status=404,
|
||||||
)
|
)
|
||||||
except Http404:
|
except Http404:
|
||||||
|
assert snapshot # (Snapshot.DoesNotExist is already handled above)
|
||||||
|
|
||||||
# Snapshot dir exists but file within does not e.g. 124235.324234/screenshot.png
|
# Snapshot dir exists but file within does not e.g. 124235.324234/screenshot.png
|
||||||
return HttpResponse(
|
return HttpResponse(
|
||||||
format_html(
|
format_html(
|
||||||
(
|
(
|
||||||
'<center><br/><br/><br/>'
|
'<center><br/><br/><br/>'
|
||||||
f'Snapshot <a href="/archive/{snapshot.timestamp}/index.html" target="_top"><b><code>[{snapshot.timestamp}]</code></b></a> exists in DB, but resource <b><code>{snapshot.timestamp}/'
|
f'Snapshot <a href="/archive/{snapshot.timestamp}/index.html" target="_top"><b><code>[{snapshot.timestamp}]</code></b></a>: <a href="{snapshot.url}" target="_blank" rel="noreferrer">{snapshot.url}</a><br/>'
|
||||||
|
f'was queued on {str(snapshot.added).split(".")[0]}, '
|
||||||
|
f'but no files have been saved yet in:<br/><b><a href="/archive/{snapshot.timestamp}/" target="_top"><code>{snapshot.timestamp}</code></a><code>/'
|
||||||
'{}'
|
'{}'
|
||||||
f'</code></b> does not exist in the <a href="/archive/{snapshot.timestamp}/" target="_top">snapshot dir</a> yet.<br/><br/>'
|
f'</code></b><br/><br/>'
|
||||||
'It\'s possible that this resource type is not available for the Snapshot,<br/>or that the archiving process has not completed yet.<br/>'
|
'It\'s possible {} '
|
||||||
f'<pre><code># if interrupted, run this cmd to finish archiving this Snapshot<br/>archivebox update -t timestamp {snapshot.timestamp}</code></pre><br/><br/>'
|
f'during the last capture on {str(snapshot.added).split(".")[0]},<br/>or that the archiving process has not completed yet.<br/>'
|
||||||
|
f'<pre><code># run this cmd to finish/retry archiving this Snapshot</code><br/>'
|
||||||
|
f'<code style="user-select: all; color: #333">archivebox update -t timestamp {snapshot.timestamp}</code></pre><br/><br/>'
|
||||||
'<div class="text-align: left; width: 100%; max-width: 400px">'
|
'<div class="text-align: left; width: 100%; max-width: 400px">'
|
||||||
'<i><b>Next steps:</i></b><br/>'
|
'<i><b>Next steps:</i></b><br/>'
|
||||||
f'- list all the <a href="/archive/{snapshot.timestamp}/" target="_top">Snapshot files <code>.*</code></a><br/>'
|
f'- list all the <a href="/archive/{snapshot.timestamp}/" target="_top">Snapshot files <code>.*</code></a><br/>'
|
||||||
f'- view the <a href="/archive/{snapshot.timestamp}/index.html" target="_top">Snapshot <code>./index.html</code></a><br/>'
|
f'- view the <a href="/archive/{snapshot.timestamp}/index.html" target="_top">Snapshot <code>./index.html</code></a><br/>'
|
||||||
f'- go to the <a href="/admin/core/snapshot/{snapshot.id}/change/" target="_top">Snapshot admin</a> to edit<br/>'
|
f'- go to the <a href="/admin/core/snapshot/{snapshot.pk}/change/" target="_top">Snapshot admin</a> to edit<br/>'
|
||||||
f'- go to the <a href="/admin/core/snapshot/?id__startswith={snapshot.id}" target="_top">Snapshot actions</a> to re-archive<br/>'
|
f'- go to the <a href="/admin/core/snapshot/?uuid__startswith={snapshot.uuid}" target="_top">Snapshot actions</a> to re-archive<br/>'
|
||||||
'- or return to <a href="/" target="_top">the main index...</a></div>'
|
'- or return to <a href="/" target="_top">the main index...</a></div>'
|
||||||
'</center>'
|
'</center>'
|
||||||
),
|
),
|
||||||
archivefile,
|
archivefile if str(archivefile) != 'None' else '',
|
||||||
|
f'the {archivefile} resource could not be fetched' if str(archivefile) != 'None' else 'the original site was not available',
|
||||||
),
|
),
|
||||||
content_type="text/html",
|
content_type="text/html",
|
||||||
status=404,
|
status=404,
|
||||||
@@ -369,21 +455,21 @@ def live_config_list_view(request: HttpRequest, **kwargs) -> TableContext:
|
|||||||
|
|
||||||
for section in CONFIG_SCHEMA.keys():
|
for section in CONFIG_SCHEMA.keys():
|
||||||
for key in CONFIG_SCHEMA[section].keys():
|
for key in CONFIG_SCHEMA[section].keys():
|
||||||
rows['Section'].append(section.replace('_', ' ').title().replace(' Config', ''))
|
rows['Section'].append(section) # section.replace('_', ' ').title().replace(' Config', '')
|
||||||
rows['Key'].append(ItemLink(key, key=key))
|
rows['Key'].append(ItemLink(key, key=key))
|
||||||
rows['Type'].append(mark_safe(f'<code>{find_config_type(key)}</code>'))
|
rows['Type'].append(mark_safe(f'<code>{find_config_type(key)}</code>'))
|
||||||
rows['Value'].append(mark_safe(f'<code>{CONFIG[key]}</code>') if key_is_safe(key) else '******** (redacted)')
|
rows['Value'].append(mark_safe(f'<code>{CONFIG[key]}</code>') if key_is_safe(key) else '******** (redacted)')
|
||||||
rows['Default'].append(mark_safe(f'<a href="https://github.com/search?q=repo%3AArchiveBox%2FArchiveBox+path%3Aconfig.py+%27{key}%27&type=code"><code style="text-decoration: underline">{find_config_default(key) or 'See here...'}</code></a>'))
|
rows['Default'].append(mark_safe(f'<a href="https://github.com/search?q=repo%3AArchiveBox%2FArchiveBox+path%3Aconfig.py+%27{key}%27&type=code"><code style="text-decoration: underline">{find_config_default(key) or "See here..."}</code></a>'))
|
||||||
# rows['Documentation'].append(mark_safe(f'Wiki: <a href="https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#{key.lower()}">{key}</a>'))
|
# rows['Documentation'].append(mark_safe(f'Wiki: <a href="https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#{key.lower()}">{key}</a>'))
|
||||||
rows['Aliases'].append(', '.join(CONFIG_SCHEMA[section][key].get('aliases', [])))
|
rows['Aliases'].append(', '.join(CONFIG_SCHEMA[section][key].get('aliases', [])))
|
||||||
|
|
||||||
section = 'DYNAMIC'
|
section = 'DYNAMIC'
|
||||||
for key in DYNAMIC_CONFIG_SCHEMA.keys():
|
for key in DYNAMIC_CONFIG_SCHEMA.keys():
|
||||||
rows['Section'].append(section.replace('_', ' ').title().replace(' Config', ''))
|
rows['Section'].append(section) # section.replace('_', ' ').title().replace(' Config', '')
|
||||||
rows['Key'].append(ItemLink(key, key=key))
|
rows['Key'].append(ItemLink(key, key=key))
|
||||||
rows['Type'].append(mark_safe(f'<code>{find_config_type(key)}</code>'))
|
rows['Type'].append(mark_safe(f'<code>{find_config_type(key)}</code>'))
|
||||||
rows['Value'].append(mark_safe(f'<code>{CONFIG[key]}</code>') if key_is_safe(key) else '******** (redacted)')
|
rows['Value'].append(mark_safe(f'<code>{CONFIG[key]}</code>') if key_is_safe(key) else '******** (redacted)')
|
||||||
rows['Default'].append(mark_safe(f'<a href="https://github.com/search?q=repo%3AArchiveBox%2FArchiveBox+path%3Aconfig.py+%27{key}%27&type=code"><code style="text-decoration: underline">{find_config_default(key) or 'See here...'}</code></a>'))
|
rows['Default'].append(mark_safe(f'<a href="https://github.com/search?q=repo%3AArchiveBox%2FArchiveBox+path%3Aconfig.py+%27{key}%27&type=code"><code style="text-decoration: underline">{find_config_default(key) or "See here..."}</code></a>'))
|
||||||
# rows['Documentation'].append(mark_safe(f'Wiki: <a href="https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#{key.lower()}">{key}</a>'))
|
# rows['Documentation'].append(mark_safe(f'Wiki: <a href="https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#{key.lower()}">{key}</a>'))
|
||||||
rows['Aliases'].append(ItemLink(key, key=key) if key in USER_CONFIG else '')
|
rows['Aliases'].append(ItemLink(key, key=key) if key in USER_CONFIG else '')
|
||||||
|
|
||||||
|
|||||||
@@ -1,11 +1,13 @@
|
|||||||
__package__ = 'archivebox.extractors'
|
__package__ = 'archivebox.extractors'
|
||||||
|
|
||||||
|
from typing import Callable, Optional, Dict, List, Iterable, Union, Protocol, cast
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from importlib import import_module
|
||||||
from typing import Callable, Optional, List, Iterable, Union
|
|
||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
|
|
||||||
from django.db.models import QuerySet
|
from django.db.models import QuerySet
|
||||||
|
|
||||||
from ..config import (
|
from ..config import (
|
||||||
@@ -158,7 +160,7 @@ def archive_link(link: Link, overwrite: bool=False, methods: Optional[Iterable[s
|
|||||||
# bump the updated time on the main Snapshot here, this is critical
|
# bump the updated time on the main Snapshot here, this is critical
|
||||||
# to be able to cache summaries of the ArchiveResults for a given
|
# to be able to cache summaries of the ArchiveResults for a given
|
||||||
# snapshot without having to load all the results from the DB each time.
|
# snapshot without having to load all the results from the DB each time.
|
||||||
# (we use {Snapshot.id}-{Snapshot.updated} as the cache key and assume
|
# (we use {Snapshot.pk}-{Snapshot.updated} as the cache key and assume
|
||||||
# ArchiveResults are unchanged as long as the updated timestamp is unchanged)
|
# ArchiveResults are unchanged as long as the updated timestamp is unchanged)
|
||||||
snapshot.save()
|
snapshot.save()
|
||||||
else:
|
else:
|
||||||
@@ -240,3 +242,37 @@ def archive_links(all_links: Union[Iterable[Link], QuerySet], overwrite: bool=Fa
|
|||||||
|
|
||||||
log_archiving_finished(num_links)
|
log_archiving_finished(num_links)
|
||||||
return all_links
|
return all_links
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
EXTRACTORS_DIR = Path(__file__).parent
|
||||||
|
|
||||||
|
class ExtractorModuleProtocol(Protocol):
|
||||||
|
"""Type interface for an Extractor Module (WIP)"""
|
||||||
|
|
||||||
|
get_output_path: Callable
|
||||||
|
|
||||||
|
# TODO:
|
||||||
|
# get_embed_path: Callable | None
|
||||||
|
# should_extract(Snapshot)
|
||||||
|
# extract(Snapshot)
|
||||||
|
|
||||||
|
|
||||||
|
def get_extractors(dir: Path=EXTRACTORS_DIR) -> Dict[str, ExtractorModuleProtocol]:
|
||||||
|
"""iterate through archivebox/extractors/*.py and load extractor modules"""
|
||||||
|
EXTRACTORS = {}
|
||||||
|
|
||||||
|
for filename in EXTRACTORS_DIR.glob('*.py'):
|
||||||
|
if filename.name.startswith('__'):
|
||||||
|
continue
|
||||||
|
|
||||||
|
extractor_name = filename.name.replace('.py', '')
|
||||||
|
|
||||||
|
extractor_module = cast(ExtractorModuleProtocol, import_module(f'.{extractor_name}', package=__package__))
|
||||||
|
|
||||||
|
assert getattr(extractor_module, 'get_output_path')
|
||||||
|
EXTRACTORS[extractor_name] = extractor_module
|
||||||
|
|
||||||
|
return EXTRACTORS
|
||||||
|
|
||||||
|
EXTRACTORS = get_extractors(EXTRACTORS_DIR)
|
||||||
|
|||||||
@@ -24,6 +24,8 @@ from ..config import (
|
|||||||
)
|
)
|
||||||
from ..logging_util import TimedProgress
|
from ..logging_util import TimedProgress
|
||||||
|
|
||||||
|
def get_output_path():
|
||||||
|
return 'archive.org.txt'
|
||||||
|
|
||||||
|
|
||||||
@enforce_types
|
@enforce_types
|
||||||
@@ -32,7 +34,7 @@ def should_save_archive_dot_org(link: Link, out_dir: Optional[Path]=None, overwr
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
out_dir = out_dir or Path(link.link_dir)
|
out_dir = out_dir or Path(link.link_dir)
|
||||||
if not overwrite and (out_dir / 'archive.org.txt').exists():
|
if not overwrite and (out_dir / get_output_path()).exists():
|
||||||
# if open(path, 'r', encoding='utf-8').read().strip() != 'None':
|
# if open(path, 'r', encoding='utf-8').read().strip() != 'None':
|
||||||
return False
|
return False
|
||||||
|
|
||||||
@@ -43,7 +45,7 @@ def save_archive_dot_org(link: Link, out_dir: Optional[Path]=None, timeout: int=
|
|||||||
"""submit site to archive.org for archiving via their service, save returned archive url"""
|
"""submit site to archive.org for archiving via their service, save returned archive url"""
|
||||||
|
|
||||||
out_dir = out_dir or Path(link.link_dir)
|
out_dir = out_dir or Path(link.link_dir)
|
||||||
output: ArchiveOutput = 'archive.org.txt'
|
output: ArchiveOutput = get_output_path()
|
||||||
archive_org_url = None
|
archive_org_url = None
|
||||||
submit_url = 'https://web.archive.org/save/{}'.format(link.url)
|
submit_url = 'https://web.archive.org/save/{}'.format(link.url)
|
||||||
# later options take precedence
|
# later options take precedence
|
||||||
@@ -88,7 +90,7 @@ def save_archive_dot_org(link: Link, out_dir: Optional[Path]=None, timeout: int=
|
|||||||
archive_org_url = archive_org_url or submit_url
|
archive_org_url = archive_org_url or submit_url
|
||||||
with open(str(out_dir / output), 'w', encoding='utf-8') as f:
|
with open(str(out_dir / output), 'w', encoding='utf-8') as f:
|
||||||
f.write(archive_org_url)
|
f.write(archive_org_url)
|
||||||
chmod_file('archive.org.txt', cwd=str(out_dir))
|
chmod_file(str(out_dir / output), cwd=str(out_dir))
|
||||||
output = archive_org_url
|
output = archive_org_url
|
||||||
|
|
||||||
return ArchiveResult(
|
return ArchiveResult(
|
||||||
|
|||||||
@@ -19,6 +19,9 @@ from ..config import (
|
|||||||
from ..logging_util import TimedProgress
|
from ..logging_util import TimedProgress
|
||||||
|
|
||||||
|
|
||||||
|
def get_output_path():
|
||||||
|
return 'output.html'
|
||||||
|
|
||||||
|
|
||||||
@enforce_types
|
@enforce_types
|
||||||
def should_save_dom(link: Link, out_dir: Optional[Path]=None, overwrite: Optional[bool]=False) -> bool:
|
def should_save_dom(link: Link, out_dir: Optional[Path]=None, overwrite: Optional[bool]=False) -> bool:
|
||||||
@@ -26,8 +29,8 @@ def should_save_dom(link: Link, out_dir: Optional[Path]=None, overwrite: Optiona
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
out_dir = out_dir or Path(link.link_dir)
|
out_dir = out_dir or Path(link.link_dir)
|
||||||
if not overwrite and (out_dir / 'output.html').exists():
|
if not overwrite and (out_dir / get_output_path()).exists():
|
||||||
if (out_dir / 'output.html').stat().st_size > 1:
|
if (out_dir / get_output_path()).stat().st_size > 1:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
return SAVE_DOM
|
return SAVE_DOM
|
||||||
@@ -37,7 +40,7 @@ def save_dom(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT) ->
|
|||||||
"""print HTML of site to file using chrome --dump-html"""
|
"""print HTML of site to file using chrome --dump-html"""
|
||||||
|
|
||||||
out_dir = out_dir or Path(link.link_dir)
|
out_dir = out_dir or Path(link.link_dir)
|
||||||
output: ArchiveOutput = 'output.html'
|
output: ArchiveOutput = get_output_path()
|
||||||
output_path = out_dir / output
|
output_path = out_dir / output
|
||||||
cmd = [
|
cmd = [
|
||||||
*chrome_args(),
|
*chrome_args(),
|
||||||
|
|||||||
@@ -8,8 +8,8 @@ from ..index.schema import Link, ArchiveResult, ArchiveOutput
|
|||||||
from ..system import chmod_file, run
|
from ..system import chmod_file, run
|
||||||
from ..util import (
|
from ..util import (
|
||||||
enforce_types,
|
enforce_types,
|
||||||
domain,
|
domain,
|
||||||
dedupe,
|
dedupe,
|
||||||
)
|
)
|
||||||
from ..config import (
|
from ..config import (
|
||||||
TIMEOUT,
|
TIMEOUT,
|
||||||
@@ -33,6 +33,11 @@ def should_save_favicon(link: Link, out_dir: Optional[str]=None, overwrite: Opti
|
|||||||
|
|
||||||
return SAVE_FAVICON
|
return SAVE_FAVICON
|
||||||
|
|
||||||
|
@enforce_types
|
||||||
|
def get_output_path():
|
||||||
|
return 'favicon.ico'
|
||||||
|
|
||||||
|
|
||||||
@enforce_types
|
@enforce_types
|
||||||
def save_favicon(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT) -> ArchiveResult:
|
def save_favicon(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT) -> ArchiveResult:
|
||||||
"""download site favicon from google's favicon api"""
|
"""download site favicon from google's favicon api"""
|
||||||
|
|||||||
@@ -26,6 +26,19 @@ from ..config import (
|
|||||||
from ..logging_util import TimedProgress
|
from ..logging_util import TimedProgress
|
||||||
|
|
||||||
|
|
||||||
|
def get_output_path():
|
||||||
|
return 'git/'
|
||||||
|
|
||||||
|
def get_embed_path(archiveresult=None):
|
||||||
|
if not archiveresult:
|
||||||
|
return get_output_path()
|
||||||
|
|
||||||
|
try:
|
||||||
|
return get_output_path() + list((archiveresult.snapshot_dir / get_output_path()).glob('*'))[0].name + '/'
|
||||||
|
except IndexError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
return get_output_path()
|
||||||
|
|
||||||
@enforce_types
|
@enforce_types
|
||||||
def should_save_git(link: Link, out_dir: Optional[Path]=None, overwrite: Optional[bool]=False) -> bool:
|
def should_save_git(link: Link, out_dir: Optional[Path]=None, overwrite: Optional[bool]=False) -> bool:
|
||||||
@@ -33,7 +46,7 @@ def should_save_git(link: Link, out_dir: Optional[Path]=None, overwrite: Optiona
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
out_dir = out_dir or Path(link.link_dir)
|
out_dir = out_dir or Path(link.link_dir)
|
||||||
if not overwrite and (out_dir / 'git').exists():
|
if not overwrite and (out_dir / get_output_path()).exists():
|
||||||
return False
|
return False
|
||||||
|
|
||||||
is_clonable_url = (
|
is_clonable_url = (
|
||||||
@@ -51,7 +64,7 @@ def save_git(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT) ->
|
|||||||
"""download full site using git"""
|
"""download full site using git"""
|
||||||
|
|
||||||
out_dir = out_dir or Path(link.link_dir)
|
out_dir = out_dir or Path(link.link_dir)
|
||||||
output: ArchiveOutput = 'git'
|
output: ArchiveOutput = get_output_path()
|
||||||
output_path = out_dir / output
|
output_path = out_dir / output
|
||||||
output_path.mkdir(exist_ok=True)
|
output_path.mkdir(exist_ok=True)
|
||||||
cmd = [
|
cmd = [
|
||||||
|
|||||||
@@ -23,10 +23,14 @@ from ..config import (
|
|||||||
)
|
)
|
||||||
from ..logging_util import TimedProgress
|
from ..logging_util import TimedProgress
|
||||||
|
|
||||||
|
def get_output_path():
|
||||||
|
return 'headers.json'
|
||||||
|
|
||||||
|
|
||||||
@enforce_types
|
@enforce_types
|
||||||
def should_save_headers(link: Link, out_dir: Optional[str]=None, overwrite: Optional[bool]=False) -> bool:
|
def should_save_headers(link: Link, out_dir: Optional[str]=None, overwrite: Optional[bool]=False) -> bool:
|
||||||
out_dir = out_dir or Path(link.link_dir)
|
out_dir = out_dir or Path(link.link_dir)
|
||||||
if not overwrite and (out_dir / 'headers.json').exists():
|
if not overwrite and (out_dir / get_output_path()).exists():
|
||||||
return False
|
return False
|
||||||
|
|
||||||
return SAVE_HEADERS
|
return SAVE_HEADERS
|
||||||
@@ -38,7 +42,7 @@ def save_headers(link: Link, out_dir: Optional[str]=None, timeout: int=TIMEOUT)
|
|||||||
|
|
||||||
out_dir = Path(out_dir or link.link_dir)
|
out_dir = Path(out_dir or link.link_dir)
|
||||||
output_folder = out_dir.absolute()
|
output_folder = out_dir.absolute()
|
||||||
output: ArchiveOutput = 'headers.json'
|
output: ArchiveOutput = get_output_path()
|
||||||
|
|
||||||
status = 'succeeded'
|
status = 'succeeded'
|
||||||
timer = TimedProgress(timeout, prefix=' ')
|
timer = TimedProgress(timeout, prefix=' ')
|
||||||
@@ -59,7 +63,7 @@ def save_headers(link: Link, out_dir: Optional[str]=None, timeout: int=TIMEOUT)
|
|||||||
try:
|
try:
|
||||||
json_headers = get_headers(link.url, timeout=timeout)
|
json_headers = get_headers(link.url, timeout=timeout)
|
||||||
output_folder.mkdir(exist_ok=True)
|
output_folder.mkdir(exist_ok=True)
|
||||||
atomic_write(str(output_folder / "headers.json"), json_headers)
|
atomic_write(str(output_folder / get_output_path()), json_headers)
|
||||||
except (Exception, OSError) as err:
|
except (Exception, OSError) as err:
|
||||||
status = 'failed'
|
status = 'failed'
|
||||||
output = err
|
output = err
|
||||||
|
|||||||
@@ -19,6 +19,12 @@ from ..util import (
|
|||||||
)
|
)
|
||||||
from .title import get_html
|
from .title import get_html
|
||||||
|
|
||||||
|
|
||||||
|
def get_output_path():
|
||||||
|
return "htmltotext.txt"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class HTMLTextExtractor(HTMLParser):
|
class HTMLTextExtractor(HTMLParser):
|
||||||
TEXT_ATTRS = [
|
TEXT_ATTRS = [
|
||||||
"alt", "cite", "href", "label",
|
"alt", "cite", "href", "label",
|
||||||
@@ -109,7 +115,7 @@ def should_save_htmltotext(link: Link, out_dir: Optional[Path]=None, overwrite:
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
out_dir = out_dir or Path(link.link_dir)
|
out_dir = out_dir or Path(link.link_dir)
|
||||||
if not overwrite and (out_dir / 'htmltotext.txt').exists():
|
if not overwrite and (out_dir / get_output_path()).exists():
|
||||||
return False
|
return False
|
||||||
|
|
||||||
return SAVE_HTMLTOTEXT
|
return SAVE_HTMLTOTEXT
|
||||||
@@ -120,7 +126,7 @@ def save_htmltotext(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEO
|
|||||||
"""extract search-indexing-friendly text from an HTML document"""
|
"""extract search-indexing-friendly text from an HTML document"""
|
||||||
|
|
||||||
out_dir = Path(out_dir or link.link_dir)
|
out_dir = Path(out_dir or link.link_dir)
|
||||||
output = "htmltotext.txt"
|
output = get_output_path()
|
||||||
cmd = ['(internal) archivebox.extractors.htmltotext', './{singlefile,dom}.html']
|
cmd = ['(internal) archivebox.extractors.htmltotext', './{singlefile,dom}.html']
|
||||||
|
|
||||||
timer = TimedProgress(timeout, prefix=' ')
|
timer = TimedProgress(timeout, prefix=' ')
|
||||||
|
|||||||
@@ -22,13 +22,27 @@ from ..config import (
|
|||||||
from ..logging_util import TimedProgress
|
from ..logging_util import TimedProgress
|
||||||
|
|
||||||
|
|
||||||
|
def get_output_path():
|
||||||
|
return 'media/'
|
||||||
|
|
||||||
|
def get_embed_path(archiveresult=None):
|
||||||
|
if not archiveresult:
|
||||||
|
return get_output_path()
|
||||||
|
|
||||||
|
out_dir = archiveresult.snapshot_dir / get_output_path()
|
||||||
|
try:
|
||||||
|
return get_output_path() + list(out_dir.glob('*.mp4'))[0].name
|
||||||
|
except IndexError:
|
||||||
|
return get_output_path()
|
||||||
|
|
||||||
|
|
||||||
@enforce_types
|
@enforce_types
|
||||||
def should_save_media(link: Link, out_dir: Optional[Path]=None, overwrite: Optional[bool]=False) -> bool:
|
def should_save_media(link: Link, out_dir: Optional[Path]=None, overwrite: Optional[bool]=False) -> bool:
|
||||||
if is_static_file(link.url):
|
if is_static_file(link.url):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
out_dir = out_dir or Path(link.link_dir)
|
out_dir = out_dir or Path(link.link_dir)
|
||||||
if not overwrite and (out_dir / 'media').exists():
|
if not overwrite and (out_dir / get_output_path()).exists():
|
||||||
return False
|
return False
|
||||||
|
|
||||||
return SAVE_MEDIA
|
return SAVE_MEDIA
|
||||||
@@ -38,7 +52,7 @@ def save_media(link: Link, out_dir: Optional[Path]=None, timeout: int=MEDIA_TIME
|
|||||||
"""Download playlists or individual video, audio, and subtitles using youtube-dl or yt-dlp"""
|
"""Download playlists or individual video, audio, and subtitles using youtube-dl or yt-dlp"""
|
||||||
|
|
||||||
out_dir = out_dir or Path(link.link_dir)
|
out_dir = out_dir or Path(link.link_dir)
|
||||||
output: ArchiveOutput = 'media'
|
output: ArchiveOutput = get_output_path()
|
||||||
output_path = out_dir / output
|
output_path = out_dir / output
|
||||||
output_path.mkdir(exist_ok=True)
|
output_path.mkdir(exist_ok=True)
|
||||||
# later options take precedence
|
# later options take precedence
|
||||||
|
|||||||
@@ -24,6 +24,12 @@ from ..config import (
|
|||||||
from ..logging_util import TimedProgress
|
from ..logging_util import TimedProgress
|
||||||
|
|
||||||
|
|
||||||
|
def get_output_path():
|
||||||
|
return 'mercury/'
|
||||||
|
|
||||||
|
def get_embed_path(archiveresult=None):
|
||||||
|
return get_output_path() + 'content.html'
|
||||||
|
|
||||||
|
|
||||||
@enforce_types
|
@enforce_types
|
||||||
def ShellError(cmd: List[str], result: CompletedProcess, lines: int=20) -> ArchiveError:
|
def ShellError(cmd: List[str], result: CompletedProcess, lines: int=20) -> ArchiveError:
|
||||||
@@ -44,7 +50,7 @@ def should_save_mercury(link: Link, out_dir: Optional[str]=None, overwrite: Opti
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
out_dir = out_dir or Path(link.link_dir)
|
out_dir = out_dir or Path(link.link_dir)
|
||||||
if not overwrite and (out_dir / 'mercury').exists():
|
if not overwrite and (out_dir / get_output_path()).exists():
|
||||||
return False
|
return False
|
||||||
|
|
||||||
return SAVE_MERCURY
|
return SAVE_MERCURY
|
||||||
@@ -55,8 +61,8 @@ def save_mercury(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT)
|
|||||||
"""download reader friendly version using @postlight/mercury-parser"""
|
"""download reader friendly version using @postlight/mercury-parser"""
|
||||||
|
|
||||||
out_dir = Path(out_dir or link.link_dir)
|
out_dir = Path(out_dir or link.link_dir)
|
||||||
output_folder = out_dir.absolute() / "mercury"
|
output_folder = out_dir.absolute() / get_output_path()
|
||||||
output = "mercury"
|
output = get_output_path()
|
||||||
|
|
||||||
status = 'succeeded'
|
status = 'succeeded'
|
||||||
timer = TimedProgress(timeout, prefix=' ')
|
timer = TimedProgress(timeout, prefix=' ')
|
||||||
|
|||||||
@@ -19,13 +19,17 @@ from ..config import (
|
|||||||
from ..logging_util import TimedProgress
|
from ..logging_util import TimedProgress
|
||||||
|
|
||||||
|
|
||||||
|
def get_output_path():
|
||||||
|
return 'output.pdf'
|
||||||
|
|
||||||
|
|
||||||
@enforce_types
|
@enforce_types
|
||||||
def should_save_pdf(link: Link, out_dir: Optional[Path]=None, overwrite: Optional[bool]=False) -> bool:
|
def should_save_pdf(link: Link, out_dir: Optional[Path]=None, overwrite: Optional[bool]=False) -> bool:
|
||||||
if is_static_file(link.url):
|
if is_static_file(link.url):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
out_dir = out_dir or Path(link.link_dir)
|
out_dir = out_dir or Path(link.link_dir)
|
||||||
if not overwrite and (out_dir / 'output.pdf').exists():
|
if not overwrite and (out_dir / get_output_path()).exists():
|
||||||
return False
|
return False
|
||||||
|
|
||||||
return SAVE_PDF
|
return SAVE_PDF
|
||||||
@@ -36,7 +40,7 @@ def save_pdf(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT) ->
|
|||||||
"""print PDF of site to file using chrome --headless"""
|
"""print PDF of site to file using chrome --headless"""
|
||||||
|
|
||||||
out_dir = out_dir or Path(link.link_dir)
|
out_dir = out_dir or Path(link.link_dir)
|
||||||
output: ArchiveOutput = 'output.pdf'
|
output: ArchiveOutput = get_output_path()
|
||||||
cmd = [
|
cmd = [
|
||||||
*chrome_args(),
|
*chrome_args(),
|
||||||
'--print-to-pdf',
|
'--print-to-pdf',
|
||||||
@@ -51,7 +55,7 @@ def save_pdf(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT) ->
|
|||||||
hints = (result.stderr or result.stdout).decode()
|
hints = (result.stderr or result.stdout).decode()
|
||||||
raise ArchiveError('Failed to save PDF', hints)
|
raise ArchiveError('Failed to save PDF', hints)
|
||||||
|
|
||||||
chmod_file('output.pdf', cwd=str(out_dir))
|
chmod_file(get_output_path(), cwd=str(out_dir))
|
||||||
except Exception as err:
|
except Exception as err:
|
||||||
status = 'failed'
|
status = 'failed'
|
||||||
output = err
|
output = err
|
||||||
|
|||||||
@@ -22,6 +22,12 @@ from ..config import (
|
|||||||
from ..logging_util import TimedProgress
|
from ..logging_util import TimedProgress
|
||||||
from .title import get_html
|
from .title import get_html
|
||||||
|
|
||||||
|
def get_output_path():
|
||||||
|
return 'readability/'
|
||||||
|
|
||||||
|
def get_embed_path(archiveresult=None):
|
||||||
|
return get_output_path() + 'content.html'
|
||||||
|
|
||||||
|
|
||||||
@enforce_types
|
@enforce_types
|
||||||
def should_save_readability(link: Link, out_dir: Optional[str]=None, overwrite: Optional[bool]=False) -> bool:
|
def should_save_readability(link: Link, out_dir: Optional[str]=None, overwrite: Optional[bool]=False) -> bool:
|
||||||
@@ -29,7 +35,7 @@ def should_save_readability(link: Link, out_dir: Optional[str]=None, overwrite:
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
out_dir = out_dir or Path(link.link_dir)
|
out_dir = out_dir or Path(link.link_dir)
|
||||||
if not overwrite and (out_dir / 'readability').exists():
|
if not overwrite and (out_dir / get_output_path()).exists():
|
||||||
return False
|
return False
|
||||||
|
|
||||||
return SAVE_READABILITY
|
return SAVE_READABILITY
|
||||||
@@ -40,8 +46,8 @@ def save_readability(link: Link, out_dir: Optional[str]=None, timeout: int=TIMEO
|
|||||||
"""download reader friendly version using @mozilla/readability"""
|
"""download reader friendly version using @mozilla/readability"""
|
||||||
|
|
||||||
out_dir = Path(out_dir or link.link_dir)
|
out_dir = Path(out_dir or link.link_dir)
|
||||||
output_folder = out_dir.absolute() / "readability"
|
output_folder = out_dir.absolute() / get_output_path()
|
||||||
output = "readability"
|
output = get_output_path()
|
||||||
|
|
||||||
# Readability Docs: https://github.com/mozilla/readability
|
# Readability Docs: https://github.com/mozilla/readability
|
||||||
|
|
||||||
|
|||||||
@@ -19,6 +19,9 @@ from ..config import (
|
|||||||
from ..logging_util import TimedProgress
|
from ..logging_util import TimedProgress
|
||||||
|
|
||||||
|
|
||||||
|
def get_output_path():
|
||||||
|
return 'screenshot.png'
|
||||||
|
|
||||||
|
|
||||||
@enforce_types
|
@enforce_types
|
||||||
def should_save_screenshot(link: Link, out_dir: Optional[Path]=None, overwrite: Optional[bool]=False) -> bool:
|
def should_save_screenshot(link: Link, out_dir: Optional[Path]=None, overwrite: Optional[bool]=False) -> bool:
|
||||||
@@ -26,7 +29,7 @@ def should_save_screenshot(link: Link, out_dir: Optional[Path]=None, overwrite:
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
out_dir = out_dir or Path(link.link_dir)
|
out_dir = out_dir or Path(link.link_dir)
|
||||||
if not overwrite and (out_dir / 'screenshot.png').exists():
|
if not overwrite and (out_dir / get_output_path()).exists():
|
||||||
return False
|
return False
|
||||||
|
|
||||||
return SAVE_SCREENSHOT
|
return SAVE_SCREENSHOT
|
||||||
@@ -36,7 +39,7 @@ def save_screenshot(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEO
|
|||||||
"""take screenshot of site using chrome --headless"""
|
"""take screenshot of site using chrome --headless"""
|
||||||
|
|
||||||
out_dir = out_dir or Path(link.link_dir)
|
out_dir = out_dir or Path(link.link_dir)
|
||||||
output: ArchiveOutput = 'screenshot.png'
|
output: ArchiveOutput = get_output_path()
|
||||||
cmd = [
|
cmd = [
|
||||||
*chrome_args(),
|
*chrome_args(),
|
||||||
'--screenshot',
|
'--screenshot',
|
||||||
|
|||||||
@@ -26,13 +26,17 @@ from ..config import (
|
|||||||
from ..logging_util import TimedProgress
|
from ..logging_util import TimedProgress
|
||||||
|
|
||||||
|
|
||||||
|
def get_output_path():
|
||||||
|
return 'singlefile.html'
|
||||||
|
|
||||||
|
|
||||||
@enforce_types
|
@enforce_types
|
||||||
def should_save_singlefile(link: Link, out_dir: Optional[Path]=None, overwrite: Optional[bool]=False) -> bool:
|
def should_save_singlefile(link: Link, out_dir: Optional[Path]=None, overwrite: Optional[bool]=False) -> bool:
|
||||||
if is_static_file(link.url):
|
if is_static_file(link.url):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
out_dir = out_dir or Path(link.link_dir)
|
out_dir = out_dir or Path(link.link_dir)
|
||||||
if not overwrite and (out_dir / 'singlefile.html').exists():
|
if not overwrite and (out_dir / get_output_path()).exists():
|
||||||
return False
|
return False
|
||||||
|
|
||||||
return SAVE_SINGLEFILE
|
return SAVE_SINGLEFILE
|
||||||
@@ -43,7 +47,7 @@ def save_singlefile(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEO
|
|||||||
"""download full site using single-file"""
|
"""download full site using single-file"""
|
||||||
|
|
||||||
out_dir = out_dir or Path(link.link_dir)
|
out_dir = out_dir or Path(link.link_dir)
|
||||||
output = "singlefile.html"
|
output = get_output_path()
|
||||||
|
|
||||||
browser_args = chrome_args(CHROME_TIMEOUT=0)
|
browser_args = chrome_args(CHROME_TIMEOUT=0)
|
||||||
|
|
||||||
@@ -90,7 +94,8 @@ def save_singlefile(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEO
|
|||||||
status = 'failed'
|
status = 'failed'
|
||||||
# TODO: Make this prettier. This is necessary to run the command (escape JSON internal quotes).
|
# TODO: Make this prettier. This is necessary to run the command (escape JSON internal quotes).
|
||||||
cmd[2] = browser_args.replace('"', "\\\"")
|
cmd[2] = browser_args.replace('"', "\\\"")
|
||||||
err.hints = (result.stdout + result.stderr).decode().split('\n')
|
if result:
|
||||||
|
err.hints = (result.stdout + result.stderr).decode().split('\n')
|
||||||
output = err
|
output = err
|
||||||
finally:
|
finally:
|
||||||
timer.end()
|
timer.end()
|
||||||
|
|||||||
@@ -60,6 +60,7 @@ class TitleParser(HTMLParser):
|
|||||||
if tag.lower() == "title":
|
if tag.lower() == "title":
|
||||||
self.inside_title_tag = False
|
self.inside_title_tag = False
|
||||||
|
|
||||||
|
|
||||||
@enforce_types
|
@enforce_types
|
||||||
def get_html(link: Link, path: Path, timeout: int=TIMEOUT) -> str:
|
def get_html(link: Link, path: Path, timeout: int=TIMEOUT) -> str:
|
||||||
"""
|
"""
|
||||||
@@ -84,6 +85,13 @@ def get_html(link: Link, path: Path, timeout: int=TIMEOUT) -> str:
|
|||||||
else:
|
else:
|
||||||
return document
|
return document
|
||||||
|
|
||||||
|
|
||||||
|
def get_output_path():
|
||||||
|
# TODO: actually save title to this file
|
||||||
|
# (currently only saved in ArchiveResult.output as charfield value, not saved to filesystem)
|
||||||
|
return 'title.json'
|
||||||
|
|
||||||
|
|
||||||
@enforce_types
|
@enforce_types
|
||||||
def should_save_title(link: Link, out_dir: Optional[str]=None, overwrite: Optional[bool]=False) -> bool:
|
def should_save_title(link: Link, out_dir: Optional[str]=None, overwrite: Optional[bool]=False) -> bool:
|
||||||
# if link already has valid title, skip it
|
# if link already has valid title, skip it
|
||||||
|
|||||||
@@ -35,6 +35,18 @@ from ..config import (
|
|||||||
from ..logging_util import TimedProgress
|
from ..logging_util import TimedProgress
|
||||||
|
|
||||||
|
|
||||||
|
def get_output_path():
|
||||||
|
# TODO: actually save output into this folder, instead of do {domain}/**/index.html
|
||||||
|
return 'wget/'
|
||||||
|
|
||||||
|
def get_embed_path(archiveresult=None):
|
||||||
|
if not archiveresult:
|
||||||
|
return get_output_path()
|
||||||
|
|
||||||
|
link = archiveresult.snapshot.as_link()
|
||||||
|
return wget_output_path(link)
|
||||||
|
|
||||||
|
|
||||||
@enforce_types
|
@enforce_types
|
||||||
def should_save_wget(link: Link, out_dir: Optional[Path]=None, overwrite: Optional[bool]=False) -> bool:
|
def should_save_wget(link: Link, out_dir: Optional[Path]=None, overwrite: Optional[bool]=False) -> bool:
|
||||||
output_path = wget_output_path(link)
|
output_path = wget_output_path(link)
|
||||||
|
|||||||
@@ -118,10 +118,10 @@ def render_django_template(template: str, context: Mapping[str, str]) -> str:
|
|||||||
|
|
||||||
|
|
||||||
def snapshot_icons(snapshot) -> str:
|
def snapshot_icons(snapshot) -> str:
|
||||||
cache_key = f'{snapshot.id}-{(snapshot.updated or snapshot.added).timestamp()}-snapshot-icons'
|
cache_key = f'{snapshot.pk}-{(snapshot.updated or snapshot.added).timestamp()}-snapshot-icons'
|
||||||
|
|
||||||
def calc_snapshot_icons():
|
def calc_snapshot_icons():
|
||||||
from core.models import EXTRACTORS
|
from core.models import EXTRACTOR_CHOICES
|
||||||
# start = datetime.now(timezone.utc)
|
# start = datetime.now(timezone.utc)
|
||||||
|
|
||||||
archive_results = snapshot.archiveresult_set.filter(status="succeeded", output__isnull=False)
|
archive_results = snapshot.archiveresult_set.filter(status="succeeded", output__isnull=False)
|
||||||
@@ -147,12 +147,12 @@ def snapshot_icons(snapshot) -> str:
|
|||||||
# Missing specific entry for WARC
|
# Missing specific entry for WARC
|
||||||
|
|
||||||
extractor_outputs = defaultdict(lambda: None)
|
extractor_outputs = defaultdict(lambda: None)
|
||||||
for extractor, _ in EXTRACTORS:
|
for extractor, _ in EXTRACTOR_CHOICES:
|
||||||
for result in archive_results:
|
for result in archive_results:
|
||||||
if result.extractor == extractor and result:
|
if result.extractor == extractor and result:
|
||||||
extractor_outputs[extractor] = result
|
extractor_outputs[extractor] = result
|
||||||
|
|
||||||
for extractor, _ in EXTRACTORS:
|
for extractor, _ in EXTRACTOR_CHOICES:
|
||||||
if extractor not in exclude:
|
if extractor not in exclude:
|
||||||
existing = extractor_outputs[extractor] and extractor_outputs[extractor].status == 'succeeded' and extractor_outputs[extractor].output
|
existing = extractor_outputs[extractor] and extractor_outputs[extractor].status == 'succeeded' and extractor_outputs[extractor].output
|
||||||
# Check filesystsem to see if anything is actually present (too slow, needs optimization/caching)
|
# Check filesystsem to see if anything is actually present (too slow, needs optimization/caching)
|
||||||
|
|||||||
@@ -192,6 +192,9 @@ class Link:
|
|||||||
if extended:
|
if extended:
|
||||||
info.update({
|
info.update({
|
||||||
'snapshot_id': self.snapshot_id,
|
'snapshot_id': self.snapshot_id,
|
||||||
|
'snapshot_uuid': self.snapshot_uuid,
|
||||||
|
'snapshot_abid': self.snapshot_abid,
|
||||||
|
|
||||||
'link_dir': self.link_dir,
|
'link_dir': self.link_dir,
|
||||||
'archive_path': self.archive_path,
|
'archive_path': self.archive_path,
|
||||||
|
|
||||||
@@ -261,9 +264,21 @@ class Link:
|
|||||||
return to_csv(self, cols=cols or self.field_names(), separator=separator, ljust=ljust)
|
return to_csv(self, cols=cols or self.field_names(), separator=separator, ljust=ljust)
|
||||||
|
|
||||||
@cached_property
|
@cached_property
|
||||||
def snapshot_id(self):
|
def snapshot(self):
|
||||||
from core.models import Snapshot
|
from core.models import Snapshot
|
||||||
return str(Snapshot.objects.only('id').get(url=self.url).id)
|
return Snapshot.objects.only('uuid').get(url=self.url)
|
||||||
|
|
||||||
|
@cached_property
|
||||||
|
def snapshot_id(self):
|
||||||
|
return str(self.snapshot.pk)
|
||||||
|
|
||||||
|
@cached_property
|
||||||
|
def snapshot_uuid(self):
|
||||||
|
return str(self.snapshot.uuid)
|
||||||
|
|
||||||
|
@cached_property
|
||||||
|
def snapshot_abid(self):
|
||||||
|
return str(self.snapshot.ABID)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def field_names(cls):
|
def field_names(cls):
|
||||||
|
|||||||
@@ -45,7 +45,8 @@ def write_link_to_sql_index(link: Link):
|
|||||||
info.pop('tags')
|
info.pop('tags')
|
||||||
|
|
||||||
try:
|
try:
|
||||||
info["timestamp"] = Snapshot.objects.get(url=link.url).timestamp
|
snapshot = Snapshot.objects.get(url=link.url)
|
||||||
|
info["timestamp"] = snapshot.timestamp
|
||||||
except Snapshot.DoesNotExist:
|
except Snapshot.DoesNotExist:
|
||||||
while Snapshot.objects.filter(timestamp=info["timestamp"]).exists():
|
while Snapshot.objects.filter(timestamp=info["timestamp"]).exists():
|
||||||
info["timestamp"] = str(float(info["timestamp"]) + 1.0)
|
info["timestamp"] = str(float(info["timestamp"]) + 1.0)
|
||||||
@@ -57,7 +58,7 @@ def write_link_to_sql_index(link: Link):
|
|||||||
for entry in entries:
|
for entry in entries:
|
||||||
if isinstance(entry, dict):
|
if isinstance(entry, dict):
|
||||||
result, _ = ArchiveResult.objects.get_or_create(
|
result, _ = ArchiveResult.objects.get_or_create(
|
||||||
snapshot_id=snapshot.id,
|
snapshot_id=snapshot.pk,
|
||||||
extractor=extractor,
|
extractor=extractor,
|
||||||
start_ts=parse_date(entry['start_ts']),
|
start_ts=parse_date(entry['start_ts']),
|
||||||
defaults={
|
defaults={
|
||||||
@@ -71,7 +72,7 @@ def write_link_to_sql_index(link: Link):
|
|||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
result, _ = ArchiveResult.objects.update_or_create(
|
result, _ = ArchiveResult.objects.update_or_create(
|
||||||
snapshot_id=snapshot.id,
|
snapshot_id=snapshot.pk,
|
||||||
extractor=extractor,
|
extractor=extractor,
|
||||||
start_ts=parse_date(entry.start_ts),
|
start_ts=parse_date(entry.start_ts),
|
||||||
defaults={
|
defaults={
|
||||||
|
|||||||
16
archivebox/monkey_patches.py
Normal file
16
archivebox/monkey_patches.py
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
__package__ = 'archivebox'
|
||||||
|
|
||||||
|
import django_stubs_ext
|
||||||
|
|
||||||
|
django_stubs_ext.monkeypatch()
|
||||||
|
|
||||||
|
|
||||||
|
# monkey patch django timezone to add back utc (it was removed in Django 5.0)
|
||||||
|
import datetime
|
||||||
|
from django.utils import timezone
|
||||||
|
timezone.utc = datetime.timezone.utc
|
||||||
|
|
||||||
|
|
||||||
|
# monkey patch django-signals-webhooks to change how it shows up in Admin UI
|
||||||
|
# from signal_webhooks.apps import DjangoSignalWebhooksConfig
|
||||||
|
# DjangoSignalWebhooksConfig.verbose_name = 'API'
|
||||||
17
archivebox/plugantic/__init__.py
Normal file
17
archivebox/plugantic/__init__.py
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
__package__ = 'archivebox.plugantic'
|
||||||
|
|
||||||
|
from .binproviders import BinProvider
|
||||||
|
from .binaries import Binary
|
||||||
|
from .extractors import Extractor
|
||||||
|
from .replayers import Replayer
|
||||||
|
from .configs import ConfigSet
|
||||||
|
from .plugins import Plugin
|
||||||
|
|
||||||
|
# __all__ = [
|
||||||
|
# 'BinProvider',
|
||||||
|
# 'Binary',
|
||||||
|
# 'Extractor',
|
||||||
|
# 'Replayer',
|
||||||
|
# 'ConfigSet',
|
||||||
|
# 'Plugin',
|
||||||
|
# ]
|
||||||
26
archivebox/plugantic/admin.py
Normal file
26
archivebox/plugantic/admin.py
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
# from django.contrib import admin
|
||||||
|
# from django import forms
|
||||||
|
|
||||||
|
# from django_jsonform.widgets import JSONFormWidget
|
||||||
|
|
||||||
|
# from django_pydantic_field.v2.fields import PydanticSchemaField
|
||||||
|
|
||||||
|
# from .models import CustomPlugin
|
||||||
|
|
||||||
|
|
||||||
|
# class PluginForm(forms.ModelForm):
|
||||||
|
# class Meta:
|
||||||
|
# model = CustomPlugin
|
||||||
|
# fields = '__all__'
|
||||||
|
# widgets = {
|
||||||
|
# 'items': JSONFormWidget(schema=PluginSchema),
|
||||||
|
# }
|
||||||
|
|
||||||
|
|
||||||
|
# class PluginAdmin(admin.ModelAdmin):
|
||||||
|
# formfield_overrides = {
|
||||||
|
# PydanticSchemaField: {"widget": JSONFormWidget},
|
||||||
|
# }
|
||||||
|
# form = PluginForm
|
||||||
|
|
||||||
|
|
||||||
6
archivebox/plugantic/apps.py
Normal file
6
archivebox/plugantic/apps.py
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
from django.apps import AppConfig
|
||||||
|
|
||||||
|
|
||||||
|
class PluganticConfig(AppConfig):
|
||||||
|
default_auto_field = 'django.db.models.BigAutoField'
|
||||||
|
name = 'plugantic'
|
||||||
323
archivebox/plugantic/binaries.py
Normal file
323
archivebox/plugantic/binaries.py
Normal file
@@ -0,0 +1,323 @@
|
|||||||
|
__package__ = 'archivebox.plugantic'
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import inspect
|
||||||
|
import importlib
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
|
||||||
|
from typing import Any, Optional, Dict, List
|
||||||
|
from typing_extensions import Self
|
||||||
|
from subprocess import run, PIPE
|
||||||
|
|
||||||
|
|
||||||
|
from pydantic_core import ValidationError
|
||||||
|
|
||||||
|
from pydantic import BaseModel, Field, model_validator, computed_field, field_validator, validate_call, field_serializer
|
||||||
|
|
||||||
|
from .binproviders import (
|
||||||
|
SemVer,
|
||||||
|
BinName,
|
||||||
|
BinProviderName,
|
||||||
|
HostBinPath,
|
||||||
|
BinProvider,
|
||||||
|
EnvProvider,
|
||||||
|
AptProvider,
|
||||||
|
BrewProvider,
|
||||||
|
PipProvider,
|
||||||
|
ProviderLookupDict,
|
||||||
|
bin_name,
|
||||||
|
bin_abspath,
|
||||||
|
path_is_script,
|
||||||
|
path_is_executable,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class Binary(BaseModel):
|
||||||
|
name: BinName
|
||||||
|
description: str = Field(default='')
|
||||||
|
|
||||||
|
providers_supported: List[BinProvider] = Field(default=[EnvProvider()], alias='providers')
|
||||||
|
provider_overrides: Dict[BinProviderName, ProviderLookupDict] = Field(default={}, alias='overrides')
|
||||||
|
|
||||||
|
loaded_provider: Optional[BinProviderName] = Field(default=None, alias='provider')
|
||||||
|
loaded_abspath: Optional[HostBinPath] = Field(default=None, alias='abspath')
|
||||||
|
loaded_version: Optional[SemVer] = Field(default=None, alias='version')
|
||||||
|
|
||||||
|
# bin_filename: see below
|
||||||
|
# is_executable: see below
|
||||||
|
# is_script
|
||||||
|
# is_valid: see below
|
||||||
|
|
||||||
|
|
||||||
|
@model_validator(mode='after')
|
||||||
|
def validate(self):
|
||||||
|
self.loaded_abspath = bin_abspath(self.name) or self.name
|
||||||
|
self.description = self.description or self.name
|
||||||
|
|
||||||
|
assert self.providers_supported, f'No providers were given for package {self.name}'
|
||||||
|
|
||||||
|
# pull in any overrides from the binproviders
|
||||||
|
for provider in self.providers_supported:
|
||||||
|
overrides_by_provider = provider.get_providers_for_bin(self.name)
|
||||||
|
if overrides_by_provider:
|
||||||
|
self.provider_overrides[provider.name] = {
|
||||||
|
**overrides_by_provider,
|
||||||
|
**self.provider_overrides.get(provider.name, {}),
|
||||||
|
}
|
||||||
|
return self
|
||||||
|
|
||||||
|
@field_validator('loaded_abspath', mode='before')
|
||||||
|
def parse_abspath(cls, value: Any):
|
||||||
|
return bin_abspath(value)
|
||||||
|
|
||||||
|
@field_validator('loaded_version', mode='before')
|
||||||
|
def parse_version(cls, value: Any):
|
||||||
|
return value and SemVer(value)
|
||||||
|
|
||||||
|
@field_serializer('provider_overrides', when_used='json')
|
||||||
|
def serialize_overrides(self, provider_overrides: Dict[BinProviderName, ProviderLookupDict]) -> Dict[BinProviderName, Dict[str, str]]:
|
||||||
|
return {
|
||||||
|
provider_name: {
|
||||||
|
key: str(val)
|
||||||
|
for key, val in overrides.items()
|
||||||
|
}
|
||||||
|
for provider_name, overrides in provider_overrides.items()
|
||||||
|
}
|
||||||
|
|
||||||
|
@computed_field # type: ignore[misc] # see mypy issue #1362
|
||||||
|
@property
|
||||||
|
def bin_filename(self) -> BinName:
|
||||||
|
if self.is_script:
|
||||||
|
# e.g. '.../Python.framework/Versions/3.11/lib/python3.11/sqlite3/__init__.py' -> sqlite
|
||||||
|
name = self.name
|
||||||
|
elif self.loaded_abspath:
|
||||||
|
# e.g. '/opt/homebrew/bin/wget' -> wget
|
||||||
|
name = bin_name(self.loaded_abspath)
|
||||||
|
else:
|
||||||
|
# e.g. 'ytdlp' -> 'yt-dlp'
|
||||||
|
name = bin_name(self.name)
|
||||||
|
return name
|
||||||
|
|
||||||
|
@computed_field # type: ignore[misc] # see mypy issue #1362
|
||||||
|
@property
|
||||||
|
def is_executable(self) -> bool:
|
||||||
|
try:
|
||||||
|
assert self.loaded_abspath and path_is_executable(self.loaded_abspath)
|
||||||
|
return True
|
||||||
|
except (ValidationError, AssertionError):
|
||||||
|
return False
|
||||||
|
|
||||||
|
@computed_field # type: ignore[misc] # see mypy issue #1362
|
||||||
|
@property
|
||||||
|
def is_script(self) -> bool:
|
||||||
|
try:
|
||||||
|
assert self.loaded_abspath and path_is_script(self.loaded_abspath)
|
||||||
|
return True
|
||||||
|
except (ValidationError, AssertionError):
|
||||||
|
return False
|
||||||
|
|
||||||
|
@computed_field # type: ignore[misc] # see mypy issue #1362
|
||||||
|
@property
|
||||||
|
def is_valid(self) -> bool:
|
||||||
|
return bool(
|
||||||
|
self.name
|
||||||
|
and self.loaded_abspath
|
||||||
|
and self.loaded_version
|
||||||
|
and (self.is_executable or self.is_script)
|
||||||
|
)
|
||||||
|
|
||||||
|
@validate_call
|
||||||
|
def install(self) -> Self:
|
||||||
|
if not self.providers_supported:
|
||||||
|
return self
|
||||||
|
|
||||||
|
exc = Exception('No providers were able to install binary', self.name, self.providers_supported)
|
||||||
|
for provider in self.providers_supported:
|
||||||
|
try:
|
||||||
|
installed_bin = provider.install(self.name, overrides=self.provider_overrides.get(provider.name))
|
||||||
|
if installed_bin:
|
||||||
|
# print('INSTALLED', self.name, installed_bin)
|
||||||
|
return self.model_copy(update={
|
||||||
|
'loaded_provider': provider.name,
|
||||||
|
'loaded_abspath': installed_bin.abspath,
|
||||||
|
'loaded_version': installed_bin.version,
|
||||||
|
})
|
||||||
|
except Exception as err:
|
||||||
|
print(err)
|
||||||
|
exc = err
|
||||||
|
raise exc
|
||||||
|
|
||||||
|
@validate_call
|
||||||
|
def load(self, cache=True) -> Self:
|
||||||
|
if self.is_valid:
|
||||||
|
return self
|
||||||
|
|
||||||
|
if not self.providers_supported:
|
||||||
|
return self
|
||||||
|
|
||||||
|
exc = Exception('No providers were able to install binary', self.name, self.providers_supported)
|
||||||
|
for provider in self.providers_supported:
|
||||||
|
try:
|
||||||
|
installed_bin = provider.load(self.name, cache=cache, overrides=self.provider_overrides.get(provider.name))
|
||||||
|
if installed_bin:
|
||||||
|
# print('LOADED', provider, self.name, installed_bin)
|
||||||
|
return self.model_copy(update={
|
||||||
|
'loaded_provider': provider.name,
|
||||||
|
'loaded_abspath': installed_bin.abspath,
|
||||||
|
'loaded_version': installed_bin.version,
|
||||||
|
})
|
||||||
|
except Exception as err:
|
||||||
|
print(err)
|
||||||
|
exc = err
|
||||||
|
raise exc
|
||||||
|
|
||||||
|
@validate_call
|
||||||
|
def load_or_install(self, cache=True) -> Self:
|
||||||
|
if self.is_valid:
|
||||||
|
return self
|
||||||
|
|
||||||
|
if not self.providers_supported:
|
||||||
|
return self
|
||||||
|
|
||||||
|
exc = Exception('No providers were able to install binary', self.name, self.providers_supported)
|
||||||
|
for provider in self.providers_supported:
|
||||||
|
try:
|
||||||
|
installed_bin = provider.load_or_install(self.name, overrides=self.provider_overrides.get(provider.name), cache=cache)
|
||||||
|
if installed_bin:
|
||||||
|
# print('LOADED_OR_INSTALLED', self.name, installed_bin)
|
||||||
|
return self.model_copy(update={
|
||||||
|
'loaded_provider': provider.name,
|
||||||
|
'loaded_abspath': installed_bin.abspath,
|
||||||
|
'loaded_version': installed_bin.version,
|
||||||
|
})
|
||||||
|
except Exception as err:
|
||||||
|
print(err)
|
||||||
|
exc = err
|
||||||
|
raise exc
|
||||||
|
|
||||||
|
@validate_call
|
||||||
|
def exec(self, args=(), pwd='.'):
|
||||||
|
assert self.loaded_abspath
|
||||||
|
assert self.loaded_version
|
||||||
|
return run([self.loaded_abspath, *args], stdout=PIPE, stderr=PIPE, pwd=pwd)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class SystemPythonHelpers:
|
||||||
|
@staticmethod
|
||||||
|
def get_subdeps() -> str:
|
||||||
|
return 'python3 python3-minimal python3-pip python3-virtualenv'
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_abspath() -> str:
|
||||||
|
return sys.executable
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_version() -> str:
|
||||||
|
return '{}.{}.{}'.format(*sys.version_info[:3])
|
||||||
|
|
||||||
|
|
||||||
|
class SqliteHelpers:
|
||||||
|
@staticmethod
|
||||||
|
def get_abspath() -> Path:
|
||||||
|
import sqlite3
|
||||||
|
importlib.reload(sqlite3)
|
||||||
|
return Path(inspect.getfile(sqlite3))
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_version() -> SemVer:
|
||||||
|
import sqlite3
|
||||||
|
importlib.reload(sqlite3)
|
||||||
|
version = sqlite3.version
|
||||||
|
assert version
|
||||||
|
return SemVer(version)
|
||||||
|
|
||||||
|
class DjangoHelpers:
|
||||||
|
@staticmethod
|
||||||
|
def get_django_abspath() -> str:
|
||||||
|
import django
|
||||||
|
return inspect.getfile(django)
|
||||||
|
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_django_version() -> str:
|
||||||
|
import django
|
||||||
|
return '{}.{}.{} {} ({})'.format(*django.VERSION)
|
||||||
|
|
||||||
|
class YtdlpHelpers:
|
||||||
|
@staticmethod
|
||||||
|
def get_ytdlp_subdeps() -> str:
|
||||||
|
return 'yt-dlp ffmpeg'
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_ytdlp_version() -> str:
|
||||||
|
import yt_dlp
|
||||||
|
importlib.reload(yt_dlp)
|
||||||
|
|
||||||
|
version = yt_dlp.version.__version__
|
||||||
|
assert version
|
||||||
|
return version
|
||||||
|
|
||||||
|
class PythonBinary(Binary):
|
||||||
|
name: BinName = 'python'
|
||||||
|
|
||||||
|
providers_supported: List[BinProvider] = [
|
||||||
|
EnvProvider(
|
||||||
|
subdeps_provider={'python': 'plugantic.binaries.SystemPythonHelpers.get_subdeps'},
|
||||||
|
abspath_provider={'python': 'plugantic.binaries.SystemPythonHelpers.get_abspath'},
|
||||||
|
version_provider={'python': 'plugantic.binaries.SystemPythonHelpers.get_version'},
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
|
class SqliteBinary(Binary):
|
||||||
|
name: BinName = 'sqlite'
|
||||||
|
providers_supported: List[BinProvider] = [
|
||||||
|
EnvProvider(
|
||||||
|
version_provider={'sqlite': 'plugantic.binaries.SqliteHelpers.get_version'},
|
||||||
|
abspath_provider={'sqlite': 'plugantic.binaries.SqliteHelpers.get_abspath'},
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
|
class DjangoBinary(Binary):
|
||||||
|
name: BinName = 'django'
|
||||||
|
providers_supported: List[BinProvider] = [
|
||||||
|
EnvProvider(
|
||||||
|
abspath_provider={'django': 'plugantic.binaries.DjangoHelpers.get_django_abspath'},
|
||||||
|
version_provider={'django': 'plugantic.binaries.DjangoHelpers.get_django_version'},
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class YtdlpBinary(Binary):
|
||||||
|
name: BinName = 'yt-dlp'
|
||||||
|
providers_supported: List[BinProvider] = [
|
||||||
|
# EnvProvider(),
|
||||||
|
PipProvider(version_provider={'yt-dlp': 'plugantic.binaries.YtdlpHelpers.get_ytdlp_version'}),
|
||||||
|
BrewProvider(subdeps_provider={'yt-dlp': 'plugantic.binaries.YtdlpHelpers.get_ytdlp_subdeps'}),
|
||||||
|
# AptProvider(subdeps_provider={'yt-dlp': lambda: 'yt-dlp ffmpeg'}),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
class WgetBinary(Binary):
|
||||||
|
name: BinName = 'wget'
|
||||||
|
providers_supported: List[BinProvider] = [EnvProvider(), AptProvider()]
|
||||||
|
|
||||||
|
|
||||||
|
# if __name__ == '__main__':
|
||||||
|
# PYTHON_BINARY = PythonBinary()
|
||||||
|
# SQLITE_BINARY = SqliteBinary()
|
||||||
|
# DJANGO_BINARY = DjangoBinary()
|
||||||
|
# WGET_BINARY = WgetBinary()
|
||||||
|
# YTDLP_BINARY = YtdlpPBinary()
|
||||||
|
|
||||||
|
# print('-------------------------------------DEFINING BINARIES---------------------------------')
|
||||||
|
# print(PYTHON_BINARY)
|
||||||
|
# print(SQLITE_BINARY)
|
||||||
|
# print(DJANGO_BINARY)
|
||||||
|
# print(WGET_BINARY)
|
||||||
|
# print(YTDLP_BINARY)
|
||||||
561
archivebox/plugantic/binproviders.py
Normal file
561
archivebox/plugantic/binproviders.py
Normal file
@@ -0,0 +1,561 @@
|
|||||||
|
__package__ = 'archivebox.plugantic'
|
||||||
|
|
||||||
|
import os
|
||||||
|
import shutil
|
||||||
|
import operator
|
||||||
|
|
||||||
|
from typing import Callable, Any, Optional, Type, Dict, Annotated, ClassVar, Literal, cast, TYPE_CHECKING
|
||||||
|
from typing_extensions import Self
|
||||||
|
from abc import ABC, abstractmethod
|
||||||
|
from collections import namedtuple
|
||||||
|
from pathlib import Path
|
||||||
|
from subprocess import run, PIPE
|
||||||
|
|
||||||
|
from pydantic_core import core_schema, ValidationError
|
||||||
|
from pydantic import BaseModel, Field, TypeAdapter, AfterValidator, validate_call, GetCoreSchemaHandler
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def func_takes_args_or_kwargs(lambda_func: Callable[..., Any]) -> bool:
|
||||||
|
"""returns True if a lambda func takes args/kwargs of any kind, otherwise false if it's pure/argless"""
|
||||||
|
code = lambda_func.__code__
|
||||||
|
has_args = code.co_argcount > 0
|
||||||
|
has_varargs = code.co_flags & 0x04 != 0
|
||||||
|
has_varkw = code.co_flags & 0x08 != 0
|
||||||
|
return has_args or has_varargs or has_varkw
|
||||||
|
|
||||||
|
|
||||||
|
def is_semver_str(semver: Any) -> bool:
|
||||||
|
if isinstance(semver, str):
|
||||||
|
return (semver.count('.') == 2 and semver.replace('.', '').isdigit())
|
||||||
|
return False
|
||||||
|
|
||||||
|
def semver_to_str(semver: tuple[int, int, int] | str) -> str:
|
||||||
|
if isinstance(semver, (list, tuple)):
|
||||||
|
return '.'.join(str(chunk) for chunk in semver)
|
||||||
|
if is_semver_str(semver):
|
||||||
|
return semver
|
||||||
|
raise ValidationError('Tried to convert invalid SemVer: {}'.format(semver))
|
||||||
|
|
||||||
|
|
||||||
|
SemVerTuple = namedtuple('SemVerTuple', ('major', 'minor', 'patch'), defaults=(0, 0, 0))
|
||||||
|
SemVerParsableTypes = str | tuple[str | int, ...] | list[str | int]
|
||||||
|
|
||||||
|
class SemVer(SemVerTuple):
|
||||||
|
major: int
|
||||||
|
minor: int = 0
|
||||||
|
patch: int = 0
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
full_text: str | None = ''
|
||||||
|
|
||||||
|
def __new__(cls, *args, full_text=None, **kwargs):
|
||||||
|
# '1.1.1'
|
||||||
|
if len(args) == 1 and is_semver_str(args[0]):
|
||||||
|
result = SemVer.parse(args[0])
|
||||||
|
|
||||||
|
# ('1', '2', '3')
|
||||||
|
elif len(args) == 1 and isinstance(args[0], (tuple, list)):
|
||||||
|
result = SemVer.parse(args[0])
|
||||||
|
|
||||||
|
# (1, '2', None)
|
||||||
|
elif not all(isinstance(arg, (int, type(None))) for arg in args):
|
||||||
|
result = SemVer.parse(args)
|
||||||
|
|
||||||
|
# (None)
|
||||||
|
elif all(chunk in ('', 0, None) for chunk in (*args, *kwargs.values())):
|
||||||
|
result = None
|
||||||
|
|
||||||
|
# 1, 2, 3
|
||||||
|
else:
|
||||||
|
result = SemVerTuple.__new__(cls, *args, **kwargs)
|
||||||
|
|
||||||
|
if result is not None:
|
||||||
|
# add first line as extra hidden metadata so it can be logged without having to re-run version cmd
|
||||||
|
result.full_text = full_text or str(result)
|
||||||
|
return result
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def parse(cls, version_stdout: SemVerParsableTypes) -> Self | None:
|
||||||
|
"""
|
||||||
|
parses a version tag string formatted like into (major, minor, patch) ints
|
||||||
|
'Google Chrome 124.0.6367.208' -> (124, 0, 6367)
|
||||||
|
'GNU Wget 1.24.5 built on darwin23.2.0.' -> (1, 24, 5)
|
||||||
|
'curl 8.4.0 (x86_64-apple-darwin23.0) ...' -> (8, 4, 0)
|
||||||
|
'2024.04.09' -> (2024, 4, 9)
|
||||||
|
|
||||||
|
"""
|
||||||
|
# print('INITIAL_VALUE', type(version_stdout).__name__, version_stdout)
|
||||||
|
|
||||||
|
if isinstance(version_stdout, (tuple, list)):
|
||||||
|
version_stdout = '.'.join(str(chunk) for chunk in version_stdout)
|
||||||
|
elif isinstance(version_stdout, bytes):
|
||||||
|
version_stdout = version_stdout.decode()
|
||||||
|
elif not isinstance(version_stdout, str):
|
||||||
|
version_stdout = str(version_stdout)
|
||||||
|
|
||||||
|
# no text to work with, return None immediately
|
||||||
|
if not version_stdout.strip():
|
||||||
|
# raise Exception('Tried to parse semver from empty version output (is binary installed and available?)')
|
||||||
|
return None
|
||||||
|
|
||||||
|
just_numbers = lambda col: col.lower().strip('v').split('+')[0].split('-')[0].split('_')[0]
|
||||||
|
contains_semver = lambda col: (
|
||||||
|
col.count('.') in (1, 2, 3)
|
||||||
|
and all(chunk.isdigit() for chunk in col.split('.')[:3]) # first 3 chunks can only be nums
|
||||||
|
)
|
||||||
|
|
||||||
|
full_text = version_stdout.split('\n')[0].strip()
|
||||||
|
first_line_columns = full_text.split()[:4]
|
||||||
|
version_columns = list(filter(contains_semver, map(just_numbers, first_line_columns)))
|
||||||
|
|
||||||
|
# could not find any column of first line that looks like a version number, despite there being some text
|
||||||
|
if not version_columns:
|
||||||
|
# raise Exception('Failed to parse semver from version command output: {}'.format(' '.join(first_line_columns)))
|
||||||
|
return None
|
||||||
|
|
||||||
|
# take first col containing a semver, and truncate it to 3 chunks (e.g. 2024.04.09.91) -> (2024, 04, 09)
|
||||||
|
first_version_tuple = version_columns[0].split('.', 3)[:3]
|
||||||
|
|
||||||
|
# print('FINAL_VALUE', first_version_tuple)
|
||||||
|
|
||||||
|
return cls(*(int(chunk) for chunk in first_version_tuple), full_text=full_text)
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return '.'.join(str(chunk) for chunk in self)
|
||||||
|
|
||||||
|
# @classmethod
|
||||||
|
# def __get_pydantic_core_schema__(cls, source: Type[Any], handler: GetCoreSchemaHandler) -> core_schema.CoreSchema:
|
||||||
|
# default_schema = handler(source)
|
||||||
|
# return core_schema.no_info_after_validator_function(
|
||||||
|
# cls.parse,
|
||||||
|
# default_schema,
|
||||||
|
# serialization=core_schema.plain_serializer_function_ser_schema(
|
||||||
|
# lambda semver: str(semver),
|
||||||
|
# info_arg=False,
|
||||||
|
# return_schema=core_schema.str_schema(),
|
||||||
|
# ),
|
||||||
|
# )
|
||||||
|
|
||||||
|
assert SemVer(None) == None
|
||||||
|
assert SemVer('') == None
|
||||||
|
assert SemVer.parse('') == None
|
||||||
|
assert SemVer(1) == (1, 0, 0)
|
||||||
|
assert SemVer(1, 2) == (1, 2, 0)
|
||||||
|
assert SemVer('1.2+234234') == (1, 2, 0)
|
||||||
|
assert SemVer((1, 2, 3)) == (1, 2, 3)
|
||||||
|
assert getattr(SemVer((1, 2, 3)), 'full_text') == '1.2.3'
|
||||||
|
assert SemVer(('1', '2', '3')) == (1, 2, 3)
|
||||||
|
assert SemVer.parse('5.6.7') == (5, 6, 7)
|
||||||
|
assert SemVer.parse('124.0.6367.208') == (124, 0, 6367)
|
||||||
|
assert SemVer.parse('Google Chrome 124.1+234.234') == (124, 1, 0)
|
||||||
|
assert SemVer.parse('Google Ch1rome 124.0.6367.208') == (124, 0, 6367)
|
||||||
|
assert SemVer.parse('Google Chrome 124.0.6367.208+beta_234. 234.234.123\n123.456.324') == (124, 0, 6367)
|
||||||
|
assert getattr(SemVer.parse('Google Chrome 124.0.6367.208+beta_234. 234.234.123\n123.456.324'), 'full_text') == 'Google Chrome 124.0.6367.208+beta_234. 234.234.123'
|
||||||
|
assert SemVer.parse('Google Chrome') == None
|
||||||
|
|
||||||
|
@validate_call
|
||||||
|
def bin_name(bin_path_or_name: str | Path) -> str:
|
||||||
|
name = Path(bin_path_or_name).name
|
||||||
|
assert len(name) > 1
|
||||||
|
assert name.replace('-', '').replace('_', '').replace('.', '').isalnum(), (
|
||||||
|
f'Binary name can only contain a-Z0-9-_.: {name}')
|
||||||
|
return name
|
||||||
|
|
||||||
|
BinName = Annotated[str, AfterValidator(bin_name)]
|
||||||
|
|
||||||
|
@validate_call
|
||||||
|
def path_is_file(path: Path | str) -> Path:
|
||||||
|
path = Path(path) if isinstance(path, str) else path
|
||||||
|
assert path.is_file(), f'Path is not a file: {path}'
|
||||||
|
return path
|
||||||
|
|
||||||
|
HostExistsPath = Annotated[Path, AfterValidator(path_is_file)]
|
||||||
|
|
||||||
|
@validate_call
|
||||||
|
def path_is_executable(path: HostExistsPath) -> HostExistsPath:
|
||||||
|
assert os.access(path, os.X_OK), f'Path is not executable (fix by running chmod +x {path})'
|
||||||
|
return path
|
||||||
|
|
||||||
|
@validate_call
|
||||||
|
def path_is_script(path: HostExistsPath) -> HostExistsPath:
|
||||||
|
SCRIPT_EXTENSIONS = ('.py', '.js', '.sh')
|
||||||
|
assert path.suffix.lower() in SCRIPT_EXTENSIONS, 'Path is not a script (does not end in {})'.format(', '.join(SCRIPT_EXTENSIONS))
|
||||||
|
return path
|
||||||
|
|
||||||
|
HostExecutablePath = Annotated[HostExistsPath, AfterValidator(path_is_executable)]
|
||||||
|
|
||||||
|
@validate_call
|
||||||
|
def path_is_abspath(path: Path) -> Path:
|
||||||
|
return path.resolve()
|
||||||
|
|
||||||
|
HostAbsPath = Annotated[HostExistsPath, AfterValidator(path_is_abspath)]
|
||||||
|
HostBinPath = Annotated[Path, AfterValidator(path_is_abspath), AfterValidator(path_is_file)]
|
||||||
|
|
||||||
|
|
||||||
|
@validate_call
|
||||||
|
def bin_abspath(bin_path_or_name: BinName | Path) -> HostBinPath | None:
|
||||||
|
assert bin_path_or_name
|
||||||
|
|
||||||
|
if str(bin_path_or_name).startswith('/'):
|
||||||
|
# already a path, get its absolute form
|
||||||
|
abspath = Path(bin_path_or_name).resolve()
|
||||||
|
else:
|
||||||
|
# not a path yet, get path using os.which
|
||||||
|
binpath = shutil.which(bin_path_or_name)
|
||||||
|
if not binpath:
|
||||||
|
return None
|
||||||
|
abspath = Path(binpath).resolve()
|
||||||
|
|
||||||
|
try:
|
||||||
|
return TypeAdapter(HostBinPath).validate_python(abspath)
|
||||||
|
except ValidationError:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
@validate_call
|
||||||
|
def bin_version(bin_path: HostBinPath, args=('--version',)) -> SemVer | None:
|
||||||
|
return SemVer(run([bin_path, *args], stdout=PIPE).stdout.strip().decode())
|
||||||
|
|
||||||
|
|
||||||
|
class InstalledBin(BaseModel):
|
||||||
|
abspath: HostBinPath
|
||||||
|
version: SemVer
|
||||||
|
|
||||||
|
|
||||||
|
def is_valid_install_string(pkgs_str: str) -> str:
|
||||||
|
"""Make sure a string is a valid install string for a package manager, e.g. 'yt-dlp ffmpeg'"""
|
||||||
|
assert pkgs_str
|
||||||
|
assert all(len(pkg) > 1 for pkg in pkgs_str.split(' '))
|
||||||
|
return pkgs_str
|
||||||
|
|
||||||
|
def is_valid_python_dotted_import(import_str: str) -> str:
|
||||||
|
assert import_str and import_str.replace('.', '').replace('_', '').isalnum()
|
||||||
|
return import_str
|
||||||
|
|
||||||
|
InstallStr = Annotated[str, AfterValidator(is_valid_install_string)]
|
||||||
|
|
||||||
|
LazyImportStr = Annotated[str, AfterValidator(is_valid_python_dotted_import)]
|
||||||
|
|
||||||
|
ProviderHandler = Callable[..., Any] | Callable[[], Any] # must take no args [], or [bin_name: str, **kwargs]
|
||||||
|
#ProviderHandlerStr = Annotated[str, AfterValidator(lambda s: s.startswith('self.'))]
|
||||||
|
ProviderHandlerRef = LazyImportStr | ProviderHandler
|
||||||
|
ProviderLookupDict = Dict[str, LazyImportStr]
|
||||||
|
ProviderType = Literal['abspath', 'version', 'subdeps', 'install']
|
||||||
|
|
||||||
|
|
||||||
|
# class Host(BaseModel):
|
||||||
|
# machine: str
|
||||||
|
# system: str
|
||||||
|
# platform: str
|
||||||
|
# in_docker: bool
|
||||||
|
# in_qemu: bool
|
||||||
|
# python: str
|
||||||
|
|
||||||
|
BinProviderName = Literal['env', 'pip', 'apt', 'brew', 'npm', 'vendor']
|
||||||
|
|
||||||
|
|
||||||
|
class BinProvider(ABC, BaseModel):
|
||||||
|
name: BinProviderName
|
||||||
|
|
||||||
|
abspath_provider: ProviderLookupDict = Field(default={'*': 'self.on_get_abspath'}, exclude=True)
|
||||||
|
version_provider: ProviderLookupDict = Field(default={'*': 'self.on_get_version'}, exclude=True)
|
||||||
|
subdeps_provider: ProviderLookupDict = Field(default={'*': 'self.on_get_subdeps'}, exclude=True)
|
||||||
|
install_provider: ProviderLookupDict = Field(default={'*': 'self.on_install'}, exclude=True)
|
||||||
|
|
||||||
|
_abspath_cache: ClassVar = {}
|
||||||
|
_version_cache: ClassVar = {}
|
||||||
|
_install_cache: ClassVar = {}
|
||||||
|
|
||||||
|
# def provider_version(self) -> SemVer | None:
|
||||||
|
# """Version of the actual underlying package manager (e.g. pip v20.4.1)"""
|
||||||
|
# if self.name in ('env', 'vendor'):
|
||||||
|
# return SemVer('0.0.0')
|
||||||
|
# installer_binpath = Path(shutil.which(self.name)).resolve()
|
||||||
|
# return bin_version(installer_binpath)
|
||||||
|
|
||||||
|
# def provider_host(self) -> Host:
|
||||||
|
# """Information about the host env, archictecture, and OS needed to select & build packages"""
|
||||||
|
# p = platform.uname()
|
||||||
|
# return Host(
|
||||||
|
# machine=p.machine,
|
||||||
|
# system=p.system,
|
||||||
|
# platform=platform.platform(),
|
||||||
|
# python=sys.implementation.name,
|
||||||
|
# in_docker=os.environ.get('IN_DOCKER', '').lower() == 'true',
|
||||||
|
# in_qemu=os.environ.get('IN_QEMU', '').lower() == 'true',
|
||||||
|
# )
|
||||||
|
|
||||||
|
def get_default_providers(self):
|
||||||
|
return self.get_providers_for_bin('*')
|
||||||
|
|
||||||
|
def resolve_provider_func(self, provider_func: ProviderHandlerRef | None) -> ProviderHandler | None:
|
||||||
|
if provider_func is None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# if provider_func is a dotted path to a function on self, swap it for the actual function
|
||||||
|
if isinstance(provider_func, str) and provider_func.startswith('self.'):
|
||||||
|
provider_func = getattr(self, provider_func.split('self.', 1)[-1])
|
||||||
|
|
||||||
|
# if provider_func is a dot-formatted import string, import the function
|
||||||
|
if isinstance(provider_func, str):
|
||||||
|
from django.utils.module_loading import import_string
|
||||||
|
|
||||||
|
package_name, module_name, classname, path = provider_func.split('.', 3) # -> abc, def, ghi.jkl
|
||||||
|
|
||||||
|
# get .ghi.jkl nested attr present on module abc.def
|
||||||
|
imported_module = import_string(f'{package_name}.{module_name}.{classname}')
|
||||||
|
provider_func = operator.attrgetter(path)(imported_module)
|
||||||
|
|
||||||
|
# # abc.def.ghi.jkl -> 1, 2, 3
|
||||||
|
# for idx in range(1, len(path)):
|
||||||
|
# parent_path = '.'.join(path[:-idx]) # abc.def.ghi
|
||||||
|
# try:
|
||||||
|
# parent_module = import_string(parent_path)
|
||||||
|
# provider_func = getattr(parent_module, path[-idx])
|
||||||
|
# except AttributeError, ImportError:
|
||||||
|
# continue
|
||||||
|
|
||||||
|
assert TypeAdapter(ProviderHandler).validate_python(provider_func), (
|
||||||
|
f'{self.__class__.__name__} provider func for {bin_name} was not a function or dotted-import path: {provider_func}')
|
||||||
|
|
||||||
|
return provider_func
|
||||||
|
|
||||||
|
@validate_call
|
||||||
|
def get_providers_for_bin(self, bin_name: str) -> ProviderLookupDict:
|
||||||
|
providers_for_bin = {
|
||||||
|
'abspath': self.abspath_provider.get(bin_name),
|
||||||
|
'version': self.version_provider.get(bin_name),
|
||||||
|
'subdeps': self.subdeps_provider.get(bin_name),
|
||||||
|
'install': self.install_provider.get(bin_name),
|
||||||
|
}
|
||||||
|
only_set_providers_for_bin = {k: v for k, v in providers_for_bin.items() if v is not None}
|
||||||
|
|
||||||
|
return only_set_providers_for_bin
|
||||||
|
|
||||||
|
@validate_call
|
||||||
|
def get_provider_for_action(self, bin_name: BinName, provider_type: ProviderType, default_provider: Optional[ProviderHandlerRef]=None, overrides: Optional[ProviderLookupDict]=None) -> ProviderHandler:
|
||||||
|
"""
|
||||||
|
Get the provider func for a given key + Dict of provider callbacks + fallback default provider.
|
||||||
|
e.g. get_provider_for_action(bin_name='yt-dlp', 'install', default_provider=self.on_install, ...) -> Callable
|
||||||
|
"""
|
||||||
|
|
||||||
|
provider_func_ref = (
|
||||||
|
(overrides or {}).get(provider_type)
|
||||||
|
or self.get_providers_for_bin(bin_name).get(provider_type)
|
||||||
|
or self.get_default_providers().get(provider_type)
|
||||||
|
or default_provider
|
||||||
|
)
|
||||||
|
# print('getting provider for action', bin_name, provider_type, provider_func)
|
||||||
|
|
||||||
|
provider_func = self.resolve_provider_func(provider_func_ref)
|
||||||
|
|
||||||
|
assert provider_func, f'No {self.name} provider func was found for {bin_name} in: {self.__class__.__name__}.'
|
||||||
|
|
||||||
|
return provider_func
|
||||||
|
|
||||||
|
@validate_call
|
||||||
|
def call_provider_for_action(self, bin_name: BinName, provider_type: ProviderType, default_provider: Optional[ProviderHandlerRef]=None, overrides: Optional[ProviderLookupDict]=None, **kwargs) -> Any:
|
||||||
|
provider_func: ProviderHandler = self.get_provider_for_action(
|
||||||
|
bin_name=bin_name,
|
||||||
|
provider_type=provider_type,
|
||||||
|
default_provider=default_provider,
|
||||||
|
overrides=overrides,
|
||||||
|
)
|
||||||
|
if not func_takes_args_or_kwargs(provider_func):
|
||||||
|
# if it's a pure argless lambdas, dont pass bin_path and other **kwargs
|
||||||
|
provider_func_without_args = cast(Callable[[], Any], provider_func)
|
||||||
|
return provider_func_without_args()
|
||||||
|
|
||||||
|
provider_func = cast(Callable[..., Any], provider_func)
|
||||||
|
return provider_func(bin_name, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def on_get_abspath(self, bin_name: BinName, **_) -> HostBinPath | None:
|
||||||
|
print(f'[*] {self.__class__.__name__}: Getting abspath for {bin_name}...')
|
||||||
|
try:
|
||||||
|
return bin_abspath(bin_name)
|
||||||
|
except ValidationError:
|
||||||
|
return None
|
||||||
|
|
||||||
|
def on_get_version(self, bin_name: BinName, abspath: Optional[HostBinPath]=None, **_) -> SemVer | None:
|
||||||
|
abspath = abspath or self._abspath_cache.get(bin_name) or self.get_abspath(bin_name)
|
||||||
|
if not abspath: return None
|
||||||
|
|
||||||
|
print(f'[*] {self.__class__.__name__}: Getting version for {bin_name}...')
|
||||||
|
try:
|
||||||
|
return bin_version(abspath)
|
||||||
|
except ValidationError:
|
||||||
|
return None
|
||||||
|
|
||||||
|
def on_get_subdeps(self, bin_name: BinName, **_) -> InstallStr:
|
||||||
|
print(f'[*] {self.__class__.__name__}: Getting subdependencies for {bin_name}')
|
||||||
|
# ... subdependency calculation logic here
|
||||||
|
return TypeAdapter(InstallStr).validate_python(bin_name)
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def on_install(self, bin_name: BinName, subdeps: Optional[InstallStr]=None, **_):
|
||||||
|
subdeps = subdeps or self.get_subdeps(bin_name)
|
||||||
|
print(f'[*] {self.__class__.__name__}: Installing subdependencies for {bin_name} ({subdeps})')
|
||||||
|
# ... install logic here
|
||||||
|
assert True
|
||||||
|
|
||||||
|
|
||||||
|
@validate_call
|
||||||
|
def get_abspath(self, bin_name: BinName, overrides: Optional[ProviderLookupDict]=None) -> HostBinPath | None:
|
||||||
|
abspath = self.call_provider_for_action(
|
||||||
|
bin_name=bin_name,
|
||||||
|
provider_type='abspath',
|
||||||
|
default_provider=self.on_get_abspath,
|
||||||
|
overrides=overrides,
|
||||||
|
)
|
||||||
|
if not abspath:
|
||||||
|
return None
|
||||||
|
result = TypeAdapter(HostBinPath).validate_python(abspath)
|
||||||
|
self._abspath_cache[bin_name] = result
|
||||||
|
return result
|
||||||
|
|
||||||
|
@validate_call
|
||||||
|
def get_version(self, bin_name: BinName, abspath: Optional[HostBinPath]=None, overrides: Optional[ProviderLookupDict]=None) -> SemVer | None:
|
||||||
|
version = self.call_provider_for_action(
|
||||||
|
bin_name=bin_name,
|
||||||
|
provider_type='version',
|
||||||
|
default_provider=self.on_get_version,
|
||||||
|
overrides=overrides,
|
||||||
|
abspath=abspath,
|
||||||
|
)
|
||||||
|
if not version:
|
||||||
|
return None
|
||||||
|
result = SemVer(version)
|
||||||
|
self._version_cache[bin_name] = result
|
||||||
|
return result
|
||||||
|
|
||||||
|
@validate_call
|
||||||
|
def get_subdeps(self, bin_name: BinName, overrides: Optional[ProviderLookupDict]=None) -> InstallStr:
|
||||||
|
subdeps = self.call_provider_for_action(
|
||||||
|
bin_name=bin_name,
|
||||||
|
provider_type='subdeps',
|
||||||
|
default_provider=self.on_get_subdeps,
|
||||||
|
overrides=overrides,
|
||||||
|
)
|
||||||
|
if not subdeps:
|
||||||
|
subdeps = bin_name
|
||||||
|
result = TypeAdapter(InstallStr).validate_python(subdeps)
|
||||||
|
return result
|
||||||
|
|
||||||
|
@validate_call
|
||||||
|
def install(self, bin_name: BinName, overrides: Optional[ProviderLookupDict]=None) -> InstalledBin | None:
|
||||||
|
subdeps = self.get_subdeps(bin_name, overrides=overrides)
|
||||||
|
|
||||||
|
self.call_provider_for_action(
|
||||||
|
bin_name=bin_name,
|
||||||
|
provider_type='install',
|
||||||
|
default_provider=self.on_install,
|
||||||
|
overrides=overrides,
|
||||||
|
subdeps=subdeps,
|
||||||
|
)
|
||||||
|
|
||||||
|
installed_abspath = self.get_abspath(bin_name)
|
||||||
|
assert installed_abspath, f'Unable to find {bin_name} abspath after installing with {self.name}'
|
||||||
|
|
||||||
|
installed_version = self.get_version(bin_name, abspath=installed_abspath)
|
||||||
|
assert installed_version, f'Unable to find {bin_name} version after installing with {self.name}'
|
||||||
|
|
||||||
|
result = InstalledBin(abspath=installed_abspath, version=installed_version)
|
||||||
|
self._install_cache[bin_name] = result
|
||||||
|
return result
|
||||||
|
|
||||||
|
@validate_call
|
||||||
|
def load(self, bin_name: BinName, overrides: Optional[ProviderLookupDict]=None, cache: bool=False) -> InstalledBin | None:
|
||||||
|
installed_abspath = None
|
||||||
|
installed_version = None
|
||||||
|
|
||||||
|
if cache:
|
||||||
|
installed_bin = self._install_cache.get(bin_name)
|
||||||
|
if installed_bin:
|
||||||
|
return installed_bin
|
||||||
|
installed_abspath = self._abspath_cache.get(bin_name)
|
||||||
|
installed_version = self._version_cache.get(bin_name)
|
||||||
|
|
||||||
|
|
||||||
|
installed_abspath = installed_abspath or self.get_abspath(bin_name, overrides=overrides)
|
||||||
|
if not installed_abspath:
|
||||||
|
return None
|
||||||
|
|
||||||
|
installed_version = installed_version or self.get_version(bin_name, abspath=installed_abspath, overrides=overrides)
|
||||||
|
if not installed_version:
|
||||||
|
return None
|
||||||
|
|
||||||
|
return InstalledBin(abspath=installed_abspath, version=installed_version)
|
||||||
|
|
||||||
|
@validate_call
|
||||||
|
def load_or_install(self, bin_name: BinName, overrides: Optional[ProviderLookupDict]=None, cache: bool=True) -> InstalledBin | None:
|
||||||
|
installed = self.load(bin_name, overrides=overrides, cache=cache)
|
||||||
|
if not installed:
|
||||||
|
installed = self.install(bin_name, overrides=overrides)
|
||||||
|
return installed
|
||||||
|
|
||||||
|
|
||||||
|
class PipProvider(BinProvider):
|
||||||
|
name: BinProviderName = 'pip'
|
||||||
|
|
||||||
|
def on_install(self, bin_name: str, subdeps: Optional[InstallStr]=None, **_):
|
||||||
|
subdeps = subdeps or self.on_get_subdeps(bin_name)
|
||||||
|
print(f'[*] {self.__class__.__name__}: Installing subdependencies for {bin_name} ({subdeps})')
|
||||||
|
|
||||||
|
proc = run(['pip', 'install', '--upgrade', *subdeps.split(' ')], stdout=PIPE, stderr=PIPE)
|
||||||
|
|
||||||
|
if proc.returncode != 0:
|
||||||
|
print(proc.stdout.strip().decode())
|
||||||
|
print(proc.stderr.strip().decode())
|
||||||
|
raise Exception(f'{self.__class__.__name__}: install got returncode {proc.returncode} while installing {subdeps}: {subdeps}')
|
||||||
|
|
||||||
|
|
||||||
|
class AptProvider(BinProvider):
|
||||||
|
name: BinProviderName = 'apt'
|
||||||
|
|
||||||
|
subdeps_provider: ProviderLookupDict = {
|
||||||
|
'yt-dlp': lambda: 'yt-dlp ffmpeg',
|
||||||
|
}
|
||||||
|
|
||||||
|
def on_install(self, bin_name: BinName, subdeps: Optional[InstallStr]=None, **_):
|
||||||
|
subdeps = subdeps or self.on_get_subdeps(bin_name)
|
||||||
|
print(f'[*] {self.__class__.__name__}: Installing subdependencies for {bin_name} ({subdeps})')
|
||||||
|
|
||||||
|
run(['apt-get', 'update', '-qq'])
|
||||||
|
proc = run(['apt-get', 'install', '-y', *subdeps.split(' ')], stdout=PIPE, stderr=PIPE)
|
||||||
|
|
||||||
|
if proc.returncode != 0:
|
||||||
|
print(proc.stdout.strip().decode())
|
||||||
|
print(proc.stderr.strip().decode())
|
||||||
|
raise Exception(f'{self.__class__.__name__} install got returncode {proc.returncode} while installing {subdeps}: {subdeps}')
|
||||||
|
|
||||||
|
class BrewProvider(BinProvider):
|
||||||
|
name: BinProviderName = 'brew'
|
||||||
|
|
||||||
|
def on_install(self, bin_name: str, subdeps: Optional[InstallStr]=None, **_):
|
||||||
|
subdeps = subdeps or self.on_get_subdeps(bin_name)
|
||||||
|
print(f'[*] {self.__class__.__name__}: Installing subdependencies for {bin_name} ({subdeps})')
|
||||||
|
|
||||||
|
proc = run(['brew', 'install', *subdeps.split(' ')], stdout=PIPE, stderr=PIPE)
|
||||||
|
|
||||||
|
if proc.returncode != 0:
|
||||||
|
print(proc.stdout.strip().decode())
|
||||||
|
print(proc.stderr.strip().decode())
|
||||||
|
raise Exception(f'{self.__class__.__name__} install got returncode {proc.returncode} while installing {subdeps}: {subdeps}')
|
||||||
|
|
||||||
|
|
||||||
|
class EnvProvider(BinProvider):
|
||||||
|
name: BinProviderName = 'env'
|
||||||
|
|
||||||
|
abspath_provider: ProviderLookupDict = {
|
||||||
|
# 'python': lambda: Path('/opt/homebrew/Cellar/python@3.10/3.10.14/Frameworks/Python.framework/Versions/3.10/bin/python3.10'),
|
||||||
|
}
|
||||||
|
version_provider: ProviderLookupDict = {
|
||||||
|
# 'python': lambda: '{}.{}.{}'.format(*sys.version_info[:3]),
|
||||||
|
}
|
||||||
|
|
||||||
|
def on_install(self, bin_name: BinName, subdeps: Optional[InstallStr]=None, **_):
|
||||||
|
"""The env provider is ready-only and does not install any packages, so this is a no-op"""
|
||||||
|
pass
|
||||||
53
archivebox/plugantic/configs.py
Normal file
53
archivebox/plugantic/configs.py
Normal file
@@ -0,0 +1,53 @@
|
|||||||
|
__package__ = 'archivebox.plugantic'
|
||||||
|
|
||||||
|
|
||||||
|
from typing import Optional, List, Literal
|
||||||
|
from pathlib import Path
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
|
||||||
|
ConfigSectionName = Literal['GENERAL_CONFIG', 'ARCHIVE_METHOD_TOGGLES', 'ARCHIVE_METHOD_OPTIONS', 'DEPENDENCY_CONFIG']
|
||||||
|
|
||||||
|
|
||||||
|
class ConfigSet(BaseModel):
|
||||||
|
section: ConfigSectionName = 'GENERAL_CONFIG'
|
||||||
|
|
||||||
|
class WgetToggleConfig(ConfigSet):
|
||||||
|
section: ConfigSectionName = 'ARCHIVE_METHOD_TOGGLES'
|
||||||
|
|
||||||
|
SAVE_WGET: bool = True
|
||||||
|
SAVE_WARC: bool = True
|
||||||
|
|
||||||
|
class WgetDependencyConfig(ConfigSet):
|
||||||
|
section: ConfigSectionName = 'DEPENDENCY_CONFIG'
|
||||||
|
|
||||||
|
WGET_BINARY: str = Field(default='wget')
|
||||||
|
WGET_ARGS: Optional[List[str]] = Field(default=None)
|
||||||
|
WGET_EXTRA_ARGS: List[str] = []
|
||||||
|
WGET_DEFAULT_ARGS: List[str] = ['--timeout={TIMEOUT-10}']
|
||||||
|
|
||||||
|
class WgetOptionsConfig(ConfigSet):
|
||||||
|
section: ConfigSectionName = 'ARCHIVE_METHOD_OPTIONS'
|
||||||
|
|
||||||
|
# loaded from shared config
|
||||||
|
WGET_AUTO_COMPRESSION: bool = Field(default=True)
|
||||||
|
SAVE_WGET_REQUISITES: bool = Field(default=True)
|
||||||
|
WGET_USER_AGENT: str = Field(default='', alias='USER_AGENT')
|
||||||
|
WGET_TIMEOUT: int = Field(default=60, alias='TIMEOUT')
|
||||||
|
WGET_CHECK_SSL_VALIDITY: bool = Field(default=True, alias='CHECK_SSL_VALIDITY')
|
||||||
|
WGET_RESTRICT_FILE_NAMES: str = Field(default='windows', alias='RESTRICT_FILE_NAMES')
|
||||||
|
WGET_COOKIES_FILE: Optional[Path] = Field(default=None, alias='COOKIES_FILE')
|
||||||
|
|
||||||
|
|
||||||
|
CONFIG = {
|
||||||
|
'CHECK_SSL_VALIDITY': False,
|
||||||
|
'SAVE_WARC': False,
|
||||||
|
'TIMEOUT': 999,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
WGET_CONFIG = [
|
||||||
|
WgetToggleConfig(**CONFIG),
|
||||||
|
WgetDependencyConfig(**CONFIG),
|
||||||
|
WgetOptionsConfig(**CONFIG),
|
||||||
|
]
|
||||||
118
archivebox/plugantic/extractors.py
Normal file
118
archivebox/plugantic/extractors.py
Normal file
@@ -0,0 +1,118 @@
|
|||||||
|
__package__ = 'archivebox.plugantic'
|
||||||
|
|
||||||
|
from typing import Optional, List, Literal, Annotated, Dict, Any
|
||||||
|
from typing_extensions import Self
|
||||||
|
|
||||||
|
from abc import ABC
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from pydantic import BaseModel, model_validator, field_serializer, AfterValidator
|
||||||
|
|
||||||
|
from .binaries import (
|
||||||
|
Binary,
|
||||||
|
YtdlpBinary,
|
||||||
|
WgetBinary,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# stubs
|
||||||
|
class Snapshot:
|
||||||
|
pass
|
||||||
|
|
||||||
|
class ArchiveResult:
|
||||||
|
pass
|
||||||
|
|
||||||
|
def get_wget_output_path(*args, **kwargs) -> Path:
|
||||||
|
return Path('.').resolve()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def no_empty_args(args: List[str]) -> List[str]:
|
||||||
|
assert all(len(arg) for arg in args)
|
||||||
|
return args
|
||||||
|
|
||||||
|
ExtractorName = Literal['wget', 'warc', 'media']
|
||||||
|
|
||||||
|
HandlerFuncStr = Annotated[str, AfterValidator(lambda s: s.startswith('self.'))]
|
||||||
|
CmdArgsList = Annotated[List[str], AfterValidator(no_empty_args)]
|
||||||
|
|
||||||
|
|
||||||
|
class Extractor(ABC, BaseModel):
|
||||||
|
name: ExtractorName
|
||||||
|
binary: Binary
|
||||||
|
|
||||||
|
output_path_func: HandlerFuncStr = 'self.get_output_path'
|
||||||
|
should_extract_func: HandlerFuncStr = 'self.should_extract'
|
||||||
|
extract_func: HandlerFuncStr = 'self.extract'
|
||||||
|
exec_func: HandlerFuncStr = 'self.exec'
|
||||||
|
|
||||||
|
default_args: CmdArgsList = []
|
||||||
|
extra_args: CmdArgsList = []
|
||||||
|
args: Optional[CmdArgsList] = None
|
||||||
|
|
||||||
|
@model_validator(mode='after')
|
||||||
|
def validate_model(self) -> Self:
|
||||||
|
if self.args is None:
|
||||||
|
self.args = [*self.default_args, *self.extra_args]
|
||||||
|
return self
|
||||||
|
|
||||||
|
@field_serializer('binary', when_used='json')
|
||||||
|
def dump_binary(binary) -> str:
|
||||||
|
return binary.name
|
||||||
|
|
||||||
|
def get_output_path(self, snapshot) -> Path:
|
||||||
|
return Path(self.name)
|
||||||
|
|
||||||
|
def should_extract(self, snapshot) -> bool:
|
||||||
|
output_dir = self.get_output_path(snapshot)
|
||||||
|
if output_dir.glob('*.*'):
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def extract(self, url: str, **kwargs) -> Dict[str, Any]:
|
||||||
|
output_dir = self.get_output_path(url, **kwargs)
|
||||||
|
|
||||||
|
cmd = [url, *self.args] if self.args is not None else [url, *self.default_args, *self.extra_args]
|
||||||
|
proc = self.exec(cmd, pwd=output_dir)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'status': 'succeeded' if proc.returncode == 0 else 'failed',
|
||||||
|
'output': proc.stdout.decode().strip().split('\n')[-1],
|
||||||
|
'output_files': list(output_dir.glob('*.*')),
|
||||||
|
|
||||||
|
'stdout': proc.stdout.decode().strip(),
|
||||||
|
'stderr': proc.stderr.decode().strip(),
|
||||||
|
'returncode': proc.returncode,
|
||||||
|
}
|
||||||
|
|
||||||
|
def exec(self, args: CmdArgsList, pwd: Optional[Path]=None):
|
||||||
|
pwd = pwd or Path('.')
|
||||||
|
assert self.binary.loaded_provider
|
||||||
|
return self.binary.exec(args, pwd=pwd)
|
||||||
|
|
||||||
|
|
||||||
|
class YtdlpExtractor(Extractor):
|
||||||
|
name: ExtractorName = 'media'
|
||||||
|
binary: Binary = YtdlpBinary()
|
||||||
|
|
||||||
|
def get_output_path(self, snapshot) -> Path:
|
||||||
|
return Path(self.name)
|
||||||
|
|
||||||
|
|
||||||
|
class WgetExtractor(Extractor):
|
||||||
|
name: ExtractorName = 'wget'
|
||||||
|
binary: Binary = WgetBinary()
|
||||||
|
|
||||||
|
def get_output_path(self, snapshot) -> Path:
|
||||||
|
return get_wget_output_path(snapshot)
|
||||||
|
|
||||||
|
|
||||||
|
class WarcExtractor(Extractor):
|
||||||
|
name: ExtractorName = 'warc'
|
||||||
|
binary: Binary = WgetBinary()
|
||||||
|
|
||||||
|
def get_output_path(self, snapshot) -> Path:
|
||||||
|
return get_wget_output_path(snapshot)
|
||||||
|
|
||||||
|
|
||||||
396
archivebox/plugantic/ini_to_toml.py
Normal file
396
archivebox/plugantic/ini_to_toml.py
Normal file
@@ -0,0 +1,396 @@
|
|||||||
|
from typing import Dict, Any, List
|
||||||
|
|
||||||
|
import configparser
|
||||||
|
import json
|
||||||
|
import ast
|
||||||
|
|
||||||
|
JSONValue = str | bool | int | None | List['JSONValue']
|
||||||
|
|
||||||
|
def load_ini_value(val: str) -> JSONValue:
|
||||||
|
"""Convert lax INI values into strict TOML-compliant (JSON) values"""
|
||||||
|
if val.lower() in ('true', 'yes', '1'):
|
||||||
|
return True
|
||||||
|
if val.lower() in ('false', 'no', '0'):
|
||||||
|
return False
|
||||||
|
if val.isdigit():
|
||||||
|
return int(val)
|
||||||
|
|
||||||
|
try:
|
||||||
|
return ast.literal_eval(val)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
try:
|
||||||
|
return json.loads(val)
|
||||||
|
except Exception as err:
|
||||||
|
pass
|
||||||
|
|
||||||
|
return val
|
||||||
|
|
||||||
|
|
||||||
|
def convert(ini_str: str) -> str:
|
||||||
|
"""Convert a string of INI config into its TOML equivalent (warning: strips comments)"""
|
||||||
|
|
||||||
|
config = configparser.ConfigParser()
|
||||||
|
config.optionxform = str # capitalize key names
|
||||||
|
config.read_string(ini_str)
|
||||||
|
|
||||||
|
# Initialize an empty dictionary to store the TOML representation
|
||||||
|
toml_dict = {}
|
||||||
|
|
||||||
|
# Iterate over each section in the INI configuration
|
||||||
|
for section in config.sections():
|
||||||
|
toml_dict[section] = {}
|
||||||
|
|
||||||
|
# Iterate over each key-value pair in the section
|
||||||
|
for key, value in config.items(section):
|
||||||
|
parsed_value = load_ini_value(value)
|
||||||
|
|
||||||
|
# Convert the parsed value to its TOML-compatible JSON representation
|
||||||
|
toml_dict[section.upper()][key.upper()] = json.dumps(parsed_value)
|
||||||
|
|
||||||
|
# Build the TOML string
|
||||||
|
toml_str = ""
|
||||||
|
for section, items in toml_dict.items():
|
||||||
|
toml_str += f"[{section}]\n"
|
||||||
|
for key, value in items.items():
|
||||||
|
toml_str += f"{key} = {value}\n"
|
||||||
|
toml_str += "\n"
|
||||||
|
|
||||||
|
return toml_str.strip()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
### Basic Assertions
|
||||||
|
|
||||||
|
test_input = """
|
||||||
|
[SERVER_CONFIG]
|
||||||
|
IS_TTY=False
|
||||||
|
USE_COLOR=False
|
||||||
|
SHOW_PROGRESS=False
|
||||||
|
IN_DOCKER=False
|
||||||
|
IN_QEMU=False
|
||||||
|
PUID=501
|
||||||
|
PGID=20
|
||||||
|
OUTPUT_DIR=/opt/archivebox/data
|
||||||
|
CONFIG_FILE=/opt/archivebox/data/ArchiveBox.conf
|
||||||
|
ONLY_NEW=True
|
||||||
|
TIMEOUT=60
|
||||||
|
MEDIA_TIMEOUT=3600
|
||||||
|
OUTPUT_PERMISSIONS=644
|
||||||
|
RESTRICT_FILE_NAMES=windows
|
||||||
|
URL_DENYLIST=\.(css|js|otf|ttf|woff|woff2|gstatic\.com|googleapis\.com/css)(\?.*)?$
|
||||||
|
URL_ALLOWLIST=None
|
||||||
|
ADMIN_USERNAME=None
|
||||||
|
ADMIN_PASSWORD=None
|
||||||
|
ENFORCE_ATOMIC_WRITES=True
|
||||||
|
TAG_SEPARATOR_PATTERN=[,]
|
||||||
|
SECRET_KEY=xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
|
||||||
|
BIND_ADDR=127.0.0.1:8000
|
||||||
|
ALLOWED_HOSTS=*
|
||||||
|
DEBUG=False
|
||||||
|
PUBLIC_INDEX=True
|
||||||
|
PUBLIC_SNAPSHOTS=True
|
||||||
|
PUBLIC_ADD_VIEW=False
|
||||||
|
FOOTER_INFO=Content is hosted for personal archiving purposes only. Contact server owner for any takedown requests.
|
||||||
|
SNAPSHOTS_PER_PAGE=40
|
||||||
|
CUSTOM_TEMPLATES_DIR=None
|
||||||
|
TIME_ZONE=UTC
|
||||||
|
TIMEZONE=UTC
|
||||||
|
REVERSE_PROXY_USER_HEADER=Remote-User
|
||||||
|
REVERSE_PROXY_WHITELIST=
|
||||||
|
LOGOUT_REDIRECT_URL=/
|
||||||
|
PREVIEW_ORIGINALS=True
|
||||||
|
LDAP=False
|
||||||
|
LDAP_SERVER_URI=None
|
||||||
|
LDAP_BIND_DN=None
|
||||||
|
LDAP_BIND_PASSWORD=None
|
||||||
|
LDAP_USER_BASE=None
|
||||||
|
LDAP_USER_FILTER=None
|
||||||
|
LDAP_USERNAME_ATTR=None
|
||||||
|
LDAP_FIRSTNAME_ATTR=None
|
||||||
|
LDAP_LASTNAME_ATTR=None
|
||||||
|
LDAP_EMAIL_ATTR=None
|
||||||
|
LDAP_CREATE_SUPERUSER=False
|
||||||
|
SAVE_TITLE=True
|
||||||
|
SAVE_FAVICON=True
|
||||||
|
SAVE_WGET=True
|
||||||
|
SAVE_WGET_REQUISITES=True
|
||||||
|
SAVE_SINGLEFILE=True
|
||||||
|
SAVE_READABILITY=True
|
||||||
|
SAVE_MERCURY=True
|
||||||
|
SAVE_HTMLTOTEXT=True
|
||||||
|
SAVE_PDF=True
|
||||||
|
SAVE_SCREENSHOT=True
|
||||||
|
SAVE_DOM=True
|
||||||
|
SAVE_HEADERS=True
|
||||||
|
SAVE_WARC=True
|
||||||
|
SAVE_GIT=True
|
||||||
|
SAVE_MEDIA=True
|
||||||
|
SAVE_ARCHIVE_DOT_ORG=True
|
||||||
|
RESOLUTION=1440,2000
|
||||||
|
GIT_DOMAINS=github.com,bitbucket.org,gitlab.com,gist.github.com,codeberg.org,gitea.com,git.sr.ht
|
||||||
|
CHECK_SSL_VALIDITY=True
|
||||||
|
MEDIA_MAX_SIZE=750m
|
||||||
|
USER_AGENT=None
|
||||||
|
CURL_USER_AGENT=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 ArchiveBox/0.8.0 (+https://github.com/ArchiveBox/ArchiveBox/) curl/curl 8.4.0 (x86_64-apple-darwin23.0)
|
||||||
|
WGET_USER_AGENT=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 ArchiveBox/0.8.0 (+https://github.com/ArchiveBox/ArchiveBox/) wget/GNU Wget 1.24.5
|
||||||
|
CHROME_USER_AGENT=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 ArchiveBox/0.8.0 (+https://github.com/ArchiveBox/ArchiveBox/)
|
||||||
|
COOKIES_FILE=None
|
||||||
|
CHROME_USER_DATA_DIR=None
|
||||||
|
CHROME_TIMEOUT=0
|
||||||
|
CHROME_HEADLESS=True
|
||||||
|
CHROME_SANDBOX=True
|
||||||
|
CHROME_EXTRA_ARGS=[]
|
||||||
|
YOUTUBEDL_ARGS=['--restrict-filenames', '--trim-filenames', '128', '--write-description', '--write-info-json', '--write-annotations', '--write-thumbnail', '--no-call-home', '--write-sub', '--write-auto-subs', '--convert-subs=srt', '--yes-playlist', '--continue', '--no-abort-on-error', '--ignore-errors', '--geo-bypass', '--add-metadata', '--format=(bv*+ba/b)[filesize<=750m][filesize_approx<=?750m]/(bv*+ba/b)']
|
||||||
|
YOUTUBEDL_EXTRA_ARGS=[]
|
||||||
|
WGET_ARGS=['--no-verbose', '--adjust-extension', '--convert-links', '--force-directories', '--backup-converted', '--span-hosts', '--no-parent', '-e', 'robots=off']
|
||||||
|
WGET_EXTRA_ARGS=[]
|
||||||
|
CURL_ARGS=['--silent', '--location', '--compressed']
|
||||||
|
CURL_EXTRA_ARGS=[]
|
||||||
|
GIT_ARGS=['--recursive']
|
||||||
|
SINGLEFILE_ARGS=[]
|
||||||
|
SINGLEFILE_EXTRA_ARGS=[]
|
||||||
|
MERCURY_ARGS=['--format=text']
|
||||||
|
MERCURY_EXTRA_ARGS=[]
|
||||||
|
FAVICON_PROVIDER=https://www.google.com/s2/favicons?domain={}
|
||||||
|
USE_INDEXING_BACKEND=True
|
||||||
|
USE_SEARCHING_BACKEND=True
|
||||||
|
SEARCH_BACKEND_ENGINE=ripgrep
|
||||||
|
SEARCH_BACKEND_HOST_NAME=localhost
|
||||||
|
SEARCH_BACKEND_PORT=1491
|
||||||
|
SEARCH_BACKEND_PASSWORD=SecretPassword
|
||||||
|
SEARCH_PROCESS_HTML=True
|
||||||
|
SONIC_COLLECTION=archivebox
|
||||||
|
SONIC_BUCKET=snapshots
|
||||||
|
SEARCH_BACKEND_TIMEOUT=90
|
||||||
|
FTS_SEPARATE_DATABASE=True
|
||||||
|
FTS_TOKENIZERS=porter unicode61 remove_diacritics 2
|
||||||
|
FTS_SQLITE_MAX_LENGTH=1000000000
|
||||||
|
USE_CURL=True
|
||||||
|
USE_WGET=True
|
||||||
|
USE_SINGLEFILE=True
|
||||||
|
USE_READABILITY=True
|
||||||
|
USE_MERCURY=True
|
||||||
|
USE_GIT=True
|
||||||
|
USE_CHROME=True
|
||||||
|
USE_NODE=True
|
||||||
|
USE_YOUTUBEDL=True
|
||||||
|
USE_RIPGREP=True
|
||||||
|
CURL_BINARY=curl
|
||||||
|
GIT_BINARY=git
|
||||||
|
WGET_BINARY=wget
|
||||||
|
SINGLEFILE_BINARY=single-file
|
||||||
|
READABILITY_BINARY=readability-extractor
|
||||||
|
MERCURY_BINARY=postlight-parser
|
||||||
|
YOUTUBEDL_BINARY=yt-dlp
|
||||||
|
NODE_BINARY=node
|
||||||
|
RIPGREP_BINARY=rg
|
||||||
|
CHROME_BINARY=chrome
|
||||||
|
POCKET_CONSUMER_KEY=None
|
||||||
|
USER=squash
|
||||||
|
PACKAGE_DIR=/opt/archivebox/archivebox
|
||||||
|
TEMPLATES_DIR=/opt/archivebox/archivebox/templates
|
||||||
|
ARCHIVE_DIR=/opt/archivebox/data/archive
|
||||||
|
SOURCES_DIR=/opt/archivebox/data/sources
|
||||||
|
LOGS_DIR=/opt/archivebox/data/logs
|
||||||
|
PERSONAS_DIR=/opt/archivebox/data/personas
|
||||||
|
URL_DENYLIST_PTN=re.compile('\\.(css|js|otf|ttf|woff|woff2|gstatic\\.com|googleapis\\.com/css)(\\?.*)?$', re.IGNORECASE|re.MULTILINE)
|
||||||
|
URL_ALLOWLIST_PTN=None
|
||||||
|
DIR_OUTPUT_PERMISSIONS=755
|
||||||
|
ARCHIVEBOX_BINARY=/opt/archivebox/.venv/bin/archivebox
|
||||||
|
VERSION=0.8.0
|
||||||
|
COMMIT_HASH=102e87578c6036bb0132dd1ebd17f8f05ffc880f
|
||||||
|
BUILD_TIME=2024-05-15 03:28:05 1715768885
|
||||||
|
VERSIONS_AVAILABLE=None
|
||||||
|
CAN_UPGRADE=False
|
||||||
|
PYTHON_BINARY=/opt/archivebox/.venv/bin/python3.10
|
||||||
|
PYTHON_ENCODING=UTF-8
|
||||||
|
PYTHON_VERSION=3.10.14
|
||||||
|
DJANGO_BINARY=/opt/archivebox/.venv/lib/python3.10/site-packages/django/__init__.py
|
||||||
|
DJANGO_VERSION=5.0.6 final (0)
|
||||||
|
SQLITE_BINARY=/opt/homebrew/Cellar/python@3.10/3.10.14/Frameworks/Python.framework/Versions/3.10/lib/python3.10/sqlite3/dbapi2.py
|
||||||
|
SQLITE_VERSION=2.6.0
|
||||||
|
CURL_VERSION=curl 8.4.0 (x86_64-apple-darwin23.0)
|
||||||
|
WGET_VERSION=GNU Wget 1.24.5
|
||||||
|
WGET_AUTO_COMPRESSION=True
|
||||||
|
RIPGREP_VERSION=ripgrep 14.1.0
|
||||||
|
SINGLEFILE_VERSION=None
|
||||||
|
READABILITY_VERSION=None
|
||||||
|
MERCURY_VERSION=None
|
||||||
|
GIT_VERSION=git version 2.44.0
|
||||||
|
YOUTUBEDL_VERSION=2024.04.09
|
||||||
|
CHROME_VERSION=Google Chrome 124.0.6367.207
|
||||||
|
NODE_VERSION=v21.7.3
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
expected_output = '''[SERVER_CONFIG]
|
||||||
|
IS_TTY = false
|
||||||
|
USE_COLOR = false
|
||||||
|
SHOW_PROGRESS = false
|
||||||
|
IN_DOCKER = false
|
||||||
|
IN_QEMU = false
|
||||||
|
PUID = 501
|
||||||
|
PGID = 20
|
||||||
|
OUTPUT_DIR = "/opt/archivebox/data"
|
||||||
|
CONFIG_FILE = "/opt/archivebox/data/ArchiveBox.conf"
|
||||||
|
ONLY_NEW = true
|
||||||
|
TIMEOUT = 60
|
||||||
|
MEDIA_TIMEOUT = 3600
|
||||||
|
OUTPUT_PERMISSIONS = 644
|
||||||
|
RESTRICT_FILE_NAMES = "windows"
|
||||||
|
URL_DENYLIST = "\\\\.(css|js|otf|ttf|woff|woff2|gstatic\\\\.com|googleapis\\\\.com/css)(\\\\?.*)?$"
|
||||||
|
URL_ALLOWLIST = null
|
||||||
|
ADMIN_USERNAME = null
|
||||||
|
ADMIN_PASSWORD = null
|
||||||
|
ENFORCE_ATOMIC_WRITES = true
|
||||||
|
TAG_SEPARATOR_PATTERN = "[,]"
|
||||||
|
SECRET_KEY = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
|
||||||
|
BIND_ADDR = "127.0.0.1:8000"
|
||||||
|
ALLOWED_HOSTS = "*"
|
||||||
|
DEBUG = false
|
||||||
|
PUBLIC_INDEX = true
|
||||||
|
PUBLIC_SNAPSHOTS = true
|
||||||
|
PUBLIC_ADD_VIEW = false
|
||||||
|
FOOTER_INFO = "Content is hosted for personal archiving purposes only. Contact server owner for any takedown requests."
|
||||||
|
SNAPSHOTS_PER_PAGE = 40
|
||||||
|
CUSTOM_TEMPLATES_DIR = null
|
||||||
|
TIME_ZONE = "UTC"
|
||||||
|
TIMEZONE = "UTC"
|
||||||
|
REVERSE_PROXY_USER_HEADER = "Remote-User"
|
||||||
|
REVERSE_PROXY_WHITELIST = ""
|
||||||
|
LOGOUT_REDIRECT_URL = "/"
|
||||||
|
PREVIEW_ORIGINALS = true
|
||||||
|
LDAP = false
|
||||||
|
LDAP_SERVER_URI = null
|
||||||
|
LDAP_BIND_DN = null
|
||||||
|
LDAP_BIND_PASSWORD = null
|
||||||
|
LDAP_USER_BASE = null
|
||||||
|
LDAP_USER_FILTER = null
|
||||||
|
LDAP_USERNAME_ATTR = null
|
||||||
|
LDAP_FIRSTNAME_ATTR = null
|
||||||
|
LDAP_LASTNAME_ATTR = null
|
||||||
|
LDAP_EMAIL_ATTR = null
|
||||||
|
LDAP_CREATE_SUPERUSER = false
|
||||||
|
SAVE_TITLE = true
|
||||||
|
SAVE_FAVICON = true
|
||||||
|
SAVE_WGET = true
|
||||||
|
SAVE_WGET_REQUISITES = true
|
||||||
|
SAVE_SINGLEFILE = true
|
||||||
|
SAVE_READABILITY = true
|
||||||
|
SAVE_MERCURY = true
|
||||||
|
SAVE_HTMLTOTEXT = true
|
||||||
|
SAVE_PDF = true
|
||||||
|
SAVE_SCREENSHOT = true
|
||||||
|
SAVE_DOM = true
|
||||||
|
SAVE_HEADERS = true
|
||||||
|
SAVE_WARC = true
|
||||||
|
SAVE_GIT = true
|
||||||
|
SAVE_MEDIA = true
|
||||||
|
SAVE_ARCHIVE_DOT_ORG = true
|
||||||
|
RESOLUTION = [1440, 2000]
|
||||||
|
GIT_DOMAINS = "github.com,bitbucket.org,gitlab.com,gist.github.com,codeberg.org,gitea.com,git.sr.ht"
|
||||||
|
CHECK_SSL_VALIDITY = true
|
||||||
|
MEDIA_MAX_SIZE = "750m"
|
||||||
|
USER_AGENT = null
|
||||||
|
CURL_USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 ArchiveBox/0.8.0 (+https://github.com/ArchiveBox/ArchiveBox/) curl/curl 8.4.0 (x86_64-apple-darwin23.0)"
|
||||||
|
WGET_USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 ArchiveBox/0.8.0 (+https://github.com/ArchiveBox/ArchiveBox/) wget/GNU Wget 1.24.5"
|
||||||
|
CHROME_USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 ArchiveBox/0.8.0 (+https://github.com/ArchiveBox/ArchiveBox/)"
|
||||||
|
COOKIES_FILE = null
|
||||||
|
CHROME_USER_DATA_DIR = null
|
||||||
|
CHROME_TIMEOUT = false
|
||||||
|
CHROME_HEADLESS = true
|
||||||
|
CHROME_SANDBOX = true
|
||||||
|
CHROME_EXTRA_ARGS = []
|
||||||
|
YOUTUBEDL_ARGS = ["--restrict-filenames", "--trim-filenames", "128", "--write-description", "--write-info-json", "--write-annotations", "--write-thumbnail", "--no-call-home", "--write-sub", "--write-auto-subs", "--convert-subs=srt", "--yes-playlist", "--continue", "--no-abort-on-error", "--ignore-errors", "--geo-bypass", "--add-metadata", "--format=(bv*+ba/b)[filesize<=750m][filesize_approx<=?750m]/(bv*+ba/b)"]
|
||||||
|
YOUTUBEDL_EXTRA_ARGS = []
|
||||||
|
WGET_ARGS = ["--no-verbose", "--adjust-extension", "--convert-links", "--force-directories", "--backup-converted", "--span-hosts", "--no-parent", "-e", "robots=off"]
|
||||||
|
WGET_EXTRA_ARGS = []
|
||||||
|
CURL_ARGS = ["--silent", "--location", "--compressed"]
|
||||||
|
CURL_EXTRA_ARGS = []
|
||||||
|
GIT_ARGS = ["--recursive"]
|
||||||
|
SINGLEFILE_ARGS = []
|
||||||
|
SINGLEFILE_EXTRA_ARGS = []
|
||||||
|
MERCURY_ARGS = ["--format=text"]
|
||||||
|
MERCURY_EXTRA_ARGS = []
|
||||||
|
FAVICON_PROVIDER = "https://www.google.com/s2/favicons?domain={}"
|
||||||
|
USE_INDEXING_BACKEND = true
|
||||||
|
USE_SEARCHING_BACKEND = true
|
||||||
|
SEARCH_BACKEND_ENGINE = "ripgrep"
|
||||||
|
SEARCH_BACKEND_HOST_NAME = "localhost"
|
||||||
|
SEARCH_BACKEND_PORT = 1491
|
||||||
|
SEARCH_BACKEND_PASSWORD = "SecretPassword"
|
||||||
|
SEARCH_PROCESS_HTML = true
|
||||||
|
SONIC_COLLECTION = "archivebox"
|
||||||
|
SONIC_BUCKET = "snapshots"
|
||||||
|
SEARCH_BACKEND_TIMEOUT = 90
|
||||||
|
FTS_SEPARATE_DATABASE = true
|
||||||
|
FTS_TOKENIZERS = "porter unicode61 remove_diacritics 2"
|
||||||
|
FTS_SQLITE_MAX_LENGTH = 1000000000
|
||||||
|
USE_CURL = true
|
||||||
|
USE_WGET = true
|
||||||
|
USE_SINGLEFILE = true
|
||||||
|
USE_READABILITY = true
|
||||||
|
USE_MERCURY = true
|
||||||
|
USE_GIT = true
|
||||||
|
USE_CHROME = true
|
||||||
|
USE_NODE = true
|
||||||
|
USE_YOUTUBEDL = true
|
||||||
|
USE_RIPGREP = true
|
||||||
|
CURL_BINARY = "curl"
|
||||||
|
GIT_BINARY = "git"
|
||||||
|
WGET_BINARY = "wget"
|
||||||
|
SINGLEFILE_BINARY = "single-file"
|
||||||
|
READABILITY_BINARY = "readability-extractor"
|
||||||
|
MERCURY_BINARY = "postlight-parser"
|
||||||
|
YOUTUBEDL_BINARY = "yt-dlp"
|
||||||
|
NODE_BINARY = "node"
|
||||||
|
RIPGREP_BINARY = "rg"
|
||||||
|
CHROME_BINARY = "chrome"
|
||||||
|
POCKET_CONSUMER_KEY = null
|
||||||
|
USER = "squash"
|
||||||
|
PACKAGE_DIR = "/opt/archivebox/archivebox"
|
||||||
|
TEMPLATES_DIR = "/opt/archivebox/archivebox/templates"
|
||||||
|
ARCHIVE_DIR = "/opt/archivebox/data/archive"
|
||||||
|
SOURCES_DIR = "/opt/archivebox/data/sources"
|
||||||
|
LOGS_DIR = "/opt/archivebox/data/logs"
|
||||||
|
PERSONAS_DIR = "/opt/archivebox/data/personas"
|
||||||
|
URL_DENYLIST_PTN = "re.compile(\'\\\\.(css|js|otf|ttf|woff|woff2|gstatic\\\\.com|googleapis\\\\.com/css)(\\\\?.*)?$\', re.IGNORECASE|re.MULTILINE)"
|
||||||
|
URL_ALLOWLIST_PTN = null
|
||||||
|
DIR_OUTPUT_PERMISSIONS = 755
|
||||||
|
ARCHIVEBOX_BINARY = "/opt/archivebox/.venv/bin/archivebox"
|
||||||
|
VERSION = "0.8.0"
|
||||||
|
COMMIT_HASH = "102e87578c6036bb0132dd1ebd17f8f05ffc880f"
|
||||||
|
BUILD_TIME = "2024-05-15 03:28:05 1715768885"
|
||||||
|
VERSIONS_AVAILABLE = null
|
||||||
|
CAN_UPGRADE = false
|
||||||
|
PYTHON_BINARY = "/opt/archivebox/.venv/bin/python3.10"
|
||||||
|
PYTHON_ENCODING = "UTF-8"
|
||||||
|
PYTHON_VERSION = "3.10.14"
|
||||||
|
DJANGO_BINARY = "/opt/archivebox/.venv/lib/python3.10/site-packages/django/__init__.py"
|
||||||
|
DJANGO_VERSION = "5.0.6 final (0)"
|
||||||
|
SQLITE_BINARY = "/opt/homebrew/Cellar/python@3.10/3.10.14/Frameworks/Python.framework/Versions/3.10/lib/python3.10/sqlite3/dbapi2.py"
|
||||||
|
SQLITE_VERSION = "2.6.0"
|
||||||
|
CURL_VERSION = "curl 8.4.0 (x86_64-apple-darwin23.0)"
|
||||||
|
WGET_VERSION = "GNU Wget 1.24.5"
|
||||||
|
WGET_AUTO_COMPRESSION = true
|
||||||
|
RIPGREP_VERSION = "ripgrep 14.1.0"
|
||||||
|
SINGLEFILE_VERSION = null
|
||||||
|
READABILITY_VERSION = null
|
||||||
|
MERCURY_VERSION = null
|
||||||
|
GIT_VERSION = "git version 2.44.0"
|
||||||
|
YOUTUBEDL_VERSION = "2024.04.09"
|
||||||
|
CHROME_VERSION = "Google Chrome 124.0.6367.207"
|
||||||
|
NODE_VERSION = "v21.7.3"'''
|
||||||
|
|
||||||
|
|
||||||
|
first_output = convert(test_input) # make sure ini -> toml parses correctly
|
||||||
|
second_output = convert(first_output) # make sure toml -> toml parses/dumps consistently
|
||||||
|
assert first_output == second_output == expected_output # make sure parsing is indempotent
|
||||||
|
|
||||||
|
# # DEBUGGING
|
||||||
|
# import sys
|
||||||
|
# import difflib
|
||||||
|
# sys.stdout.writelines(difflib.context_diff(first_output, second_output, fromfile='first', tofile='second'))
|
||||||
|
# print(repr(second_output))
|
||||||
38
archivebox/plugantic/migrations/0001_initial.py
Normal file
38
archivebox/plugantic/migrations/0001_initial.py
Normal file
@@ -0,0 +1,38 @@
|
|||||||
|
# Generated by Django 5.0.6 on 2024-05-18 00:16
|
||||||
|
|
||||||
|
import abid_utils.models
|
||||||
|
import archivebox.plugantic.plugins
|
||||||
|
import charidfield.fields
|
||||||
|
import django.core.serializers.json
|
||||||
|
import django.db.models.deletion
|
||||||
|
import django_pydantic_field.fields
|
||||||
|
import uuid
|
||||||
|
from django.conf import settings
|
||||||
|
from django.db import migrations, models
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
initial = True
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.CreateModel(
|
||||||
|
name='Plugin',
|
||||||
|
fields=[
|
||||||
|
('created', models.DateTimeField(auto_now_add=True)),
|
||||||
|
('modified', models.DateTimeField(auto_now=True)),
|
||||||
|
('id', models.UUIDField(default=uuid.uuid4, editable=False, primary_key=True, serialize=False)),
|
||||||
|
('uuid', models.UUIDField(blank=True, null=True, unique=True)),
|
||||||
|
('abid', charidfield.fields.CharIDField(blank=True, db_index=True, default=None, help_text='ABID-format identifier for this entity (e.g. snp_01BJQMF54D093DXEAWZ6JYRPAQ)', max_length=30, null=True, prefix='plg_', unique=True)),
|
||||||
|
('schema', django_pydantic_field.fields.PydanticSchemaField(config=None, encoder=django.core.serializers.json.DjangoJSONEncoder, schema=archivebox.plugantic.plugins.Plugin)),
|
||||||
|
('created_by', models.ForeignKey(default=abid_utils.models.get_or_create_system_user_pk, on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL)),
|
||||||
|
],
|
||||||
|
options={
|
||||||
|
'abstract': False,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
]
|
||||||
21
archivebox/plugantic/migrations/0002_alter_plugin_schema.py
Normal file
21
archivebox/plugantic/migrations/0002_alter_plugin_schema.py
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
# Generated by Django 5.0.6 on 2024-05-18 01:16
|
||||||
|
|
||||||
|
import archivebox.plugantic.plugins
|
||||||
|
import django.core.serializers.json
|
||||||
|
import django_pydantic_field.fields
|
||||||
|
from django.db import migrations
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
('plugantic', '0001_initial'),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.AlterField(
|
||||||
|
model_name='plugin',
|
||||||
|
name='schema',
|
||||||
|
field=django_pydantic_field.fields.PydanticSchemaField(config=None, default=None, encoder=django.core.serializers.json.DjangoJSONEncoder, schema=archivebox.plugantic.plugins.Plugin),
|
||||||
|
),
|
||||||
|
]
|
||||||
21
archivebox/plugantic/migrations/0003_alter_plugin_schema.py
Normal file
21
archivebox/plugantic/migrations/0003_alter_plugin_schema.py
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
# Generated by Django 5.0.6 on 2024-05-18 01:25
|
||||||
|
|
||||||
|
import archivebox.plugantic.replayers
|
||||||
|
import django.core.serializers.json
|
||||||
|
import django_pydantic_field.fields
|
||||||
|
from django.db import migrations
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
('plugantic', '0002_alter_plugin_schema'),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.AlterField(
|
||||||
|
model_name='plugin',
|
||||||
|
name='schema',
|
||||||
|
field=django_pydantic_field.fields.PydanticSchemaField(config=None, default={'embed_template': 'plugins/generic_replayer/templates/embed.html', 'fullpage_template': 'plugins/generic_replayer/templates/fullpage.html', 'name': 'GenericReplayer', 'row_template': 'plugins/generic_replayer/templates/row.html', 'url_pattern': '*'}, encoder=django.core.serializers.json.DjangoJSONEncoder, schema=archivebox.plugantic.replayers.Replayer),
|
||||||
|
),
|
||||||
|
]
|
||||||
@@ -0,0 +1,32 @@
|
|||||||
|
# Generated by Django 5.0.6 on 2024-05-18 01:28
|
||||||
|
|
||||||
|
import archivebox.plugantic.configs
|
||||||
|
import django.core.serializers.json
|
||||||
|
import django_pydantic_field.compat.django
|
||||||
|
import django_pydantic_field.fields
|
||||||
|
from django.db import migrations, models
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
('plugantic', '0003_alter_plugin_schema'),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.RemoveField(
|
||||||
|
model_name='plugin',
|
||||||
|
name='schema',
|
||||||
|
),
|
||||||
|
migrations.AddField(
|
||||||
|
model_name='plugin',
|
||||||
|
name='configs',
|
||||||
|
field=django_pydantic_field.fields.PydanticSchemaField(config=None, default=[], encoder=django.core.serializers.json.DjangoJSONEncoder, schema=django_pydantic_field.compat.django.GenericContainer(list, (archivebox.plugantic.configs.ConfigSet,))),
|
||||||
|
),
|
||||||
|
migrations.AddField(
|
||||||
|
model_name='plugin',
|
||||||
|
name='name',
|
||||||
|
field=models.CharField(default='name', max_length=64, unique=True),
|
||||||
|
preserve_default=False,
|
||||||
|
),
|
||||||
|
]
|
||||||
@@ -0,0 +1,39 @@
|
|||||||
|
# Generated by Django 5.0.6 on 2024-05-18 01:42
|
||||||
|
|
||||||
|
import abid_utils.models
|
||||||
|
import charidfield.fields
|
||||||
|
import django.db.models.deletion
|
||||||
|
import pathlib
|
||||||
|
import uuid
|
||||||
|
from django.conf import settings
|
||||||
|
from django.db import migrations, models
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
('plugantic', '0004_remove_plugin_schema_plugin_configs_plugin_name'),
|
||||||
|
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.CreateModel(
|
||||||
|
name='CustomPlugin',
|
||||||
|
fields=[
|
||||||
|
('created', models.DateTimeField(auto_now_add=True)),
|
||||||
|
('modified', models.DateTimeField(auto_now=True)),
|
||||||
|
('id', models.UUIDField(default=uuid.uuid4, editable=False, primary_key=True, serialize=False)),
|
||||||
|
('uuid', models.UUIDField(blank=True, null=True, unique=True)),
|
||||||
|
('abid', charidfield.fields.CharIDField(blank=True, db_index=True, default=None, help_text='ABID-format identifier for this entity (e.g. snp_01BJQMF54D093DXEAWZ6JYRPAQ)', max_length=30, null=True, prefix='plg_', unique=True)),
|
||||||
|
('name', models.CharField(max_length=64, unique=True)),
|
||||||
|
('path', models.FilePathField(path=pathlib.PurePosixPath('/Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox/archivebox/plugins'))),
|
||||||
|
('created_by', models.ForeignKey(default=abid_utils.models.get_or_create_system_user_pk, on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL)),
|
||||||
|
],
|
||||||
|
options={
|
||||||
|
'abstract': False,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
migrations.DeleteModel(
|
||||||
|
name='Plugin',
|
||||||
|
),
|
||||||
|
]
|
||||||
@@ -0,0 +1,19 @@
|
|||||||
|
# Generated by Django 5.0.6 on 2024-05-18 01:45
|
||||||
|
|
||||||
|
import pathlib
|
||||||
|
from django.db import migrations, models
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
('plugantic', '0005_customplugin_delete_plugin'),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.AlterField(
|
||||||
|
model_name='customplugin',
|
||||||
|
name='path',
|
||||||
|
field=models.FilePathField(allow_files=False, allow_folders=True, path=pathlib.PurePosixPath('/Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox/archivebox/plugins'), recursive=True),
|
||||||
|
),
|
||||||
|
]
|
||||||
@@ -0,0 +1,19 @@
|
|||||||
|
# Generated by Django 5.0.6 on 2024-05-18 01:46
|
||||||
|
|
||||||
|
import pathlib
|
||||||
|
from django.db import migrations, models
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
('plugantic', '0006_alter_customplugin_path'),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.AlterField(
|
||||||
|
model_name='customplugin',
|
||||||
|
name='path',
|
||||||
|
field=models.FilePathField(allow_files=False, allow_folders=True, path=pathlib.PurePosixPath('/Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox/data/plugins'), recursive=True),
|
||||||
|
),
|
||||||
|
]
|
||||||
@@ -0,0 +1,19 @@
|
|||||||
|
# Generated by Django 5.0.6 on 2024-05-18 01:47
|
||||||
|
|
||||||
|
import pathlib
|
||||||
|
from django.db import migrations, models
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
('plugantic', '0007_alter_customplugin_path'),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.AlterField(
|
||||||
|
model_name='customplugin',
|
||||||
|
name='path',
|
||||||
|
field=models.FilePathField(allow_files=False, allow_folders=True, path=pathlib.PurePosixPath('/Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox/data'), recursive=True),
|
||||||
|
),
|
||||||
|
]
|
||||||
@@ -0,0 +1,18 @@
|
|||||||
|
# Generated by Django 5.0.6 on 2024-05-18 01:48
|
||||||
|
|
||||||
|
from django.db import migrations, models
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
('plugantic', '0008_alter_customplugin_path'),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.AlterField(
|
||||||
|
model_name='customplugin',
|
||||||
|
name='path',
|
||||||
|
field=models.FilePathField(allow_files=False, allow_folders=True, path='/Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox/data', recursive=True),
|
||||||
|
),
|
||||||
|
]
|
||||||
@@ -0,0 +1,18 @@
|
|||||||
|
# Generated by Django 5.0.6 on 2024-05-18 01:48
|
||||||
|
|
||||||
|
from django.db import migrations, models
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
('plugantic', '0009_alter_customplugin_path'),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.AlterField(
|
||||||
|
model_name='customplugin',
|
||||||
|
name='path',
|
||||||
|
field=models.FilePathField(allow_files=False, allow_folders=True, match='/plugins/*', path='/Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox/data', recursive=True),
|
||||||
|
),
|
||||||
|
]
|
||||||
@@ -0,0 +1,18 @@
|
|||||||
|
# Generated by Django 5.0.6 on 2024-05-18 01:48
|
||||||
|
|
||||||
|
from django.db import migrations, models
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
('plugantic', '0010_alter_customplugin_path'),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.AlterField(
|
||||||
|
model_name='customplugin',
|
||||||
|
name='path',
|
||||||
|
field=models.FilePathField(allow_files=False, allow_folders=True, match='plugins/*', path='/Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox/data', recursive=True),
|
||||||
|
),
|
||||||
|
]
|
||||||
@@ -0,0 +1,18 @@
|
|||||||
|
# Generated by Django 5.0.6 on 2024-05-18 01:49
|
||||||
|
|
||||||
|
from django.db import migrations, models
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
('plugantic', '0011_alter_customplugin_path'),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.AlterField(
|
||||||
|
model_name='customplugin',
|
||||||
|
name='path',
|
||||||
|
field=models.FilePathField(allow_files=False, allow_folders=True, default='example_plugin', match='plugins/*', path='/Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox/data', recursive=True),
|
||||||
|
),
|
||||||
|
]
|
||||||
@@ -0,0 +1,18 @@
|
|||||||
|
# Generated by Django 5.0.6 on 2024-05-18 01:49
|
||||||
|
|
||||||
|
from django.db import migrations, models
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
('plugantic', '0012_alter_customplugin_path'),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.AlterField(
|
||||||
|
model_name='customplugin',
|
||||||
|
name='path',
|
||||||
|
field=models.FilePathField(allow_files=False, allow_folders=True, default='/plugins/example_plugin', match='plugins/*', path='/Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox/data', recursive=True),
|
||||||
|
),
|
||||||
|
]
|
||||||
@@ -0,0 +1,18 @@
|
|||||||
|
# Generated by Django 5.0.6 on 2024-05-18 01:50
|
||||||
|
|
||||||
|
from django.db import migrations, models
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
('plugantic', '0013_alter_customplugin_path'),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.AlterField(
|
||||||
|
model_name='customplugin',
|
||||||
|
name='path',
|
||||||
|
field=models.FilePathField(allow_files=False, allow_folders=True, default='/plugins/example_plugin', match='*', path='/Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox/data/plugins', recursive=True),
|
||||||
|
),
|
||||||
|
]
|
||||||
@@ -0,0 +1,18 @@
|
|||||||
|
# Generated by Django 5.0.6 on 2024-05-18 01:51
|
||||||
|
|
||||||
|
from django.db import migrations, models
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
('plugantic', '0014_alter_customplugin_path'),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.AlterField(
|
||||||
|
model_name='customplugin',
|
||||||
|
name='path',
|
||||||
|
field=models.FilePathField(allow_files=False, allow_folders=True, match='*', path='/Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox/data/plugins', recursive=True),
|
||||||
|
),
|
||||||
|
]
|
||||||
16
archivebox/plugantic/migrations/0016_delete_customplugin.py
Normal file
16
archivebox/plugantic/migrations/0016_delete_customplugin.py
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
# Generated by Django 5.0.6 on 2024-05-18 01:57
|
||||||
|
|
||||||
|
from django.db import migrations
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
('plugantic', '0015_alter_customplugin_path'),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.DeleteModel(
|
||||||
|
name='CustomPlugin',
|
||||||
|
),
|
||||||
|
]
|
||||||
0
archivebox/plugantic/migrations/__init__.py
Normal file
0
archivebox/plugantic/migrations/__init__.py
Normal file
50
archivebox/plugantic/models.py
Normal file
50
archivebox/plugantic/models.py
Normal file
@@ -0,0 +1,50 @@
|
|||||||
|
__package__ = 'archivebox.plugantic'
|
||||||
|
|
||||||
|
|
||||||
|
# import uuid
|
||||||
|
# from django.db import models
|
||||||
|
# from typing_extensions import Self
|
||||||
|
|
||||||
|
# from django_pydantic_field import SchemaField
|
||||||
|
# from django.conf import settings
|
||||||
|
|
||||||
|
# from abid_utils.models import ABIDModel, ABIDField
|
||||||
|
|
||||||
|
# # from .plugins import Plugin as PluginSchema, CORE_PLUGIN
|
||||||
|
# from .binproviders import BinProvider
|
||||||
|
# from .binaries import Binary
|
||||||
|
# from .configs import WgetOptionsConfig
|
||||||
|
# from .extractors import Extractor
|
||||||
|
# from .replayers import Replayer
|
||||||
|
|
||||||
|
|
||||||
|
# PLUGINS_ROOT = settings.CONFIG['OUTPUT_DIR'] / 'plugins'
|
||||||
|
# PLUGINS_ROOT.mkdir(exist_ok=True)
|
||||||
|
|
||||||
|
|
||||||
|
# class CustomPlugin(ABIDModel):
|
||||||
|
# abid_prefix = 'plg_'
|
||||||
|
# abid_ts_src = 'self.added'
|
||||||
|
# abid_uri_src = 'self.name'
|
||||||
|
# abid_subtype_src = '"09"'
|
||||||
|
# abid_rand_src = 'self.id'
|
||||||
|
|
||||||
|
# id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False) # legacy pk
|
||||||
|
# uuid = models.UUIDField(blank=True, null=True, editable=True, unique=True)
|
||||||
|
# abid = ABIDField(prefix=abid_prefix)
|
||||||
|
|
||||||
|
# name = models.CharField(max_length=64, blank=False, unique=True)
|
||||||
|
|
||||||
|
# path = models.FilePathField(path=str(PLUGINS_ROOT), match='*', recursive=True, allow_folders=True, allow_files=False)
|
||||||
|
|
||||||
|
# # replayers: list[Replayer] = SchemaField()
|
||||||
|
# # binaries: list[Replayer] = SchemaField()
|
||||||
|
# # extractors: list[Replayer] = SchemaField()
|
||||||
|
|
||||||
|
|
||||||
|
# # @classmethod
|
||||||
|
# # def from_loaded_plugin(cls, plugin: PluginSchema) -> Self:
|
||||||
|
# # new_obj = cls(
|
||||||
|
# # schema=plugin,
|
||||||
|
# # )
|
||||||
|
# # return new_obj
|
||||||
134
archivebox/plugantic/plugins.py
Normal file
134
archivebox/plugantic/plugins.py
Normal file
@@ -0,0 +1,134 @@
|
|||||||
|
__package__ = 'archivebox.plugantic'
|
||||||
|
|
||||||
|
from typing import List
|
||||||
|
from typing_extensions import Self
|
||||||
|
|
||||||
|
from pydantic import (
|
||||||
|
BaseModel,
|
||||||
|
ConfigDict,
|
||||||
|
Field,
|
||||||
|
model_validator,
|
||||||
|
validate_call,
|
||||||
|
SerializeAsAny,
|
||||||
|
)
|
||||||
|
|
||||||
|
from .binaries import (
|
||||||
|
Binary,
|
||||||
|
PythonBinary,
|
||||||
|
SqliteBinary,
|
||||||
|
DjangoBinary,
|
||||||
|
WgetBinary,
|
||||||
|
YtdlpBinary,
|
||||||
|
)
|
||||||
|
from .extractors import (
|
||||||
|
Extractor,
|
||||||
|
YtdlpExtractor,
|
||||||
|
WgetExtractor,
|
||||||
|
WarcExtractor,
|
||||||
|
)
|
||||||
|
from .replayers import (
|
||||||
|
Replayer,
|
||||||
|
GENERIC_REPLAYER,
|
||||||
|
MEDIA_REPLAYER,
|
||||||
|
)
|
||||||
|
from .configs import (
|
||||||
|
ConfigSet,
|
||||||
|
WGET_CONFIG,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class Plugin(BaseModel):
|
||||||
|
model_config = ConfigDict(arbitrary_types_allowed=True, extra='ignore', populate_by_name=True)
|
||||||
|
|
||||||
|
name: str = Field(default='baseplugin') # e.g. media
|
||||||
|
description: str = Field(default='') # e.g. get media using yt-dlp
|
||||||
|
|
||||||
|
configs: List[SerializeAsAny[ConfigSet]] = Field(default=[])
|
||||||
|
binaries: List[SerializeAsAny[Binary]] = Field(default=[]) # e.g. [Binary(name='yt-dlp')]
|
||||||
|
extractors: List[SerializeAsAny[Extractor]] = Field(default=[])
|
||||||
|
replayers: List[SerializeAsAny[Replayer]] = Field(default=[])
|
||||||
|
|
||||||
|
@model_validator(mode='after')
|
||||||
|
def validate(self):
|
||||||
|
self.description = self.description or self.name
|
||||||
|
|
||||||
|
@validate_call
|
||||||
|
def install(self) -> Self:
|
||||||
|
new_binaries = []
|
||||||
|
for idx, binary in enumerate(self.binaries):
|
||||||
|
new_binaries.append(binary.install() or binary)
|
||||||
|
return self.model_copy(update={
|
||||||
|
'binaries': new_binaries,
|
||||||
|
})
|
||||||
|
|
||||||
|
@validate_call
|
||||||
|
def load(self, cache=True) -> Self:
|
||||||
|
new_binaries = []
|
||||||
|
for idx, binary in enumerate(self.binaries):
|
||||||
|
new_binaries.append(binary.load(cache=cache) or binary)
|
||||||
|
return self.model_copy(update={
|
||||||
|
'binaries': new_binaries,
|
||||||
|
})
|
||||||
|
|
||||||
|
@validate_call
|
||||||
|
def load_or_install(self, cache=True) -> Self:
|
||||||
|
new_binaries = []
|
||||||
|
for idx, binary in enumerate(self.binaries):
|
||||||
|
new_binaries.append(binary.load_or_install(cache=cache) or binary)
|
||||||
|
return self.model_copy(update={
|
||||||
|
'binaries': new_binaries,
|
||||||
|
})
|
||||||
|
|
||||||
|
|
||||||
|
class CorePlugin(Plugin):
|
||||||
|
name: str = 'core'
|
||||||
|
configs: List[SerializeAsAny[ConfigSet]] = []
|
||||||
|
binaries: List[SerializeAsAny[Binary]] = [PythonBinary(), SqliteBinary(), DjangoBinary()]
|
||||||
|
extractors: List[SerializeAsAny[Extractor]] = []
|
||||||
|
replayers: List[SerializeAsAny[Replayer]] = [GENERIC_REPLAYER]
|
||||||
|
|
||||||
|
class YtdlpPlugin(Plugin):
|
||||||
|
name: str = 'ytdlp'
|
||||||
|
configs: List[SerializeAsAny[ConfigSet]] = []
|
||||||
|
binaries: List[SerializeAsAny[Binary]] = [YtdlpBinary()]
|
||||||
|
extractors: List[SerializeAsAny[Extractor]] = [YtdlpExtractor()]
|
||||||
|
replayers: List[SerializeAsAny[Replayer]] = [MEDIA_REPLAYER]
|
||||||
|
|
||||||
|
class WgetPlugin(Plugin):
|
||||||
|
name: str = 'wget'
|
||||||
|
configs: List[SerializeAsAny[ConfigSet]] = [*WGET_CONFIG]
|
||||||
|
binaries: List[SerializeAsAny[Binary]] = [WgetBinary()]
|
||||||
|
extractors: List[SerializeAsAny[Extractor]] = [WgetExtractor(), WarcExtractor()]
|
||||||
|
|
||||||
|
|
||||||
|
CORE_PLUGIN = CorePlugin()
|
||||||
|
YTDLP_PLUGIN = YtdlpPlugin()
|
||||||
|
WGET_PLUGIN = WgetPlugin()
|
||||||
|
PLUGINS = [
|
||||||
|
CORE_PLUGIN,
|
||||||
|
YTDLP_PLUGIN,
|
||||||
|
WGET_PLUGIN,
|
||||||
|
]
|
||||||
|
LOADED_PLUGINS = PLUGINS
|
||||||
|
|
||||||
|
|
||||||
|
import json
|
||||||
|
|
||||||
|
for plugin in PLUGINS:
|
||||||
|
try:
|
||||||
|
json.dumps(plugin.model_json_schema(), indent=4)
|
||||||
|
# print(json.dumps(plugin.model_json_schema(), indent=4))
|
||||||
|
except Exception as err:
|
||||||
|
print(f'Failed to generate JSON schema for {plugin.name}')
|
||||||
|
raise
|
||||||
|
|
||||||
|
# print('-------------------------------------BEFORE INSTALL---------------------------------')
|
||||||
|
# for plugin in PLUGINS:
|
||||||
|
# print(plugin.model_dump_json(indent=4))
|
||||||
|
# print('-------------------------------------DURING LOAD/INSTALL---------------------------------')
|
||||||
|
# for plugin in PLUGINS:
|
||||||
|
# LOADED_PLUGINS.append(plugin.install())
|
||||||
|
# print('-------------------------------------AFTER INSTALL---------------------------------')
|
||||||
|
# for plugin in LOADED_PLUGINS:
|
||||||
|
# print(plugin.model_dump_json(indent=4))
|
||||||
|
|
||||||
26
archivebox/plugantic/replayers.py
Normal file
26
archivebox/plugantic/replayers.py
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
__package__ = 'archivebox.plugantic'
|
||||||
|
|
||||||
|
|
||||||
|
from pydantic import BaseModel
|
||||||
|
|
||||||
|
# from .binproviders import LazyImportStr
|
||||||
|
|
||||||
|
|
||||||
|
class Replayer(BaseModel):
|
||||||
|
"""Describes how to render an ArchiveResult in several contexts"""
|
||||||
|
name: str = 'GenericReplayer'
|
||||||
|
url_pattern: str = '*'
|
||||||
|
|
||||||
|
row_template: str = 'plugins/generic_replayer/templates/row.html'
|
||||||
|
embed_template: str = 'plugins/generic_replayer/templates/embed.html'
|
||||||
|
fullpage_template: str = 'plugins/generic_replayer/templates/fullpage.html'
|
||||||
|
|
||||||
|
# row_view: LazyImportStr = 'plugins.generic_replayer.views.row_view'
|
||||||
|
# embed_view: LazyImportStr = 'plugins.generic_replayer.views.embed_view'
|
||||||
|
# fullpage_view: LazyImportStr = 'plugins.generic_replayer.views.fullpage_view'
|
||||||
|
# icon_view: LazyImportStr = 'plugins.generic_replayer.views.get_icon'
|
||||||
|
# thumbnail_view: LazyImportStr = 'plugins.generic_replayer.views.get_icon'
|
||||||
|
|
||||||
|
|
||||||
|
GENERIC_REPLAYER = Replayer(name='generic')
|
||||||
|
MEDIA_REPLAYER = Replayer(name='media')
|
||||||
3
archivebox/plugantic/tests.py
Normal file
3
archivebox/plugantic/tests.py
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
from django.test import TestCase
|
||||||
|
|
||||||
|
# Create your tests here.
|
||||||
169
archivebox/plugantic/views.py
Normal file
169
archivebox/plugantic/views.py
Normal file
@@ -0,0 +1,169 @@
|
|||||||
|
__package__ = 'archivebox.plugantic'
|
||||||
|
|
||||||
|
from django.http import HttpRequest
|
||||||
|
from django.utils.html import format_html, mark_safe
|
||||||
|
|
||||||
|
from admin_data_views.typing import TableContext, ItemContext
|
||||||
|
from admin_data_views.utils import render_with_table_view, render_with_item_view, ItemLink
|
||||||
|
|
||||||
|
|
||||||
|
from plugantic.plugins import LOADED_PLUGINS
|
||||||
|
from django.conf import settings
|
||||||
|
|
||||||
|
|
||||||
|
@render_with_table_view
|
||||||
|
def binaries_list_view(request: HttpRequest, **kwargs) -> TableContext:
|
||||||
|
|
||||||
|
assert request.user.is_superuser, 'Must be a superuser to view configuration settings.'
|
||||||
|
|
||||||
|
rows = {
|
||||||
|
"Binary": [],
|
||||||
|
"From Plugin": [],
|
||||||
|
"Found Version": [],
|
||||||
|
"Provided By": [],
|
||||||
|
"Found Abspath": [],
|
||||||
|
"Related Configuration": [],
|
||||||
|
"Overrides": [],
|
||||||
|
"Description": [],
|
||||||
|
}
|
||||||
|
|
||||||
|
relevant_configs = {
|
||||||
|
key: val
|
||||||
|
for key, val in settings.CONFIG.items()
|
||||||
|
if '_BINARY' in key or '_VERSION' in key
|
||||||
|
}
|
||||||
|
|
||||||
|
for plugin in LOADED_PLUGINS:
|
||||||
|
for binary in plugin.binaries:
|
||||||
|
binary = binary.load_or_install()
|
||||||
|
|
||||||
|
rows['Binary'].append(ItemLink(binary.name, key=binary.name))
|
||||||
|
rows['From Plugin'].append(plugin.name)
|
||||||
|
rows['Found Version'].append(binary.loaded_version)
|
||||||
|
rows['Provided By'].append(binary.loaded_provider)
|
||||||
|
rows['Found Abspath'].append(binary.loaded_abspath)
|
||||||
|
rows['Related Configuration'].append(mark_safe(', '.join(
|
||||||
|
f'<a href="/admin/environment/config/{config_key}/">{config_key}</a>'
|
||||||
|
for config_key, config_value in relevant_configs.items()
|
||||||
|
if binary.name.lower().replace('-', '').replace('_', '').replace('ytdlp', 'youtubedl') in config_key.lower()
|
||||||
|
# or binary.name.lower().replace('-', '').replace('_', '') in str(config_value).lower()
|
||||||
|
)))
|
||||||
|
rows['Overrides'].append(str(binary.provider_overrides))
|
||||||
|
rows['Description'].append(binary.description)
|
||||||
|
|
||||||
|
return TableContext(
|
||||||
|
title="Binaries",
|
||||||
|
table=rows,
|
||||||
|
)
|
||||||
|
|
||||||
|
@render_with_item_view
|
||||||
|
def binary_detail_view(request: HttpRequest, key: str, **kwargs) -> ItemContext:
|
||||||
|
|
||||||
|
assert request.user.is_superuser, 'Must be a superuser to view configuration settings.'
|
||||||
|
|
||||||
|
binary = None
|
||||||
|
plugin = None
|
||||||
|
for loaded_plugin in LOADED_PLUGINS:
|
||||||
|
for loaded_binary in loaded_plugin.binaries:
|
||||||
|
if loaded_binary.name == key:
|
||||||
|
binary = loaded_binary
|
||||||
|
plugin = loaded_plugin
|
||||||
|
|
||||||
|
assert plugin and binary, f'Could not find a binary matching the specified name: {key}'
|
||||||
|
|
||||||
|
binary = binary.load_or_install()
|
||||||
|
|
||||||
|
return ItemContext(
|
||||||
|
slug=key,
|
||||||
|
title=key,
|
||||||
|
data=[
|
||||||
|
{
|
||||||
|
"name": binary.name,
|
||||||
|
"description": binary.description,
|
||||||
|
"fields": {
|
||||||
|
'plugin': plugin.name,
|
||||||
|
'binprovider': binary.loaded_provider,
|
||||||
|
'abspath': binary.loaded_abspath,
|
||||||
|
'version': binary.loaded_version,
|
||||||
|
'overrides': str(binary.provider_overrides),
|
||||||
|
'providers': str(binary.providers_supported),
|
||||||
|
},
|
||||||
|
"help_texts": {
|
||||||
|
# TODO
|
||||||
|
},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@render_with_table_view
|
||||||
|
def plugins_list_view(request: HttpRequest, **kwargs) -> TableContext:
|
||||||
|
|
||||||
|
assert request.user.is_superuser, 'Must be a superuser to view configuration settings.'
|
||||||
|
|
||||||
|
rows = {
|
||||||
|
"Name": [],
|
||||||
|
"binaries": [],
|
||||||
|
"extractors": [],
|
||||||
|
"replayers": [],
|
||||||
|
"configs": [],
|
||||||
|
"description": [],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
for plugin in LOADED_PLUGINS:
|
||||||
|
plugin = plugin.load_or_install()
|
||||||
|
|
||||||
|
rows['Name'].append(ItemLink(plugin.name, key=plugin.name))
|
||||||
|
rows['binaries'].append(mark_safe(', '.join(
|
||||||
|
f'<a href="/admin/environment/binaries/{binary.name}/">{binary.name}</a>'
|
||||||
|
for binary in plugin.binaries
|
||||||
|
)))
|
||||||
|
rows['extractors'].append(', '.join(extractor.name for extractor in plugin.extractors))
|
||||||
|
rows['replayers'].append(', '.join(replayer.name for replayer in plugin.replayers))
|
||||||
|
rows['configs'].append(mark_safe(', '.join(
|
||||||
|
f'<a href="/admin/environment/config/{config_key}/">{config_key}</a>'
|
||||||
|
for configset in plugin.configs
|
||||||
|
for config_key in configset.__fields__.keys()
|
||||||
|
if config_key != 'section' and config_key in settings.CONFIG
|
||||||
|
)))
|
||||||
|
rows['description'].append(str(plugin.description))
|
||||||
|
|
||||||
|
return TableContext(
|
||||||
|
title="Installed plugins",
|
||||||
|
table=rows,
|
||||||
|
)
|
||||||
|
|
||||||
|
@render_with_item_view
|
||||||
|
def plugin_detail_view(request: HttpRequest, key: str, **kwargs) -> ItemContext:
|
||||||
|
|
||||||
|
assert request.user.is_superuser, 'Must be a superuser to view configuration settings.'
|
||||||
|
|
||||||
|
plugin = None
|
||||||
|
for loaded_plugin in LOADED_PLUGINS:
|
||||||
|
if loaded_plugin.name == key:
|
||||||
|
plugin = loaded_plugin
|
||||||
|
|
||||||
|
assert plugin, f'Could not find a plugin matching the specified name: {key}'
|
||||||
|
|
||||||
|
plugin = plugin.load_or_install()
|
||||||
|
|
||||||
|
return ItemContext(
|
||||||
|
slug=key,
|
||||||
|
title=key,
|
||||||
|
data=[
|
||||||
|
{
|
||||||
|
"name": plugin.name,
|
||||||
|
"description": plugin.description,
|
||||||
|
"fields": {
|
||||||
|
'configs': plugin.configs,
|
||||||
|
'binaries': plugin.binaries,
|
||||||
|
'extractors': plugin.extractors,
|
||||||
|
'replayers': plugin.replayers,
|
||||||
|
},
|
||||||
|
"help_texts": {
|
||||||
|
# TODO
|
||||||
|
},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
)
|
||||||
@@ -39,7 +39,7 @@ def write_search_index(link: Link, texts: Union[List[str], None]=None, out_dir:
|
|||||||
backend = import_backend()
|
backend = import_backend()
|
||||||
if snap:
|
if snap:
|
||||||
try:
|
try:
|
||||||
backend.index(snapshot_id=str(snap.id), texts=texts)
|
backend.index(snapshot_id=str(snap.pk), texts=texts)
|
||||||
except Exception as err:
|
except Exception as err:
|
||||||
stderr()
|
stderr()
|
||||||
stderr(
|
stderr(
|
||||||
@@ -54,7 +54,7 @@ def query_search_index(query: str, out_dir: Path=OUTPUT_DIR) -> QuerySet:
|
|||||||
if search_backend_enabled():
|
if search_backend_enabled():
|
||||||
backend = import_backend()
|
backend = import_backend()
|
||||||
try:
|
try:
|
||||||
snapshot_ids = backend.search(query)
|
snapshot_pks = backend.search(query)
|
||||||
except Exception as err:
|
except Exception as err:
|
||||||
stderr()
|
stderr()
|
||||||
stderr(
|
stderr(
|
||||||
@@ -64,7 +64,7 @@ def query_search_index(query: str, out_dir: Path=OUTPUT_DIR) -> QuerySet:
|
|||||||
raise
|
raise
|
||||||
else:
|
else:
|
||||||
# TODO preserve ordering from backend
|
# TODO preserve ordering from backend
|
||||||
qsearch = Snapshot.objects.filter(pk__in=snapshot_ids)
|
qsearch = Snapshot.objects.filter(pk__in=snapshot_pks)
|
||||||
return qsearch
|
return qsearch
|
||||||
|
|
||||||
return Snapshot.objects.none()
|
return Snapshot.objects.none()
|
||||||
@@ -74,9 +74,9 @@ def flush_search_index(snapshots: QuerySet):
|
|||||||
if not indexing_enabled() or not snapshots:
|
if not indexing_enabled() or not snapshots:
|
||||||
return
|
return
|
||||||
backend = import_backend()
|
backend = import_backend()
|
||||||
snapshot_ids=(str(pk) for pk in snapshots.values_list('pk',flat=True))
|
snapshot_pks = (str(pk) for pk in snapshots.values_list('pk', flat=True))
|
||||||
try:
|
try:
|
||||||
backend.flush(snapshot_ids)
|
backend.flush(snapshot_pks)
|
||||||
except Exception as err:
|
except Exception as err:
|
||||||
stderr()
|
stderr()
|
||||||
stderr(
|
stderr(
|
||||||
|
|||||||
@@ -277,10 +277,22 @@
|
|||||||
$(this).parents('.card').removeClass('selected-card')
|
$(this).parents('.card').removeClass('selected-card')
|
||||||
})
|
})
|
||||||
};
|
};
|
||||||
|
function selectSnapshotIfHotlinked() {
|
||||||
|
// if we arrive at the index with a url like ??id__startswith=...
|
||||||
|
// we were hotlinked here with the intention of making it easy for the user to perform some
|
||||||
|
// actions on the given snapshot. therefore we should preselect the snapshot to save them a click
|
||||||
|
if (window.location.search.startsWith('?id__startswith=') || window.location.search.startsWith('?id__exact=')) {
|
||||||
|
const result_checkboxes = [...document.querySelectorAll('#result_list .action-checkbox input[type=checkbox]')]
|
||||||
|
if (result_checkboxes.length === 1) {
|
||||||
|
result_checkboxes[0].click()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
$(document).ready(function() {
|
$(document).ready(function() {
|
||||||
fix_actions()
|
fix_actions()
|
||||||
setupSnapshotGridListToggle()
|
setupSnapshotGridListToggle()
|
||||||
setTimeOffset()
|
setTimeOffset()
|
||||||
|
selectSnapshotIfHotlinked()
|
||||||
})
|
})
|
||||||
</script>
|
</script>
|
||||||
</body>
|
</body>
|
||||||
|
|||||||
@@ -147,7 +147,7 @@
|
|||||||
{% for obj in results %}
|
{% for obj in results %}
|
||||||
<div class="card">
|
<div class="card">
|
||||||
<div class="card-info">
|
<div class="card-info">
|
||||||
<a href="{% url 'admin:core_snapshot_change' obj.id %}">
|
<a href="{% url 'admin:core_snapshot_change' obj.pk %}">
|
||||||
<span class="timestamp">{{obj.added}}</span>
|
<span class="timestamp">{{obj.added}}</span>
|
||||||
</a>
|
</a>
|
||||||
<label>
|
<label>
|
||||||
|
|||||||
545
archivebox/templates/core/snapshot_live.html
Normal file
545
archivebox/templates/core/snapshot_live.html
Normal file
@@ -0,0 +1,545 @@
|
|||||||
|
{% load static tz core_tags %}
|
||||||
|
|
||||||
|
<!DOCTYPE html>
|
||||||
|
<html lang="en">
|
||||||
|
<head>
|
||||||
|
<title>{{title}}</title>
|
||||||
|
<meta charset="utf-8" name="viewport" content="width=device-width, initial-scale=1">
|
||||||
|
<link href="{% static 'bootstrap.min.css' %}" rel="stylesheet">
|
||||||
|
<style>
|
||||||
|
/* Keep this inline, don't move to external css file because this template is used to generate static exports that need to be usable as-is without an accompanying staticfiles dir */
|
||||||
|
html, body {
|
||||||
|
width: 100%;
|
||||||
|
height: 100%;
|
||||||
|
background-color: #ddd;
|
||||||
|
}
|
||||||
|
header {
|
||||||
|
background-color: #aa1e55;
|
||||||
|
}
|
||||||
|
small {
|
||||||
|
font-weight: 200;
|
||||||
|
}
|
||||||
|
header a:hover {
|
||||||
|
text-decoration: none;
|
||||||
|
}
|
||||||
|
.header-top {
|
||||||
|
width: 100%;
|
||||||
|
height: auto;
|
||||||
|
min-height: 40px;
|
||||||
|
margin: 0px;
|
||||||
|
text-align: center;
|
||||||
|
color: #f6f6f6;
|
||||||
|
font-size: calc(10px + 0.84vw);
|
||||||
|
font-weight: 200;
|
||||||
|
padding: 3px 4px;
|
||||||
|
background-color: #aa1e55;
|
||||||
|
}
|
||||||
|
.header-top .nav {
|
||||||
|
width: 100%;
|
||||||
|
}
|
||||||
|
.nav > div {
|
||||||
|
min-height: 30px;
|
||||||
|
line-height: 1.2;
|
||||||
|
}
|
||||||
|
.header-top .header-url {
|
||||||
|
display: inline-block;
|
||||||
|
width: 100%;
|
||||||
|
background-color: rgb(216, 216, 235, 0.05);
|
||||||
|
text-align: center;
|
||||||
|
line-height: 1.3;
|
||||||
|
font-family: monospace;
|
||||||
|
white-space: nowrap;
|
||||||
|
font-weight: 200;
|
||||||
|
display: block;
|
||||||
|
margin-top: -1px;
|
||||||
|
font-size: 23px;
|
||||||
|
opacity: 0.8;
|
||||||
|
border-radius: 0px 0px 8px 8px;
|
||||||
|
}
|
||||||
|
.header-top .header-url a.header-url-text {
|
||||||
|
color: #f6f6f6;
|
||||||
|
user-select: all;
|
||||||
|
text-overflow: ellipsis;
|
||||||
|
}
|
||||||
|
.header-top .header-url a.header-url-text:hover {
|
||||||
|
color: rgb(144, 161, 255);
|
||||||
|
}
|
||||||
|
.header-top a {
|
||||||
|
text-decoration: none;
|
||||||
|
color: rgba(0,0,0,0.6);
|
||||||
|
}
|
||||||
|
.header-top a:hover {
|
||||||
|
text-decoration: none;
|
||||||
|
color: rgba(0,0,0,0.9);
|
||||||
|
}
|
||||||
|
.header-top .header-title {
|
||||||
|
color: rgba(0,0,0,0.6);
|
||||||
|
}
|
||||||
|
.header-top .favicon {
|
||||||
|
height: 24px;
|
||||||
|
vertical-align: -5px;
|
||||||
|
margin-right: 4px;
|
||||||
|
}
|
||||||
|
.header-top .col-lg-4 {
|
||||||
|
text-align: center;
|
||||||
|
padding-top: 4px;
|
||||||
|
padding-bottom: 4px;
|
||||||
|
}
|
||||||
|
.header-archivebox img {
|
||||||
|
display: inline-block;
|
||||||
|
margin-right: 3px;
|
||||||
|
height: 30px;
|
||||||
|
margin-left: 12px;
|
||||||
|
margin-top: -4px;
|
||||||
|
margin-bottom: 2px;
|
||||||
|
}
|
||||||
|
.header-archivebox img:hover {
|
||||||
|
opacity: 0.5;
|
||||||
|
}
|
||||||
|
header small code {
|
||||||
|
white-space: nowrap;
|
||||||
|
font-weight: 200;
|
||||||
|
display: block;
|
||||||
|
margin-top: -1px;
|
||||||
|
font-size: 13px;
|
||||||
|
opacity: 0.8;
|
||||||
|
user-select: all;
|
||||||
|
}
|
||||||
|
.header-toggle {
|
||||||
|
line-height: 12px;
|
||||||
|
font-size: 70px;
|
||||||
|
vertical-align: -12px;
|
||||||
|
margin-left: 4px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.info-row {
|
||||||
|
margin-top: 2px;
|
||||||
|
margin-bottom: 5px;
|
||||||
|
}
|
||||||
|
.info-row .alert {
|
||||||
|
margin-bottom: 0px;
|
||||||
|
}
|
||||||
|
.row.header-bottom {
|
||||||
|
margin-left: -10px;
|
||||||
|
margin-right: -10px;
|
||||||
|
}
|
||||||
|
.header-bottom .col-lg-2 {
|
||||||
|
padding-left: 4px;
|
||||||
|
padding-right: 4px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.header-bottom-frames .card {
|
||||||
|
box-shadow: 2px 2px 7px 0px rgba(0, 0, 0, 0.1);
|
||||||
|
margin-bottom: 5px;
|
||||||
|
border: 1px solid rgba(0, 0, 0, 0.06);
|
||||||
|
border-radius: 10px;
|
||||||
|
background-color: #efefef;
|
||||||
|
overflow: hidden;
|
||||||
|
height: 130px;
|
||||||
|
}
|
||||||
|
.card h4 {
|
||||||
|
font-size: 0.8em;
|
||||||
|
display: inline-block;
|
||||||
|
width: auto;
|
||||||
|
text-transform: uppercase;
|
||||||
|
margin-top: 0px;
|
||||||
|
margin-bottom: 5px;
|
||||||
|
color: rgb(93, 105, 110);
|
||||||
|
}
|
||||||
|
.card-body {
|
||||||
|
font-size: 14px;
|
||||||
|
padding: 4px 10px;
|
||||||
|
padding-bottom: 0px;
|
||||||
|
/* padding-left: 3px; */
|
||||||
|
/* padding-right: 3px; */
|
||||||
|
/* padding-bottom: 3px; */
|
||||||
|
line-height: 1;
|
||||||
|
word-wrap: break-word;
|
||||||
|
max-height: 102px;
|
||||||
|
overflow: hidden;
|
||||||
|
text-overflow: ellipsis;
|
||||||
|
color: #d3d3d3;
|
||||||
|
}
|
||||||
|
.card-title {
|
||||||
|
margin-bottom: 4px;
|
||||||
|
text-transform: uppercase;
|
||||||
|
}
|
||||||
|
.card-img-top {
|
||||||
|
border: 0px;
|
||||||
|
padding: 0px;
|
||||||
|
margin: 0px;
|
||||||
|
overflow: hidden;
|
||||||
|
opacity: 0.8;
|
||||||
|
border-top: 1px solid rgba(0,0,0,0);
|
||||||
|
border-radius: 4px;
|
||||||
|
border-bottom: 1px solid rgba(0,0,0,0);
|
||||||
|
height: 430px;
|
||||||
|
width: 405%;
|
||||||
|
margin-bottom: -330px;
|
||||||
|
background-color: #333;
|
||||||
|
margin-left: -1%;
|
||||||
|
margin-right: -1%;
|
||||||
|
pointer-events: none;
|
||||||
|
|
||||||
|
transform: scale(0.25);
|
||||||
|
transform-origin: 0 0;
|
||||||
|
}
|
||||||
|
#main-frame {
|
||||||
|
border-top: 1px solid #ddd;
|
||||||
|
width: 100%;
|
||||||
|
height: calc(100vh - 210px);
|
||||||
|
margin: 0px;
|
||||||
|
border: 0px;
|
||||||
|
border-top: 3px solid #aa1e55;
|
||||||
|
}
|
||||||
|
.card.selected-card {
|
||||||
|
border: 2px solid orange;
|
||||||
|
box-shadow: 0px -6px 13px 1px rgba(0,0,0,0.05);
|
||||||
|
}
|
||||||
|
.iframe-large {
|
||||||
|
height: calc(100vh - 70px);
|
||||||
|
}
|
||||||
|
img.external {
|
||||||
|
height: 30px;
|
||||||
|
margin-right: -10px;
|
||||||
|
padding: 3px;
|
||||||
|
border-radius: 4px;
|
||||||
|
vertical-align: middle;
|
||||||
|
border: 4px solid rgba(0,0,0,0);
|
||||||
|
}
|
||||||
|
img.external:hover {
|
||||||
|
border: 4px solid green;
|
||||||
|
}
|
||||||
|
.screenshot {
|
||||||
|
background-color: #333;
|
||||||
|
transform: none;
|
||||||
|
width: 100%;
|
||||||
|
min-height: 100px;
|
||||||
|
max-height: 100px;
|
||||||
|
margin-bottom: 0px;
|
||||||
|
object-fit: cover;
|
||||||
|
object-position: top center;
|
||||||
|
}
|
||||||
|
.header-bottom {
|
||||||
|
border-top: 1px solid rgba(170, 30, 85, 0.9);
|
||||||
|
padding-bottom: 1px;
|
||||||
|
border-bottom: 5px solid rgb(170, 30, 85);
|
||||||
|
margin-bottom: -1px;
|
||||||
|
|
||||||
|
border-radius: 0px;
|
||||||
|
background-color: #f4eeee;
|
||||||
|
border: 1px solid rgba(0,0,0,0.2);
|
||||||
|
box-shadow: 4px 4px 4px rgba(0,0,0,0.2);
|
||||||
|
margin-top: 0px;
|
||||||
|
}
|
||||||
|
.header-bottom-info {
|
||||||
|
color: #6f6f6f;
|
||||||
|
padding-top: 0px;
|
||||||
|
padding-bottom: 0px;
|
||||||
|
margin: 0px -15px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.header-bottom-info > div {
|
||||||
|
text-align: center;
|
||||||
|
}
|
||||||
|
.header-bottom-info h5 {
|
||||||
|
font-size: 12px;
|
||||||
|
font-weight: 400;
|
||||||
|
margin-top: 3px;
|
||||||
|
margin-bottom: 3px;
|
||||||
|
}
|
||||||
|
.info-chunk {
|
||||||
|
width: auto;
|
||||||
|
display: inline-block;
|
||||||
|
text-align: center;
|
||||||
|
margin: 8px 4px;
|
||||||
|
vertical-align: top;
|
||||||
|
font-size: 14px;
|
||||||
|
}
|
||||||
|
header .badge {
|
||||||
|
margin-top: 3px;
|
||||||
|
font-size: 0.9rem;
|
||||||
|
font-weight: 200;
|
||||||
|
font-family: monospace;
|
||||||
|
}
|
||||||
|
header .internal-links {
|
||||||
|
text-align: left;
|
||||||
|
opacity: 1;
|
||||||
|
background-color: rgba(0,0,0,0.03);
|
||||||
|
padding: 1px 3px;
|
||||||
|
}
|
||||||
|
header .external-links {
|
||||||
|
text-align: center;
|
||||||
|
opacity: 0.9;
|
||||||
|
/*background-color: rgba(0,0,0,0.03);*/
|
||||||
|
margin-top: 0px;
|
||||||
|
padding: 1px 3px;
|
||||||
|
font-size: 14px;
|
||||||
|
color: #ddd;
|
||||||
|
width: 100%;
|
||||||
|
overflow: hidden;
|
||||||
|
}
|
||||||
|
.header-bottom-frames {
|
||||||
|
padding-top: 5px;
|
||||||
|
justify-content: center;
|
||||||
|
}
|
||||||
|
.header-bottom-frames .card-title {
|
||||||
|
width: 100%;
|
||||||
|
text-align: center;
|
||||||
|
font-size: 17px;
|
||||||
|
margin-bottom: 0px;
|
||||||
|
display: inline-block;
|
||||||
|
color: #d3d3d3;
|
||||||
|
font-weight: 200;
|
||||||
|
vertical-align: 3px;
|
||||||
|
}
|
||||||
|
.header-bottom-frames .card-text {
|
||||||
|
/* width: 100%;
|
||||||
|
text-align: center;*/
|
||||||
|
font-size: 0.9em;
|
||||||
|
display: inline-block;
|
||||||
|
position: relative;
|
||||||
|
/* top: -11px;*/
|
||||||
|
}
|
||||||
|
.card-text code {
|
||||||
|
padding: .1rem .2rem;
|
||||||
|
font-size: 90%;
|
||||||
|
color: #bd4147;
|
||||||
|
background-color: rgb(204, 204, 204, 0.28);
|
||||||
|
border-radius: .25rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*@media(max-width: 1092px) {
|
||||||
|
iframe {
|
||||||
|
display: none;
|
||||||
|
}
|
||||||
|
}*/
|
||||||
|
|
||||||
|
|
||||||
|
@media(max-width: 728px) {
|
||||||
|
.card h4 {
|
||||||
|
font-size: 5vw;
|
||||||
|
}
|
||||||
|
.card-body {
|
||||||
|
font-size: 4vw;
|
||||||
|
}
|
||||||
|
.card {
|
||||||
|
margin-bottom: 5px;
|
||||||
|
}
|
||||||
|
header > h1 > a.header-url, header > h1 > a.header-archivebox {
|
||||||
|
display: none;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
</style>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<header>
|
||||||
|
<div class="header-top container-fluid">
|
||||||
|
<div class="row nav">
|
||||||
|
<div class="col-lg-2" style="line-height: 58px; vertical-align: middle">
|
||||||
|
<a href="../../index.html" class="header-archivebox" title="Go to Main Index...">
|
||||||
|
<img src="../../static/archive.png" alt="Archive Icon">
|
||||||
|
ArchiveBox
|
||||||
|
</a>
|
||||||
|
</div>
|
||||||
|
<div class="col-lg-8">
|
||||||
|
<div class="header-url">
|
||||||
|
<a class="header-url-text" href="{{url}}" title="Open original URL in new window..." target="_blank" rel="noreferrer">
|
||||||
|
{{url}}
|
||||||
|
</a>
|
||||||
|
</div>
|
||||||
|
<div class="badge badge-{{status_color}}" style="float: left">
|
||||||
|
<a href="/admin/core/snapshot/?id__startswith={{snapshot_id}}" title="Click to see options to pull, re-snapshot, or delete this Snapshot">
|
||||||
|
{{status|upper}}
|
||||||
|
</a>
|
||||||
|
</div>
|
||||||
|
<div class="badge badge-default" style="float: left; font-weight: 200">
|
||||||
|
{{num_outputs}}
|
||||||
|
{% if num_failures %}
|
||||||
|
+ {{num_failures}} <small>errors</small>
|
||||||
|
{% endif %}
|
||||||
|
</div>
|
||||||
|
<div class="badge badge-info" style="float: right">
|
||||||
|
<a href="/admin/core/snapshot/{{snapshot_id}}/change/" title="Click to edit this Snapshot in the Admin UI">
|
||||||
|
{{size}}
|
||||||
|
</a>
|
||||||
|
</div>
|
||||||
|
<div class="badge badge-default" style="float: right">
|
||||||
|
<a href="/admin/core/snapshot/{{snapshot_id}}/change/" title="Click to edit this Snapshot in the Admin UI">
|
||||||
|
{{extension}}
|
||||||
|
</a>
|
||||||
|
</div>
|
||||||
|
<small class="header-title header-toggle-trigger">
|
||||||
|
<img src="favicon.ico" onerror="this.style.opacity=0" alt="Favicon" class="favicon"/>
|
||||||
|
{{title|truncatechars:120|safe}} <a href="#" class="header-toggle header-toggle-trigger">▾</a>
|
||||||
|
<br/>
|
||||||
|
{% for tag in tags_str|split:',' %}
|
||||||
|
<div class="badge badge-default tag" style="word-break: break-all;">{{tag}}</div>
|
||||||
|
{% endfor %}
|
||||||
|
</small>
|
||||||
|
</div>
|
||||||
|
<div class="col-lg-2" style="padding-top: 4px">
|
||||||
|
<a href="/archive/{{url}}" title="Date Added: {{bookmarked_date}} | First Archived: {{oldest_archive_date|default:updated_date}} | Last Checked: {{updated_date}} (UTC)">
|
||||||
|
{{oldest_archive_date|default:updated_date|default:bookmarked_date}}
|
||||||
|
</a>
|
||||||
|
<br/>
|
||||||
|
<div class="external-links">
|
||||||
|
↗️
|
||||||
|
<a href="https://web.archive.org/web/{{url}}" title="Search for a copy of the URL saved in Archive.org" target="_blank" rel="noreferrer">Archive.org</a> |
|
||||||
|
<a href="https://archive.md/{{url}}" title="Search for a copy of the URL saved in Archive.today" target="_blank" rel="noreferrer">Archive.today</a> |
|
||||||
|
<a href="{{warc_path}}" title="Download the ArchiveBox-generated WARC file" target="_blank">WARC</a>
|
||||||
|
<!--<a href="https://ghostarchive.org/search?term={{url|urlencode}}" title="Search for a copy of the URL saved in GhostArchive.org" target="_blank" rel="noreferrer">More...</a>-->
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div class="header-bottom container-fluid">
|
||||||
|
<div class="row header-bottom-frames">
|
||||||
|
{% for result in archiveresults %}
|
||||||
|
<div class="col-lg-2">
|
||||||
|
<div class="card {% if forloop.first %}selected-card{% endif %}">
|
||||||
|
<div class="card-body">
|
||||||
|
<a href="{{result.path}}" target="preview" title="./{{result.path}} (downloaded {{result.ts}})">
|
||||||
|
<h4>{{result.name}}</h4>
|
||||||
|
<!-- <p class="card-text" ><code>./{{result.path|truncatechars:30}}</code></p> -->
|
||||||
|
</a>
|
||||||
|
<!--<a href="{{result.path}}" target="preview"><h4 class="card-title">{{result.name}}</h4></a>-->
|
||||||
|
</div>
|
||||||
|
<iframe class="card-img-top" src="{{result.path}}" sandbox="allow-same-origin allow-top-navigation-by-user-activation allow-scripts allow-forms" scrolling="no" loading="lazy"></iframe>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
{% endfor %}
|
||||||
|
<div class="col-lg-2">
|
||||||
|
<div class="card">
|
||||||
|
<div class="card-body">
|
||||||
|
<a href="./" target="preview">
|
||||||
|
<h4>Headers, JSON, etc.</h4>
|
||||||
|
</a>
|
||||||
|
<!--<a href="{{result.path}}" target="preview"><h4 class="card-title">{{result.name}}</h4></a>-->
|
||||||
|
</div>
|
||||||
|
<iframe class="card-img-top" src="./" sandbox="" scrolling="no" loading="lazy"></iframe>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</header>
|
||||||
|
<iframe id="main-frame" sandbox="allow-same-origin allow-top-navigation-by-user-activation allow-scripts allow-forms" class="full-page-iframe" src="{{best_result.path}}" name="preview"></iframe>
|
||||||
|
|
||||||
|
<script src="{% static 'jquery.min.js' %}" type="text/javascript"></script>
|
||||||
|
|
||||||
|
<script>
|
||||||
|
// un-sandbox iframes showing pdfs (required to display pdf viewer)
|
||||||
|
jQuery('iframe').map(function() {
|
||||||
|
if (this.src.endsWith('.pdf')) {
|
||||||
|
this.removeAttribute('sandbox')
|
||||||
|
this.src = this.src + '#toolbar=0'
|
||||||
|
}
|
||||||
|
this.onload = function() {
|
||||||
|
this.contentWindow.scrollTo(0, 0);
|
||||||
|
// this.src = this.src
|
||||||
|
if (this.src.endsWith('.pdf')) {
|
||||||
|
this.removeAttribute('sandbox')
|
||||||
|
this.src = this.src + '#toolbar=0'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
function getPreviewTypeFromPath(link) {
|
||||||
|
if (link.getAttribute('href') == './') {
|
||||||
|
return 'all'
|
||||||
|
}
|
||||||
|
return link.getAttribute('href')
|
||||||
|
}
|
||||||
|
|
||||||
|
const iframe_elem = document.getElementById('main-frame')
|
||||||
|
|
||||||
|
for (const card of [...document.querySelectorAll('.card')]) {
|
||||||
|
card.addEventListener('click', function(event) {
|
||||||
|
const target = event.currentTarget.querySelector('a').href
|
||||||
|
|
||||||
|
jQuery('.selected-card').removeClass('selected-card')
|
||||||
|
jQuery(event.currentTarget).closest('.card').addClass('selected-card')
|
||||||
|
|
||||||
|
if (target.endsWith('.pdf')) {
|
||||||
|
jQuery('#main-frame')[0].removeAttribute('sandbox')
|
||||||
|
} else {
|
||||||
|
jQuery('#main-frame')[0].sandbox = "allow-same-origin allow-top-navigation-by-user-activation allow-scripts allow-forms"
|
||||||
|
}
|
||||||
|
window.location.hash = getPreviewTypeFromPath(event.currentTarget.querySelector('a'))
|
||||||
|
|
||||||
|
iframe_elem.src = target
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
function hideSnapshotHeader() {
|
||||||
|
console.log('Collapsing Snapshot header...')
|
||||||
|
jQuery('.header-toggle').text('▸')
|
||||||
|
jQuery('.header-bottom').hide()
|
||||||
|
jQuery('#main-frame').addClass('iframe-large')
|
||||||
|
try {
|
||||||
|
localStorage.setItem("archivebox-snapshot-header-visible", "false")
|
||||||
|
} catch (e) {
|
||||||
|
console.log('Could not use localStorage to persist header collapse state', e)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
function showSnapshotHeader() {
|
||||||
|
console.log('Expanding Snapshot header...')
|
||||||
|
jQuery('.header-toggle').text('▾')
|
||||||
|
jQuery('.header-bottom').show()
|
||||||
|
jQuery('#main-frame').removeClass('iframe-large')
|
||||||
|
try {
|
||||||
|
localStorage.setItem("archivebox-snapshot-header-visible", "true")
|
||||||
|
} catch (e) {
|
||||||
|
console.log('Could not use localStorage to persist header collapse state', e)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
function loadSnapshotHeaderState() {
|
||||||
|
// collapse snapshot header if user has previously hidden it
|
||||||
|
let snapshotHeaderIsExpanded = 'false'
|
||||||
|
try {
|
||||||
|
snapshotHeaderIsExpanded = localStorage.getItem("archivebox-snapshot-header-visible") || 'false'
|
||||||
|
} catch (e) {
|
||||||
|
console.log('Could not use localStorage to get header collapse state', e)
|
||||||
|
}
|
||||||
|
if (snapshotHeaderIsExpanded === 'false') {
|
||||||
|
hideSnapshotHeader()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
function handleSnapshotHeaderToggle() {
|
||||||
|
if (jQuery('.header-toggle').text().includes('▾')) {
|
||||||
|
hideSnapshotHeader()
|
||||||
|
} else {
|
||||||
|
showSnapshotHeader()
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
// hide header when collapse icon is clicked
|
||||||
|
jQuery('.header-toggle').on('click', handleSnapshotHeaderToggle)
|
||||||
|
jQuery('.header-toggle-trigger').on('click', handleSnapshotHeaderToggle)
|
||||||
|
|
||||||
|
// check URL for hash e.g. #git and load relevant preview
|
||||||
|
jQuery(document).ready(function() {
|
||||||
|
if (window.location.hash) {
|
||||||
|
for (const link of jQuery('a[target=preview]')) {
|
||||||
|
console.log(link.pathname)
|
||||||
|
if (getPreviewTypeFromPath(link) == window.location.hash.slice(1).toLowerCase()) {
|
||||||
|
jQuery(link).closest('.card').click()
|
||||||
|
jQuery(link).click()
|
||||||
|
link.click()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
loadSnapshotHeaderState()
|
||||||
|
})
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
// hide all preview iframes on small screens
|
||||||
|
// if (window.innerWidth < 1091) {
|
||||||
|
// jQuery('.card a[target=preview]').attr('target', '_self')
|
||||||
|
// }
|
||||||
|
</script>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
@@ -37,6 +37,11 @@ dependencies = [
|
|||||||
# - See Github issues for more...
|
# - See Github issues for more...
|
||||||
"django-signal-webhooks>=0.3.0",
|
"django-signal-webhooks>=0.3.0",
|
||||||
"django-admin-data-views>=0.3.1",
|
"django-admin-data-views>=0.3.1",
|
||||||
|
"ulid-py>=1.1.0",
|
||||||
|
"typeid-python>=0.3.0",
|
||||||
|
"django-charid-field>=0.4",
|
||||||
|
"django-pydantic-field>=0.3.9",
|
||||||
|
"django-jsonform>=2.22.0",
|
||||||
]
|
]
|
||||||
|
|
||||||
homepage = "https://github.com/ArchiveBox/ArchiveBox"
|
homepage = "https://github.com/ArchiveBox/ArchiveBox"
|
||||||
@@ -155,6 +160,22 @@ plugins = ["mypy_django_plugin.main"]
|
|||||||
[tool.django-stubs]
|
[tool.django-stubs]
|
||||||
django_settings_module = "core.settings"
|
django_settings_module = "core.settings"
|
||||||
|
|
||||||
|
[tool.pyright]
|
||||||
|
include = ["archivebox"]
|
||||||
|
exclude = ["**/node_modules",
|
||||||
|
"**/__pycache__",
|
||||||
|
"**/migrations",
|
||||||
|
"archivebox/vendor",
|
||||||
|
]
|
||||||
|
# ignore = ["src/oldstuff"]
|
||||||
|
# defineConstant = { DEBUG = true }
|
||||||
|
|
||||||
|
reportMissingImports = true
|
||||||
|
reportMissingTypeStubs = false
|
||||||
|
pythonVersion = "3.10"
|
||||||
|
pythonPlatform = "Linux"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
[project.urls]
|
[project.urls]
|
||||||
Homepage = "https://github.com/ArchiveBox/ArchiveBox"
|
Homepage = "https://github.com/ArchiveBox/ArchiveBox"
|
||||||
|
|||||||
Reference in New Issue
Block a user