mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2026-04-05 07:17:52 +10:00
Merge branch 'master' into tags
This commit is contained in:
@@ -6,12 +6,13 @@ import sys
|
||||
import argparse
|
||||
|
||||
from typing import Optional, Dict, List, IO
|
||||
from pathlib import Path
|
||||
|
||||
from ..config import OUTPUT_DIR
|
||||
|
||||
from importlib import import_module
|
||||
|
||||
CLI_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
CLI_DIR = Path(__file__).resolve().parent
|
||||
|
||||
# these common commands will appear sorted before any others for ease-of-use
|
||||
meta_cmds = ('help', 'version')
|
||||
|
||||
@@ -7,6 +7,7 @@ import os
|
||||
import sys
|
||||
import shutil
|
||||
import unittest
|
||||
from pathlib import Path
|
||||
|
||||
from contextlib import contextmanager
|
||||
|
||||
@@ -109,13 +110,13 @@ class TestInit(unittest.TestCase):
|
||||
with output_hidden():
|
||||
archivebox_init.main([])
|
||||
|
||||
assert os.path.exists(os.path.join(OUTPUT_DIR, SQL_INDEX_FILENAME))
|
||||
assert os.path.exists(os.path.join(OUTPUT_DIR, JSON_INDEX_FILENAME))
|
||||
assert os.path.exists(os.path.join(OUTPUT_DIR, HTML_INDEX_FILENAME))
|
||||
assert (Path(OUTPUT_DIR) / SQL_INDEX_FILENAME).exists()
|
||||
assert (Path(OUTPUT_DIR) / JSON_INDEX_FILENAME).exists()
|
||||
assert (Path(OUTPUT_DIR) / HTML_INDEX_FILENAME).exists()
|
||||
assert len(load_main_index(out_dir=OUTPUT_DIR)) == 0
|
||||
|
||||
def test_conflicting_init(self):
|
||||
with open(os.path.join(OUTPUT_DIR, 'test_conflict.txt'), 'w+') as f:
|
||||
with open(Path(OUTPUT_DIR) / 'test_conflict.txt', 'w+') as f:
|
||||
f.write('test')
|
||||
|
||||
try:
|
||||
@@ -125,9 +126,9 @@ class TestInit(unittest.TestCase):
|
||||
except SystemExit:
|
||||
pass
|
||||
|
||||
assert not os.path.exists(os.path.join(OUTPUT_DIR, SQL_INDEX_FILENAME))
|
||||
assert not os.path.exists(os.path.join(OUTPUT_DIR, JSON_INDEX_FILENAME))
|
||||
assert not os.path.exists(os.path.join(OUTPUT_DIR, HTML_INDEX_FILENAME))
|
||||
assert not (Path(OUTPUT_DIR) / SQL_INDEX_FILENAME).exists()
|
||||
assert not (Path(OUTPUT_DIR) / JSON_INDEX_FILENAME).exists()
|
||||
assert not (Path(OUTPUT_DIR) / HTML_INDEX_FILENAME).exists()
|
||||
try:
|
||||
load_main_index(out_dir=OUTPUT_DIR)
|
||||
assert False, 'load_main_index should raise an exception when no index is present'
|
||||
@@ -159,7 +160,7 @@ class TestAdd(unittest.TestCase):
|
||||
assert len(all_links) == 30
|
||||
|
||||
def test_add_arg_file(self):
|
||||
test_file = os.path.join(OUTPUT_DIR, 'test.txt')
|
||||
test_file = Path(OUTPUT_DIR) / 'test.txt'
|
||||
with open(test_file, 'w+') as f:
|
||||
f.write(test_urls)
|
||||
|
||||
|
||||
@@ -431,7 +431,7 @@ def write_config_file(config: Dict[str, str], out_dir: str=None) -> ConfigDict:
|
||||
with open(f'{config_path}.bak', 'r') as old:
|
||||
atomic_write(config_path, old.read())
|
||||
|
||||
if os.path.exists(f'{config_path}.bak'):
|
||||
if Path(f'{config_path}.bak').exists():
|
||||
os.remove(f'{config_path}.bak')
|
||||
|
||||
return {}
|
||||
@@ -540,7 +540,7 @@ def bin_path(binary: Optional[str]) -> Optional[str]:
|
||||
if node_modules_bin.exists():
|
||||
return str(node_modules_bin.resolve())
|
||||
|
||||
return shutil.which(os.path.expanduser(binary)) or binary
|
||||
return shutil.which(Path(binary).expanduser()) or binary
|
||||
|
||||
def bin_hash(binary: Optional[str]) -> Optional[str]:
|
||||
if binary is None:
|
||||
@@ -634,17 +634,17 @@ def get_code_locations(config: ConfigDict) -> SimpleConfigValueDict:
|
||||
}
|
||||
|
||||
def get_external_locations(config: ConfigDict) -> ConfigValue:
|
||||
abspath = lambda path: None if path is None else os.path.abspath(path)
|
||||
abspath = lambda path: None if path is None else Path(path).resolve()
|
||||
return {
|
||||
'CHROME_USER_DATA_DIR': {
|
||||
'path': abspath(config['CHROME_USER_DATA_DIR']),
|
||||
'enabled': config['USE_CHROME'] and config['CHROME_USER_DATA_DIR'],
|
||||
'is_valid': False if config['CHROME_USER_DATA_DIR'] is None else os.path.exists(os.path.join(config['CHROME_USER_DATA_DIR'], 'Default')),
|
||||
'is_valid': False if config['CHROME_USER_DATA_DIR'] is None else (Path(config['CHROME_USER_DATA_DIR']) / 'Default').exists(),
|
||||
},
|
||||
'COOKIES_FILE': {
|
||||
'path': abspath(config['COOKIES_FILE']),
|
||||
'enabled': config['USE_WGET'] and config['COOKIES_FILE'],
|
||||
'is_valid': False if config['COOKIES_FILE'] is None else os.path.exists(config['COOKIES_FILE']),
|
||||
'is_valid': False if config['COOKIES_FILE'] is None else Path(config['COOKIES_FILE']).exists(),
|
||||
},
|
||||
}
|
||||
|
||||
@@ -828,7 +828,7 @@ def check_system_config(config: ConfigDict=CONFIG) -> None:
|
||||
# stderr('[i] Using Chrome binary: {}'.format(shutil.which(CHROME_BINARY) or CHROME_BINARY))
|
||||
# stderr('[i] Using Chrome data dir: {}'.format(os.path.abspath(CHROME_USER_DATA_DIR)))
|
||||
if config['CHROME_USER_DATA_DIR'] is not None:
|
||||
if not os.path.exists(os.path.join(config['CHROME_USER_DATA_DIR'], 'Default')):
|
||||
if not (Path(config['CHROME_USER_DATA_DIR']) / 'Default').exists():
|
||||
stderr('[X] Could not find profile "Default" in CHROME_USER_DATA_DIR.', color='red')
|
||||
stderr(f' {config["CHROME_USER_DATA_DIR"]}')
|
||||
stderr(' Make sure you set it to a Chrome user data directory containing a Default profile folder.')
|
||||
|
||||
@@ -2,6 +2,7 @@ __package__ = 'archivebox.core'
|
||||
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from django.utils.crypto import get_random_string
|
||||
|
||||
|
||||
@@ -49,9 +50,9 @@ TEMPLATES = [
|
||||
{
|
||||
'BACKEND': 'django.template.backends.django.DjangoTemplates',
|
||||
'DIRS': [
|
||||
os.path.join(PYTHON_DIR, 'themes', ACTIVE_THEME),
|
||||
os.path.join(PYTHON_DIR, 'themes', 'default'),
|
||||
os.path.join(PYTHON_DIR, 'themes'),
|
||||
str(Path(PYTHON_DIR) / 'themes' / ACTIVE_THEME),
|
||||
str(Path(PYTHON_DIR) / 'themes' / 'default'),
|
||||
str(Path(PYTHON_DIR) / 'themes'),
|
||||
],
|
||||
'APP_DIRS': True,
|
||||
'OPTIONS': {
|
||||
@@ -70,7 +71,7 @@ WSGI_APPLICATION = 'core.wsgi.application'
|
||||
DATABASES = {
|
||||
'default': {
|
||||
'ENGINE': 'django.db.backends.sqlite3',
|
||||
'NAME': os.path.join(OUTPUT_DIR, SQL_INDEX_FILENAME),
|
||||
'NAME': str(Path(OUTPUT_DIR) / SQL_INDEX_FILENAME),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -105,7 +106,7 @@ SHELL_PLUS_PRINT_SQL = False
|
||||
IPYTHON_ARGUMENTS = ['--no-confirm-exit', '--no-banner']
|
||||
IPYTHON_KERNEL_DISPLAY_NAME = 'ArchiveBox Django Shell'
|
||||
if IS_SHELL:
|
||||
os.environ['PYTHONSTARTUP'] = os.path.join(PYTHON_DIR, 'core', 'welcome_message.py')
|
||||
os.environ['PYTHONSTARTUP'] = str(Path(PYTHON_DIR) / 'core' / 'welcome_message.py')
|
||||
|
||||
|
||||
LANGUAGE_CODE = 'en-us'
|
||||
@@ -122,6 +123,6 @@ EMAIL_BACKEND = 'django.core.mail.backends.console.EmailBackend'
|
||||
|
||||
STATIC_URL = '/static/'
|
||||
STATICFILES_DIRS = [
|
||||
os.path.join(PYTHON_DIR, 'themes', ACTIVE_THEME, 'static'),
|
||||
os.path.join(PYTHON_DIR, 'themes', 'default', 'static'),
|
||||
str(Path(PYTHON_DIR) / 'themes' / ACTIVE_THEME / 'static'),
|
||||
str(Path(PYTHON_DIR) / 'themes' / 'default' / 'static'),
|
||||
]
|
||||
|
||||
@@ -14,11 +14,11 @@ def get_icons(snapshot: Snapshot) -> str:
|
||||
|
||||
return format_html(
|
||||
'<span class="files-icons" style="font-size: 1.2em; opacity: 0.8">'
|
||||
'<a href="/{}/{}/" class="exists-{}" title="Wget clone">🌐 </a> '
|
||||
'<a href="/{}/{}" class="exists-{}" title="Wget clone">🌐 </a> '
|
||||
'<a href="/{}/{}" class="exists-{}" title="PDF">📄</a> '
|
||||
'<a href="/{}/{}" class="exists-{}" title="Screenshot">🖥 </a> '
|
||||
'<a href="/{}/{}" class="exists-{}" title="HTML dump">🅷 </a> '
|
||||
'<a href="/{}/{}/" class="exists-{}" title="WARC">🆆 </a> '
|
||||
'<a href="/{}/{}" class="exists-{}" title="WARC">🆆 </a> '
|
||||
'<a href="/{}/{}" class="exists-{}" title="SingleFile">🗜 </a>'
|
||||
'<a href="/{}/{}/" class="exists-{}" title="Media files">📼 </a> '
|
||||
'<a href="/{}/{}/" class="exists-{}" title="Git repos">📦 </a> '
|
||||
|
||||
@@ -114,12 +114,23 @@ class AddView(UserPassesTestMixin, FormView):
|
||||
template_name = "add_links.html"
|
||||
form_class = AddLinkForm
|
||||
|
||||
def get_initial(self):
|
||||
"""Prefill the AddLinkForm with the 'url' GET parameter"""
|
||||
if self.request.method == 'GET':
|
||||
url = self.request.GET.get('url', None)
|
||||
if url:
|
||||
return {'url': url}
|
||||
else:
|
||||
return super().get_initial()
|
||||
|
||||
def test_func(self):
|
||||
return PUBLIC_ADD_VIEW or self.request.user.is_authenticated
|
||||
|
||||
def get_context_data(self, *args, **kwargs):
|
||||
context = super().get_context_data(*args, **kwargs)
|
||||
context["title"] = "Add URLs"
|
||||
# We can't just call request.build_absolute_uri in the template, because it would include query parameters
|
||||
context["absolute_add_path"] = self.request.build_absolute_uri(self.request.path)
|
||||
return context
|
||||
|
||||
def form_valid(self, form):
|
||||
|
||||
@@ -75,7 +75,7 @@ def archive_link(link: Link, overwrite: bool=False, methods: Optional[Iterable[s
|
||||
|
||||
out_dir = out_dir or Path(link.link_dir)
|
||||
try:
|
||||
is_new = not os.path.exists(out_dir)
|
||||
is_new = not Path(out_dir).exists()
|
||||
if is_new:
|
||||
os.makedirs(out_dir)
|
||||
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
__package__ = 'archivebox.extractors'
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
from typing import Optional
|
||||
@@ -22,7 +21,7 @@ from ..logging_util import TimedProgress
|
||||
@enforce_types
|
||||
def should_save_favicon(link: Link, out_dir: Optional[str]=None) -> bool:
|
||||
out_dir = out_dir or link.link_dir
|
||||
if os.path.exists(os.path.join(out_dir, 'favicon.ico')):
|
||||
if (Path(out_dir) / 'favicon.ico').exists():
|
||||
return False
|
||||
|
||||
return SAVE_FAVICON
|
||||
|
||||
@@ -179,7 +179,7 @@ def wget_output_path(link: Link) -> Optional[str]:
|
||||
if re.search(".+\\.[Ss]?[Hh][Tt][Mm][Ll]?$", str(f), re.I | re.M)
|
||||
]
|
||||
if html_files:
|
||||
return str(html_files[0])
|
||||
return str(html_files[0].relative_to(link.link_dir))
|
||||
|
||||
# Move up one directory level
|
||||
search_dir = search_dir.parent
|
||||
|
||||
@@ -575,7 +575,7 @@ def is_archived(link: Link) -> bool:
|
||||
return is_valid(link) and link.is_archived
|
||||
|
||||
def is_unarchived(link: Link) -> bool:
|
||||
if not os.path.exists(link.link_dir):
|
||||
if not Path(link.link_dir).exists():
|
||||
return True
|
||||
return not link.is_archived
|
||||
|
||||
|
||||
@@ -1,7 +1,5 @@
|
||||
__package__ = 'archivebox.index'
|
||||
|
||||
import os
|
||||
|
||||
from string import Template
|
||||
from datetime import datetime
|
||||
from typing import List, Optional, Iterator, Mapping
|
||||
@@ -30,11 +28,10 @@ from ..config import (
|
||||
FAVICON_FILENAME,
|
||||
)
|
||||
|
||||
join = lambda *paths: os.path.join(*paths)
|
||||
MAIN_INDEX_TEMPLATE = join(TEMPLATES_DIR, 'main_index.html')
|
||||
MINIMAL_INDEX_TEMPLATE = join(TEMPLATES_DIR, 'main_index_minimal.html')
|
||||
MAIN_INDEX_ROW_TEMPLATE = join(TEMPLATES_DIR, 'main_index_row.html')
|
||||
LINK_DETAILS_TEMPLATE = join(TEMPLATES_DIR, 'link_details.html')
|
||||
MAIN_INDEX_TEMPLATE = str(Path(TEMPLATES_DIR) / 'main_index.html')
|
||||
MINIMAL_INDEX_TEMPLATE = str(Path(TEMPLATES_DIR) / 'main_index_minimal.html')
|
||||
MAIN_INDEX_ROW_TEMPLATE = str(Path(TEMPLATES_DIR) / 'main_index_row.html')
|
||||
LINK_DETAILS_TEMPLATE = str(Path(TEMPLATES_DIR) / 'link_details.html')
|
||||
TITLE_LOADING_MSG = 'Not yet archived...'
|
||||
|
||||
|
||||
@@ -44,8 +41,8 @@ TITLE_LOADING_MSG = 'Not yet archived...'
|
||||
def parse_html_main_index(out_dir: Path=OUTPUT_DIR) -> Iterator[str]:
|
||||
"""parse an archive index html file and return the list of urls"""
|
||||
|
||||
index_path = join(out_dir, HTML_INDEX_FILENAME)
|
||||
if os.path.exists(index_path):
|
||||
index_path = Path(out_dir) / HTML_INDEX_FILENAME
|
||||
if index_path.exists():
|
||||
with open(index_path, 'r', encoding='utf-8') as f:
|
||||
for line in f:
|
||||
if 'class="link-url"' in line:
|
||||
@@ -56,12 +53,12 @@ def parse_html_main_index(out_dir: Path=OUTPUT_DIR) -> Iterator[str]:
|
||||
def write_html_main_index(links: List[Link], out_dir: Path=OUTPUT_DIR, finished: bool=False) -> None:
|
||||
"""write the html link index to a given path"""
|
||||
|
||||
copy_and_overwrite(join(TEMPLATES_DIR, FAVICON_FILENAME), join(out_dir, FAVICON_FILENAME))
|
||||
copy_and_overwrite(join(TEMPLATES_DIR, ROBOTS_TXT_FILENAME), join(out_dir, ROBOTS_TXT_FILENAME))
|
||||
copy_and_overwrite(join(TEMPLATES_DIR, STATIC_DIR_NAME), join(out_dir, STATIC_DIR_NAME))
|
||||
copy_and_overwrite(str(Path(TEMPLATES_DIR) / FAVICON_FILENAME), str(out_dir / FAVICON_FILENAME))
|
||||
copy_and_overwrite(str(Path(TEMPLATES_DIR) / ROBOTS_TXT_FILENAME), str(out_dir / ROBOTS_TXT_FILENAME))
|
||||
copy_and_overwrite(str(Path(TEMPLATES_DIR) / STATIC_DIR_NAME), str(out_dir / STATIC_DIR_NAME))
|
||||
|
||||
rendered_html = main_index_template(links, finished=finished)
|
||||
atomic_write(join(out_dir, HTML_INDEX_FILENAME), rendered_html)
|
||||
atomic_write(str(out_dir / HTML_INDEX_FILENAME), rendered_html)
|
||||
|
||||
|
||||
@enforce_types
|
||||
@@ -100,7 +97,7 @@ def main_index_row_template(link: Link) -> str:
|
||||
|
||||
# before pages are finished archiving, show fallback loading favicon
|
||||
'favicon_url': (
|
||||
join(ARCHIVE_DIR_NAME, link.timestamp, 'favicon.ico')
|
||||
str(Path(ARCHIVE_DIR_NAME) / link.timestamp / 'favicon.ico')
|
||||
# if link['is_archived'] else 'data:image/gif;base64,R0lGODlhAQABAAD/ACwAAAAAAQABAAACADs='
|
||||
),
|
||||
|
||||
@@ -119,7 +116,7 @@ def write_html_link_details(link: Link, out_dir: Optional[str]=None) -> None:
|
||||
out_dir = out_dir or link.link_dir
|
||||
|
||||
rendered_html = link_details_template(link)
|
||||
atomic_write(join(out_dir, HTML_INDEX_FILENAME), rendered_html)
|
||||
atomic_write(str(Path(out_dir) / HTML_INDEX_FILENAME), rendered_html)
|
||||
|
||||
|
||||
@enforce_types
|
||||
|
||||
@@ -45,8 +45,8 @@ MAIN_INDEX_HEADER = {
|
||||
def parse_json_main_index(out_dir: Path=OUTPUT_DIR) -> Iterator[Link]:
|
||||
"""parse an archive index json file and return the list of links"""
|
||||
|
||||
index_path = os.path.join(out_dir, JSON_INDEX_FILENAME)
|
||||
if os.path.exists(index_path):
|
||||
index_path = Path(out_dir) / JSON_INDEX_FILENAME
|
||||
if index_path.exists():
|
||||
with open(index_path, 'r', encoding='utf-8') as f:
|
||||
links = pyjson.load(f)['links']
|
||||
for link_json in links:
|
||||
@@ -86,7 +86,7 @@ def write_json_main_index(links: List[Link], out_dir: Path=OUTPUT_DIR) -> None:
|
||||
'last_run_cmd': sys.argv,
|
||||
'links': links,
|
||||
}
|
||||
atomic_write(os.path.join(out_dir, JSON_INDEX_FILENAME), main_index_json)
|
||||
atomic_write(str(Path(out_dir) / JSON_INDEX_FILENAME), main_index_json)
|
||||
|
||||
|
||||
### Link Details Index
|
||||
@@ -96,15 +96,15 @@ def write_json_link_details(link: Link, out_dir: Optional[str]=None) -> None:
|
||||
"""write a json file with some info about the link"""
|
||||
|
||||
out_dir = out_dir or link.link_dir
|
||||
path = os.path.join(out_dir, JSON_INDEX_FILENAME)
|
||||
atomic_write(path, link._asdict(extended=True))
|
||||
path = Path(out_dir) / JSON_INDEX_FILENAME
|
||||
atomic_write(str(path), link._asdict(extended=True))
|
||||
|
||||
|
||||
@enforce_types
|
||||
def parse_json_link_details(out_dir: Union[Path, str], guess: Optional[bool]=False) -> Optional[Link]:
|
||||
"""load the json link index from a given directory"""
|
||||
existing_index = os.path.join(out_dir, JSON_INDEX_FILENAME)
|
||||
if os.path.exists(existing_index):
|
||||
existing_index = Path(out_dir) / JSON_INDEX_FILENAME
|
||||
if existing_index.exists():
|
||||
with open(existing_index, 'r', encoding='utf-8') as f:
|
||||
try:
|
||||
link_json = pyjson.load(f)
|
||||
@@ -118,9 +118,9 @@ def parse_json_link_details(out_dir: Union[Path, str], guess: Optional[bool]=Fal
|
||||
def parse_json_links_details(out_dir: Union[Path, str]) -> Iterator[Link]:
|
||||
"""read through all the archive data folders and return the parsed links"""
|
||||
|
||||
for entry in os.scandir(os.path.join(out_dir, ARCHIVE_DIR_NAME)):
|
||||
for entry in os.scandir(Path(out_dir) / ARCHIVE_DIR_NAME):
|
||||
if entry.is_dir(follow_symlinks=True):
|
||||
if os.path.exists(os.path.join(entry.path, 'index.json')):
|
||||
if (Path(entry.path) / 'index.json').exists():
|
||||
try:
|
||||
link = parse_json_link_details(entry.path)
|
||||
except KeyError:
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
__package__ = 'archivebox.index'
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
from datetime import datetime, timedelta
|
||||
@@ -250,7 +249,7 @@ class Link:
|
||||
@property
|
||||
def link_dir(self) -> str:
|
||||
from ..config import CONFIG
|
||||
return os.path.join(CONFIG['ARCHIVE_DIR'], self.timestamp)
|
||||
return str(Path(CONFIG['ARCHIVE_DIR']) / self.timestamp)
|
||||
|
||||
@property
|
||||
def archive_path(self) -> str:
|
||||
@@ -369,7 +368,7 @@ class Link:
|
||||
)
|
||||
|
||||
return any(
|
||||
os.path.exists(os.path.join(ARCHIVE_DIR, self.timestamp, path))
|
||||
(Path(ARCHIVE_DIR) / self.timestamp / path).exists()
|
||||
for path in output_paths
|
||||
)
|
||||
|
||||
|
||||
@@ -390,7 +390,7 @@ def log_list_finished(links):
|
||||
def log_removal_started(links: List["Link"], yes: bool, delete: bool):
|
||||
print('{lightyellow}[i] Found {} matching URLs to remove.{reset}'.format(len(links), **ANSI))
|
||||
if delete:
|
||||
file_counts = [link.num_outputs for link in links if os.path.exists(link.link_dir)]
|
||||
file_counts = [link.num_outputs for link in links if Path(link.link_dir).exists()]
|
||||
print(
|
||||
f' {len(links)} Links will be de-listed from the main index, and their archived content folders will be deleted from disk.\n'
|
||||
f' ({len(file_counts)} data folders with {sum(file_counts)} archived files will be deleted!)'
|
||||
@@ -445,9 +445,9 @@ def log_shell_welcome_msg():
|
||||
@enforce_types
|
||||
def pretty_path(path: Union[Path, str]) -> str:
|
||||
"""convert paths like .../ArchiveBox/archivebox/../output/abc into output/abc"""
|
||||
pwd = os.path.abspath('.')
|
||||
pwd = Path('.').resolve()
|
||||
# parent = os.path.abspath(os.path.join(pwd, os.path.pardir))
|
||||
return str(path).replace(pwd + '/', './')
|
||||
return str(path).replace(str(pwd) + '/', './')
|
||||
|
||||
|
||||
@enforce_types
|
||||
@@ -518,11 +518,11 @@ def printable_folder_status(name: str, folder: Dict) -> str:
|
||||
color, symbol, note, num_files = 'lightyellow', '-', 'disabled', '-'
|
||||
|
||||
if folder['path']:
|
||||
if os.path.exists(folder['path']):
|
||||
if Path(folder['path']).exists():
|
||||
num_files = (
|
||||
f'{len(os.listdir(folder["path"]))} files'
|
||||
if os.path.isdir(folder['path']) else
|
||||
printable_filesize(os.path.getsize(folder['path']))
|
||||
if Path(folder['path']).is_dir() else
|
||||
printable_filesize(Path(folder['path']).stat().st_size)
|
||||
)
|
||||
else:
|
||||
num_files = 'missing'
|
||||
|
||||
@@ -8,7 +8,6 @@ For examples of supported import formats see tests/.
|
||||
__package__ = 'archivebox.parsers'
|
||||
|
||||
import re
|
||||
import os
|
||||
from io import StringIO
|
||||
|
||||
from typing import IO, Tuple, List, Optional
|
||||
@@ -128,7 +127,7 @@ def run_parser_functions(to_parse: IO[str], timer, root_url: Optional[str]=None)
|
||||
@enforce_types
|
||||
def save_text_as_source(raw_text: str, filename: str='{ts}-stdin.txt', out_dir: Path=OUTPUT_DIR) -> str:
|
||||
ts = str(datetime.now().timestamp()).split('.', 1)[0]
|
||||
source_path = os.path.join(out_dir, SOURCES_DIR_NAME, filename.format(ts=ts))
|
||||
source_path = str(out_dir / SOURCES_DIR_NAME / filename.format(ts=ts))
|
||||
atomic_write(source_path, raw_text)
|
||||
log_source_saved(source_file=source_path)
|
||||
return source_path
|
||||
@@ -138,7 +137,7 @@ def save_text_as_source(raw_text: str, filename: str='{ts}-stdin.txt', out_dir:
|
||||
def save_file_as_source(path: str, timeout: int=TIMEOUT, filename: str='{ts}-{basename}.txt', out_dir: Path=OUTPUT_DIR) -> str:
|
||||
"""download a given url's content into output/sources/domain-<timestamp>.txt"""
|
||||
ts = str(datetime.now().timestamp()).split('.', 1)[0]
|
||||
source_path = os.path.join(OUTPUT_DIR, SOURCES_DIR_NAME, filename.format(basename=basename(path), ts=ts))
|
||||
source_path = str(OUTPUT_DIR / SOURCES_DIR_NAME / filename.format(basename=basename(path), ts=ts))
|
||||
|
||||
if any(path.startswith(s) for s in ('http://', 'https://', 'ftp://')):
|
||||
# Source is a URL that needs to be downloaded
|
||||
|
||||
@@ -64,7 +64,7 @@ def chmod_file(path: str, cwd: str='.', permissions: str=OUTPUT_PERMISSIONS) ->
|
||||
@enforce_types
|
||||
def copy_and_overwrite(from_path: str, to_path: str):
|
||||
"""copy a given file or directory to a given path, overwriting the destination"""
|
||||
if os.path.isdir(from_path):
|
||||
if Path(from_path).is_dir():
|
||||
shutil.rmtree(to_path, ignore_errors=True)
|
||||
shutil.copytree(from_path, to_path)
|
||||
else:
|
||||
|
||||
@@ -49,6 +49,12 @@
|
||||
<small>(it's safe to leave this page, adding will continue in the background)</small>
|
||||
</div>
|
||||
</center>
|
||||
{% if absolute_add_path %}
|
||||
<center id="bookmarklet">
|
||||
<p>Bookmark this link to quickly add to your archive:
|
||||
<a href="javascript:void(window.open('{{ absolute_add_path }}?url='+document.location.href));">Add to ArchiveBox</a></p>
|
||||
</center>
|
||||
{% endif %}
|
||||
<script>
|
||||
document.getElementById('add-form').addEventListener('submit', function(event) {
|
||||
setTimeout(function() {
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
<td title="$timestamp">$bookmarked_date</td>
|
||||
<td class="title-col">
|
||||
<a href="$archive_path/index.html" class="link-url"><img src="$favicon_url" class="link-favicon" decoding="async"></a>
|
||||
<a href="$wget_url" title="$title">
|
||||
<a href="$archive_path/$wget_url" title="$title">
|
||||
<span data-title-for="$url" data-archived="$is_archived">$title</span>
|
||||
<small style="float:right">$tags</small>
|
||||
</a>
|
||||
|
||||
Reference in New Issue
Block a user