mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2026-04-06 07:47:53 +10:00
Improve scheduling, runtime paths, and API behavior
This commit is contained in:
@@ -131,6 +131,7 @@ def check_data_dir_permissions():
|
||||
from archivebox import DATA_DIR
|
||||
from archivebox.misc.logging import STDERR
|
||||
from archivebox.config.permissions import ARCHIVEBOX_USER, ARCHIVEBOX_GROUP, DEFAULT_PUID, DEFAULT_PGID, IS_ROOT, USER
|
||||
from archivebox.config.paths import get_or_create_working_tmp_dir, get_or_create_working_lib_dir
|
||||
|
||||
data_dir_stat = Path(DATA_DIR).stat()
|
||||
data_dir_uid, data_dir_gid = data_dir_stat.st_uid, data_dir_stat.st_gid
|
||||
@@ -156,11 +157,21 @@ def check_data_dir_permissions():
|
||||
|
||||
from archivebox.config.common import STORAGE_CONFIG
|
||||
|
||||
try:
|
||||
tmp_dir = get_or_create_working_tmp_dir(autofix=True, quiet=True) or STORAGE_CONFIG.TMP_DIR
|
||||
except Exception:
|
||||
tmp_dir = STORAGE_CONFIG.TMP_DIR
|
||||
|
||||
try:
|
||||
lib_dir = get_or_create_working_lib_dir(autofix=True, quiet=True) or STORAGE_CONFIG.LIB_DIR
|
||||
except Exception:
|
||||
lib_dir = STORAGE_CONFIG.LIB_DIR
|
||||
|
||||
# Check /tmp dir permissions
|
||||
check_tmp_dir(STORAGE_CONFIG.TMP_DIR, throw=False, must_exist=True)
|
||||
check_tmp_dir(tmp_dir, throw=False, must_exist=True)
|
||||
|
||||
# Check /lib dir permissions
|
||||
check_lib_dir(STORAGE_CONFIG.LIB_DIR, throw=False, must_exist=True)
|
||||
check_lib_dir(lib_dir, throw=False, must_exist=True)
|
||||
|
||||
os.umask(0o777 - int(STORAGE_CONFIG.DIR_OUTPUT_PERMISSIONS, base=8)) # noqa: F821
|
||||
|
||||
|
||||
@@ -426,14 +426,15 @@ def log_removal_started(snapshots, yes: bool, delete: bool):
|
||||
except (KeyboardInterrupt, EOFError, AssertionError):
|
||||
raise SystemExit(0)
|
||||
|
||||
def log_removal_finished(all_links: int, to_remove: int):
|
||||
if all_links == 0:
|
||||
def log_removal_finished(remaining_links: int, removed_links: int):
|
||||
if remaining_links == 0 and removed_links == 0:
|
||||
print()
|
||||
print('[red1][X] No matching links found.[/]')
|
||||
else:
|
||||
total_before = remaining_links + removed_links
|
||||
print()
|
||||
print(f'[red1][√] Removed {to_remove} out of {all_links} links from the archive index.[/]')
|
||||
print(f' Index now contains {all_links - to_remove} links.')
|
||||
print(f'[red1][√] Removed {removed_links} out of {total_before} links from the archive index.[/]')
|
||||
print(f' Index now contains {remaining_links} links.')
|
||||
|
||||
|
||||
### Search Indexing Stage
|
||||
|
||||
@@ -10,7 +10,6 @@ from pathlib import Path
|
||||
from typing import Optional, Union, Set, Tuple
|
||||
from subprocess import _mswindows, PIPE, Popen, CalledProcessError, CompletedProcess, TimeoutExpired
|
||||
|
||||
from crontab import CronTab
|
||||
from atomicwrites import atomic_write as lib_atomic_write
|
||||
|
||||
from archivebox.config.common import STORAGE_CONFIG
|
||||
@@ -170,28 +169,6 @@ def get_dir_size(path: Union[str, Path], recursive: bool=True, pattern: Optional
|
||||
pass
|
||||
return num_bytes, num_dirs, num_files
|
||||
|
||||
|
||||
CRON_COMMENT = 'archivebox_schedule'
|
||||
|
||||
|
||||
@enforce_types
|
||||
def dedupe_cron_jobs(cron: CronTab) -> CronTab:
|
||||
deduped: Set[Tuple[str, str]] = set()
|
||||
|
||||
for job in list(cron):
|
||||
unique_tuple = (str(job.slices), str(job.command))
|
||||
if unique_tuple not in deduped:
|
||||
deduped.add(unique_tuple)
|
||||
cron.remove(job)
|
||||
|
||||
for schedule, command in deduped:
|
||||
job = cron.new(command=command, comment=CRON_COMMENT)
|
||||
job.setall(schedule)
|
||||
job.enable()
|
||||
|
||||
return cron
|
||||
|
||||
|
||||
class suppress_output(object):
|
||||
"""
|
||||
A context manager for doing a "deep suppression" of stdout and stderr in
|
||||
|
||||
Reference in New Issue
Block a user