diff --git a/README.md b/README.md index 861ea00d..4fd5eea2 100644 --- a/README.md +++ b/README.md @@ -771,6 +771,7 @@ to show `/icons/exe.png` and `/icons/elf.gif` as the thumbnail for all `.exe` an note: * heif/heifs/heic/heics images usually require the `libvips` [optional dependency](#optional-dependencies) but this is not possible with the docker-images due to [legal reasons](docs/bad-codecs.md) +* if you do not want thumbnails to be generated on-the-fly, and instead wish to generate all of them on server startup, then see [thumbnail pregen](#thumbnail-pregen) config file example: @@ -829,6 +830,7 @@ cool trick: download a folder by appending url-params `?tar&opus` or `?tar&mp3` * and url-param `&nodot` skips dotfiles/dotfolders; they are included by default if your account has permission to see them * and url-params `&j` / `&w` produce jpeg/webm thumbnails/spectrograms instead of the original audio/video/images (`&p` for audio waveforms) * can also be used to pregenerate thumbnails; combine with `--th-maxage=9999999` or `--th-clean=0` + * but now there is also a real [thumbnail pregen](#thumbnail-pregen) so just use that ## uploading @@ -1906,6 +1908,30 @@ this is **cosmetic only!** the files are still easily accessible in many ways, f > also see the [--unlist](https://copyparty.eu/cli/#g-unlist) option which is somewhat similar -- `unlist` applies to the whole volume instead of just one folder; however, while dothidden also affects sftp and ftp, the `unlist` option is http/https-only +## thumbnail pregen + +if you want to pre-generate everything on startup (usually a bad idea); + +by default, thumbnails are created on-the-fly when a client needs it, and then cached on the server for [--th-maxage](https://copyparty.eu/cli/#g-th-maxage) seconds (default is one week), so most thumbnails only need to be created once, and are then eventually deleted from the cache to preserver disk space + +but if you need every thumbnail instantly available when a folder is viewed, then first increase the thumbnail expiration time to something really big, and then set global-option `th-pregen` and volflag `th_pregen` to a comma-separated list of thumbnail formats to automatically generate on server startup; + +the full list of all possible formats is: `j,jf,jf3,j3,w,wf,wf3,w3,x,xf,xf3,x3,opus,mp3,flac,wav` and I'll explain what those mean soon + +* `j` = jpeg cropped, `jf` = jpeg uncropped, `jf3` = jpeg uncropped triplesize, `j3` jpeg cropped triplesize +* `w` = webm cropped, `wf` = webm uncropped, ..., `x` = jxl cropped, `xf` = jxl uncropped, ... +* and yes, audio-transcodes are technically thumbnails according to copyparty -- don't think too much about it ( ゚ ヮ゚) + * unlike thumbnails, the expiry time for audio-transcodes is configured with [--ac-maxage](https://copyparty.eu/cli/#g-ac-maxage) + +anyways, obviously you **do not** want to pregenerate flac/wav because they're HUGE, and everything else also gets pretty big because it all adds up; + +* each regular thumbnail ( j, jf, w, wf, x, xf ) takes about 16 KiB of disk space +* each triplesize thumb ( j3, jf3, w3, wf3, x3, xf3 ) takes about 96 KiB +* each opus / mp3 audiotranscode takes... idk, 6 MiB? depends on song length + +so a thousand pictures converted to every possible regular-size image format (`j,jf,w,wf,x,xf`) takes **96 MiB,** and every possible 3x-size (`jf3,j3,wf3,w3,xf3,x3`) takes **562 MiB,** alternatively **658 MiB** in total for all, so that's why the default is to *not* pregenerate on startup, but instead do on-demand with a cache + + ## database location in-volume (`.hist/up2k.db`, default) or somewhere else diff --git a/copyparty/__main__.py b/copyparty/__main__.py index f3d133a0..d95fabe6 100644 --- a/copyparty/__main__.py +++ b/copyparty/__main__.py @@ -1660,7 +1660,7 @@ def add_shutdown(ap): ap2 = ap.add_argument_group("shutdown options") ap2.add_argument("--ign-ebind", action="store_true", help="continue running even if it's impossible to listen on some of the requested endpoints") ap2.add_argument("--ign-ebind-all", action="store_true", help="continue running even if it's impossible to receive connections at all") - ap2.add_argument("--exit", metavar="WHEN", type=u, default="", help="shutdown after \033[33mWHEN\033[0m has finished; [\033[32mcfg\033[0m] config parsing, [\033[32midx\033[0m] volscan + multimedia indexing") + ap2.add_argument("--exit", metavar="WHEN", type=u, default="", help="shutdown after \033[33mWHEN\033[0m has finished; [\033[32mcfg\033[0m] config parsing, [\033[32midx\033[0m] volscan + multimedia indexing, [\033[32mthgen\033[0m] thumbnail-pregen") def add_logging(ap): @@ -1726,6 +1726,8 @@ def add_thumbnail(ap): ap2.add_argument("--th-poke", metavar="SEC", type=int, default=300, help="activity labeling cooldown -- avoids doing keepalive pokes (updating the mtime) on thumbnail folders more often than \033[33mSEC\033[0m seconds") ap2.add_argument("--th-clean", metavar="SEC", type=int, default=43200, help="cleanup interval; 0=disabled") ap2.add_argument("--th-maxage", metavar="SEC", type=int, default=604800, help="max folder age -- folders which haven't been poked for longer than \033[33m--th-poke\033[0m seconds will get deleted every \033[33m--th-clean\033[0m seconds") + ap2.add_argument("--th-pregen", metavar="F,F", type=u, default="", help="pregenerate thumbnails on startup; \033[33mF,F\033[0m is comma-separated list of formats; example: [\033[32mj,jf,w,w3,wf,wf3,x,xf\033[0m] NOTE: remember to set \033[33m--th-maxage 123456789\033[0m (volflag=th_pregen)") + ap2.add_argument("--th-pre-rl", metavar="SEC", type=int, default=30, help="while pregen is running, ratelimit the thumbnailer logger to one message every \033[33mSEC\033[0m seconds (only works with \033[33m-j1\033[0m); set 0 to disable ratelimit") ap2.add_argument("--th-covers", metavar="N,N", type=u, default="folder.png,folder.jpg,cover.png,cover.jpg", help="folder thumbnails to stat/look for; enabling \033[33m-e2d\033[0m will make these case-insensitive, and try them as dotfiles (.folder.jpg), and also automatically select thumbnails for all folders that contain pics, even if none match this pattern") ap2.add_argument("--th-spec-p", metavar="N", type=u, default=1, help="for music, do spectrograms or embedded coverart? [\033[32m0\033[0m]=only-art, [\033[32m1\033[0m]=prefer-art, [\033[32m2\033[0m]=only-spec") # https://pillow.readthedocs.io/en/stable/handbook/image-file-formats.html diff --git a/copyparty/broker_mp.py b/copyparty/broker_mp.py index 9479c707..ed01640a 100644 --- a/copyparty/broker_mp.py +++ b/copyparty/broker_mp.py @@ -160,6 +160,13 @@ class BrokerMp(object): else: raise Exception("what is " + str(dest)) + def say1(self, dest: str, *args: Any) -> None: + """ + send message to one lucky recipient + """ + p = self.procs[0] + p.q_pend.put((0, dest, list(args))) + def periodic(self) -> None: while True: time.sleep(1) diff --git a/copyparty/cfg.py b/copyparty/cfg.py index b1c71c19..d4c163bb 100644 --- a/copyparty/cfg.py +++ b/copyparty/cfg.py @@ -150,6 +150,7 @@ def vf_vmap() -> dict[str, str]: "tail_tmax", "tail_who", "tcolor", + "th_pregen", "th_qv", "th_qvx", "th_spec_p", @@ -317,6 +318,7 @@ flagcats = { "aconvt": "convert-to-audio timeout in seconds", "th_spec_p=1": "make spectrograms? 0=never 1=fallback 2=always", "ext_th=s=/b.png": "use /b.png as thumbnail for file-extension s", + "th_pregen=w,wf": "pregenerate thumbs for these formats", }, "handlers\n(better explained in --help-handlers)": { "on404=PY": "handle 404s by executing PY file", diff --git a/copyparty/httpsrv.py b/copyparty/httpsrv.py index 6d24b4f6..1c0ddfad 100644 --- a/copyparty/httpsrv.py +++ b/copyparty/httpsrv.py @@ -55,10 +55,12 @@ except SyntaxError: ) sys.exit(1) +from .authsrv import LEELOO_DALLAS from .httpconn import HttpConn from .ico import Ico from .metrics import Metrics from .mtag import HAVE_FFMPEG +from .sutil import gfilter2 from .th_cli import ThumbCli from .th_srv import HAVE_PIL, HAVE_VIPS from .u2idx import U2idx @@ -657,3 +659,73 @@ class HttpSrv(object): self.tdli = dli self.tdls = dls + + def pregen_thumbs(self) -> None: + Daemon(self._pregen_thumbs, "th_pregen") + + def _pregen_thumbs(self) -> None: + def log(msg, n): + self.log("thumb-pregen", msg, n) + + if getattr(self, "_pregen", False): + log("already running", 1) + return + + self._pregen = True + + for n in range(9999999): + x = self.broker.ask("up2k.is_busy") + zb, zi = x.get() + if zi: + break + if not n: + log("waiting for up2k to finish initializing", 6) + time.sleep(1 if n < 10 else 5 if n < 300 else 15) + + if not self.thumbcli: + log("no thumbcli", 1) + return + + if self.args.th_pre_rl: + try: + self.broker.hub.thumbsrv.log = self.broker.hub.thumbsrv._slog + except: + pass + + nfiles = 0 + t0 = time.time() + scandir = not self.args.no_scandir + for vn in self.asrv.vfs.all_nodes.values(): + fmts = vn.flags.get("th_pregen", "") + if not fmts: + continue + log("starting for volume /%s" % (vn.vpath,), 6) + g = vn.walk("x", "/", [], LEELOO_DALLAS, [True], 2, scandir, False, False) + g = gfilter2(g, self, vn.vpath, fmts.split(",")) + for f in g: + nfiles += 1 + if not nfiles % 256: + now = time.time() + for n in range(9999999): + x = self.broker.ask("up2k.is_busy") + zb, zi = x.get() + if not zb: + if n: + t0 += time.time() - now + break + if not n: + log("waiting for up2k to finish indexing", 6) + time.sleep(5) + + if self.args.th_pre_rl: + try: + self.broker.hub.thumbsrv.log = self.broker.hub.thumbsrv._log + except: + pass + + t = "finished; %d files in %d seconds" + log(t % (nfiles, time.time() - t0), 6) + self._pregen = False + + if self.args.exit == "thgen": + self.broker.say("sigterm") diff --git a/copyparty/sutil.py b/copyparty/sutil.py index 6cc57f6f..f64d2d02 100644 --- a/copyparty/sutil.py +++ b/copyparty/sutil.py @@ -6,11 +6,11 @@ import tempfile from datetime import datetime from .__init__ import CORES -from .authsrv import VFS, AuthSrv +from .authsrv import LEELOO_DALLAS, VFS, AuthSrv from .bos import bos from .th_cli import ThumbCli from .th_srv import TH_CH -from .util import UTC, vjoin, vol_san +from .util import UTC, sigblock, vjoin, vol_san if True: # pylint: disable=using-constant-test from typing import Any, Generator, Optional @@ -42,6 +42,17 @@ class StreamArc(object): self.stopped = True +_pools = {} + + +def close_pools() -> None: + for p in list(_pools): + try: + p.shutdown(wait=False, cancel_futures=True) + except: + pass + + def gfilter( fgen: Generator[dict[str, Any], None, None], thumbcli: ThumbCli, @@ -52,7 +63,8 @@ def gfilter( from concurrent.futures import ThreadPoolExecutor pend = [] - with ThreadPoolExecutor(max_workers=CORES) as tp: + with ThreadPoolExecutor(max_workers=CORES, initializer=sigblock) as tp: + _pools[tp] = 1 try: for f in fgen: task = tp.submit(enthumb, thumbcli, uname, vtop, f, fmt) @@ -79,6 +91,61 @@ def gfilter( except: pass thumbcli.log("gfilter flushed") + _pools.pop(tp, None) + + +def gfilter2( + fgen: Generator[ + tuple[ + "VFS", + str, + str, + str, + list[tuple[str, os.stat_result]], + list[tuple[str, os.stat_result]], + dict[str, "VFS"], + ], + None, + None, + ], + hsrv: "HttpSrv", + vtop: str, + fmts: list[str], +) -> Generator[dict[str, Any], None, None]: + from concurrent.futures import ThreadPoolExecutor + + pend = [] + with ThreadPoolExecutor(max_workers=CORES, initializer=sigblock) as tp: + _pools[tp] = 1 + for _, _, vpath, apath, files, rd, vd in fgen: + if "/.hist/" in vpath: + continue + fnames = [n[0] for n in files] + vpaths = [vpath + "/" + n for n in fnames] if vpath else fnames + for vp, fi in zip(vpaths, files): + for fmt in fmts: + try: + f = {"vp": vp, "st": fi[1]} + task = tp.submit( + enthumb, hsrv.thumbcli, LEELOO_DALLAS, vtop, f, fmt + ) + pend.append((task, f)) + if pend[0][0].done() or len(pend) > CORES * 4: + task, f = pend.pop(0) + try: + f = task.result(600) + except: + pass + yield f + except: + pass + for task, f in pend: + try: + f = task.result(600) + except: + pass + yield f + _pools.pop(tp, None) def enthumb( diff --git a/copyparty/svchub.py b/copyparty/svchub.py index 674ecfd7..2e744207 100644 --- a/copyparty/svchub.py +++ b/copyparty/svchub.py @@ -35,6 +35,7 @@ from .cert import ensure_cert from .fsutil import ramdisk_chk from .mtag import HAVE_FFMPEG, HAVE_FFPROBE, HAVE_MUTAGEN from .pwhash import HAVE_ARGON2 +from .sutil import close_pools as sutil_close_pools from .tcpsrv import TcpSrv from .th_srv import ( H_PIL_AVIF, @@ -489,6 +490,8 @@ class SvcHub(object): for nm in args.ipr_u.values(): nm.mutex = threading.Lock() + self._reload_thumbsrv() + def _db_onfail_ses(self) -> None: self.args.no_ses = True @@ -1478,8 +1481,17 @@ class SvcHub(object): self.log("root", "reload done") t += "\n\nchanges to global options (if any) require a restart of copyparty to take effect" self.broker.reload() + self._reload_thumbsrv() return t + def _reload_thumbsrv(self) -> None: + if not self.thumbsrv: + return + vols = list(self.asrv.vfs.all_nodes.values()) + if next((x for x in vols if x.flags.get("th_pregen", "")), None): + fun = getattr(self.broker, "say1", self.broker.say) + fun("httpsrv.pregen_thumbs") + def _reload_sessions(self) -> None: with self.asrv.mutex: self.asrv.load_sessions(True) @@ -1564,6 +1576,7 @@ class SvcHub(object): if self.thumbsrv: self.thumbsrv.shutdown() + sutil_close_pools() for n in range(200): # 10s time.sleep(0.05) diff --git a/copyparty/th_srv.py b/copyparty/th_srv.py index d0cd43e3..8c6275a4 100644 --- a/copyparty/th_srv.py +++ b/copyparty/th_srv.py @@ -256,6 +256,9 @@ class ThumbSrv(object): self.args = hub.args self.log_func = hub.log + self.log = self._log + self.nextlog = 0 + self.poke_cd = Cooldown(self.args.th_poke) self.mutex = threading.Lock() @@ -345,7 +348,14 @@ class ThumbSrv(object): for zss in [self.fmt_ffi, self.fmt_ffv, self.fmt_ffa]: self.thumbable |= zss - def log(self, msg: str, c: Union[int, str] = 0) -> None: + def _log(self, msg: str, c: Union[int, str] = 0) -> None: + self.log_func("thumb", msg, c) + + def _slog(self, msg: str, c: Union[int, str] = 0) -> None: + now = time.time() + if c in (0, 6) and now < self.nextlog: + return + self.nextlog = now + self.args.th_pre_rl self.log_func("thumb", msg, c) def shutdown(self) -> None: diff --git a/copyparty/up2k.py b/copyparty/up2k.py index 69a8986d..b6c95798 100644 --- a/copyparty/up2k.py +++ b/copyparty/up2k.py @@ -337,6 +337,10 @@ class Up2k(object): if not self.stop: self.log("uploads are now possible", 2) + def is_busy(self) -> bool: + # returns ( currently-busy , have-finished-at-least-once ) + return bool(self.pp), self.gt1 + def get_state(self, get_q: bool, uname: str) -> str: mtpq: Union[int, str] = 0 ups = [] diff --git a/tests/util.py b/tests/util.py index 64246582..802417c5 100644 --- a/tests/util.py +++ b/tests/util.py @@ -164,7 +164,7 @@ class Cfg(Namespace): ex = "ctl_re db_act forget_ip idp_cookie idp_store k304 loris no304 nosubtle qr_pin qr_wait re_maxage rproxy rsp_jtr rsp_slp s_wr_slp snap_wri theme themes turbo u2ow zipmaxn zipmaxs" ka.update(**{k: 0 for k in ex.split()}) - ex = "ah_alg bname chdir chmod_f chpw_db db_xattr doctitle df epilogues exit favico fika ipa ipar html_head html_head_d html_head_s idp_login idp_logout lg_sba lg_sbf log_date log_fk md_sba md_sbf name og_desc og_site og_th og_title og_title_a og_title_v og_title_i opds_exts preadmes prologues readmes shr shr1 shr_site site smsg tcolor textfiles txt_eol ufavico ufavico_h unlist up_site vc_url vname xff_src zipmaxt R RS SR" + ex = "ah_alg bname chdir chmod_f chpw_db db_xattr doctitle df epilogues exit favico fika ipa ipar html_head html_head_d html_head_s idp_login idp_logout lg_sba lg_sbf log_date log_fk md_sba md_sbf name og_desc og_site og_th og_title og_title_a og_title_v og_title_i opds_exts preadmes prologues readmes shr shr1 shr_site site smsg tcolor textfiles th_pregen txt_eol ufavico ufavico_h unlist up_site vc_url vname xff_src zipmaxt R RS SR" ka.update(**{k: "" for k in ex.split()}) ex = "apnd_who ban_403 ban_404 ban_422 ban_pw ban_pwc ban_url dont_ban cachectl http_vary rcm rss_fmt_d rss_fmt_t spinner"