Add -j/--workers flag and parallelize exiftool calls

Introduce -j/--workers CLI option defaulting to the number of CPUs. When
more than one worker is specified and not in dry-run mode, exiftool
invocations are dispatched concurrently via ThreadPoolExecutor and
tracked with as_completed, updating the progress bar as each completes.
Falls back to the original sequential/dry-run logic when a single worker
or dry-run is used.
This commit is contained in:
codex
2025-04-30 22:08:20 +10:00
committed by Alexander Wainwright
parent 73e6318640
commit d086603306

View File

@@ -5,6 +5,7 @@ import subprocess
import datetime
import toml
from alive_progress import alive_bar
from concurrent.futures import ThreadPoolExecutor, as_completed
CONFIG_PATH = os.path.expanduser("~/.config/emulsion/config.toml")
@@ -38,6 +39,7 @@ def parse_args(config):
parser.add_argument('--base-date', default=None, help='Base date or date/time (e.g. 2023-04-10 or 2023-04-10 12:00:00).')
parser.add_argument('--time-increment', type=int, default=None, help='Time increment in seconds between images.')
parser.add_argument('--dry-run', action='store_true', help='Show what would be changed without modifying files.')
parser.add_argument('-j', '--workers', type=int, default=os.cpu_count() or 1, help='Number of parallel workers to run exiftool; defaults to number of CPUs.')
parser.add_argument('--init-config', action='store_true', help='Create a default config file (if none exists) and exit.')
args = parser.parse_args()
@@ -236,6 +238,9 @@ def main():
try:
config = load_config()
args = parse_args(config)
# Default to number of CPUs if workers not specified
if args.workers is None:
args.workers = os.cpu_count() or 1
if args.init_config:
prompt_for_config(args)
@@ -261,41 +266,84 @@ def main():
print(f"Processing {total_files} file(s)...")
with alive_bar(total_files, title="Tagging files") as bar:
for f in files:
ext = os.path.splitext(f.lower())[1]
if ext not in ['.jpg', '.jpeg', '.tif', '.tiff']:
bar.text(f"Skipping unsupported file: {f}")
bar()
continue
if args.workers > 1 and not args.dry_run:
executor = ThreadPoolExecutor(max_workers=args.workers)
futures = {}
supported_idx = 0
for f in files:
ext = os.path.splitext(f)[1].lower()
if ext not in ['.jpg', '.jpeg', '.tif', '.tiff']:
bar.text(f"Skipping unsupported file: {f}")
bar()
continue
timestamp_str = current_dt.strftime("%Y:%m:%d %H:%M:%S")
cmd = build_exiftool_cmd(
file_path=f,
author=args.author,
lab=args.lab,
make=args.make,
model=args.model,
film=args.film,
timestamp=timestamp_str,
dry_run=args.dry_run
)
ts_dt = base_dt + datetime.timedelta(seconds=supported_idx * time_increment)
timestamp_str = ts_dt.strftime("%Y:%m:%d %H:%M:%S")
cmd = build_exiftool_cmd(
file_path=f,
author=args.author,
lab=args.lab,
make=args.make,
model=args.model,
film=args.film,
timestamp=timestamp_str,
dry_run=False
)
future = executor.submit(
subprocess.run, cmd, check=True,
stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL
)
futures[future] = (f, timestamp_str)
supported_idx += 1
if args.dry_run:
bar.text(f"DRY RUN: {cmd}")
else:
for future in as_completed(futures):
f, ts = futures[future]
try:
subprocess.run(
cmd,
check=True,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL
)
bar.text(f"Updated {f} => {timestamp_str}")
future.result()
bar.text(f"Updated {f} => {ts}")
except subprocess.CalledProcessError as e:
bar.text(f"Failed to update {f}: {e}")
bar()
current_dt += datetime.timedelta(seconds=time_increment)
bar()
executor.shutdown(wait=True)
else:
supported_idx = 0
current_dt = base_dt
for f in files:
ext = os.path.splitext(f.lower())[1]
if ext not in ['.jpg', '.jpeg', '.tif', '.tiff']:
bar.text(f"Skipping unsupported file: {f}")
bar()
continue
timestamp_str = current_dt.strftime("%Y:%m:%d %H:%M:%S")
cmd = build_exiftool_cmd(
file_path=f,
author=args.author,
lab=args.lab,
make=args.make,
model=args.model,
film=args.film,
timestamp=timestamp_str,
dry_run=args.dry_run
)
if args.dry_run:
bar.text(f"DRY RUN: {cmd}")
else:
try:
subprocess.run(
cmd,
check=True,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL
)
bar.text(f"Updated {f} => {timestamp_str}")
except subprocess.CalledProcessError as e:
bar.text(f"Failed to update {f}: {e}")
current_dt += datetime.timedelta(seconds=time_increment)
bar()
except KeyboardInterrupt:
print("\nInterrupted by user. Exiting.")