bump version

This commit is contained in:
Nick Sweeting
2026-01-19 01:11:59 -08:00
parent c7b2217cd6
commit 1cb2d5070e
8 changed files with 43 additions and 33 deletions

View File

@@ -44,12 +44,10 @@ def main(binary_id: str, machine_id: str, name: str, binproviders: str, custom_c
result = subprocess.run(
custom_cmd,
shell=True,
capture_output=True,
text=True,
timeout=600, # 10 minute timeout for custom installs
)
if result.returncode != 0:
click.echo(f"Custom install failed: {result.stderr}", err=True)
click.echo(f"Custom install failed (exit={result.returncode})", err=True)
sys.exit(1)
except subprocess.TimeoutExpired:
click.echo("Custom install timed out", err=True)

View File

@@ -82,13 +82,12 @@ def clone_git(url: str, binary: str) -> tuple[bool, str | None, str]:
cmd = [binary, *git_args, *git_args_extra, url, OUTPUT_DIR]
try:
result = subprocess.run(cmd, capture_output=True, timeout=timeout)
result = subprocess.run(cmd, timeout=timeout)
if result.returncode == 0 and Path(OUTPUT_DIR).is_dir():
return True, OUTPUT_DIR, ''
else:
stderr = result.stderr.decode('utf-8', errors='replace')
return False, None, f'git clone failed: {stderr[:200]}'
return False, None, f'git clone failed (exit={result.returncode})'
except subprocess.TimeoutExpired:
return False, None, f'Timed out after {timeout} seconds'

View File

@@ -81,11 +81,10 @@ def extract_mercury(url: str, binary: str) -> tuple[bool, str | None, str]:
try:
# Get text version
cmd_text = [binary, *mercury_args, *mercury_args_extra, url, '--format=text']
result_text = subprocess.run(cmd_text, capture_output=True, timeout=timeout)
result_text = subprocess.run(cmd_text, stdout=subprocess.PIPE, timeout=timeout, text=True)
if result_text.returncode != 0:
stderr = result_text.stderr.decode('utf-8', errors='replace')
return False, None, f'postlight-parser failed: {stderr[:200]}'
return False, None, f'postlight-parser failed (exit={result_text.returncode})'
try:
text_json = json.loads(result_text.stdout)
@@ -101,7 +100,7 @@ def extract_mercury(url: str, binary: str) -> tuple[bool, str | None, str]:
# Get HTML version
cmd_html = [binary, *mercury_args, *mercury_args_extra, url, '--format=html']
result_html = subprocess.run(cmd_html, capture_output=True, timeout=timeout)
result_html = subprocess.run(cmd_html, stdout=subprocess.PIPE, timeout=timeout, text=True)
try:
html_json = json.loads(result_html.stdout)

View File

@@ -62,8 +62,6 @@ def main(binary_id: str, machine_id: str, name: str, binproviders: str, override
subprocess.run(
[preferred_python, '-m', 'venv', str(pip_venv_path), '--upgrade-deps'],
check=True,
capture_output=True,
text=True,
)
except Exception:
# Fall back to PipProvider-managed venv creation

View File

@@ -107,11 +107,10 @@ def extract_readability(url: str, binary: str) -> tuple[bool, str | None, str]:
try:
# Run readability-extractor (outputs JSON by default)
cmd = [binary, *readability_args, *readability_args_extra, html_source]
result = subprocess.run(cmd, capture_output=True, timeout=timeout)
result = subprocess.run(cmd, stdout=subprocess.PIPE, timeout=timeout, text=True)
if result.returncode != 0:
stderr = result.stderr.decode('utf-8', errors='replace')
return False, None, f'readability-extractor failed: {stderr[:200]}'
return False, None, f'readability-extractor failed (exit={result.returncode})'
# Parse JSON output
try:

View File

@@ -23,6 +23,7 @@ import json
import os
import subprocess
import sys
import threading
import time
from urllib.request import urlopen
from pathlib import Path
@@ -200,18 +201,44 @@ def save_singlefile(url: str, binary: str) -> tuple[bool, str | None, str]:
cmd.extend([url, str(output_path)])
try:
result = subprocess.run(cmd, capture_output=True, timeout=timeout)
output_lines: list[str] = []
process = subprocess.Popen(
cmd,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
text=True,
bufsize=1,
)
def _read_output() -> None:
if not process.stdout:
return
for line in process.stdout:
output_lines.append(line)
sys.stderr.write(line)
reader = threading.Thread(target=_read_output, daemon=True)
reader.start()
try:
process.wait(timeout=timeout)
except subprocess.TimeoutExpired:
process.kill()
reader.join(timeout=1)
return False, None, f'Timed out after {timeout} seconds'
reader.join(timeout=1)
combined_output = ''.join(output_lines)
if output_path.exists() and output_path.stat().st_size > 0:
return True, str(output_path), ''
else:
stderr = result.stderr.decode('utf-8', errors='replace')
stdout = result.stdout.decode('utf-8', errors='replace')
stderr = combined_output
if 'ERR_NAME_NOT_RESOLVED' in stderr:
return False, None, 'DNS resolution failed'
if 'ERR_CONNECTION_REFUSED' in stderr:
return False, None, 'Connection refused'
detail = (stderr or stdout).strip()
detail = (stderr or '').strip()
if len(detail) > 2000:
detail = detail[:2000]
cmd_preview = list(cmd)

View File

@@ -144,7 +144,6 @@ def save_wget(url: str, binary: str) -> tuple[bool, str | None, str]:
try:
result = subprocess.run(
cmd,
capture_output=True,
timeout=timeout * 2, # Allow extra time for large downloads
)
@@ -155,18 +154,9 @@ def save_wget(url: str, binary: str) -> tuple[bool, str | None, str]:
]
if not downloaded_files:
stderr = result.stderr.decode('utf-8', errors='replace')
stdout = result.stdout.decode('utf-8', errors='replace')
combined = stderr + stdout
if '403' in combined or 'Forbidden' in combined:
return False, None, '403 Forbidden (try changing USER_AGENT)'
elif '404' in combined or 'Not Found' in combined:
return False, None, '404 Not Found'
elif '500' in combined:
return False, None, '500 Internal Server Error'
else:
return False, None, f'No files downloaded: {stderr[:200]}'
if result.returncode != 0:
return False, None, f'wget failed (exit={result.returncode})'
return False, None, 'No files downloaded'
# Find main HTML file
html_files = [

View File

@@ -1,6 +1,6 @@
[project]
name = "archivebox"
version = "0.9.2"
version = "0.9.3"
requires-python = ">=3.13"
description = "Self-hosted internet archiving solution."
authors = [{name = "Nick Sweeting", email = "pyproject.toml@archivebox.io"}]