mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2026-04-06 07:47:53 +10:00
bump version
This commit is contained in:
@@ -44,12 +44,10 @@ def main(binary_id: str, machine_id: str, name: str, binproviders: str, custom_c
|
||||
result = subprocess.run(
|
||||
custom_cmd,
|
||||
shell=True,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=600, # 10 minute timeout for custom installs
|
||||
)
|
||||
if result.returncode != 0:
|
||||
click.echo(f"Custom install failed: {result.stderr}", err=True)
|
||||
click.echo(f"Custom install failed (exit={result.returncode})", err=True)
|
||||
sys.exit(1)
|
||||
except subprocess.TimeoutExpired:
|
||||
click.echo("Custom install timed out", err=True)
|
||||
|
||||
@@ -82,13 +82,12 @@ def clone_git(url: str, binary: str) -> tuple[bool, str | None, str]:
|
||||
cmd = [binary, *git_args, *git_args_extra, url, OUTPUT_DIR]
|
||||
|
||||
try:
|
||||
result = subprocess.run(cmd, capture_output=True, timeout=timeout)
|
||||
result = subprocess.run(cmd, timeout=timeout)
|
||||
|
||||
if result.returncode == 0 and Path(OUTPUT_DIR).is_dir():
|
||||
return True, OUTPUT_DIR, ''
|
||||
else:
|
||||
stderr = result.stderr.decode('utf-8', errors='replace')
|
||||
return False, None, f'git clone failed: {stderr[:200]}'
|
||||
return False, None, f'git clone failed (exit={result.returncode})'
|
||||
|
||||
except subprocess.TimeoutExpired:
|
||||
return False, None, f'Timed out after {timeout} seconds'
|
||||
|
||||
@@ -81,11 +81,10 @@ def extract_mercury(url: str, binary: str) -> tuple[bool, str | None, str]:
|
||||
try:
|
||||
# Get text version
|
||||
cmd_text = [binary, *mercury_args, *mercury_args_extra, url, '--format=text']
|
||||
result_text = subprocess.run(cmd_text, capture_output=True, timeout=timeout)
|
||||
result_text = subprocess.run(cmd_text, stdout=subprocess.PIPE, timeout=timeout, text=True)
|
||||
|
||||
if result_text.returncode != 0:
|
||||
stderr = result_text.stderr.decode('utf-8', errors='replace')
|
||||
return False, None, f'postlight-parser failed: {stderr[:200]}'
|
||||
return False, None, f'postlight-parser failed (exit={result_text.returncode})'
|
||||
|
||||
try:
|
||||
text_json = json.loads(result_text.stdout)
|
||||
@@ -101,7 +100,7 @@ def extract_mercury(url: str, binary: str) -> tuple[bool, str | None, str]:
|
||||
|
||||
# Get HTML version
|
||||
cmd_html = [binary, *mercury_args, *mercury_args_extra, url, '--format=html']
|
||||
result_html = subprocess.run(cmd_html, capture_output=True, timeout=timeout)
|
||||
result_html = subprocess.run(cmd_html, stdout=subprocess.PIPE, timeout=timeout, text=True)
|
||||
|
||||
try:
|
||||
html_json = json.loads(result_html.stdout)
|
||||
|
||||
@@ -62,8 +62,6 @@ def main(binary_id: str, machine_id: str, name: str, binproviders: str, override
|
||||
subprocess.run(
|
||||
[preferred_python, '-m', 'venv', str(pip_venv_path), '--upgrade-deps'],
|
||||
check=True,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
except Exception:
|
||||
# Fall back to PipProvider-managed venv creation
|
||||
|
||||
@@ -107,11 +107,10 @@ def extract_readability(url: str, binary: str) -> tuple[bool, str | None, str]:
|
||||
try:
|
||||
# Run readability-extractor (outputs JSON by default)
|
||||
cmd = [binary, *readability_args, *readability_args_extra, html_source]
|
||||
result = subprocess.run(cmd, capture_output=True, timeout=timeout)
|
||||
result = subprocess.run(cmd, stdout=subprocess.PIPE, timeout=timeout, text=True)
|
||||
|
||||
if result.returncode != 0:
|
||||
stderr = result.stderr.decode('utf-8', errors='replace')
|
||||
return False, None, f'readability-extractor failed: {stderr[:200]}'
|
||||
return False, None, f'readability-extractor failed (exit={result.returncode})'
|
||||
|
||||
# Parse JSON output
|
||||
try:
|
||||
|
||||
@@ -23,6 +23,7 @@ import json
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
import threading
|
||||
import time
|
||||
from urllib.request import urlopen
|
||||
from pathlib import Path
|
||||
@@ -200,18 +201,44 @@ def save_singlefile(url: str, binary: str) -> tuple[bool, str | None, str]:
|
||||
cmd.extend([url, str(output_path)])
|
||||
|
||||
try:
|
||||
result = subprocess.run(cmd, capture_output=True, timeout=timeout)
|
||||
output_lines: list[str] = []
|
||||
process = subprocess.Popen(
|
||||
cmd,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.STDOUT,
|
||||
text=True,
|
||||
bufsize=1,
|
||||
)
|
||||
|
||||
def _read_output() -> None:
|
||||
if not process.stdout:
|
||||
return
|
||||
for line in process.stdout:
|
||||
output_lines.append(line)
|
||||
sys.stderr.write(line)
|
||||
|
||||
reader = threading.Thread(target=_read_output, daemon=True)
|
||||
reader.start()
|
||||
|
||||
try:
|
||||
process.wait(timeout=timeout)
|
||||
except subprocess.TimeoutExpired:
|
||||
process.kill()
|
||||
reader.join(timeout=1)
|
||||
return False, None, f'Timed out after {timeout} seconds'
|
||||
|
||||
reader.join(timeout=1)
|
||||
combined_output = ''.join(output_lines)
|
||||
|
||||
if output_path.exists() and output_path.stat().st_size > 0:
|
||||
return True, str(output_path), ''
|
||||
else:
|
||||
stderr = result.stderr.decode('utf-8', errors='replace')
|
||||
stdout = result.stdout.decode('utf-8', errors='replace')
|
||||
stderr = combined_output
|
||||
if 'ERR_NAME_NOT_RESOLVED' in stderr:
|
||||
return False, None, 'DNS resolution failed'
|
||||
if 'ERR_CONNECTION_REFUSED' in stderr:
|
||||
return False, None, 'Connection refused'
|
||||
detail = (stderr or stdout).strip()
|
||||
detail = (stderr or '').strip()
|
||||
if len(detail) > 2000:
|
||||
detail = detail[:2000]
|
||||
cmd_preview = list(cmd)
|
||||
|
||||
@@ -144,7 +144,6 @@ def save_wget(url: str, binary: str) -> tuple[bool, str | None, str]:
|
||||
try:
|
||||
result = subprocess.run(
|
||||
cmd,
|
||||
capture_output=True,
|
||||
timeout=timeout * 2, # Allow extra time for large downloads
|
||||
)
|
||||
|
||||
@@ -155,18 +154,9 @@ def save_wget(url: str, binary: str) -> tuple[bool, str | None, str]:
|
||||
]
|
||||
|
||||
if not downloaded_files:
|
||||
stderr = result.stderr.decode('utf-8', errors='replace')
|
||||
stdout = result.stdout.decode('utf-8', errors='replace')
|
||||
combined = stderr + stdout
|
||||
|
||||
if '403' in combined or 'Forbidden' in combined:
|
||||
return False, None, '403 Forbidden (try changing USER_AGENT)'
|
||||
elif '404' in combined or 'Not Found' in combined:
|
||||
return False, None, '404 Not Found'
|
||||
elif '500' in combined:
|
||||
return False, None, '500 Internal Server Error'
|
||||
else:
|
||||
return False, None, f'No files downloaded: {stderr[:200]}'
|
||||
if result.returncode != 0:
|
||||
return False, None, f'wget failed (exit={result.returncode})'
|
||||
return False, None, 'No files downloaded'
|
||||
|
||||
# Find main HTML file
|
||||
html_files = [
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[project]
|
||||
name = "archivebox"
|
||||
version = "0.9.2"
|
||||
version = "0.9.3"
|
||||
requires-python = ">=3.13"
|
||||
description = "Self-hosted internet archiving solution."
|
||||
authors = [{name = "Nick Sweeting", email = "pyproject.toml@archivebox.io"}]
|
||||
|
||||
Reference in New Issue
Block a user