import os import subprocess from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer from threading import Thread import pytest @pytest.fixture def process(tmp_path): process = subprocess.run( ['archivebox', 'init'], capture_output=True, cwd=tmp_path, ) return process @pytest.fixture def disable_extractors_dict(): env = os.environ.copy() env.update({ "SAVE_WGET": "false", "SAVE_SINGLEFILE": "false", "SAVE_READABILITY": "false", "SAVE_MERCURY": "false", "SAVE_HTMLTOTEXT": "false", "SAVE_PDF": "false", "SAVE_SCREENSHOT": "false", "SAVE_DOM": "false", "SAVE_HEADERS": "false", "SAVE_GIT": "false", "SAVE_YTDLP": "false", "SAVE_ARCHIVEDOTORG": "false", "SAVE_TITLE": "false", "SAVE_FAVICON": "false", }) return env @pytest.fixture def recursive_test_site(): pages = { "/": """ Root About Blog Contact """.strip().encode("utf-8"), "/about": """ Deep About """.strip().encode("utf-8"), "/blog": """ Deep Blog """.strip().encode("utf-8"), "/contact": """ Deep Contact """.strip().encode("utf-8"), "/deep/about": b"

Deep About

", "/deep/blog": b"

Deep Blog

", "/deep/contact": b"

Deep Contact

", "/favicon.ico": b"test-icon", } class _RecursiveHandler(BaseHTTPRequestHandler): def do_GET(self): body = pages.get(self.path) if body is None: self.send_response(404) self.end_headers() return self.send_response(200) if self.path.endswith(".ico"): self.send_header("Content-Type", "image/x-icon") else: self.send_header("Content-Type", "text/html; charset=utf-8") self.send_header("Content-Length", str(len(body))) self.end_headers() self.wfile.write(body) def log_message(self, format, *args): return server = ThreadingHTTPServer(("127.0.0.1", 0), _RecursiveHandler) thread = Thread(target=server.serve_forever, daemon=True) thread.start() try: base_url = f"http://127.0.0.1:{server.server_address[1]}" yield { "base_url": base_url, "root_url": f"{base_url}/", "child_urls": [f"{base_url}/about", f"{base_url}/blog", f"{base_url}/contact"], "deep_urls": [f"{base_url}/deep/about", f"{base_url}/deep/blog", f"{base_url}/deep/contact"], } finally: server.shutdown() server.server_close() thread.join(timeout=5)