diff --git a/archivebox/core/migrations/0025_cleanup_schema.py b/archivebox/core/migrations/0025_cleanup_schema.py index cab42bbf..0753b453 100644 --- a/archivebox/core/migrations/0025_cleanup_schema.py +++ b/archivebox/core/migrations/0025_cleanup_schema.py @@ -48,12 +48,17 @@ def cleanup_extra_columns(apps, schema_editor): guid = f"{socket.gethostname()}-{platform.machine()}" hostname = socket.gethostname() - # Check if config column exists (v0.9.0+ only) + # Check schema version cursor.execute("SELECT COUNT(*) FROM pragma_table_info('machine_machine') WHERE name='config'") has_config = cursor.fetchone()[0] > 0 + cursor.execute("SELECT COUNT(*) FROM pragma_table_info('machine_machine') WHERE name='abid'") + has_abid = cursor.fetchone()[0] > 0 + cursor.execute("SELECT COUNT(*) FROM pragma_table_info('machine_machine') WHERE name='num_uses_succeeded'") + has_num_uses = cursor.fetchone()[0] > 0 # Insert directly with SQL (use INSERT OR IGNORE in case it already exists) if has_config: + # v0.9.0+ schema cursor.execute(""" INSERT OR IGNORE INTO machine_machine ( id, created_at, modified_at, @@ -65,8 +70,21 @@ def cleanup_extra_columns(apps, schema_editor): machine_id, guid, hostname, platform.machine(), platform.system(), platform.platform(), platform.release() )) + elif has_abid and has_num_uses: + # v0.8.6rc0 schema (has abid and num_uses columns) + cursor.execute(""" + INSERT OR IGNORE INTO machine_machine ( + id, abid, created_at, modified_at, + guid, hostname, hw_in_docker, hw_in_vm, hw_manufacturer, hw_product, hw_uuid, + os_arch, os_family, os_platform, os_release, os_kernel, + stats, num_uses_failed, num_uses_succeeded + ) VALUES (?, '', datetime('now'), datetime('now'), ?, ?, 0, 0, '', '', '', ?, ?, ?, ?, '', '{}', 0, 0) + """, ( + machine_id, guid, hostname, + platform.machine(), platform.system(), platform.platform(), platform.release() + )) else: - # v0.8.6rc0 schema (no config column) + # v0.7.2 or other schema cursor.execute(""" INSERT OR IGNORE INTO machine_machine ( id, created_at, modified_at, @@ -79,8 +97,13 @@ def cleanup_extra_columns(apps, schema_editor): platform.machine(), platform.system(), platform.platform(), platform.release() )) # Re-query to get the actual id (in case INSERT OR IGNORE skipped it) - machine_id = cursor.execute("SELECT id FROM machine_machine LIMIT 1").fetchone()[0] - print(f" ✓ Using/Created Machine: {machine_id}") + result = cursor.execute("SELECT id FROM machine_machine LIMIT 1").fetchone() + if result: + machine_id = result[0] + print(f" ✓ Using/Created Machine: {machine_id}") + else: + # INSERT OR IGNORE failed - try again without IGNORE to see the error + raise Exception("Failed to create Machine record - machine_machine table is empty after INSERT") for ar_id, cmd, pwd, binary_id, iface_id, start_ts, end_ts, status in archive_results: # Create Process record diff --git a/archivebox/plugins/ublock/tests/test_ublock.py b/archivebox/plugins/ublock/tests/test_ublock.py index 63aa5bb7..8636fe55 100644 --- a/archivebox/plugins/ublock/tests/test_ublock.py +++ b/archivebox/plugins/ublock/tests/test_ublock.py @@ -648,8 +648,35 @@ def test_blocks_ads_on_test_page(): loaded_exts = json.loads(extensions_file.read_text()) print(f"Extensions loaded: {[e.get('name') for e in loaded_exts]}") - # Wait for extension to initialize - time.sleep(3) + # Verify extension has ID and is initialized + if loaded_exts and loaded_exts[0].get('id'): + ext_id = loaded_exts[0]['id'] + print(f"Extension ID: {ext_id}") + + # Visit the extension dashboard to ensure it's fully loaded + print("Visiting extension dashboard to verify initialization...") + dashboard_script = f''' +const puppeteer = require('{env_base['NODE_MODULES_DIR']}/puppeteer-core'); +(async () => {{ + const browser = await puppeteer.connect({{ + browserWSEndpoint: '{ext_cdp_url}', + defaultViewport: null + }}); + const page = await browser.newPage(); + await page.goto('chrome-extension://{ext_id}/dashboard.html', {{ waitUntil: 'domcontentloaded', timeout: 10000 }}); + const title = await page.title(); + console.log('Dashboard title:', title); + await page.close(); + browser.disconnect(); +}})(); +''' + dash_script_path = tmpdir / 'check_dashboard.js' + dash_script_path.write_text(dashboard_script) + subprocess.run(['node', str(dash_script_path)], capture_output=True, timeout=15, env=env_base) + + # Wait longer for extension to fully initialize filters + print("Waiting for uBlock filter lists to initialize...") + time.sleep(8) ext_result = check_ad_blocking( ext_cdp_url, TEST_URL, env_base, tmpdir diff --git a/archivebox/workers/orchestrator.py b/archivebox/workers/orchestrator.py index 6323df8a..0da8d7ea 100644 --- a/archivebox/workers/orchestrator.py +++ b/archivebox/workers/orchestrator.py @@ -267,8 +267,8 @@ class Orchestrator: """Main orchestrator loop.""" from rich.live import Live from rich.table import Table - from rich.console import Group - from archivebox.misc.logging import IS_TTY, CONSOLE + from archivebox.misc.logging import IS_TTY + import archivebox.misc.logging as logging_module self.on_startup() @@ -305,11 +305,18 @@ class Orchestrator: return table - live = Live(make_progress_table(), console=CONSOLE, refresh_per_second=4, transient=False) if show_progress else None + live = Live(make_progress_table(), refresh_per_second=4, transient=False) if show_progress else None + + # Redirect all output through Live's console when active + original_console = logging_module.CONSOLE + original_stderr = logging_module.STDERR try: if live: live.start() + # Replace global consoles with Live's console + logging_module.CONSOLE = live.console + logging_module.STDERR = live.console while True: # Check queues and spawn workers @@ -347,7 +354,10 @@ class Orchestrator: else: self.on_shutdown() finally: + # Restore original consoles if live: + logging_module.CONSOLE = original_console + logging_module.STDERR = original_stderr live.stop() def start(self) -> int: