more progress bar and migrations fixes

This commit is contained in:
Nick Sweeting
2025-12-31 01:56:37 -08:00
parent d5c0c64dcd
commit 72f6a91b31
3 changed files with 69 additions and 9 deletions

View File

@@ -48,12 +48,17 @@ def cleanup_extra_columns(apps, schema_editor):
guid = f"{socket.gethostname()}-{platform.machine()}"
hostname = socket.gethostname()
# Check if config column exists (v0.9.0+ only)
# Check schema version
cursor.execute("SELECT COUNT(*) FROM pragma_table_info('machine_machine') WHERE name='config'")
has_config = cursor.fetchone()[0] > 0
cursor.execute("SELECT COUNT(*) FROM pragma_table_info('machine_machine') WHERE name='abid'")
has_abid = cursor.fetchone()[0] > 0
cursor.execute("SELECT COUNT(*) FROM pragma_table_info('machine_machine') WHERE name='num_uses_succeeded'")
has_num_uses = cursor.fetchone()[0] > 0
# Insert directly with SQL (use INSERT OR IGNORE in case it already exists)
if has_config:
# v0.9.0+ schema
cursor.execute("""
INSERT OR IGNORE INTO machine_machine (
id, created_at, modified_at,
@@ -65,8 +70,21 @@ def cleanup_extra_columns(apps, schema_editor):
machine_id, guid, hostname,
platform.machine(), platform.system(), platform.platform(), platform.release()
))
elif has_abid and has_num_uses:
# v0.8.6rc0 schema (has abid and num_uses columns)
cursor.execute("""
INSERT OR IGNORE INTO machine_machine (
id, abid, created_at, modified_at,
guid, hostname, hw_in_docker, hw_in_vm, hw_manufacturer, hw_product, hw_uuid,
os_arch, os_family, os_platform, os_release, os_kernel,
stats, num_uses_failed, num_uses_succeeded
) VALUES (?, '', datetime('now'), datetime('now'), ?, ?, 0, 0, '', '', '', ?, ?, ?, ?, '', '{}', 0, 0)
""", (
machine_id, guid, hostname,
platform.machine(), platform.system(), platform.platform(), platform.release()
))
else:
# v0.8.6rc0 schema (no config column)
# v0.7.2 or other schema
cursor.execute("""
INSERT OR IGNORE INTO machine_machine (
id, created_at, modified_at,
@@ -79,8 +97,13 @@ def cleanup_extra_columns(apps, schema_editor):
platform.machine(), platform.system(), platform.platform(), platform.release()
))
# Re-query to get the actual id (in case INSERT OR IGNORE skipped it)
machine_id = cursor.execute("SELECT id FROM machine_machine LIMIT 1").fetchone()[0]
print(f" ✓ Using/Created Machine: {machine_id}")
result = cursor.execute("SELECT id FROM machine_machine LIMIT 1").fetchone()
if result:
machine_id = result[0]
print(f" ✓ Using/Created Machine: {machine_id}")
else:
# INSERT OR IGNORE failed - try again without IGNORE to see the error
raise Exception("Failed to create Machine record - machine_machine table is empty after INSERT")
for ar_id, cmd, pwd, binary_id, iface_id, start_ts, end_ts, status in archive_results:
# Create Process record

View File

@@ -648,8 +648,35 @@ def test_blocks_ads_on_test_page():
loaded_exts = json.loads(extensions_file.read_text())
print(f"Extensions loaded: {[e.get('name') for e in loaded_exts]}")
# Wait for extension to initialize
time.sleep(3)
# Verify extension has ID and is initialized
if loaded_exts and loaded_exts[0].get('id'):
ext_id = loaded_exts[0]['id']
print(f"Extension ID: {ext_id}")
# Visit the extension dashboard to ensure it's fully loaded
print("Visiting extension dashboard to verify initialization...")
dashboard_script = f'''
const puppeteer = require('{env_base['NODE_MODULES_DIR']}/puppeteer-core');
(async () => {{
const browser = await puppeteer.connect({{
browserWSEndpoint: '{ext_cdp_url}',
defaultViewport: null
}});
const page = await browser.newPage();
await page.goto('chrome-extension://{ext_id}/dashboard.html', {{ waitUntil: 'domcontentloaded', timeout: 10000 }});
const title = await page.title();
console.log('Dashboard title:', title);
await page.close();
browser.disconnect();
}})();
'''
dash_script_path = tmpdir / 'check_dashboard.js'
dash_script_path.write_text(dashboard_script)
subprocess.run(['node', str(dash_script_path)], capture_output=True, timeout=15, env=env_base)
# Wait longer for extension to fully initialize filters
print("Waiting for uBlock filter lists to initialize...")
time.sleep(8)
ext_result = check_ad_blocking(
ext_cdp_url, TEST_URL, env_base, tmpdir

View File

@@ -267,8 +267,8 @@ class Orchestrator:
"""Main orchestrator loop."""
from rich.live import Live
from rich.table import Table
from rich.console import Group
from archivebox.misc.logging import IS_TTY, CONSOLE
from archivebox.misc.logging import IS_TTY
import archivebox.misc.logging as logging_module
self.on_startup()
@@ -305,11 +305,18 @@ class Orchestrator:
return table
live = Live(make_progress_table(), console=CONSOLE, refresh_per_second=4, transient=False) if show_progress else None
live = Live(make_progress_table(), refresh_per_second=4, transient=False) if show_progress else None
# Redirect all output through Live's console when active
original_console = logging_module.CONSOLE
original_stderr = logging_module.STDERR
try:
if live:
live.start()
# Replace global consoles with Live's console
logging_module.CONSOLE = live.console
logging_module.STDERR = live.console
while True:
# Check queues and spawn workers
@@ -347,7 +354,10 @@ class Orchestrator:
else:
self.on_shutdown()
finally:
# Restore original consoles
if live:
logging_module.CONSOLE = original_console
logging_module.STDERR = original_stderr
live.stop()
def start(self) -> int: