mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2026-01-04 09:55:33 +10:00
more progress bar and migrations fixes
This commit is contained in:
@@ -48,12 +48,17 @@ def cleanup_extra_columns(apps, schema_editor):
|
|||||||
guid = f"{socket.gethostname()}-{platform.machine()}"
|
guid = f"{socket.gethostname()}-{platform.machine()}"
|
||||||
hostname = socket.gethostname()
|
hostname = socket.gethostname()
|
||||||
|
|
||||||
# Check if config column exists (v0.9.0+ only)
|
# Check schema version
|
||||||
cursor.execute("SELECT COUNT(*) FROM pragma_table_info('machine_machine') WHERE name='config'")
|
cursor.execute("SELECT COUNT(*) FROM pragma_table_info('machine_machine') WHERE name='config'")
|
||||||
has_config = cursor.fetchone()[0] > 0
|
has_config = cursor.fetchone()[0] > 0
|
||||||
|
cursor.execute("SELECT COUNT(*) FROM pragma_table_info('machine_machine') WHERE name='abid'")
|
||||||
|
has_abid = cursor.fetchone()[0] > 0
|
||||||
|
cursor.execute("SELECT COUNT(*) FROM pragma_table_info('machine_machine') WHERE name='num_uses_succeeded'")
|
||||||
|
has_num_uses = cursor.fetchone()[0] > 0
|
||||||
|
|
||||||
# Insert directly with SQL (use INSERT OR IGNORE in case it already exists)
|
# Insert directly with SQL (use INSERT OR IGNORE in case it already exists)
|
||||||
if has_config:
|
if has_config:
|
||||||
|
# v0.9.0+ schema
|
||||||
cursor.execute("""
|
cursor.execute("""
|
||||||
INSERT OR IGNORE INTO machine_machine (
|
INSERT OR IGNORE INTO machine_machine (
|
||||||
id, created_at, modified_at,
|
id, created_at, modified_at,
|
||||||
@@ -65,8 +70,21 @@ def cleanup_extra_columns(apps, schema_editor):
|
|||||||
machine_id, guid, hostname,
|
machine_id, guid, hostname,
|
||||||
platform.machine(), platform.system(), platform.platform(), platform.release()
|
platform.machine(), platform.system(), platform.platform(), platform.release()
|
||||||
))
|
))
|
||||||
|
elif has_abid and has_num_uses:
|
||||||
|
# v0.8.6rc0 schema (has abid and num_uses columns)
|
||||||
|
cursor.execute("""
|
||||||
|
INSERT OR IGNORE INTO machine_machine (
|
||||||
|
id, abid, created_at, modified_at,
|
||||||
|
guid, hostname, hw_in_docker, hw_in_vm, hw_manufacturer, hw_product, hw_uuid,
|
||||||
|
os_arch, os_family, os_platform, os_release, os_kernel,
|
||||||
|
stats, num_uses_failed, num_uses_succeeded
|
||||||
|
) VALUES (?, '', datetime('now'), datetime('now'), ?, ?, 0, 0, '', '', '', ?, ?, ?, ?, '', '{}', 0, 0)
|
||||||
|
""", (
|
||||||
|
machine_id, guid, hostname,
|
||||||
|
platform.machine(), platform.system(), platform.platform(), platform.release()
|
||||||
|
))
|
||||||
else:
|
else:
|
||||||
# v0.8.6rc0 schema (no config column)
|
# v0.7.2 or other schema
|
||||||
cursor.execute("""
|
cursor.execute("""
|
||||||
INSERT OR IGNORE INTO machine_machine (
|
INSERT OR IGNORE INTO machine_machine (
|
||||||
id, created_at, modified_at,
|
id, created_at, modified_at,
|
||||||
@@ -79,8 +97,13 @@ def cleanup_extra_columns(apps, schema_editor):
|
|||||||
platform.machine(), platform.system(), platform.platform(), platform.release()
|
platform.machine(), platform.system(), platform.platform(), platform.release()
|
||||||
))
|
))
|
||||||
# Re-query to get the actual id (in case INSERT OR IGNORE skipped it)
|
# Re-query to get the actual id (in case INSERT OR IGNORE skipped it)
|
||||||
machine_id = cursor.execute("SELECT id FROM machine_machine LIMIT 1").fetchone()[0]
|
result = cursor.execute("SELECT id FROM machine_machine LIMIT 1").fetchone()
|
||||||
print(f" ✓ Using/Created Machine: {machine_id}")
|
if result:
|
||||||
|
machine_id = result[0]
|
||||||
|
print(f" ✓ Using/Created Machine: {machine_id}")
|
||||||
|
else:
|
||||||
|
# INSERT OR IGNORE failed - try again without IGNORE to see the error
|
||||||
|
raise Exception("Failed to create Machine record - machine_machine table is empty after INSERT")
|
||||||
|
|
||||||
for ar_id, cmd, pwd, binary_id, iface_id, start_ts, end_ts, status in archive_results:
|
for ar_id, cmd, pwd, binary_id, iface_id, start_ts, end_ts, status in archive_results:
|
||||||
# Create Process record
|
# Create Process record
|
||||||
|
|||||||
@@ -648,8 +648,35 @@ def test_blocks_ads_on_test_page():
|
|||||||
loaded_exts = json.loads(extensions_file.read_text())
|
loaded_exts = json.loads(extensions_file.read_text())
|
||||||
print(f"Extensions loaded: {[e.get('name') for e in loaded_exts]}")
|
print(f"Extensions loaded: {[e.get('name') for e in loaded_exts]}")
|
||||||
|
|
||||||
# Wait for extension to initialize
|
# Verify extension has ID and is initialized
|
||||||
time.sleep(3)
|
if loaded_exts and loaded_exts[0].get('id'):
|
||||||
|
ext_id = loaded_exts[0]['id']
|
||||||
|
print(f"Extension ID: {ext_id}")
|
||||||
|
|
||||||
|
# Visit the extension dashboard to ensure it's fully loaded
|
||||||
|
print("Visiting extension dashboard to verify initialization...")
|
||||||
|
dashboard_script = f'''
|
||||||
|
const puppeteer = require('{env_base['NODE_MODULES_DIR']}/puppeteer-core');
|
||||||
|
(async () => {{
|
||||||
|
const browser = await puppeteer.connect({{
|
||||||
|
browserWSEndpoint: '{ext_cdp_url}',
|
||||||
|
defaultViewport: null
|
||||||
|
}});
|
||||||
|
const page = await browser.newPage();
|
||||||
|
await page.goto('chrome-extension://{ext_id}/dashboard.html', {{ waitUntil: 'domcontentloaded', timeout: 10000 }});
|
||||||
|
const title = await page.title();
|
||||||
|
console.log('Dashboard title:', title);
|
||||||
|
await page.close();
|
||||||
|
browser.disconnect();
|
||||||
|
}})();
|
||||||
|
'''
|
||||||
|
dash_script_path = tmpdir / 'check_dashboard.js'
|
||||||
|
dash_script_path.write_text(dashboard_script)
|
||||||
|
subprocess.run(['node', str(dash_script_path)], capture_output=True, timeout=15, env=env_base)
|
||||||
|
|
||||||
|
# Wait longer for extension to fully initialize filters
|
||||||
|
print("Waiting for uBlock filter lists to initialize...")
|
||||||
|
time.sleep(8)
|
||||||
|
|
||||||
ext_result = check_ad_blocking(
|
ext_result = check_ad_blocking(
|
||||||
ext_cdp_url, TEST_URL, env_base, tmpdir
|
ext_cdp_url, TEST_URL, env_base, tmpdir
|
||||||
|
|||||||
@@ -267,8 +267,8 @@ class Orchestrator:
|
|||||||
"""Main orchestrator loop."""
|
"""Main orchestrator loop."""
|
||||||
from rich.live import Live
|
from rich.live import Live
|
||||||
from rich.table import Table
|
from rich.table import Table
|
||||||
from rich.console import Group
|
from archivebox.misc.logging import IS_TTY
|
||||||
from archivebox.misc.logging import IS_TTY, CONSOLE
|
import archivebox.misc.logging as logging_module
|
||||||
|
|
||||||
self.on_startup()
|
self.on_startup()
|
||||||
|
|
||||||
@@ -305,11 +305,18 @@ class Orchestrator:
|
|||||||
|
|
||||||
return table
|
return table
|
||||||
|
|
||||||
live = Live(make_progress_table(), console=CONSOLE, refresh_per_second=4, transient=False) if show_progress else None
|
live = Live(make_progress_table(), refresh_per_second=4, transient=False) if show_progress else None
|
||||||
|
|
||||||
|
# Redirect all output through Live's console when active
|
||||||
|
original_console = logging_module.CONSOLE
|
||||||
|
original_stderr = logging_module.STDERR
|
||||||
|
|
||||||
try:
|
try:
|
||||||
if live:
|
if live:
|
||||||
live.start()
|
live.start()
|
||||||
|
# Replace global consoles with Live's console
|
||||||
|
logging_module.CONSOLE = live.console
|
||||||
|
logging_module.STDERR = live.console
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
# Check queues and spawn workers
|
# Check queues and spawn workers
|
||||||
@@ -347,7 +354,10 @@ class Orchestrator:
|
|||||||
else:
|
else:
|
||||||
self.on_shutdown()
|
self.on_shutdown()
|
||||||
finally:
|
finally:
|
||||||
|
# Restore original consoles
|
||||||
if live:
|
if live:
|
||||||
|
logging_module.CONSOLE = original_console
|
||||||
|
logging_module.STDERR = original_stderr
|
||||||
live.stop()
|
live.stop()
|
||||||
|
|
||||||
def start(self) -> int:
|
def start(self) -> int:
|
||||||
|
|||||||
Reference in New Issue
Block a user