add infiniscroll plugin

2026-01-03 01:15:57 +10:00 · 2025-12-29 13:11:20 -08:00
parent e20fdae2a5
commit 8d76b2b0c6
3 changed files with 665 additions and 0 deletions
--- a/archivebox/plugins/infiniscroll/config.json
+++ b/archivebox/plugins/infiniscroll/config.json
@@ -0,0 +1,46 @@
+{
+  "$schema": "http://json-schema.org/draft-07/schema#",
+  "type": "object",
+  "additionalProperties": false,
+  "required_plugins": ["chrome"],
+  "properties": {
+    "INFINISCROLL_ENABLED": {
+      "type": "boolean",
+      "default": true,
+      "x-aliases": ["SAVE_INFINISCROLL", "USE_INFINISCROLL"],
+      "description": "Enable infinite scroll page expansion"
+    },
+    "INFINISCROLL_TIMEOUT": {
+      "type": "integer",
+      "default": 120,
+      "minimum": 10,
+      "x-fallback": "TIMEOUT",
+      "description": "Maximum timeout for scrolling in seconds"
+    },
+    "INFINISCROLL_SCROLL_DELAY": {
+      "type": "integer",
+      "default": 2000,
+      "minimum": 500,
+      "description": "Delay between scrolls in milliseconds"
+    },
+    "INFINISCROLL_SCROLL_DISTANCE": {
+      "type": "integer",
+      "default": 1600,
+      "minimum": 100,
+      "description": "Distance to scroll per step in pixels"
+    },
+    "INFINISCROLL_SCROLL_LIMIT": {
+      "type": "integer",
+      "default": 10,
+      "minimum": 1,
+      "maximum": 100,
+      "description": "Maximum number of scroll steps"
+    },
+    "INFINISCROLL_MIN_HEIGHT": {
+      "type": "integer",
+      "default": 16000,
+      "minimum": 1000,
+      "description": "Minimum page height to scroll to in pixels"
+    }
+  }
+}
--- a/archivebox/plugins/infiniscroll/on_Snapshot__45_infiniscroll.js
+++ b/archivebox/plugins/infiniscroll/on_Snapshot__45_infiniscroll.js
@@ -0,0 +1,267 @@
+#!/usr/bin/env node
+/**
+ * Scroll the page down to trigger infinite scroll / lazy loading.
+ *
+ * Scrolls down 1 page at a time, up to INFINISCROLL_SCROLL_LIMIT times,
+ * ensuring at least INFINISCROLL_MIN_HEIGHT (default 16,000px) is reached.
+ * Stops early if no new content loads after a scroll.
+ *
+ * Usage: on_Snapshot__45_infiniscroll.js --url=<url> --snapshot-id=<uuid>
+ * Output: JSONL with scroll stats (no files created)
+ *
+ * Environment variables:
+ *     INFINISCROLL_ENABLED: Enable/disable (default: true)
+ *     INFINISCROLL_TIMEOUT: Max timeout in seconds (default: 120)
+ *     INFINISCROLL_SCROLL_DELAY: Delay between scrolls in ms (default: 2000)
+ *     INFINISCROLL_SCROLL_DISTANCE: Pixels per scroll (default: 1600)
+ *     INFINISCROLL_SCROLL_LIMIT: Max scroll iterations (default: 10)
+ *     INFINISCROLL_MIN_HEIGHT: Min page height to reach in px (default: 16000)
+ */
+
+function getEnv(name, defaultValue = '') {
+    return (process.env[name] || defaultValue).trim();
+}
+
+function getEnvBool(name, defaultValue = false) {
+    const val = getEnv(name, '').toLowerCase();
+    if (['true', '1', 'yes', 'on'].includes(val)) return true;
+    if (['false', '0', 'no', 'off'].includes(val)) return false;
+    return defaultValue;
+}
+
+function getEnvInt(name, defaultValue = 0) {
+    const val = parseInt(getEnv(name, String(defaultValue)), 10);
+    return isNaN(val) ? defaultValue : val;
+}
+
+// Check if infiniscroll is enabled BEFORE requiring puppeteer
+if (!getEnvBool('INFINISCROLL_ENABLED', true)) {
+    console.error('Skipping infiniscroll (INFINISCROLL_ENABLED=False)');
+    process.exit(0);
+}
+
+const fs = require('fs');
+const path = require('path');
+const puppeteer = require('puppeteer-core');
+
+const PLUGIN_NAME = 'infiniscroll';
+const CHROME_SESSION_DIR = '../chrome';
+
+function parseArgs() {
+    const args = {};
+    process.argv.slice(2).forEach(arg => {
+        if (arg.startsWith('--')) {
+            const [key, ...valueParts] = arg.slice(2).split('=');
+            args[key.replace(/-/g, '_')] = valueParts.join('=') || true;
+        }
+    });
+    return args;
+}
+
+function getCdpUrl() {
+    const cdpFile = path.join(CHROME_SESSION_DIR, 'cdp_url.txt');
+    if (fs.existsSync(cdpFile)) {
+        return fs.readFileSync(cdpFile, 'utf8').trim();
+    }
+    return null;
+}
+
+function getPageId() {
+    const targetIdFile = path.join(CHROME_SESSION_DIR, 'target_id.txt');
+    if (fs.existsSync(targetIdFile)) {
+        return fs.readFileSync(targetIdFile, 'utf8').trim();
+    }
+    return null;
+}
+
+async function waitForChromeTabLoaded(timeoutMs = 60000) {
+    const navigationFile = path.join(CHROME_SESSION_DIR, 'navigation.json');
+    const startTime = Date.now();
+
+    while (Date.now() - startTime < timeoutMs) {
+        if (fs.existsSync(navigationFile)) {
+            return true;
+        }
+        await new Promise(resolve => setTimeout(resolve, 100));
+    }
+    return false;
+}
+
+function sleep(ms) {
+    return new Promise(resolve => setTimeout(resolve, ms));
+}
+
+async function scrollDown(page, options = {}) {
+    const {
+        timeout = 120000,
+        scrollDelay = 2000,
+        scrollDistance = 1600,
+        scrollLimit = 10,
+        minHeight = 16000,
+    } = options;
+
+    const startTime = Date.now();
+    const startingHeight = await page.evaluate(() => document.body.scrollHeight);
+    let lastHeight = startingHeight;
+    let scrollCount = 0;
+    let scrollPosition = 0;
+
+    // Scroll to top first
+    await page.evaluate(() => {
+        window.scrollTo({ top: 0, left: 0, behavior: 'smooth' });
+    });
+    await sleep(500);
+
+    while (scrollCount < scrollLimit) {
+        // Check timeout
+        const elapsed = Date.now() - startTime;
+        if (elapsed >= timeout) {
+            console.error(`Timeout reached after ${scrollCount} scrolls`);
+            break;
+        }
+
+        scrollPosition = (scrollCount + 1) * scrollDistance;
+        console.error(`Scrolling down ${scrollCount + 1}x ${scrollDistance}px... (${scrollPosition}/${lastHeight})`);
+
+        await page.evaluate((yOffset) => {
+            window.scrollTo({ top: yOffset, left: 0, behavior: 'smooth' });
+        }, scrollPosition);
+
+        scrollCount++;
+        await sleep(scrollDelay);
+
+        // Check if new content was added (infinite scroll detection)
+        const newHeight = await page.evaluate(() => document.body.scrollHeight);
+        const addedPx = newHeight - lastHeight;
+
+        if (addedPx > 0) {
+            console.error(`Detected infini-scrolling: ${lastHeight}+${addedPx} => ${newHeight}`);
+        } else if (scrollPosition >= newHeight + scrollDistance) {
+            // Reached the bottom
+            if (scrollCount > 2) {
+                console.error(`Reached bottom of page at ${newHeight}px`);
+                break;
+            }
+        }
+
+        lastHeight = newHeight;
+
+        // Check if we've reached minimum height and can stop
+        if (lastHeight >= minHeight && scrollPosition >= lastHeight) {
+            console.error(`Reached minimum height target (${minHeight}px)`);
+            break;
+        }
+    }
+
+    // Scroll to absolute bottom
+    if (scrollPosition < lastHeight) {
+        await page.evaluate(() => {
+            window.scrollTo({ top: document.body.scrollHeight, left: 0, behavior: 'smooth' });
+        });
+        await sleep(scrollDelay);
+    }
+
+    // Scroll back to top
+    console.error(`Reached bottom of page at ${lastHeight}px, scrolling back to top...`);
+    await page.evaluate(() => {
+        window.scrollTo({ top: 0, left: 0, behavior: 'smooth' });
+    });
+    await sleep(scrollDelay);
+
+    const totalElapsed = Date.now() - startTime;
+
+    return {
+        scrollCount,
+        finalHeight: lastHeight,
+        startingHeight,
+        elapsedMs: totalElapsed,
+    };
+}
+
+async function main() {
+    const args = parseArgs();
+    const url = args.url;
+    const snapshotId = args.snapshot_id;
+
+    if (!url || !snapshotId) {
+        console.error('Usage: on_Snapshot__45_infiniscroll.js --url=<url> --snapshot-id=<uuid>');
+        process.exit(1);
+    }
+
+    const timeout = getEnvInt('INFINISCROLL_TIMEOUT', 120) * 1000;
+    const scrollDelay = getEnvInt('INFINISCROLL_SCROLL_DELAY', 2000);
+    const scrollDistance = getEnvInt('INFINISCROLL_SCROLL_DISTANCE', 1600);
+    const scrollLimit = getEnvInt('INFINISCROLL_SCROLL_LIMIT', 10);
+    const minHeight = getEnvInt('INFINISCROLL_MIN_HEIGHT', 16000);
+
+    const cdpUrl = getCdpUrl();
+    if (!cdpUrl) {
+        console.error('ERROR: Chrome CDP URL not found (chrome plugin must run first)');
+        process.exit(1);
+    }
+
+    // Wait for page to be loaded
+    const pageLoaded = await waitForChromeTabLoaded(60000);
+    if (!pageLoaded) {
+        console.error('ERROR: Page not loaded after 60s (chrome_navigate must complete first)');
+        process.exit(1);
+    }
+
+    let browser = null;
+    try {
+        browser = await puppeteer.connect({ browserWSEndpoint: cdpUrl });
+
+        const pages = await browser.pages();
+        if (pages.length === 0) {
+            throw new Error('No pages found in browser');
+        }
+
+        // Find the right page by target ID
+        const targetId = getPageId();
+        let page = null;
+        if (targetId) {
+            page = pages.find(p => {
+                const target = p.target();
+                return target && target._targetId === targetId;
+            });
+        }
+        if (!page) {
+            page = pages[pages.length - 1];
+        }
+
+        console.error(`Starting infinite scroll on ${url}`);
+        const result = await scrollDown(page, {
+            timeout,
+            scrollDelay,
+            scrollDistance,
+            scrollLimit,
+            minHeight,
+        });
+
+        browser.disconnect();
+
+        const elapsedSec = (result.elapsedMs / 1000).toFixed(1);
+        const finalHeightStr = result.finalHeight.toLocaleString();
+        const addedHeight = result.finalHeight - result.startingHeight;
+        const addedStr = addedHeight > 0 ? `+${addedHeight.toLocaleString()}px new content` : 'no new content';
+        const outputStr = `scrolled to ${finalHeightStr}px (${addedStr}) over ${elapsedSec}s`;
+
+        console.error(`Success: ${outputStr}`);
+        console.log(JSON.stringify({
+            type: 'ArchiveResult',
+            status: 'succeeded',
+            output_str: outputStr,
+        }));
+        process.exit(0);
+
+    } catch (e) {
+        if (browser) browser.disconnect();
+        console.error(`ERROR: ${e.name}: ${e.message}`);
+        process.exit(1);
+    }
+}
+
+main().catch(e => {
+    console.error(`Fatal error: ${e.message}`);
+    process.exit(1);
+});
--- a/archivebox/plugins/infiniscroll/tests/test_infiniscroll.py
+++ b/archivebox/plugins/infiniscroll/tests/test_infiniscroll.py
@@ -0,0 +1,352 @@
+"""
+Integration tests for infiniscroll plugin
+
+Tests verify:
+1. Hook script exists
+2. Dependencies installed via chrome validation hooks
+3. Verify deps with abx-pkg
+4. INFINISCROLL_ENABLED=False skips without JSONL
+5. Fails gracefully when no chrome session exists
+6. Full integration test: scrolls page and outputs stats
+7. Config options work (scroll limit, min height)
+"""
+
+import json
+import os
+import re
+import signal
+import subprocess
+import time
+import tempfile
+from pathlib import Path
+
+import pytest
+
+
+PLUGIN_DIR = Path(__file__).parent.parent
+PLUGINS_ROOT = PLUGIN_DIR.parent
+INFINISCROLL_HOOK = next(PLUGIN_DIR.glob('on_Snapshot__*_infiniscroll.*'), None)
+CHROME_LAUNCH_HOOK = PLUGINS_ROOT / 'chrome' / 'on_Crawl__20_chrome_launch.bg.js'
+CHROME_TAB_HOOK = PLUGINS_ROOT / 'chrome' / 'on_Snapshot__20_chrome_tab.bg.js'
+CHROME_NAVIGATE_HOOK = next((PLUGINS_ROOT / 'chrome').glob('on_Snapshot__*_chrome_navigate.*'), None)
+TEST_URL = 'https://www.singsing.movie/'
+
+
+def get_node_modules_dir():
+    """Get NODE_MODULES_DIR for tests, checking env first."""
+    # Check if NODE_PATH is already set in environment
+    if os.environ.get('NODE_PATH'):
+        return Path(os.environ['NODE_PATH'])
+    # Otherwise compute from LIB_DIR
+    from archivebox.config.common import STORAGE_CONFIG
+    lib_dir = Path(os.environ.get('LIB_DIR') or str(STORAGE_CONFIG.LIB_DIR))
+    return lib_dir / 'npm' / 'node_modules'
+
+
+NODE_MODULES_DIR = get_node_modules_dir()
+
+
+def get_test_env():
+    """Get environment with NODE_PATH set correctly."""
+    env = os.environ.copy()
+    env['NODE_PATH'] = str(NODE_MODULES_DIR)
+    return env
+
+
+def test_hook_script_exists():
+    """Verify on_Snapshot hook exists."""
+    assert INFINISCROLL_HOOK is not None, "Infiniscroll hook not found"
+    assert INFINISCROLL_HOOK.exists(), f"Hook not found: {INFINISCROLL_HOOK}"
+
+
+def test_verify_deps_with_abx_pkg():
+    """Verify dependencies are available via abx-pkg after hook installation."""
+    from abx_pkg import Binary, EnvProvider, BinProviderOverrides
+
+    EnvProvider.model_rebuild()
+
+    # Verify node is available
+    node_binary = Binary(name='node', binproviders=[EnvProvider()])
+    node_loaded = node_binary.load()
+    assert node_loaded and node_loaded.abspath, "Node.js required for infiniscroll plugin"
+
+
+def test_config_infiniscroll_disabled_skips():
+    """Test that INFINISCROLL_ENABLED=False exits without emitting JSONL."""
+    with tempfile.TemporaryDirectory() as tmpdir:
+        tmpdir = Path(tmpdir)
+        env = get_test_env()
+        env['INFINISCROLL_ENABLED'] = 'False'
+
+        result = subprocess.run(
+            ['node', str(INFINISCROLL_HOOK), f'--url={TEST_URL}', '--snapshot-id=test-disabled'],
+            cwd=tmpdir,
+            capture_output=True,
+            text=True,
+            env=env,
+            timeout=30
+        )
+
+        assert result.returncode == 0, f"Should exit 0 when feature disabled: {result.stderr}"
+        assert 'Skipping' in result.stderr or 'False' in result.stderr, "Should log skip reason to stderr"
+
+        # Should NOT emit any JSONL
+        jsonl_lines = [line for line in result.stdout.strip().split('\n') if line.strip().startswith('{')]
+        assert len(jsonl_lines) == 0, f"Should not emit JSONL when feature disabled, got: {jsonl_lines}"
+
+
+def test_fails_gracefully_without_chrome_session():
+    """Test that hook fails gracefully when no chrome session exists."""
+    with tempfile.TemporaryDirectory() as tmpdir:
+        tmpdir = Path(tmpdir)
+
+        result = subprocess.run(
+            ['node', str(INFINISCROLL_HOOK), f'--url={TEST_URL}', '--snapshot-id=test-no-chrome'],
+            cwd=tmpdir,
+            capture_output=True,
+            text=True,
+            env=get_test_env(),
+            timeout=30
+        )
+
+        # Should fail (exit 1) when no chrome session
+        assert result.returncode != 0, "Should fail when no chrome session exists"
+        # Error could be about chrome/CDP not found, or puppeteer module missing
+        err_lower = result.stderr.lower()
+        assert any(x in err_lower for x in ['chrome', 'cdp', 'puppeteer', 'module']), \
+            f"Should mention chrome/CDP/puppeteer in error: {result.stderr}"
+
+
+def setup_chrome_session(tmpdir):
+    """Helper to set up Chrome session with tab and navigation."""
+    crawl_dir = Path(tmpdir) / 'crawl'
+    crawl_dir.mkdir()
+    chrome_dir = crawl_dir / 'chrome'
+
+    env = get_test_env()
+    env['CHROME_HEADLESS'] = 'true'
+
+    # Launch Chrome at crawl level
+    chrome_launch_process = subprocess.Popen(
+        ['node', str(CHROME_LAUNCH_HOOK), '--crawl-id=test-infiniscroll'],
+        cwd=str(crawl_dir),
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE,
+        text=True,
+        env=env
+    )
+
+    # Wait for Chrome to launch
+    for i in range(15):
+        if chrome_launch_process.poll() is not None:
+            stdout, stderr = chrome_launch_process.communicate()
+            raise RuntimeError(f"Chrome launch failed:\nStdout: {stdout}\nStderr: {stderr}")
+        if (chrome_dir / 'cdp_url.txt').exists():
+            break
+        time.sleep(1)
+
+    if not (chrome_dir / 'cdp_url.txt').exists():
+        raise RuntimeError("Chrome CDP URL not found after 15s")
+
+    chrome_pid = int((chrome_dir / 'chrome.pid').read_text().strip())
+
+    # Create snapshot directory structure
+    snapshot_dir = Path(tmpdir) / 'snapshot'
+    snapshot_dir.mkdir()
+    snapshot_chrome_dir = snapshot_dir / 'chrome'
+    snapshot_chrome_dir.mkdir()
+
+    # Create tab
+    tab_env = env.copy()
+    tab_env['CRAWL_OUTPUT_DIR'] = str(crawl_dir)
+    result = subprocess.run(
+        ['node', str(CHROME_TAB_HOOK), f'--url={TEST_URL}', '--snapshot-id=snap-infiniscroll', '--crawl-id=test-infiniscroll'],
+        cwd=str(snapshot_chrome_dir),
+        capture_output=True,
+        text=True,
+        timeout=60,
+        env=tab_env
+    )
+    if result.returncode != 0:
+        raise RuntimeError(f"Tab creation failed: {result.stderr}")
+
+    # Navigate to URL
+    result = subprocess.run(
+        ['node', str(CHROME_NAVIGATE_HOOK), f'--url={TEST_URL}', '--snapshot-id=snap-infiniscroll'],
+        cwd=str(snapshot_chrome_dir),
+        capture_output=True,
+        text=True,
+        timeout=120,
+        env=env
+    )
+    if result.returncode != 0:
+        raise RuntimeError(f"Navigation failed: {result.stderr}")
+
+    return chrome_launch_process, chrome_pid, snapshot_chrome_dir
+
+
+def cleanup_chrome(chrome_launch_process, chrome_pid):
+    """Helper to clean up Chrome processes."""
+    try:
+        chrome_launch_process.send_signal(signal.SIGTERM)
+        chrome_launch_process.wait(timeout=5)
+    except:
+        pass
+    try:
+        os.kill(chrome_pid, signal.SIGKILL)
+    except OSError:
+        pass
+
+
+def test_scrolls_page_and_outputs_stats():
+    """Integration test: scroll page and verify JSONL output format."""
+    with tempfile.TemporaryDirectory() as tmpdir:
+        chrome_launch_process = None
+        chrome_pid = None
+        try:
+            chrome_launch_process, chrome_pid, snapshot_chrome_dir = setup_chrome_session(tmpdir)
+
+            # Create infiniscroll output directory (sibling to chrome)
+            infiniscroll_dir = snapshot_chrome_dir.parent / 'infiniscroll'
+            infiniscroll_dir.mkdir()
+
+            # Run infiniscroll hook
+            env = get_test_env()
+            env['INFINISCROLL_SCROLL_LIMIT'] = '3'  # Limit scrolls for faster test
+            env['INFINISCROLL_SCROLL_DELAY'] = '500'  # Faster scrolling
+            env['INFINISCROLL_MIN_HEIGHT'] = '1000'  # Lower threshold for test
+
+            result = subprocess.run(
+                ['node', str(INFINISCROLL_HOOK), f'--url={TEST_URL}', '--snapshot-id=snap-infiniscroll'],
+                cwd=str(infiniscroll_dir),
+                capture_output=True,
+                text=True,
+                timeout=60,
+                env=env
+            )
+
+            assert result.returncode == 0, f"Infiniscroll failed: {result.stderr}\nStdout: {result.stdout}"
+
+            # Parse JSONL output
+            result_json = None
+            for line in result.stdout.strip().split('\n'):
+                line = line.strip()
+                if line.startswith('{'):
+                    try:
+                        record = json.loads(line)
+                        if record.get('type') == 'ArchiveResult':
+                            result_json = record
+                            break
+                    except json.JSONDecodeError:
+                        pass
+
+            assert result_json is not None, f"Should have ArchiveResult JSONL output. Stdout: {result.stdout}"
+            assert result_json['status'] == 'succeeded', f"Should succeed: {result_json}"
+
+            # Verify output_str format: "scrolled to X,XXXpx (+Y,YYYpx new content) over Z.Zs"
+            output_str = result_json.get('output_str', '')
+            assert output_str.startswith('scrolled to'), f"output_str should start with 'scrolled to': {output_str}"
+            assert 'px' in output_str, f"output_str should contain pixel count: {output_str}"
+            assert re.search(r'over \d+(\.\d+)?s', output_str), f"output_str should contain duration: {output_str}"
+
+            # Verify no files created in output directory
+            output_files = list(infiniscroll_dir.iterdir())
+            assert len(output_files) == 0, f"Should not create any files, but found: {output_files}"
+
+        finally:
+            if chrome_launch_process and chrome_pid:
+                cleanup_chrome(chrome_launch_process, chrome_pid)
+
+
+def test_config_scroll_limit_honored():
+    """Test that INFINISCROLL_SCROLL_LIMIT config is respected."""
+    with tempfile.TemporaryDirectory() as tmpdir:
+        chrome_launch_process = None
+        chrome_pid = None
+        try:
+            chrome_launch_process, chrome_pid, snapshot_chrome_dir = setup_chrome_session(tmpdir)
+
+            infiniscroll_dir = snapshot_chrome_dir.parent / 'infiniscroll'
+            infiniscroll_dir.mkdir()
+
+            # Set scroll limit to 2
+            env = get_test_env()
+            env['INFINISCROLL_SCROLL_LIMIT'] = '2'
+            env['INFINISCROLL_SCROLL_DELAY'] = '500'
+            env['INFINISCROLL_MIN_HEIGHT'] = '100000'  # High threshold so limit kicks in
+
+            result = subprocess.run(
+                ['node', str(INFINISCROLL_HOOK), f'--url={TEST_URL}', '--snapshot-id=snap-limit'],
+                cwd=str(infiniscroll_dir),
+                capture_output=True,
+                text=True,
+                timeout=60,
+                env=env
+            )
+
+            assert result.returncode == 0, f"Infiniscroll failed: {result.stderr}"
+
+            # Parse output and verify scroll count
+            result_json = None
+            for line in result.stdout.strip().split('\n'):
+                if line.strip().startswith('{'):
+                    try:
+                        record = json.loads(line)
+                        if record.get('type') == 'ArchiveResult':
+                            result_json = record
+                            break
+                    except json.JSONDecodeError:
+                        pass
+
+            assert result_json is not None, "Should have JSONL output"
+            output_str = result_json.get('output_str', '')
+
+            # Verify output format and that it completed (scroll limit enforced internally)
+            assert output_str.startswith('scrolled to'), f"Should have valid output_str: {output_str}"
+            assert result_json['status'] == 'succeeded', f"Should succeed with scroll limit: {result_json}"
+
+        finally:
+            if chrome_launch_process and chrome_pid:
+                cleanup_chrome(chrome_launch_process, chrome_pid)
+
+
+def test_config_timeout_honored():
+    """Test that INFINISCROLL_TIMEOUT config is respected."""
+    with tempfile.TemporaryDirectory() as tmpdir:
+        chrome_launch_process = None
+        chrome_pid = None
+        try:
+            chrome_launch_process, chrome_pid, snapshot_chrome_dir = setup_chrome_session(tmpdir)
+
+            infiniscroll_dir = snapshot_chrome_dir.parent / 'infiniscroll'
+            infiniscroll_dir.mkdir()
+
+            # Set very short timeout
+            env = get_test_env()
+            env['INFINISCROLL_TIMEOUT'] = '3'  # 3 seconds
+            env['INFINISCROLL_SCROLL_DELAY'] = '2000'  # 2s delay - timeout should trigger
+            env['INFINISCROLL_SCROLL_LIMIT'] = '100'  # High limit
+            env['INFINISCROLL_MIN_HEIGHT'] = '100000'
+
+            start_time = time.time()
+            result = subprocess.run(
+                ['node', str(INFINISCROLL_HOOK), f'--url={TEST_URL}', '--snapshot-id=snap-timeout'],
+                cwd=str(infiniscroll_dir),
+                capture_output=True,
+                text=True,
+                timeout=30,
+                env=env
+            )
+            elapsed = time.time() - start_time
+
+            # Should complete within reasonable time (timeout + buffer)
+            assert elapsed < 15, f"Should respect timeout, took {elapsed:.1f}s"
+            assert result.returncode == 0, f"Should complete even with timeout: {result.stderr}"
+
+        finally:
+            if chrome_launch_process and chrome_pid:
+                cleanup_chrome(chrome_launch_process, chrome_pid)
+
+
+if __name__ == '__main__':
+    pytest.main([__file__, '-v'])