mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2026-04-06 07:47:53 +10:00
add modalcloser plugin
This commit is contained in:
26
archivebox/plugins/modalcloser/config.json
Normal file
26
archivebox/plugins/modalcloser/config.json
Normal file
@@ -0,0 +1,26 @@
|
||||
{
|
||||
"$schema": "http://json-schema.org/draft-07/schema#",
|
||||
"type": "object",
|
||||
"additionalProperties": false,
|
||||
"required_plugins": ["chrome"],
|
||||
"properties": {
|
||||
"MODALCLOSER_ENABLED": {
|
||||
"type": "boolean",
|
||||
"default": true,
|
||||
"x-aliases": ["CLOSE_MODALS", "AUTO_CLOSE_MODALS"],
|
||||
"description": "Enable automatic modal and dialog closing"
|
||||
},
|
||||
"MODALCLOSER_TIMEOUT": {
|
||||
"type": "integer",
|
||||
"default": 1250,
|
||||
"minimum": 100,
|
||||
"description": "Delay before auto-closing dialogs (ms)"
|
||||
},
|
||||
"MODALCLOSER_POLL_INTERVAL": {
|
||||
"type": "integer",
|
||||
"default": 500,
|
||||
"minimum": 100,
|
||||
"description": "How often to check for CSS modals (ms)"
|
||||
}
|
||||
}
|
||||
}
|
||||
326
archivebox/plugins/modalcloser/on_Snapshot__15_modalcloser.bg.js
Normal file
326
archivebox/plugins/modalcloser/on_Snapshot__15_modalcloser.bg.js
Normal file
@@ -0,0 +1,326 @@
|
||||
#!/usr/bin/env node
|
||||
/**
|
||||
* Auto-close browser dialogs and CSS modals.
|
||||
*
|
||||
* Runs as a background script that sets up listeners BEFORE navigation,
|
||||
* so it catches modals that appear on page load.
|
||||
*
|
||||
* Handles:
|
||||
* - Browser dialogs (alert, confirm, prompt, beforeunload)
|
||||
* - Framework modals (Bootstrap, Tailwind, shadcn, Angular Material, jQuery UI, SweetAlert)
|
||||
* - Cookie consent banners, newsletter popups, age gates
|
||||
*
|
||||
* Usage: on_Snapshot__15_modalcloser.bg.js --url=<url> --snapshot-id=<uuid>
|
||||
* Output: JSONL with modal close stats (no files created)
|
||||
* Termination: Send SIGTERM to exit cleanly
|
||||
*
|
||||
* Environment variables:
|
||||
* MODALCLOSER_ENABLED: Enable/disable (default: true)
|
||||
* MODALCLOSER_TIMEOUT: Delay before auto-closing dialogs in ms (default: 1250)
|
||||
* MODALCLOSER_POLL_INTERVAL: How often to check for CSS modals in ms (default: 500)
|
||||
*/
|
||||
|
||||
function getEnv(name, defaultValue = '') {
|
||||
return (process.env[name] || defaultValue).trim();
|
||||
}
|
||||
|
||||
function getEnvBool(name, defaultValue = false) {
|
||||
const val = getEnv(name, '').toLowerCase();
|
||||
if (['true', '1', 'yes', 'on'].includes(val)) return true;
|
||||
if (['false', '0', 'no', 'off'].includes(val)) return false;
|
||||
return defaultValue;
|
||||
}
|
||||
|
||||
function getEnvInt(name, defaultValue = 0) {
|
||||
const val = parseInt(getEnv(name, String(defaultValue)), 10);
|
||||
return isNaN(val) ? defaultValue : val;
|
||||
}
|
||||
|
||||
// Check if modalcloser is enabled BEFORE requiring puppeteer
|
||||
if (!getEnvBool('MODALCLOSER_ENABLED', true)) {
|
||||
console.error('Skipping modalcloser (MODALCLOSER_ENABLED=False)');
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
const puppeteer = require('puppeteer-core');
|
||||
|
||||
const PLUGIN_NAME = 'modalcloser';
|
||||
const CHROME_SESSION_DIR = '../chrome';
|
||||
|
||||
function parseArgs() {
|
||||
const args = {};
|
||||
process.argv.slice(2).forEach(arg => {
|
||||
if (arg.startsWith('--')) {
|
||||
const [key, ...valueParts] = arg.slice(2).split('=');
|
||||
args[key.replace(/-/g, '_')] = valueParts.join('=') || true;
|
||||
}
|
||||
});
|
||||
return args;
|
||||
}
|
||||
|
||||
function getCdpUrl() {
|
||||
const cdpFile = path.join(CHROME_SESSION_DIR, 'cdp_url.txt');
|
||||
if (fs.existsSync(cdpFile)) {
|
||||
return fs.readFileSync(cdpFile, 'utf8').trim();
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
function getPageId() {
|
||||
const targetIdFile = path.join(CHROME_SESSION_DIR, 'target_id.txt');
|
||||
if (fs.existsSync(targetIdFile)) {
|
||||
return fs.readFileSync(targetIdFile, 'utf8').trim();
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
function sleep(ms) {
|
||||
return new Promise(resolve => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
/**
|
||||
* Close CSS modals using framework-specific dismiss methods.
|
||||
* Returns the number of modals closed.
|
||||
*/
|
||||
async function closeModals(page) {
|
||||
return page.evaluate(() => {
|
||||
let closed = 0;
|
||||
|
||||
// Bootstrap 4/5 - use Bootstrap's modal API
|
||||
if (typeof bootstrap !== 'undefined' && bootstrap.Modal) {
|
||||
document.querySelectorAll('.modal.show').forEach(el => {
|
||||
try {
|
||||
const modal = bootstrap.Modal.getInstance(el);
|
||||
if (modal) { modal.hide(); closed++; }
|
||||
} catch (e) {}
|
||||
});
|
||||
}
|
||||
|
||||
// Bootstrap 3 / jQuery - use jQuery modal API
|
||||
if (typeof jQuery !== 'undefined' && jQuery.fn && jQuery.fn.modal) {
|
||||
try {
|
||||
const $modals = jQuery('.modal.in, .modal.show');
|
||||
if ($modals.length > 0) {
|
||||
$modals.modal('hide');
|
||||
closed += $modals.length;
|
||||
}
|
||||
} catch (e) {}
|
||||
}
|
||||
|
||||
// shadcn/Radix UI - fire escape key to dismiss
|
||||
document.querySelectorAll('[data-radix-dialog-overlay], [data-state="open"][role="dialog"]').forEach(el => {
|
||||
try {
|
||||
el.dispatchEvent(new KeyboardEvent('keydown', { key: 'Escape', bubbles: true, cancelable: true }));
|
||||
closed++;
|
||||
} catch (e) {}
|
||||
});
|
||||
|
||||
// Angular Material - click backdrop to dismiss
|
||||
document.querySelectorAll('.cdk-overlay-backdrop').forEach(el => {
|
||||
try {
|
||||
el.click();
|
||||
closed++;
|
||||
} catch (e) {}
|
||||
});
|
||||
|
||||
// Tailwind / Headless UI - dispatch escape key
|
||||
document.querySelectorAll('[role="dialog"][aria-modal="true"]').forEach(el => {
|
||||
try {
|
||||
el.dispatchEvent(new KeyboardEvent('keydown', { key: 'Escape', bubbles: true, cancelable: true }));
|
||||
closed++;
|
||||
} catch (e) {}
|
||||
});
|
||||
|
||||
// jQuery UI Dialog
|
||||
if (typeof jQuery !== 'undefined' && jQuery.ui && jQuery.ui.dialog) {
|
||||
try {
|
||||
const $dialogs = jQuery('.ui-dialog-content');
|
||||
if ($dialogs.length > 0) {
|
||||
$dialogs.dialog('close');
|
||||
closed += $dialogs.length;
|
||||
}
|
||||
} catch (e) {}
|
||||
}
|
||||
|
||||
// SweetAlert2
|
||||
if (typeof Swal !== 'undefined' && Swal.close) {
|
||||
try { Swal.close(); closed++; } catch (e) {}
|
||||
}
|
||||
|
||||
// SweetAlert 1
|
||||
if (typeof swal !== 'undefined' && swal.close) {
|
||||
try { swal.close(); closed++; } catch (e) {}
|
||||
}
|
||||
|
||||
// Generic fallback - hide unrecognized modals with CSS
|
||||
const genericSelectors = [
|
||||
// CookieYes (cky) - popular cookie consent library
|
||||
'.cky-consent-container',
|
||||
'.cky-popup-center',
|
||||
'.cky-overlay',
|
||||
'.cky-modal',
|
||||
'#ckyPreferenceCenter',
|
||||
// Modal overlays and backdrops
|
||||
'.modal-overlay:not([style*="display: none"])',
|
||||
'.modal-backdrop:not([style*="display: none"])',
|
||||
'.overlay-visible',
|
||||
// Cookie consent banners
|
||||
'#cookie-consent', '.cookie-banner', '.cookie-notice',
|
||||
'#cookieConsent', '.cookie-consent', '.cookies-banner',
|
||||
'[class*="cookie"][class*="banner"]',
|
||||
'[class*="cookie"][class*="notice"]',
|
||||
'[class*="gdpr"]',
|
||||
// Popup overlays
|
||||
'.popup-overlay', '.newsletter-popup', '.age-gate',
|
||||
'.subscribe-popup', '.subscription-modal',
|
||||
// Generic modal patterns
|
||||
'[class*="modal"][class*="open"]:not(.modal-open)',
|
||||
'[class*="modal"][class*="show"][class*="overlay"]',
|
||||
'[class*="modal"][class*="visible"]',
|
||||
'[class*="dialog"][class*="open"]',
|
||||
'[class*="overlay"][class*="visible"]',
|
||||
// Interstitials
|
||||
'.interstitial', '.interstitial-wrapper',
|
||||
'[class*="interstitial"]',
|
||||
];
|
||||
|
||||
genericSelectors.forEach(selector => {
|
||||
try {
|
||||
document.querySelectorAll(selector).forEach(el => {
|
||||
// Skip if already hidden
|
||||
const style = window.getComputedStyle(el);
|
||||
if (style.display === 'none' || style.visibility === 'hidden') return;
|
||||
|
||||
el.style.display = 'none';
|
||||
el.style.visibility = 'hidden';
|
||||
el.style.opacity = '0';
|
||||
el.style.pointerEvents = 'none';
|
||||
closed++;
|
||||
});
|
||||
} catch (e) {}
|
||||
});
|
||||
|
||||
// Remove body scroll lock (common pattern when modals are open)
|
||||
try {
|
||||
document.body.style.overflow = '';
|
||||
document.body.style.position = '';
|
||||
document.body.classList.remove('modal-open', 'overflow-hidden', 'no-scroll', 'scroll-locked');
|
||||
document.documentElement.style.overflow = '';
|
||||
document.documentElement.classList.remove('overflow-hidden', 'no-scroll');
|
||||
} catch (e) {}
|
||||
|
||||
return closed;
|
||||
});
|
||||
}
|
||||
|
||||
async function main() {
|
||||
const args = parseArgs();
|
||||
const url = args.url;
|
||||
const snapshotId = args.snapshot_id;
|
||||
|
||||
if (!url || !snapshotId) {
|
||||
console.error('Usage: on_Snapshot__15_modalcloser.bg.js --url=<url> --snapshot-id=<uuid>');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const dialogTimeout = getEnvInt('MODALCLOSER_TIMEOUT', 1250);
|
||||
const pollInterval = getEnvInt('MODALCLOSER_POLL_INTERVAL', 500);
|
||||
|
||||
const cdpUrl = getCdpUrl();
|
||||
if (!cdpUrl) {
|
||||
console.error('ERROR: Chrome CDP URL not found (chrome plugin must run first)');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
let browser = null;
|
||||
let dialogsClosed = 0;
|
||||
let cssModalsClosed = 0;
|
||||
let running = true;
|
||||
|
||||
// Handle SIGTERM for clean exit
|
||||
process.on('SIGTERM', () => {
|
||||
running = false;
|
||||
const total = dialogsClosed + cssModalsClosed;
|
||||
console.error(`Modalcloser exiting: closed ${dialogsClosed} dialogs, ${cssModalsClosed} CSS modals`);
|
||||
|
||||
const outputStr = total > 0
|
||||
? `closed ${total} modals (${dialogsClosed} dialogs, ${cssModalsClosed} CSS)`
|
||||
: 'no modals detected';
|
||||
|
||||
console.log(JSON.stringify({
|
||||
type: 'ArchiveResult',
|
||||
status: 'succeeded',
|
||||
output_str: outputStr,
|
||||
}));
|
||||
|
||||
if (browser) browser.disconnect();
|
||||
process.exit(0);
|
||||
});
|
||||
|
||||
try {
|
||||
browser = await puppeteer.connect({ browserWSEndpoint: cdpUrl });
|
||||
|
||||
const pages = await browser.pages();
|
||||
if (pages.length === 0) {
|
||||
throw new Error('No pages found in browser');
|
||||
}
|
||||
|
||||
// Find the right page by target ID
|
||||
const targetId = getPageId();
|
||||
let page = null;
|
||||
if (targetId) {
|
||||
page = pages.find(p => {
|
||||
const target = p.target();
|
||||
return target && target._targetId === targetId;
|
||||
});
|
||||
}
|
||||
if (!page) {
|
||||
page = pages[pages.length - 1];
|
||||
}
|
||||
|
||||
console.error(`Modalcloser listening on ${url}`);
|
||||
|
||||
// Set up dialog handler (for JS alert/confirm/prompt/beforeunload)
|
||||
page.on('dialog', async (dialog) => {
|
||||
const type = dialog.type();
|
||||
const message = dialog.message().substring(0, 100);
|
||||
console.error(`Auto-closing dialog: ${type} - "${message}"`);
|
||||
|
||||
// Small delay before accepting (some pages expect a brief pause)
|
||||
await sleep(dialogTimeout);
|
||||
try {
|
||||
await dialog.accept();
|
||||
dialogsClosed++;
|
||||
} catch (e) {
|
||||
// Dialog may have been dismissed by page
|
||||
}
|
||||
});
|
||||
|
||||
// Poll for CSS modals
|
||||
while (running) {
|
||||
try {
|
||||
const closed = await closeModals(page);
|
||||
if (closed > 0) {
|
||||
console.error(`Closed ${closed} CSS modals`);
|
||||
cssModalsClosed += closed;
|
||||
}
|
||||
} catch (e) {
|
||||
// Page may have navigated or been closed
|
||||
if (!running) break;
|
||||
}
|
||||
await sleep(pollInterval);
|
||||
}
|
||||
|
||||
} catch (e) {
|
||||
if (browser) browser.disconnect();
|
||||
console.error(`ERROR: ${e.name}: ${e.message}`);
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
main().catch(e => {
|
||||
console.error(`Fatal error: ${e.message}`);
|
||||
process.exit(1);
|
||||
});
|
||||
538
archivebox/plugins/modalcloser/tests/test_modalcloser.py
Normal file
538
archivebox/plugins/modalcloser/tests/test_modalcloser.py
Normal file
@@ -0,0 +1,538 @@
|
||||
"""
|
||||
Integration tests for modalcloser plugin
|
||||
|
||||
Tests verify:
|
||||
1. Hook script exists
|
||||
2. Dependencies installed via chrome validation hooks
|
||||
3. Verify deps with abx-pkg
|
||||
4. MODALCLOSER_ENABLED=False skips without JSONL
|
||||
5. Fails gracefully when no chrome session exists
|
||||
6. Background script runs and handles SIGTERM correctly
|
||||
7. Config options work (timeout, poll interval)
|
||||
8. Live test: hides cookie consent on filmin.es
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import signal
|
||||
import subprocess
|
||||
import time
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
PLUGIN_DIR = Path(__file__).parent.parent
|
||||
PLUGINS_ROOT = PLUGIN_DIR.parent
|
||||
MODALCLOSER_HOOK = next(PLUGIN_DIR.glob('on_Snapshot__*_modalcloser.*'), None)
|
||||
CHROME_LAUNCH_HOOK = PLUGINS_ROOT / 'chrome' / 'on_Crawl__20_chrome_launch.bg.js'
|
||||
CHROME_TAB_HOOK = PLUGINS_ROOT / 'chrome' / 'on_Snapshot__20_chrome_tab.bg.js'
|
||||
CHROME_NAVIGATE_HOOK = next((PLUGINS_ROOT / 'chrome').glob('on_Snapshot__*_chrome_navigate.*'), None)
|
||||
TEST_URL = 'https://www.singsing.movie/'
|
||||
COOKIE_CONSENT_TEST_URL = 'https://www.filmin.es/'
|
||||
|
||||
|
||||
def get_node_modules_dir():
|
||||
"""Get NODE_MODULES_DIR for tests, checking env first."""
|
||||
# Check if NODE_PATH is already set in environment
|
||||
if os.environ.get('NODE_PATH'):
|
||||
return Path(os.environ['NODE_PATH'])
|
||||
# Otherwise compute from LIB_DIR
|
||||
from archivebox.config.common import STORAGE_CONFIG
|
||||
lib_dir = Path(os.environ.get('LIB_DIR') or str(STORAGE_CONFIG.LIB_DIR))
|
||||
return lib_dir / 'npm' / 'node_modules'
|
||||
|
||||
|
||||
NODE_MODULES_DIR = get_node_modules_dir()
|
||||
|
||||
|
||||
def get_test_env():
|
||||
"""Get environment with NODE_PATH set correctly."""
|
||||
env = os.environ.copy()
|
||||
env['NODE_PATH'] = str(NODE_MODULES_DIR)
|
||||
return env
|
||||
|
||||
|
||||
def test_hook_script_exists():
|
||||
"""Verify on_Snapshot hook exists."""
|
||||
assert MODALCLOSER_HOOK is not None, "Modalcloser hook not found"
|
||||
assert MODALCLOSER_HOOK.exists(), f"Hook not found: {MODALCLOSER_HOOK}"
|
||||
|
||||
|
||||
def test_verify_deps_with_abx_pkg():
|
||||
"""Verify dependencies are available via abx-pkg after hook installation."""
|
||||
from abx_pkg import Binary, EnvProvider
|
||||
|
||||
EnvProvider.model_rebuild()
|
||||
|
||||
# Verify node is available
|
||||
node_binary = Binary(name='node', binproviders=[EnvProvider()])
|
||||
node_loaded = node_binary.load()
|
||||
assert node_loaded and node_loaded.abspath, "Node.js required for modalcloser plugin"
|
||||
|
||||
|
||||
def test_config_modalcloser_disabled_skips():
|
||||
"""Test that MODALCLOSER_ENABLED=False exits without emitting JSONL."""
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
tmpdir = Path(tmpdir)
|
||||
env = get_test_env()
|
||||
env['MODALCLOSER_ENABLED'] = 'False'
|
||||
|
||||
result = subprocess.run(
|
||||
['node', str(MODALCLOSER_HOOK), f'--url={TEST_URL}', '--snapshot-id=test-disabled'],
|
||||
cwd=tmpdir,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
env=env,
|
||||
timeout=30
|
||||
)
|
||||
|
||||
assert result.returncode == 0, f"Should exit 0 when feature disabled: {result.stderr}"
|
||||
assert 'Skipping' in result.stderr or 'False' in result.stderr, "Should log skip reason to stderr"
|
||||
|
||||
# Should NOT emit any JSONL
|
||||
jsonl_lines = [line for line in result.stdout.strip().split('\n') if line.strip().startswith('{')]
|
||||
assert len(jsonl_lines) == 0, f"Should not emit JSONL when feature disabled, got: {jsonl_lines}"
|
||||
|
||||
|
||||
def test_fails_gracefully_without_chrome_session():
|
||||
"""Test that hook fails gracefully when no chrome session exists."""
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
tmpdir = Path(tmpdir)
|
||||
|
||||
result = subprocess.run(
|
||||
['node', str(MODALCLOSER_HOOK), f'--url={TEST_URL}', '--snapshot-id=test-no-chrome'],
|
||||
cwd=tmpdir,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
env=get_test_env(),
|
||||
timeout=30
|
||||
)
|
||||
|
||||
# Should fail (exit 1) when no chrome session
|
||||
assert result.returncode != 0, "Should fail when no chrome session exists"
|
||||
# Error could be about chrome/CDP not found, or puppeteer module missing
|
||||
err_lower = result.stderr.lower()
|
||||
assert any(x in err_lower for x in ['chrome', 'cdp', 'puppeteer', 'module']), \
|
||||
f"Should mention chrome/CDP/puppeteer in error: {result.stderr}"
|
||||
|
||||
|
||||
def setup_chrome_session(tmpdir):
|
||||
"""Helper to set up Chrome session with tab."""
|
||||
crawl_dir = Path(tmpdir) / 'crawl'
|
||||
crawl_dir.mkdir()
|
||||
chrome_dir = crawl_dir / 'chrome'
|
||||
|
||||
env = get_test_env()
|
||||
env['CHROME_HEADLESS'] = 'true'
|
||||
|
||||
# Launch Chrome at crawl level
|
||||
chrome_launch_process = subprocess.Popen(
|
||||
['node', str(CHROME_LAUNCH_HOOK), '--crawl-id=test-modalcloser'],
|
||||
cwd=str(crawl_dir),
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
text=True,
|
||||
env=env
|
||||
)
|
||||
|
||||
# Wait for Chrome to launch
|
||||
for i in range(15):
|
||||
if chrome_launch_process.poll() is not None:
|
||||
stdout, stderr = chrome_launch_process.communicate()
|
||||
raise RuntimeError(f"Chrome launch failed:\nStdout: {stdout}\nStderr: {stderr}")
|
||||
if (chrome_dir / 'cdp_url.txt').exists():
|
||||
break
|
||||
time.sleep(1)
|
||||
|
||||
if not (chrome_dir / 'cdp_url.txt').exists():
|
||||
raise RuntimeError("Chrome CDP URL not found after 15s")
|
||||
|
||||
chrome_pid = int((chrome_dir / 'chrome.pid').read_text().strip())
|
||||
|
||||
# Create snapshot directory structure
|
||||
snapshot_dir = Path(tmpdir) / 'snapshot'
|
||||
snapshot_dir.mkdir()
|
||||
snapshot_chrome_dir = snapshot_dir / 'chrome'
|
||||
snapshot_chrome_dir.mkdir()
|
||||
|
||||
# Create tab
|
||||
tab_env = env.copy()
|
||||
tab_env['CRAWL_OUTPUT_DIR'] = str(crawl_dir)
|
||||
result = subprocess.run(
|
||||
['node', str(CHROME_TAB_HOOK), f'--url={TEST_URL}', '--snapshot-id=snap-modalcloser', '--crawl-id=test-modalcloser'],
|
||||
cwd=str(snapshot_chrome_dir),
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=60,
|
||||
env=tab_env
|
||||
)
|
||||
if result.returncode != 0:
|
||||
raise RuntimeError(f"Tab creation failed: {result.stderr}")
|
||||
|
||||
return chrome_launch_process, chrome_pid, snapshot_chrome_dir
|
||||
|
||||
|
||||
def cleanup_chrome(chrome_launch_process, chrome_pid):
|
||||
"""Helper to clean up Chrome processes."""
|
||||
try:
|
||||
chrome_launch_process.send_signal(signal.SIGTERM)
|
||||
chrome_launch_process.wait(timeout=5)
|
||||
except:
|
||||
pass
|
||||
try:
|
||||
os.kill(chrome_pid, signal.SIGKILL)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
|
||||
def test_background_script_handles_sigterm():
|
||||
"""Test that background script runs and handles SIGTERM correctly."""
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
chrome_launch_process = None
|
||||
chrome_pid = None
|
||||
modalcloser_process = None
|
||||
try:
|
||||
chrome_launch_process, chrome_pid, snapshot_chrome_dir = setup_chrome_session(tmpdir)
|
||||
|
||||
# Create modalcloser output directory (sibling to chrome)
|
||||
modalcloser_dir = snapshot_chrome_dir.parent / 'modalcloser'
|
||||
modalcloser_dir.mkdir()
|
||||
|
||||
# Run modalcloser as background process
|
||||
env = get_test_env()
|
||||
env['MODALCLOSER_POLL_INTERVAL'] = '200' # Faster polling for test
|
||||
|
||||
modalcloser_process = subprocess.Popen(
|
||||
['node', str(MODALCLOSER_HOOK), f'--url={TEST_URL}', '--snapshot-id=snap-modalcloser'],
|
||||
cwd=str(modalcloser_dir),
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
text=True,
|
||||
env=env
|
||||
)
|
||||
|
||||
# Let it run for a bit
|
||||
time.sleep(2)
|
||||
|
||||
# Verify it's still running (background script)
|
||||
assert modalcloser_process.poll() is None, "Modalcloser should still be running as background process"
|
||||
|
||||
# Send SIGTERM
|
||||
modalcloser_process.send_signal(signal.SIGTERM)
|
||||
stdout, stderr = modalcloser_process.communicate(timeout=5)
|
||||
|
||||
assert modalcloser_process.returncode == 0, f"Should exit 0 on SIGTERM: {stderr}"
|
||||
|
||||
# Parse JSONL output
|
||||
result_json = None
|
||||
for line in stdout.strip().split('\n'):
|
||||
line = line.strip()
|
||||
if line.startswith('{'):
|
||||
try:
|
||||
record = json.loads(line)
|
||||
if record.get('type') == 'ArchiveResult':
|
||||
result_json = record
|
||||
break
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
|
||||
assert result_json is not None, f"Should have ArchiveResult JSONL output. Stdout: {stdout}"
|
||||
assert result_json['status'] == 'succeeded', f"Should succeed: {result_json}"
|
||||
|
||||
# Verify output_str format
|
||||
output_str = result_json.get('output_str', '')
|
||||
assert 'modal' in output_str.lower() or 'dialog' in output_str.lower(), \
|
||||
f"output_str should mention modals/dialogs: {output_str}"
|
||||
|
||||
# Verify no files created in output directory
|
||||
output_files = list(modalcloser_dir.iterdir())
|
||||
assert len(output_files) == 0, f"Should not create any files, but found: {output_files}"
|
||||
|
||||
finally:
|
||||
if modalcloser_process and modalcloser_process.poll() is None:
|
||||
modalcloser_process.kill()
|
||||
if chrome_launch_process and chrome_pid:
|
||||
cleanup_chrome(chrome_launch_process, chrome_pid)
|
||||
|
||||
|
||||
def test_dialog_handler_logs_dialogs():
|
||||
"""Test that dialog handler is set up correctly."""
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
chrome_launch_process = None
|
||||
chrome_pid = None
|
||||
modalcloser_process = None
|
||||
try:
|
||||
chrome_launch_process, chrome_pid, snapshot_chrome_dir = setup_chrome_session(tmpdir)
|
||||
|
||||
modalcloser_dir = snapshot_chrome_dir.parent / 'modalcloser'
|
||||
modalcloser_dir.mkdir()
|
||||
|
||||
env = get_test_env()
|
||||
env['MODALCLOSER_TIMEOUT'] = '100' # Fast timeout for test
|
||||
env['MODALCLOSER_POLL_INTERVAL'] = '200'
|
||||
|
||||
modalcloser_process = subprocess.Popen(
|
||||
['node', str(MODALCLOSER_HOOK), f'--url={TEST_URL}', '--snapshot-id=snap-dialog'],
|
||||
cwd=str(modalcloser_dir),
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
text=True,
|
||||
env=env
|
||||
)
|
||||
|
||||
# Let it run briefly
|
||||
time.sleep(1.5)
|
||||
|
||||
# Verify it's running
|
||||
assert modalcloser_process.poll() is None, "Should be running"
|
||||
|
||||
# Check stderr for "listening" message
|
||||
# Note: Can't read stderr while process is running without blocking,
|
||||
# so we just verify it exits cleanly
|
||||
modalcloser_process.send_signal(signal.SIGTERM)
|
||||
stdout, stderr = modalcloser_process.communicate(timeout=5)
|
||||
|
||||
assert 'listening' in stderr.lower() or 'modalcloser' in stderr.lower(), \
|
||||
f"Should log startup message: {stderr}"
|
||||
assert modalcloser_process.returncode == 0, f"Should exit cleanly: {stderr}"
|
||||
|
||||
finally:
|
||||
if modalcloser_process and modalcloser_process.poll() is None:
|
||||
modalcloser_process.kill()
|
||||
if chrome_launch_process and chrome_pid:
|
||||
cleanup_chrome(chrome_launch_process, chrome_pid)
|
||||
|
||||
|
||||
def test_config_poll_interval():
|
||||
"""Test that MODALCLOSER_POLL_INTERVAL config is respected."""
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
chrome_launch_process = None
|
||||
chrome_pid = None
|
||||
modalcloser_process = None
|
||||
try:
|
||||
chrome_launch_process, chrome_pid, snapshot_chrome_dir = setup_chrome_session(tmpdir)
|
||||
|
||||
modalcloser_dir = snapshot_chrome_dir.parent / 'modalcloser'
|
||||
modalcloser_dir.mkdir()
|
||||
|
||||
# Set very short poll interval
|
||||
env = get_test_env()
|
||||
env['MODALCLOSER_POLL_INTERVAL'] = '100' # 100ms
|
||||
|
||||
modalcloser_process = subprocess.Popen(
|
||||
['node', str(MODALCLOSER_HOOK), f'--url={TEST_URL}', '--snapshot-id=snap-poll'],
|
||||
cwd=str(modalcloser_dir),
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
text=True,
|
||||
env=env
|
||||
)
|
||||
|
||||
# Run for short time
|
||||
time.sleep(1)
|
||||
|
||||
# Should still be running
|
||||
assert modalcloser_process.poll() is None, "Should still be running"
|
||||
|
||||
# Clean exit
|
||||
modalcloser_process.send_signal(signal.SIGTERM)
|
||||
stdout, stderr = modalcloser_process.communicate(timeout=5)
|
||||
|
||||
assert modalcloser_process.returncode == 0, f"Should exit 0: {stderr}"
|
||||
|
||||
# Verify JSONL output exists
|
||||
result_json = None
|
||||
for line in stdout.strip().split('\n'):
|
||||
if line.strip().startswith('{'):
|
||||
try:
|
||||
record = json.loads(line)
|
||||
if record.get('type') == 'ArchiveResult':
|
||||
result_json = record
|
||||
break
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
|
||||
assert result_json is not None, "Should have JSONL output"
|
||||
assert result_json['status'] == 'succeeded', f"Should succeed: {result_json}"
|
||||
|
||||
finally:
|
||||
if modalcloser_process and modalcloser_process.poll() is None:
|
||||
modalcloser_process.kill()
|
||||
if chrome_launch_process and chrome_pid:
|
||||
cleanup_chrome(chrome_launch_process, chrome_pid)
|
||||
|
||||
|
||||
def test_hides_cookie_consent_on_filmin():
|
||||
"""Live test: verify modalcloser hides cookie consent popup on filmin.es."""
|
||||
# Create a test script that uses puppeteer directly
|
||||
test_script = '''
|
||||
const puppeteer = require('puppeteer-core');
|
||||
|
||||
async function closeModals(page) {
|
||||
return page.evaluate(() => {
|
||||
let closed = 0;
|
||||
|
||||
// Bootstrap 4/5
|
||||
if (typeof bootstrap !== 'undefined' && bootstrap.Modal) {
|
||||
document.querySelectorAll('.modal.show').forEach(el => {
|
||||
try {
|
||||
const modal = bootstrap.Modal.getInstance(el);
|
||||
if (modal) { modal.hide(); closed++; }
|
||||
} catch (e) {}
|
||||
});
|
||||
}
|
||||
|
||||
// Bootstrap 3 / jQuery
|
||||
if (typeof jQuery !== 'undefined' && jQuery.fn && jQuery.fn.modal) {
|
||||
try {
|
||||
const $modals = jQuery('.modal.in, .modal.show');
|
||||
if ($modals.length > 0) {
|
||||
$modals.modal('hide');
|
||||
closed += $modals.length;
|
||||
}
|
||||
} catch (e) {}
|
||||
}
|
||||
|
||||
// Generic selectors including cookie consent
|
||||
const genericSelectors = [
|
||||
// CookieYes (cky) specific selectors
|
||||
'.cky-consent-container',
|
||||
'.cky-popup-center',
|
||||
'.cky-overlay',
|
||||
'.cky-modal',
|
||||
'#ckyPreferenceCenter',
|
||||
// Generic cookie consent
|
||||
'#cookie-consent', '.cookie-banner', '.cookie-notice',
|
||||
'#cookieConsent', '.cookie-consent', '.cookies-banner',
|
||||
'[class*="cookie"][class*="banner"]',
|
||||
'[class*="cookie"][class*="notice"]',
|
||||
'[class*="consent"]',
|
||||
'[class*="gdpr"]',
|
||||
'.modal-overlay', '.modal-backdrop',
|
||||
'.popup-overlay', '.newsletter-popup',
|
||||
];
|
||||
|
||||
genericSelectors.forEach(selector => {
|
||||
try {
|
||||
document.querySelectorAll(selector).forEach(el => {
|
||||
const style = window.getComputedStyle(el);
|
||||
if (style.display === 'none' || style.visibility === 'hidden') return;
|
||||
el.style.display = 'none';
|
||||
el.style.visibility = 'hidden';
|
||||
el.style.opacity = '0';
|
||||
el.style.pointerEvents = 'none';
|
||||
closed++;
|
||||
});
|
||||
} catch (e) {}
|
||||
});
|
||||
|
||||
document.body.style.overflow = '';
|
||||
document.body.classList.remove('modal-open', 'overflow-hidden', 'no-scroll');
|
||||
|
||||
return closed;
|
||||
});
|
||||
}
|
||||
|
||||
async function main() {
|
||||
const browser = await puppeteer.launch({
|
||||
headless: 'new',
|
||||
executablePath: process.env.CHROME_BINARY || '/Applications/Google Chrome.app/Contents/MacOS/Google Chrome',
|
||||
args: ['--no-sandbox', '--disable-setuid-sandbox', '--disable-blink-features=AutomationControlled']
|
||||
});
|
||||
|
||||
const page = await browser.newPage();
|
||||
// Set real user agent to bypass headless detection
|
||||
await page.setUserAgent('Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36');
|
||||
await page.setViewport({ width: 1440, height: 900 });
|
||||
|
||||
console.error('Navigating to filmin.es...');
|
||||
await page.goto('https://www.filmin.es/', { waitUntil: 'networkidle2', timeout: 30000 });
|
||||
|
||||
// Wait for cookie consent to appear
|
||||
await new Promise(r => setTimeout(r, 3000));
|
||||
|
||||
// Check BEFORE
|
||||
const before = await page.evaluate(() => {
|
||||
const el = document.querySelector('.cky-consent-container');
|
||||
if (!el) return { found: false };
|
||||
const style = window.getComputedStyle(el);
|
||||
return { found: true, display: style.display, visibility: style.visibility };
|
||||
});
|
||||
|
||||
console.error('Before:', JSON.stringify(before));
|
||||
|
||||
// Run modal closer
|
||||
const closed = await closeModals(page);
|
||||
console.error('Closed:', closed, 'modals');
|
||||
|
||||
// Check AFTER
|
||||
const after = await page.evaluate(() => {
|
||||
const el = document.querySelector('.cky-consent-container');
|
||||
if (!el) return { found: false };
|
||||
const style = window.getComputedStyle(el);
|
||||
return { found: true, display: style.display, visibility: style.visibility };
|
||||
});
|
||||
|
||||
console.error('After:', JSON.stringify(after));
|
||||
|
||||
await browser.close();
|
||||
|
||||
// Output result as JSON for Python to parse
|
||||
const result = {
|
||||
before_found: before.found,
|
||||
before_visible: before.found && before.display !== 'none' && before.visibility !== 'hidden',
|
||||
after_hidden: !after.found || after.display === 'none' || after.visibility === 'hidden',
|
||||
modals_closed: closed
|
||||
};
|
||||
console.log(JSON.stringify(result));
|
||||
}
|
||||
|
||||
main().catch(e => {
|
||||
console.error('Error:', e.message);
|
||||
process.exit(1);
|
||||
});
|
||||
'''
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
tmpdir = Path(tmpdir)
|
||||
script_path = tmpdir / 'test_cookie_consent.js'
|
||||
script_path.write_text(test_script)
|
||||
|
||||
env = get_test_env()
|
||||
|
||||
result = subprocess.run(
|
||||
['node', str(script_path)],
|
||||
cwd=tmpdir,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
env=env,
|
||||
timeout=60
|
||||
)
|
||||
|
||||
print(f"stderr: {result.stderr}")
|
||||
print(f"stdout: {result.stdout}")
|
||||
|
||||
assert result.returncode == 0, f"Test script failed: {result.stderr}"
|
||||
|
||||
# Parse the JSON output
|
||||
output_lines = [l for l in result.stdout.strip().split('\n') if l.startswith('{')]
|
||||
assert len(output_lines) > 0, f"No JSON output from test script. stdout: {result.stdout}"
|
||||
|
||||
test_result = json.loads(output_lines[-1])
|
||||
|
||||
# The cookie consent should have been found initially (or page changed)
|
||||
# After running closeModals, it should be hidden
|
||||
if test_result['before_found']:
|
||||
assert test_result['after_hidden'], \
|
||||
f"Cookie consent should be hidden after modalcloser. Result: {test_result}"
|
||||
assert test_result['modals_closed'] > 0, \
|
||||
f"Should have closed at least one modal. Result: {test_result}"
|
||||
else:
|
||||
# Page may have changed, just verify no errors
|
||||
print("Cookie consent element not found (page may have changed)")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
pytest.main([__file__, '-v'])
|
||||
Reference in New Issue
Block a user