rename archive_org to archivedotorg, add BinaryWorker, fix config pass-through

This commit is contained in:
Nick Sweeting
2026-01-04 22:38:15 -08:00
parent 456aaee287
commit 7ceaeae2d9
32 changed files with 1111 additions and 110 deletions

View File

@@ -2,7 +2,7 @@
"""
Submit a URL to archive.org for archiving.
Usage: on_Snapshot__archive_org.py --url=<url> --snapshot-id=<uuid>
Usage: on_Snapshot__archivedotorg.py --url=<url> --snapshot-id=<uuid>
Output: Writes archive.org.txt to $PWD with the archived URL
Environment variables:
@@ -25,7 +25,7 @@ import rich_click as click
# Extractor metadata
PLUGIN_NAME = 'archive_org'
PLUGIN_NAME = 'archivedotorg'
OUTPUT_DIR = '.'
OUTPUT_FILE = 'archive.org.txt'
@@ -41,7 +41,7 @@ def get_env_int(name: str, default: int = 0) -> int:
return default
def submit_to_archive_org(url: str) -> tuple[bool, str | None, str]:
def submit_to_archivedotorg(url: str) -> tuple[bool, str | None, str]:
"""
Submit URL to archive.org Wayback Machine.
@@ -113,7 +113,7 @@ def main(url: str, snapshot_id: str):
try:
# Run extraction
success, output, error = submit_to_archive_org(url)
success, output, error = submit_to_archivedotorg(url)
if success:
# Success - emit ArchiveResult with output file

View File

@@ -1,5 +1,5 @@
"""
Integration tests for archive_org plugin
Integration tests for archivedotorg plugin
Tests verify standalone archive.org extractor execution.
"""
@@ -12,13 +12,13 @@ from pathlib import Path
import pytest
PLUGIN_DIR = Path(__file__).parent.parent
ARCHIVEDOTORG_HOOK = next(PLUGIN_DIR.glob('on_Snapshot__*_archive_org.*'), None)
ARCHIVEDOTORG_HOOK = next(PLUGIN_DIR.glob('on_Snapshot__*_archivedotorg.*'), None)
TEST_URL = 'https://example.com'
def test_hook_script_exists():
assert ARCHIVEDOTORG_HOOK.exists()
def test_submits_to_archive_org():
def test_submits_to_archivedotorg():
with tempfile.TemporaryDirectory() as tmpdir:
result = subprocess.run(
[sys.executable, str(ARCHIVEDOTORG_HOOK), '--url', TEST_URL, '--snapshot-id', 'test789'],
@@ -49,7 +49,7 @@ def test_submits_to_archive_org():
assert not result_json, "Should NOT emit JSONL on transient error"
assert result.stderr, "Should have error message in stderr"
def test_config_save_archive_org_false_skips():
def test_config_save_archivedotorg_false_skips():
with tempfile.TemporaryDirectory() as tmpdir:
import os
env = os.environ.copy()

View File

@@ -26,11 +26,20 @@ const {
readCdpUrl,
} = require('../chrome/chrome_utils.js');
// Flush V8 coverage before exit (needed for NODE_V8_COVERAGE to capture early exits)
function flushCoverageAndExit(exitCode) {
if (process.env.NODE_V8_COVERAGE) {
const v8 = require('v8');
v8.takeCoverage();
}
process.exit(exitCode);
}
// Check if screenshot is enabled BEFORE requiring puppeteer
if (!getEnvBool('SCREENSHOT_ENABLED', true)) {
console.error('Skipping screenshot (SCREENSHOT_ENABLED=False)');
// Temporary failure (config disabled) - NO JSONL emission
process.exit(0);
flushCoverageAndExit(0);
}
// Now safe to require puppeteer
@@ -135,7 +144,7 @@ async function main() {
if (!url || !snapshotId) {
console.error('Usage: on_Snapshot__51_screenshot.js --url=<url> --snapshot-id=<uuid>');
process.exit(1);
flushCoverageAndExit(1);
}
// Check if staticfile extractor already handled this (permanent skip)
@@ -147,7 +156,7 @@ async function main() {
status: 'skipped',
output_str: 'staticfile already handled',
}));
process.exit(0);
flushCoverageAndExit(0);
}
// Take screenshot (throws on error)
@@ -166,5 +175,5 @@ async function main() {
main().catch(e => {
// Transient error - emit NO JSONL
console.error(`ERROR: ${e.message}`);
process.exit(1);
flushCoverageAndExit(1);
});

View File

@@ -226,9 +226,6 @@ def test_config_save_screenshot_false_skips():
print(f"[DEBUG RESULT] Exit code: {result.returncode}")
print(f"[DEBUG RESULT] Stderr: {result.stderr[:200]}")
# FORCE FAILURE to verify test actually runs
assert False, f"FORCED FAILURE - NODE_V8_COVERAGE={'NODE_V8_COVERAGE' in env} value={env.get('NODE_V8_COVERAGE', 'NOTSET')}"
assert result.returncode == 0, f"Should exit 0 when feature disabled: {result.stderr}"
# Feature disabled - temporary failure, should NOT emit JSONL

View File

@@ -74,7 +74,7 @@
"--geo-bypass",
"--add-metadata",
"--no-progress",
"--remote-components ejs:github",
"--remote-components=ejs:github",
"-o",
"%(title)s.%(ext)s"
],