mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2026-04-04 06:47:57 +10:00
wip
This commit is contained in:
@@ -3,10 +3,10 @@
|
||||
"type": "object",
|
||||
"additionalProperties": false,
|
||||
"properties": {
|
||||
"SAVE_ARCHIVE_DOT_ORG": {
|
||||
"ARCHIVE_ORG_ENABLED": {
|
||||
"type": "boolean",
|
||||
"default": true,
|
||||
"x-aliases": ["SUBMIT_ARCHIVE_DOT_ORG"],
|
||||
"x-aliases": ["SAVE_ARCHIVE_DOT_ORG", "USE_ARCHIVE_ORG", "SUBMIT_ARCHIVE_DOT_ORG"],
|
||||
"description": "Submit URLs to archive.org Wayback Machine"
|
||||
},
|
||||
"ARCHIVE_ORG_TIMEOUT": {
|
||||
|
||||
10
archivebox/plugins/archive_org/templates/embed.html
Normal file
10
archivebox/plugins/archive_org/templates/embed.html
Normal file
@@ -0,0 +1,10 @@
|
||||
{% load config_tags %}
|
||||
{% get_config "ARCHIVEDOTORG_ENABLED" as enabled %}
|
||||
{% if enabled %}
|
||||
<!-- Archive.org embed - full iframe view -->
|
||||
<iframe src="{{ output_path }}"
|
||||
class="extractor-embed archivedotorg-embed"
|
||||
style="width: 100%; height: 600px; border: 1px solid #ddd;"
|
||||
sandbox="allow-same-origin allow-top-navigation-by-user-activation allow-scripts allow-forms">
|
||||
</iframe>
|
||||
{% endif %}
|
||||
10
archivebox/plugins/archive_org/templates/fullscreen.html
Normal file
10
archivebox/plugins/archive_org/templates/fullscreen.html
Normal file
@@ -0,0 +1,10 @@
|
||||
{% load config_tags %}
|
||||
{% get_config "ARCHIVEDOTORG_ENABLED" as enabled %}
|
||||
{% if enabled %}
|
||||
<!-- Archive.org fullscreen - full page iframe -->
|
||||
<iframe src="{{ output_path }}"
|
||||
class="extractor-fullscreen archivedotorg-fullscreen"
|
||||
style="width: 100%; height: 100vh; border: none;"
|
||||
sandbox="allow-same-origin allow-top-navigation-by-user-activation allow-scripts allow-forms">
|
||||
</iframe>
|
||||
{% endif %}
|
||||
12
archivebox/plugins/archive_org/templates/thumbnail.html
Normal file
12
archivebox/plugins/archive_org/templates/thumbnail.html
Normal file
@@ -0,0 +1,12 @@
|
||||
{% load config_tags %}
|
||||
{% get_config "ARCHIVEDOTORG_ENABLED" as enabled %}
|
||||
{% if enabled %}
|
||||
<!-- Archive.org thumbnail - iframe preview of archived page -->
|
||||
<div class="extractor-thumbnail archivedotorg-thumbnail" style="width: 100%; height: 100px; overflow: hidden;">
|
||||
<iframe src="{{ output_path }}"
|
||||
style="width: 100%; height: 100px; border: none; pointer-events: none;"
|
||||
loading="lazy"
|
||||
sandbox="allow-same-origin">
|
||||
</iframe>
|
||||
</div>
|
||||
{% endif %}
|
||||
@@ -60,21 +60,6 @@
|
||||
"default": true,
|
||||
"x-fallback": "CHECK_SSL_VALIDITY",
|
||||
"description": "Whether to verify SSL certificates"
|
||||
},
|
||||
"SAVE_SCREENSHOT": {
|
||||
"type": "boolean",
|
||||
"default": true,
|
||||
"description": "Enable screenshot capture"
|
||||
},
|
||||
"SAVE_PDF": {
|
||||
"type": "boolean",
|
||||
"default": true,
|
||||
"description": "Enable PDF generation"
|
||||
},
|
||||
"SAVE_DOM": {
|
||||
"type": "boolean",
|
||||
"default": true,
|
||||
"description": "Enable DOM capture"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
0
archivebox/plugins/consolelog/templates/icon.html
Normal file
0
archivebox/plugins/consolelog/templates/icon.html
Normal file
21
archivebox/plugins/dom/config.json
Normal file
21
archivebox/plugins/dom/config.json
Normal file
@@ -0,0 +1,21 @@
|
||||
{
|
||||
"$schema": "http://json-schema.org/draft-07/schema#",
|
||||
"type": "object",
|
||||
"additionalProperties": false,
|
||||
"required_plugins": ["chrome"],
|
||||
"properties": {
|
||||
"DOM_ENABLED": {
|
||||
"type": "boolean",
|
||||
"default": true,
|
||||
"x-aliases": ["SAVE_DOM", "USE_DOM"],
|
||||
"description": "Enable DOM capture"
|
||||
},
|
||||
"DOM_TIMEOUT": {
|
||||
"type": "integer",
|
||||
"default": 60,
|
||||
"minimum": 5,
|
||||
"x-fallback": "TIMEOUT",
|
||||
"description": "Timeout for DOM capture in seconds"
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -3,9 +3,10 @@
|
||||
"type": "object",
|
||||
"additionalProperties": false,
|
||||
"properties": {
|
||||
"SAVE_FAVICON": {
|
||||
"FAVICON_ENABLED": {
|
||||
"type": "boolean",
|
||||
"default": true,
|
||||
"x-aliases": ["SAVE_FAVICON", "USE_FAVICON"],
|
||||
"description": "Enable favicon downloading"
|
||||
},
|
||||
"FAVICON_TIMEOUT": {
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
Integration tests for favicon plugin
|
||||
|
||||
Tests verify:
|
||||
pass
|
||||
1. Plugin script exists
|
||||
2. requests library is available
|
||||
3. Favicon extraction works for real example.com
|
||||
@@ -40,7 +41,7 @@ def test_requests_library_available():
|
||||
)
|
||||
|
||||
if result.returncode != 0:
|
||||
pytest.skip("requests library not installed")
|
||||
pass
|
||||
|
||||
assert len(result.stdout.strip()) > 0, "Should report requests version"
|
||||
|
||||
@@ -58,7 +59,7 @@ def test_extracts_favicon_from_example_com():
|
||||
capture_output=True
|
||||
)
|
||||
if check_result.returncode != 0:
|
||||
pytest.skip("requests not installed")
|
||||
pass
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
tmpdir = Path(tmpdir)
|
||||
@@ -80,6 +81,7 @@ def test_extracts_favicon_from_example_com():
|
||||
for line in result.stdout.strip().split('\n'):
|
||||
line = line.strip()
|
||||
if line.startswith('{'):
|
||||
pass
|
||||
try:
|
||||
record = json.loads(line)
|
||||
if record.get('type') == 'ArchiveResult':
|
||||
@@ -124,7 +126,7 @@ def test_config_timeout_honored():
|
||||
capture_output=True
|
||||
)
|
||||
if check_result.returncode != 0:
|
||||
pytest.skip("requests not installed")
|
||||
pass
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
tmpdir = Path(tmpdir)
|
||||
@@ -155,7 +157,7 @@ def test_config_user_agent():
|
||||
capture_output=True
|
||||
)
|
||||
if check_result.returncode != 0:
|
||||
pytest.skip("requests not installed")
|
||||
pass
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
tmpdir = Path(tmpdir)
|
||||
@@ -181,6 +183,7 @@ def test_config_user_agent():
|
||||
for line in result.stdout.strip().split('\n'):
|
||||
line = line.strip()
|
||||
if line.startswith('{'):
|
||||
pass
|
||||
try:
|
||||
record = json.loads(line)
|
||||
if record.get('type') == 'ArchiveResult':
|
||||
@@ -201,7 +204,7 @@ def test_handles_https_urls():
|
||||
capture_output=True
|
||||
)
|
||||
if check_result.returncode != 0:
|
||||
pytest.skip("requests not installed")
|
||||
pass
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
tmpdir = Path(tmpdir)
|
||||
@@ -232,7 +235,7 @@ def test_handles_missing_favicon_gracefully():
|
||||
capture_output=True
|
||||
)
|
||||
if check_result.returncode != 0:
|
||||
pytest.skip("requests not installed")
|
||||
pass
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
tmpdir = Path(tmpdir)
|
||||
|
||||
@@ -3,9 +3,10 @@
|
||||
"type": "object",
|
||||
"additionalProperties": false,
|
||||
"properties": {
|
||||
"SAVE_FORUMDL": {
|
||||
"FORUMDL_ENABLED": {
|
||||
"type": "boolean",
|
||||
"default": true,
|
||||
"x-aliases": ["SAVE_FORUMDL", "USE_FORUMDL"],
|
||||
"description": "Enable forum downloading with forum-dl"
|
||||
},
|
||||
"FORUMDL_BINARY": {
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
Integration tests for forumdl plugin
|
||||
|
||||
Tests verify:
|
||||
pass
|
||||
1. Hook script exists
|
||||
2. Dependencies installed via validation hooks
|
||||
3. Verify deps with abx-pkg
|
||||
@@ -48,7 +49,9 @@ def get_forumdl_binary_path():
|
||||
|
||||
# Check if binary was found
|
||||
for line in result.stdout.strip().split('\n'):
|
||||
pass
|
||||
if line.strip():
|
||||
pass
|
||||
try:
|
||||
record = json.loads(line)
|
||||
if record.get('type') == 'Binary' and record.get('name') == 'forum-dl':
|
||||
@@ -77,7 +80,9 @@ def get_forumdl_binary_path():
|
||||
|
||||
# Parse Binary from pip installation
|
||||
for install_line in install_result.stdout.strip().split('\n'):
|
||||
pass
|
||||
if install_line.strip():
|
||||
pass
|
||||
try:
|
||||
install_record = json.loads(install_line)
|
||||
if install_record.get('type') == 'Binary' and install_record.get('name') == 'forum-dl':
|
||||
@@ -107,7 +112,7 @@ def test_forumdl_install_hook():
|
||||
"""Test forum-dl install hook checks for forum-dl."""
|
||||
# Skip if install hook doesn't exist yet
|
||||
if not FORUMDL_INSTALL_HOOK.exists():
|
||||
pytest.skip(f"Install hook not found: {FORUMDL_INSTALL_HOOK}")
|
||||
pass
|
||||
|
||||
# Run forum-dl install hook
|
||||
result = subprocess.run(
|
||||
@@ -123,14 +128,18 @@ def test_forumdl_install_hook():
|
||||
found_dependency = False
|
||||
|
||||
for line in result.stdout.strip().split('\n'):
|
||||
pass
|
||||
if line.strip():
|
||||
pass
|
||||
try:
|
||||
record = json.loads(line)
|
||||
if record.get('type') == 'Binary':
|
||||
pass
|
||||
if record['name'] == 'forum-dl':
|
||||
assert record['abspath'], "forum-dl should have abspath"
|
||||
found_binary = True
|
||||
elif record.get('type') == 'Dependency':
|
||||
pass
|
||||
if record['bin_name'] == 'forum-dl':
|
||||
found_dependency = True
|
||||
except json.JSONDecodeError:
|
||||
@@ -145,10 +154,10 @@ def test_verify_deps_with_abx_pkg():
|
||||
"""Verify forum-dl is installed by calling the REAL installation hooks."""
|
||||
binary_path = get_forumdl_binary_path()
|
||||
if not binary_path:
|
||||
pytest.skip(
|
||||
"forum-dl installation skipped. Install hook may not exist or "
|
||||
"forum-dl has a dependency on cchardet which does not compile on Python 3.14+ "
|
||||
"due to removed longintrepr.h header. This is a known compatibility issue with forum-dl."
|
||||
assert False, (
|
||||
"forum-dl installation failed. Install hook should install forum-dl automatically. "
|
||||
"Note: forum-dl has a dependency on cchardet which may not compile on Python 3.14+ "
|
||||
"due to removed longintrepr.h header."
|
||||
)
|
||||
assert Path(binary_path).is_file(), f"Binary path must be a valid file: {binary_path}"
|
||||
|
||||
@@ -159,7 +168,7 @@ def test_handles_non_forum_url():
|
||||
|
||||
binary_path = get_forumdl_binary_path()
|
||||
if not binary_path:
|
||||
pytest.skip("forum-dl binary not available")
|
||||
pass
|
||||
assert Path(binary_path).is_file(), f"Binary must be a valid file: {binary_path}"
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
@@ -186,6 +195,7 @@ def test_handles_non_forum_url():
|
||||
for line in result.stdout.strip().split('\n'):
|
||||
line = line.strip()
|
||||
if line.startswith('{'):
|
||||
pass
|
||||
try:
|
||||
record = json.loads(line)
|
||||
if record.get('type') == 'ArchiveResult':
|
||||
@@ -231,7 +241,7 @@ def test_config_timeout():
|
||||
|
||||
binary_path = get_forumdl_binary_path()
|
||||
if not binary_path:
|
||||
pytest.skip("forum-dl binary not available")
|
||||
pass
|
||||
assert Path(binary_path).is_file(), f"Binary must be a valid file: {binary_path}"
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
|
||||
@@ -3,9 +3,10 @@
|
||||
"type": "object",
|
||||
"additionalProperties": false,
|
||||
"properties": {
|
||||
"SAVE_GALLERYDL": {
|
||||
"GALLERYDL_ENABLED": {
|
||||
"type": "boolean",
|
||||
"default": true,
|
||||
"x-aliases": ["SAVE_GALLERYDL", "USE_GALLERYDL"],
|
||||
"description": "Enable gallery downloading with gallery-dl"
|
||||
},
|
||||
"GALLERYDL_BINARY": {
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
Integration tests for gallerydl plugin
|
||||
|
||||
Tests verify:
|
||||
pass
|
||||
1. Hook script exists
|
||||
2. Dependencies installed via validation hooks
|
||||
3. Verify deps with abx-pkg
|
||||
@@ -45,14 +46,18 @@ def test_gallerydl_install_hook():
|
||||
found_dependency = False
|
||||
|
||||
for line in result.stdout.strip().split('\n'):
|
||||
pass
|
||||
if line.strip():
|
||||
pass
|
||||
try:
|
||||
record = json.loads(line)
|
||||
if record.get('type') == 'Binary':
|
||||
pass
|
||||
if record['name'] == 'gallery-dl':
|
||||
assert record['abspath'], "gallery-dl should have abspath"
|
||||
found_binary = True
|
||||
elif record.get('type') == 'Dependency':
|
||||
pass
|
||||
if record['bin_name'] == 'gallery-dl':
|
||||
found_dependency = True
|
||||
except json.JSONDecodeError:
|
||||
@@ -76,7 +81,7 @@ def test_verify_deps_with_abx_pkg():
|
||||
missing_binaries.append('gallery-dl')
|
||||
|
||||
if missing_binaries:
|
||||
pytest.skip(f"Binaries not available: {', '.join(missing_binaries)} - Dependency records should have been emitted")
|
||||
pass
|
||||
|
||||
|
||||
def test_handles_non_gallery_url():
|
||||
@@ -103,6 +108,7 @@ def test_handles_non_gallery_url():
|
||||
for line in result.stdout.strip().split('\n'):
|
||||
line = line.strip()
|
||||
if line.startswith('{'):
|
||||
pass
|
||||
try:
|
||||
record = json.loads(line)
|
||||
if record.get('type') == 'ArchiveResult':
|
||||
|
||||
@@ -3,9 +3,10 @@
|
||||
"type": "object",
|
||||
"additionalProperties": false,
|
||||
"properties": {
|
||||
"SAVE_GIT": {
|
||||
"GIT_ENABLED": {
|
||||
"type": "boolean",
|
||||
"default": true,
|
||||
"x-aliases": ["SAVE_GIT", "USE_GIT"],
|
||||
"description": "Enable git repository cloning"
|
||||
},
|
||||
"GIT_BINARY": {
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
Integration tests for git plugin
|
||||
|
||||
Tests verify:
|
||||
pass
|
||||
1. Validate hook checks for git binary
|
||||
2. Verify deps with abx-pkg
|
||||
3. Standalone git extractor execution
|
||||
@@ -37,7 +38,9 @@ def test_git_install_hook():
|
||||
# Binary found - verify Binary JSONL output
|
||||
found_binary = False
|
||||
for line in result.stdout.strip().split('\n'):
|
||||
pass
|
||||
if line.strip():
|
||||
pass
|
||||
try:
|
||||
record = json.loads(line)
|
||||
if record.get('type') == 'Binary':
|
||||
@@ -52,7 +55,9 @@ def test_git_install_hook():
|
||||
# Binary not found - verify Dependency JSONL output
|
||||
found_dependency = False
|
||||
for line in result.stdout.strip().split('\n'):
|
||||
pass
|
||||
if line.strip():
|
||||
pass
|
||||
try:
|
||||
record = json.loads(line)
|
||||
if record.get('type') == 'Dependency':
|
||||
@@ -74,7 +79,7 @@ def test_verify_deps_with_abx_pkg():
|
||||
if git_loaded and git_loaded.abspath:
|
||||
assert True, "git is available"
|
||||
else:
|
||||
pytest.skip("git not available - Dependency record should have been emitted")
|
||||
pass
|
||||
|
||||
def test_reports_missing_git():
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
@@ -88,8 +93,9 @@ def test_reports_missing_git():
|
||||
assert 'DEPENDENCY_NEEDED' in combined or 'git' in combined.lower() or 'ERROR=' in combined
|
||||
|
||||
def test_handles_non_git_url():
|
||||
pass
|
||||
if not shutil.which('git'):
|
||||
pytest.skip("git not installed")
|
||||
pass
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
result = subprocess.run(
|
||||
@@ -104,6 +110,7 @@ def test_handles_non_git_url():
|
||||
for line in result.stdout.strip().split('\n'):
|
||||
line = line.strip()
|
||||
if line.startswith('{'):
|
||||
pass
|
||||
try:
|
||||
record = json.loads(line)
|
||||
if record.get('type') == 'ArchiveResult':
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
Integration tests for headers plugin
|
||||
|
||||
Tests verify:
|
||||
pass
|
||||
1. Plugin script exists and is executable
|
||||
2. Node.js is available
|
||||
3. Headers extraction works for real example.com
|
||||
@@ -38,7 +39,7 @@ def test_node_is_available():
|
||||
)
|
||||
|
||||
if result.returncode != 0:
|
||||
pytest.skip("node not installed on system")
|
||||
pass
|
||||
|
||||
binary_path = result.stdout.strip()
|
||||
assert Path(binary_path).exists(), f"Binary should exist at {binary_path}"
|
||||
@@ -59,7 +60,7 @@ def test_extracts_headers_from_example_com():
|
||||
|
||||
# Check node is available
|
||||
if not shutil.which('node'):
|
||||
pytest.skip("node not installed")
|
||||
pass
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
tmpdir = Path(tmpdir)
|
||||
@@ -80,6 +81,7 @@ def test_extracts_headers_from_example_com():
|
||||
for line in result.stdout.strip().split('\n'):
|
||||
line = line.strip()
|
||||
if line.startswith('{'):
|
||||
pass
|
||||
try:
|
||||
record = json.loads(line)
|
||||
if record.get('type') == 'ArchiveResult':
|
||||
@@ -119,7 +121,7 @@ def test_headers_output_structure():
|
||||
"""Test that headers plugin produces correctly structured output."""
|
||||
|
||||
if not shutil.which('node'):
|
||||
pytest.skip("node not installed")
|
||||
pass
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
tmpdir = Path(tmpdir)
|
||||
@@ -140,6 +142,7 @@ def test_headers_output_structure():
|
||||
for line in result.stdout.strip().split('\n'):
|
||||
line = line.strip()
|
||||
if line.startswith('{'):
|
||||
pass
|
||||
try:
|
||||
record = json.loads(line)
|
||||
if record.get('type') == 'ArchiveResult':
|
||||
@@ -175,7 +178,7 @@ def test_falls_back_to_http_when_chrome_unavailable():
|
||||
"""Test that headers plugin falls back to HTTP HEAD when chrome unavailable."""
|
||||
|
||||
if not shutil.which('node'):
|
||||
pytest.skip("node not installed")
|
||||
pass
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
tmpdir = Path(tmpdir)
|
||||
@@ -198,6 +201,7 @@ def test_falls_back_to_http_when_chrome_unavailable():
|
||||
for line in result.stdout.strip().split('\n'):
|
||||
line = line.strip()
|
||||
if line.startswith('{'):
|
||||
pass
|
||||
try:
|
||||
record = json.loads(line)
|
||||
if record.get('type') == 'ArchiveResult':
|
||||
@@ -224,7 +228,7 @@ def test_config_timeout_honored():
|
||||
"""Test that TIMEOUT config is respected."""
|
||||
|
||||
if not shutil.which('node'):
|
||||
pytest.skip("node not installed")
|
||||
pass
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
tmpdir = Path(tmpdir)
|
||||
@@ -251,7 +255,7 @@ def test_config_user_agent():
|
||||
"""Test that USER_AGENT config is used."""
|
||||
|
||||
if not shutil.which('node'):
|
||||
pytest.skip("node not installed")
|
||||
pass
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
tmpdir = Path(tmpdir)
|
||||
@@ -277,6 +281,7 @@ def test_config_user_agent():
|
||||
for line in result.stdout.strip().split('\n'):
|
||||
line = line.strip()
|
||||
if line.startswith('{'):
|
||||
pass
|
||||
try:
|
||||
record = json.loads(line)
|
||||
if record.get('type') == 'ArchiveResult':
|
||||
@@ -293,7 +298,7 @@ def test_handles_https_urls():
|
||||
"""Test that HTTPS URLs work correctly."""
|
||||
|
||||
if not shutil.which('node'):
|
||||
pytest.skip("node not installed")
|
||||
pass
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
tmpdir = Path(tmpdir)
|
||||
@@ -318,7 +323,7 @@ def test_handles_404_gracefully():
|
||||
"""Test that headers plugin handles 404s gracefully."""
|
||||
|
||||
if not shutil.which('node'):
|
||||
pytest.skip("node not installed")
|
||||
pass
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
tmpdir = Path(tmpdir)
|
||||
|
||||
@@ -1,279 +0,0 @@
|
||||
/**
|
||||
* Unit tests for istilldontcareaboutcookies plugin
|
||||
*
|
||||
* Run with: node --test tests/test_istilldontcareaboutcookies.js
|
||||
*/
|
||||
|
||||
const assert = require('assert');
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
const { describe, it, before, after, beforeEach, afterEach } = require('node:test');
|
||||
|
||||
// Test fixtures
|
||||
const TEST_DIR = path.join(__dirname, '.test_fixtures');
|
||||
const TEST_EXTENSIONS_DIR = path.join(TEST_DIR, 'chrome_extensions');
|
||||
|
||||
describe('istilldontcareaboutcookies plugin', () => {
|
||||
before(() => {
|
||||
if (!fs.existsSync(TEST_DIR)) {
|
||||
fs.mkdirSync(TEST_DIR, { recursive: true });
|
||||
}
|
||||
});
|
||||
|
||||
after(() => {
|
||||
if (fs.existsSync(TEST_DIR)) {
|
||||
fs.rmSync(TEST_DIR, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
describe('EXTENSION metadata', () => {
|
||||
it('should have correct webstore_id', () => {
|
||||
const { EXTENSION } = require('../on_Snapshot__02_istilldontcareaboutcookies.js');
|
||||
|
||||
assert.strictEqual(EXTENSION.webstore_id, 'edibdbjcniadpccecjdfdjjppcpchdlm');
|
||||
});
|
||||
|
||||
it('should have correct name', () => {
|
||||
const { EXTENSION } = require('../on_Snapshot__02_istilldontcareaboutcookies.js');
|
||||
|
||||
assert.strictEqual(EXTENSION.name, 'istilldontcareaboutcookies');
|
||||
});
|
||||
});
|
||||
|
||||
describe('installCookiesExtension', () => {
|
||||
beforeEach(() => {
|
||||
process.env.CHROME_EXTENSIONS_DIR = TEST_EXTENSIONS_DIR;
|
||||
|
||||
if (!fs.existsSync(TEST_EXTENSIONS_DIR)) {
|
||||
fs.mkdirSync(TEST_EXTENSIONS_DIR, { recursive: true });
|
||||
}
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
if (fs.existsSync(TEST_EXTENSIONS_DIR)) {
|
||||
fs.rmSync(TEST_EXTENSIONS_DIR, { recursive: true });
|
||||
}
|
||||
|
||||
delete process.env.CHROME_EXTENSIONS_DIR;
|
||||
});
|
||||
|
||||
it('should use cached extension if available', async () => {
|
||||
const { installCookiesExtension } = require('../on_Snapshot__02_istilldontcareaboutcookies.js');
|
||||
|
||||
// Create fake cache
|
||||
const cacheFile = path.join(TEST_EXTENSIONS_DIR, 'istilldontcareaboutcookies.extension.json');
|
||||
const fakeExtensionDir = path.join(TEST_EXTENSIONS_DIR, 'fake_cookies');
|
||||
|
||||
fs.mkdirSync(fakeExtensionDir, { recursive: true });
|
||||
fs.writeFileSync(
|
||||
path.join(fakeExtensionDir, 'manifest.json'),
|
||||
JSON.stringify({ version: '1.1.8' })
|
||||
);
|
||||
|
||||
const fakeCache = {
|
||||
webstore_id: 'edibdbjcniadpccecjdfdjjppcpchdlm',
|
||||
name: 'istilldontcareaboutcookies',
|
||||
unpacked_path: fakeExtensionDir,
|
||||
version: '1.1.8'
|
||||
};
|
||||
|
||||
fs.writeFileSync(cacheFile, JSON.stringify(fakeCache));
|
||||
|
||||
const result = await installCookiesExtension();
|
||||
|
||||
assert.notStrictEqual(result, null);
|
||||
assert.strictEqual(result.webstore_id, 'edibdbjcniadpccecjdfdjjppcpchdlm');
|
||||
});
|
||||
|
||||
it('should not require any configuration', async () => {
|
||||
// This extension works out of the box
|
||||
// No API keys or config needed
|
||||
const { EXTENSION } = require('../on_Snapshot__02_istilldontcareaboutcookies.js');
|
||||
|
||||
assert.ok(EXTENSION);
|
||||
// No config fields should be required
|
||||
});
|
||||
});
|
||||
|
||||
describe('cache file creation', () => {
|
||||
beforeEach(() => {
|
||||
process.env.CHROME_EXTENSIONS_DIR = TEST_EXTENSIONS_DIR;
|
||||
|
||||
if (!fs.existsSync(TEST_EXTENSIONS_DIR)) {
|
||||
fs.mkdirSync(TEST_EXTENSIONS_DIR, { recursive: true });
|
||||
}
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
if (fs.existsSync(TEST_EXTENSIONS_DIR)) {
|
||||
fs.rmSync(TEST_EXTENSIONS_DIR, { recursive: true });
|
||||
}
|
||||
|
||||
delete process.env.CHROME_EXTENSIONS_DIR;
|
||||
});
|
||||
|
||||
it('should create cache file with correct extension name', async () => {
|
||||
const cacheFile = path.join(TEST_EXTENSIONS_DIR, 'istilldontcareaboutcookies.extension.json');
|
||||
|
||||
// Create mock extension
|
||||
const mockExtension = {
|
||||
webstore_id: 'edibdbjcniadpccecjdfdjjppcpchdlm',
|
||||
name: 'istilldontcareaboutcookies',
|
||||
version: '1.1.9'
|
||||
};
|
||||
|
||||
await fs.promises.writeFile(cacheFile, JSON.stringify(mockExtension, null, 2));
|
||||
|
||||
assert.ok(fs.existsSync(cacheFile));
|
||||
|
||||
const cache = JSON.parse(fs.readFileSync(cacheFile, 'utf-8'));
|
||||
assert.strictEqual(cache.name, 'istilldontcareaboutcookies');
|
||||
});
|
||||
|
||||
it('should use correct filename pattern', () => {
|
||||
const expectedPattern = 'istilldontcareaboutcookies.extension.json';
|
||||
const cacheFile = path.join(TEST_EXTENSIONS_DIR, expectedPattern);
|
||||
|
||||
// Pattern should match expected format
|
||||
assert.ok(path.basename(cacheFile).endsWith('.extension.json'));
|
||||
assert.ok(path.basename(cacheFile).includes('istilldontcareaboutcookies'));
|
||||
});
|
||||
});
|
||||
|
||||
describe('extension functionality', () => {
|
||||
it('should work automatically without configuration', () => {
|
||||
// This extension automatically dismisses cookie banners
|
||||
// No manual trigger or configuration needed
|
||||
|
||||
const features = {
|
||||
automaticBannerDismissal: true,
|
||||
requiresConfiguration: false,
|
||||
requiresApiKey: false,
|
||||
requiresUserAction: false
|
||||
};
|
||||
|
||||
assert.strictEqual(features.automaticBannerDismissal, true);
|
||||
assert.strictEqual(features.requiresConfiguration, false);
|
||||
assert.strictEqual(features.requiresApiKey, false);
|
||||
assert.strictEqual(features.requiresUserAction, false);
|
||||
});
|
||||
|
||||
it('should not require any runtime hooks', () => {
|
||||
// Extension works purely via Chrome's content script injection
|
||||
// No need for additional hooks or configuration
|
||||
|
||||
const requiresHooks = {
|
||||
preNavigation: false,
|
||||
postNavigation: false,
|
||||
onPageLoad: false
|
||||
};
|
||||
|
||||
assert.strictEqual(requiresHooks.preNavigation, false);
|
||||
assert.strictEqual(requiresHooks.postNavigation, false);
|
||||
assert.strictEqual(requiresHooks.onPageLoad, false);
|
||||
});
|
||||
});
|
||||
|
||||
describe('priority and execution order', () => {
|
||||
it('should have priority 02 (early)', () => {
|
||||
const filename = 'on_Snapshot__02_istilldontcareaboutcookies.js';
|
||||
|
||||
// Extract priority from filename
|
||||
const match = filename.match(/on_Snapshot__(\d+)_/);
|
||||
assert.ok(match);
|
||||
|
||||
const priority = parseInt(match[1]);
|
||||
assert.strictEqual(priority, 2);
|
||||
});
|
||||
|
||||
it('should run before chrome (priority 20)', () => {
|
||||
const extensionPriority = 2;
|
||||
const chromeSessionPriority = 20;
|
||||
|
||||
assert.ok(extensionPriority < chromeSessionPriority);
|
||||
});
|
||||
});
|
||||
|
||||
describe('error handling', () => {
|
||||
beforeEach(() => {
|
||||
process.env.CHROME_EXTENSIONS_DIR = TEST_EXTENSIONS_DIR;
|
||||
|
||||
if (!fs.existsSync(TEST_EXTENSIONS_DIR)) {
|
||||
fs.mkdirSync(TEST_EXTENSIONS_DIR, { recursive: true });
|
||||
}
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
if (fs.existsSync(TEST_EXTENSIONS_DIR)) {
|
||||
fs.rmSync(TEST_EXTENSIONS_DIR, { recursive: true });
|
||||
}
|
||||
|
||||
delete process.env.CHROME_EXTENSIONS_DIR;
|
||||
});
|
||||
|
||||
it('should handle corrupted cache gracefully', async () => {
|
||||
const cacheFile = path.join(TEST_EXTENSIONS_DIR, 'istilldontcareaboutcookies.extension.json');
|
||||
|
||||
// Create corrupted cache
|
||||
fs.writeFileSync(cacheFile, 'invalid json content');
|
||||
|
||||
// Should detect corruption and proceed with fresh install
|
||||
const { installCookiesExtension } = require('../on_Snapshot__02_istilldontcareaboutcookies.js');
|
||||
|
||||
// Mock loadOrInstallExtension to avoid actual download
|
||||
const extensionUtils = require('../../chrome_extensions/chrome_extension_utils.js');
|
||||
const originalFunc = extensionUtils.loadOrInstallExtension;
|
||||
|
||||
extensionUtils.loadOrInstallExtension = async () => ({
|
||||
webstore_id: 'edibdbjcniadpccecjdfdjjppcpchdlm',
|
||||
name: 'istilldontcareaboutcookies',
|
||||
version: '1.1.9'
|
||||
});
|
||||
|
||||
const result = await installCookiesExtension();
|
||||
|
||||
extensionUtils.loadOrInstallExtension = originalFunc;
|
||||
|
||||
assert.notStrictEqual(result, null);
|
||||
});
|
||||
|
||||
it('should handle missing manifest gracefully', async () => {
|
||||
const cacheFile = path.join(TEST_EXTENSIONS_DIR, 'istilldontcareaboutcookies.extension.json');
|
||||
const fakeExtensionDir = path.join(TEST_EXTENSIONS_DIR, 'fake_cookies_no_manifest');
|
||||
|
||||
// Create directory without manifest
|
||||
fs.mkdirSync(fakeExtensionDir, { recursive: true });
|
||||
|
||||
const fakeCache = {
|
||||
webstore_id: 'edibdbjcniadpccecjdfdjjppcpchdlm',
|
||||
name: 'istilldontcareaboutcookies',
|
||||
unpacked_path: fakeExtensionDir
|
||||
};
|
||||
|
||||
fs.writeFileSync(cacheFile, JSON.stringify(fakeCache));
|
||||
|
||||
const { installCookiesExtension } = require('../on_Snapshot__02_istilldontcareaboutcookies.js');
|
||||
|
||||
// Mock to return fresh extension when manifest missing
|
||||
const extensionUtils = require('../../chrome_extensions/chrome_extension_utils.js');
|
||||
const originalFunc = extensionUtils.loadOrInstallExtension;
|
||||
|
||||
let freshInstallCalled = false;
|
||||
extensionUtils.loadOrInstallExtension = async () => {
|
||||
freshInstallCalled = true;
|
||||
return {
|
||||
webstore_id: 'edibdbjcniadpccecjdfdjjppcpchdlm',
|
||||
name: 'istilldontcareaboutcookies',
|
||||
version: '1.1.9'
|
||||
};
|
||||
};
|
||||
|
||||
const result = await installCookiesExtension();
|
||||
|
||||
extensionUtils.loadOrInstallExtension = originalFunc;
|
||||
|
||||
// Should trigger fresh install when manifest missing
|
||||
assert.ok(freshInstallCalled || result);
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -3,16 +3,16 @@
|
||||
"type": "object",
|
||||
"additionalProperties": false,
|
||||
"properties": {
|
||||
"SAVE_MEDIA": {
|
||||
"MEDIA_ENABLED": {
|
||||
"type": "boolean",
|
||||
"default": true,
|
||||
"x-aliases": ["USE_YTDLP", "FETCH_MEDIA"],
|
||||
"x-aliases": ["SAVE_MEDIA", "USE_MEDIA", "USE_YTDLP", "FETCH_MEDIA"],
|
||||
"description": "Enable media downloading with yt-dlp"
|
||||
},
|
||||
"YOUTUBEDL_BINARY": {
|
||||
"MEDIA_BINARY": {
|
||||
"type": "string",
|
||||
"default": "yt-dlp",
|
||||
"x-aliases": ["YTDLP_BINARY", "YOUTUBE_DL_BINARY"],
|
||||
"x-aliases": ["YOUTUBEDL_BINARY", "YTDLP_BINARY", "YOUTUBE_DL_BINARY"],
|
||||
"description": "Path to yt-dlp binary"
|
||||
},
|
||||
"MEDIA_TIMEOUT": {
|
||||
@@ -28,13 +28,14 @@
|
||||
"pattern": "^\\d+[kmgKMG]?$",
|
||||
"description": "Maximum file size for media downloads"
|
||||
},
|
||||
"YTDLP_CHECK_SSL_VALIDITY": {
|
||||
"MEDIA_CHECK_SSL_VALIDITY": {
|
||||
"type": "boolean",
|
||||
"default": true,
|
||||
"x-fallback": "CHECK_SSL_VALIDITY",
|
||||
"x-aliases": ["YTDLP_CHECK_SSL_VALIDITY"],
|
||||
"description": "Whether to verify SSL certificates"
|
||||
},
|
||||
"YTDLP_ARGS": {
|
||||
"MEDIA_ARGS": {
|
||||
"type": "array",
|
||||
"items": {"type": "string"},
|
||||
"default": [
|
||||
@@ -44,11 +45,13 @@
|
||||
"--embed-subs",
|
||||
"--write-auto-sub"
|
||||
],
|
||||
"x-aliases": ["YTDLP_ARGS"],
|
||||
"description": "Default yt-dlp arguments"
|
||||
},
|
||||
"YTDLP_EXTRA_ARGS": {
|
||||
"MEDIA_EXTRA_ARGS": {
|
||||
"type": "string",
|
||||
"default": "",
|
||||
"x-aliases": ["YTDLP_EXTRA_ARGS"],
|
||||
"description": "Extra arguments for yt-dlp (space-separated)"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
Integration tests for media plugin
|
||||
|
||||
Tests verify:
|
||||
pass
|
||||
1. Hook script exists
|
||||
2. Dependencies installed via validation hooks
|
||||
3. Verify deps with abx-pkg
|
||||
@@ -45,7 +46,9 @@ def test_ytdlp_install_hook():
|
||||
found_dependencies = {'node': False, 'ffmpeg': False, 'yt-dlp': False}
|
||||
|
||||
for line in result.stdout.strip().split('\n'):
|
||||
pass
|
||||
if line.strip():
|
||||
pass
|
||||
try:
|
||||
record = json.loads(line)
|
||||
if record.get('type') == 'Binary':
|
||||
@@ -94,7 +97,7 @@ def test_verify_deps_with_abx_pkg():
|
||||
missing_binaries.append('ffmpeg')
|
||||
|
||||
if missing_binaries:
|
||||
pytest.skip(f"Binaries not available: {', '.join(missing_binaries)} - Dependency records should have been emitted")
|
||||
pass
|
||||
|
||||
def test_handles_non_media_url():
|
||||
"""Test that media extractor handles non-media URLs gracefully via hook."""
|
||||
@@ -120,6 +123,7 @@ def test_handles_non_media_url():
|
||||
for line in result.stdout.strip().split('\n'):
|
||||
line = line.strip()
|
||||
if line.startswith('{'):
|
||||
pass
|
||||
try:
|
||||
record = json.loads(line)
|
||||
if record.get('type') == 'ArchiveResult':
|
||||
|
||||
@@ -3,9 +3,10 @@
|
||||
"type": "object",
|
||||
"additionalProperties": false,
|
||||
"properties": {
|
||||
"SAVE_MERCURY": {
|
||||
"MERCURY_ENABLED": {
|
||||
"type": "boolean",
|
||||
"default": true,
|
||||
"x-aliases": ["SAVE_MERCURY", "USE_MERCURY"],
|
||||
"description": "Enable Mercury text extraction"
|
||||
},
|
||||
"MERCURY_BINARY": {
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
Integration tests for mercury plugin
|
||||
|
||||
Tests verify:
|
||||
pass
|
||||
1. Hook script exists
|
||||
2. Dependencies installed via validation hooks
|
||||
3. Verify deps with abx-pkg
|
||||
@@ -44,7 +45,9 @@ def test_mercury_install_hook():
|
||||
# Binary found - verify Binary JSONL output
|
||||
found_binary = False
|
||||
for line in result.stdout.strip().split('\n'):
|
||||
pass
|
||||
if line.strip():
|
||||
pass
|
||||
try:
|
||||
record = json.loads(line)
|
||||
if record.get('type') == 'Binary':
|
||||
@@ -59,7 +62,9 @@ def test_mercury_install_hook():
|
||||
# Binary not found - verify Dependency JSONL output
|
||||
found_dependency = False
|
||||
for line in result.stdout.strip().split('\n'):
|
||||
pass
|
||||
if line.strip():
|
||||
pass
|
||||
try:
|
||||
record = json.loads(line)
|
||||
if record.get('type') == 'Dependency':
|
||||
@@ -89,7 +94,7 @@ def test_verify_deps_with_abx_pkg():
|
||||
if mercury_loaded and mercury_loaded.abspath:
|
||||
assert True, "postlight-parser is available"
|
||||
else:
|
||||
pytest.skip("postlight-parser not available - Dependency record should have been emitted")
|
||||
pass
|
||||
|
||||
def test_extracts_with_mercury_parser():
|
||||
"""Test full workflow: extract with postlight-parser from real HTML via hook."""
|
||||
@@ -122,6 +127,7 @@ def test_extracts_with_mercury_parser():
|
||||
for line in result.stdout.strip().split('\n'):
|
||||
line = line.strip()
|
||||
if line.startswith('{'):
|
||||
pass
|
||||
try:
|
||||
record = json.loads(line)
|
||||
if record.get('type') == 'ArchiveResult':
|
||||
@@ -184,6 +190,7 @@ def test_fails_gracefully_without_html():
|
||||
for line in result.stdout.strip().split('\n'):
|
||||
line = line.strip()
|
||||
if line.startswith('{'):
|
||||
pass
|
||||
try:
|
||||
record = json.loads(line)
|
||||
if record.get('type') == 'ArchiveResult':
|
||||
|
||||
925
archivebox/plugins/package-lock.json
generated
925
archivebox/plugins/package-lock.json
generated
@@ -1,925 +0,0 @@
|
||||
{
|
||||
"name": "archivebox-plugins",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "archivebox-plugins",
|
||||
"dependencies": {
|
||||
"puppeteer-core": "^24.34.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@puppeteer/browsers": {
|
||||
"version": "2.11.0",
|
||||
"resolved": "https://registry.npmjs.org/@puppeteer/browsers/-/browsers-2.11.0.tgz",
|
||||
"integrity": "sha512-n6oQX6mYkG8TRPuPXmbPidkUbsSRalhmaaVAQxvH1IkQy63cwsH+kOjB3e4cpCDHg0aSvsiX9bQ4s2VB6mGWUQ==",
|
||||
"license": "Apache-2.0",
|
||||
"dependencies": {
|
||||
"debug": "^4.4.3",
|
||||
"extract-zip": "^2.0.1",
|
||||
"progress": "^2.0.3",
|
||||
"proxy-agent": "^6.5.0",
|
||||
"semver": "^7.7.3",
|
||||
"tar-fs": "^3.1.1",
|
||||
"yargs": "^17.7.2"
|
||||
},
|
||||
"bin": {
|
||||
"browsers": "lib/cjs/main-cli.js"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@tootallnate/quickjs-emscripten": {
|
||||
"version": "0.23.0",
|
||||
"resolved": "https://registry.npmjs.org/@tootallnate/quickjs-emscripten/-/quickjs-emscripten-0.23.0.tgz",
|
||||
"integrity": "sha512-C5Mc6rdnsaJDjO3UpGW/CQTHtCKaYlScZTly4JIu97Jxo/odCiH0ITnDXSJPTOrEKk/ycSZ0AOgTmkDtkOsvIA==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/@types/node": {
|
||||
"version": "25.0.3",
|
||||
"resolved": "https://registry.npmjs.org/@types/node/-/node-25.0.3.tgz",
|
||||
"integrity": "sha512-W609buLVRVmeW693xKfzHeIV6nJGGz98uCPfeXI1ELMLXVeKYZ9m15fAMSaUPBHYLGFsVRcMmSCksQOrZV9BYA==",
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"dependencies": {
|
||||
"undici-types": "~7.16.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@types/yauzl": {
|
||||
"version": "2.10.3",
|
||||
"resolved": "https://registry.npmjs.org/@types/yauzl/-/yauzl-2.10.3.tgz",
|
||||
"integrity": "sha512-oJoftv0LSuaDZE3Le4DbKX+KS9G36NzOeSap90UIK0yMA/NhKJhqlSGtNDORNRaIbQfzjXDrQa0ytJ6mNRGz/Q==",
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"dependencies": {
|
||||
"@types/node": "*"
|
||||
}
|
||||
},
|
||||
"node_modules/agent-base": {
|
||||
"version": "7.1.4",
|
||||
"resolved": "https://registry.npmjs.org/agent-base/-/agent-base-7.1.4.tgz",
|
||||
"integrity": "sha512-MnA+YT8fwfJPgBx3m60MNqakm30XOkyIoH1y6huTQvC0PwZG7ki8NacLBcrPbNoo8vEZy7Jpuk7+jMO+CUovTQ==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">= 14"
|
||||
}
|
||||
},
|
||||
"node_modules/ansi-regex": {
|
||||
"version": "5.0.1",
|
||||
"resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz",
|
||||
"integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=8"
|
||||
}
|
||||
},
|
||||
"node_modules/ansi-styles": {
|
||||
"version": "4.3.0",
|
||||
"resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz",
|
||||
"integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"color-convert": "^2.0.1"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=8"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/chalk/ansi-styles?sponsor=1"
|
||||
}
|
||||
},
|
||||
"node_modules/ast-types": {
|
||||
"version": "0.13.4",
|
||||
"resolved": "https://registry.npmjs.org/ast-types/-/ast-types-0.13.4.tgz",
|
||||
"integrity": "sha512-x1FCFnFifvYDDzTaLII71vG5uvDwgtmDTEVWAxrgeiR8VjMONcCXJx7E+USjDtHlwFmt9MysbqgF9b9Vjr6w+w==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"tslib": "^2.0.1"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=4"
|
||||
}
|
||||
},
|
||||
"node_modules/b4a": {
|
||||
"version": "1.7.3",
|
||||
"resolved": "https://registry.npmjs.org/b4a/-/b4a-1.7.3.tgz",
|
||||
"integrity": "sha512-5Q2mfq2WfGuFp3uS//0s6baOJLMoVduPYVeNmDYxu5OUA1/cBfvr2RIS7vi62LdNj/urk1hfmj867I3qt6uZ7Q==",
|
||||
"license": "Apache-2.0",
|
||||
"peerDependencies": {
|
||||
"react-native-b4a": "*"
|
||||
},
|
||||
"peerDependenciesMeta": {
|
||||
"react-native-b4a": {
|
||||
"optional": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"node_modules/bare-events": {
|
||||
"version": "2.8.2",
|
||||
"resolved": "https://registry.npmjs.org/bare-events/-/bare-events-2.8.2.tgz",
|
||||
"integrity": "sha512-riJjyv1/mHLIPX4RwiK+oW9/4c3TEUeORHKefKAKnZ5kyslbN+HXowtbaVEqt4IMUB7OXlfixcs6gsFeo/jhiQ==",
|
||||
"license": "Apache-2.0",
|
||||
"peerDependencies": {
|
||||
"bare-abort-controller": "*"
|
||||
},
|
||||
"peerDependenciesMeta": {
|
||||
"bare-abort-controller": {
|
||||
"optional": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"node_modules/bare-fs": {
|
||||
"version": "4.5.2",
|
||||
"resolved": "https://registry.npmjs.org/bare-fs/-/bare-fs-4.5.2.tgz",
|
||||
"integrity": "sha512-veTnRzkb6aPHOvSKIOy60KzURfBdUflr5VReI+NSaPL6xf+XLdONQgZgpYvUuZLVQ8dCqxpBAudaOM1+KpAUxw==",
|
||||
"license": "Apache-2.0",
|
||||
"optional": true,
|
||||
"dependencies": {
|
||||
"bare-events": "^2.5.4",
|
||||
"bare-path": "^3.0.0",
|
||||
"bare-stream": "^2.6.4",
|
||||
"bare-url": "^2.2.2",
|
||||
"fast-fifo": "^1.3.2"
|
||||
},
|
||||
"engines": {
|
||||
"bare": ">=1.16.0"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"bare-buffer": "*"
|
||||
},
|
||||
"peerDependenciesMeta": {
|
||||
"bare-buffer": {
|
||||
"optional": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"node_modules/bare-os": {
|
||||
"version": "3.6.2",
|
||||
"resolved": "https://registry.npmjs.org/bare-os/-/bare-os-3.6.2.tgz",
|
||||
"integrity": "sha512-T+V1+1srU2qYNBmJCXZkUY5vQ0B4FSlL3QDROnKQYOqeiQR8UbjNHlPa+TIbM4cuidiN9GaTaOZgSEgsvPbh5A==",
|
||||
"license": "Apache-2.0",
|
||||
"optional": true,
|
||||
"engines": {
|
||||
"bare": ">=1.14.0"
|
||||
}
|
||||
},
|
||||
"node_modules/bare-path": {
|
||||
"version": "3.0.0",
|
||||
"resolved": "https://registry.npmjs.org/bare-path/-/bare-path-3.0.0.tgz",
|
||||
"integrity": "sha512-tyfW2cQcB5NN8Saijrhqn0Zh7AnFNsnczRcuWODH0eYAXBsJ5gVxAUuNr7tsHSC6IZ77cA0SitzT+s47kot8Mw==",
|
||||
"license": "Apache-2.0",
|
||||
"optional": true,
|
||||
"dependencies": {
|
||||
"bare-os": "^3.0.1"
|
||||
}
|
||||
},
|
||||
"node_modules/bare-stream": {
|
||||
"version": "2.7.0",
|
||||
"resolved": "https://registry.npmjs.org/bare-stream/-/bare-stream-2.7.0.tgz",
|
||||
"integrity": "sha512-oyXQNicV1y8nc2aKffH+BUHFRXmx6VrPzlnaEvMhram0nPBrKcEdcyBg5r08D0i8VxngHFAiVyn1QKXpSG0B8A==",
|
||||
"license": "Apache-2.0",
|
||||
"optional": true,
|
||||
"dependencies": {
|
||||
"streamx": "^2.21.0"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"bare-buffer": "*",
|
||||
"bare-events": "*"
|
||||
},
|
||||
"peerDependenciesMeta": {
|
||||
"bare-buffer": {
|
||||
"optional": true
|
||||
},
|
||||
"bare-events": {
|
||||
"optional": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"node_modules/bare-url": {
|
||||
"version": "2.3.2",
|
||||
"resolved": "https://registry.npmjs.org/bare-url/-/bare-url-2.3.2.tgz",
|
||||
"integrity": "sha512-ZMq4gd9ngV5aTMa5p9+UfY0b3skwhHELaDkhEHetMdX0LRkW9kzaym4oo/Eh+Ghm0CCDuMTsRIGM/ytUc1ZYmw==",
|
||||
"license": "Apache-2.0",
|
||||
"optional": true,
|
||||
"dependencies": {
|
||||
"bare-path": "^3.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/basic-ftp": {
|
||||
"version": "5.0.5",
|
||||
"resolved": "https://registry.npmjs.org/basic-ftp/-/basic-ftp-5.0.5.tgz",
|
||||
"integrity": "sha512-4Bcg1P8xhUuqcii/S0Z9wiHIrQVPMermM1any+MX5GeGD7faD3/msQUDGLol9wOcz4/jbg/WJnGqoJF6LiBdtg==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=10.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/buffer-crc32": {
|
||||
"version": "0.2.13",
|
||||
"resolved": "https://registry.npmjs.org/buffer-crc32/-/buffer-crc32-0.2.13.tgz",
|
||||
"integrity": "sha512-VO9Ht/+p3SN7SKWqcrgEzjGbRSJYTx+Q1pTQC0wrWqHx0vpJraQ6GtHx8tvcg1rlK1byhU5gccxgOgj7B0TDkQ==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": "*"
|
||||
}
|
||||
},
|
||||
"node_modules/chromium-bidi": {
|
||||
"version": "12.0.1",
|
||||
"resolved": "https://registry.npmjs.org/chromium-bidi/-/chromium-bidi-12.0.1.tgz",
|
||||
"integrity": "sha512-fGg+6jr0xjQhzpy5N4ErZxQ4wF7KLEvhGZXD6EgvZKDhu7iOhZXnZhcDxPJDcwTcrD48NPzOCo84RP2lv3Z+Cg==",
|
||||
"license": "Apache-2.0",
|
||||
"dependencies": {
|
||||
"mitt": "^3.0.1",
|
||||
"zod": "^3.24.1"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"devtools-protocol": "*"
|
||||
}
|
||||
},
|
||||
"node_modules/cliui": {
|
||||
"version": "8.0.1",
|
||||
"resolved": "https://registry.npmjs.org/cliui/-/cliui-8.0.1.tgz",
|
||||
"integrity": "sha512-BSeNnyus75C4//NQ9gQt1/csTXyo/8Sb+afLAkzAptFuMsod9HFokGNudZpi/oQV73hnVK+sR+5PVRMd+Dr7YQ==",
|
||||
"license": "ISC",
|
||||
"dependencies": {
|
||||
"string-width": "^4.2.0",
|
||||
"strip-ansi": "^6.0.1",
|
||||
"wrap-ansi": "^7.0.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
}
|
||||
},
|
||||
"node_modules/color-convert": {
|
||||
"version": "2.0.1",
|
||||
"resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz",
|
||||
"integrity": "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"color-name": "~1.1.4"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=7.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/color-name": {
|
||||
"version": "1.1.4",
|
||||
"resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz",
|
||||
"integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/data-uri-to-buffer": {
|
||||
"version": "6.0.2",
|
||||
"resolved": "https://registry.npmjs.org/data-uri-to-buffer/-/data-uri-to-buffer-6.0.2.tgz",
|
||||
"integrity": "sha512-7hvf7/GW8e86rW0ptuwS3OcBGDjIi6SZva7hCyWC0yYry2cOPmLIjXAUHI6DK2HsnwJd9ifmt57i8eV2n4YNpw==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">= 14"
|
||||
}
|
||||
},
|
||||
"node_modules/debug": {
|
||||
"version": "4.4.3",
|
||||
"resolved": "https://registry.npmjs.org/debug/-/debug-4.4.3.tgz",
|
||||
"integrity": "sha512-RGwwWnwQvkVfavKVt22FGLw+xYSdzARwm0ru6DhTVA3umU5hZc28V3kO4stgYryrTlLpuvgI9GiijltAjNbcqA==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"ms": "^2.1.3"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=6.0"
|
||||
},
|
||||
"peerDependenciesMeta": {
|
||||
"supports-color": {
|
||||
"optional": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"node_modules/degenerator": {
|
||||
"version": "5.0.1",
|
||||
"resolved": "https://registry.npmjs.org/degenerator/-/degenerator-5.0.1.tgz",
|
||||
"integrity": "sha512-TllpMR/t0M5sqCXfj85i4XaAzxmS5tVA16dqvdkMwGmzI+dXLXnw3J+3Vdv7VKw+ThlTMboK6i9rnZ6Nntj5CQ==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"ast-types": "^0.13.4",
|
||||
"escodegen": "^2.1.0",
|
||||
"esprima": "^4.0.1"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">= 14"
|
||||
}
|
||||
},
|
||||
"node_modules/devtools-protocol": {
|
||||
"version": "0.0.1534754",
|
||||
"resolved": "https://registry.npmjs.org/devtools-protocol/-/devtools-protocol-0.0.1534754.tgz",
|
||||
"integrity": "sha512-26T91cV5dbOYnXdJi5qQHoTtUoNEqwkHcAyu/IKtjIAxiEqPMrDiRkDOPWVsGfNZGmlQVHQbZRSjD8sxagWVsQ==",
|
||||
"license": "BSD-3-Clause",
|
||||
"peer": true
|
||||
},
|
||||
"node_modules/emoji-regex": {
|
||||
"version": "8.0.0",
|
||||
"resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz",
|
||||
"integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/end-of-stream": {
|
||||
"version": "1.4.5",
|
||||
"resolved": "https://registry.npmjs.org/end-of-stream/-/end-of-stream-1.4.5.tgz",
|
||||
"integrity": "sha512-ooEGc6HP26xXq/N+GCGOT0JKCLDGrq2bQUZrQ7gyrJiZANJ/8YDTxTpQBXGMn+WbIQXNVpyWymm7KYVICQnyOg==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"once": "^1.4.0"
|
||||
}
|
||||
},
|
||||
"node_modules/escalade": {
|
||||
"version": "3.2.0",
|
||||
"resolved": "https://registry.npmjs.org/escalade/-/escalade-3.2.0.tgz",
|
||||
"integrity": "sha512-WUj2qlxaQtO4g6Pq5c29GTcWGDyd8itL8zTlipgECz3JesAiiOKotd8JU6otB3PACgG6xkJUyVhboMS+bje/jA==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=6"
|
||||
}
|
||||
},
|
||||
"node_modules/escodegen": {
|
||||
"version": "2.1.0",
|
||||
"resolved": "https://registry.npmjs.org/escodegen/-/escodegen-2.1.0.tgz",
|
||||
"integrity": "sha512-2NlIDTwUWJN0mRPQOdtQBzbUHvdGY2P1VXSyU83Q3xKxM7WHX2Ql8dKq782Q9TgQUNOLEzEYu9bzLNj1q88I5w==",
|
||||
"license": "BSD-2-Clause",
|
||||
"dependencies": {
|
||||
"esprima": "^4.0.1",
|
||||
"estraverse": "^5.2.0",
|
||||
"esutils": "^2.0.2"
|
||||
},
|
||||
"bin": {
|
||||
"escodegen": "bin/escodegen.js",
|
||||
"esgenerate": "bin/esgenerate.js"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=6.0"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"source-map": "~0.6.1"
|
||||
}
|
||||
},
|
||||
"node_modules/esprima": {
|
||||
"version": "4.0.1",
|
||||
"resolved": "https://registry.npmjs.org/esprima/-/esprima-4.0.1.tgz",
|
||||
"integrity": "sha512-eGuFFw7Upda+g4p+QHvnW0RyTX/SVeJBDM/gCtMARO0cLuT2HcEKnTPvhjV6aGeqrCB/sbNop0Kszm0jsaWU4A==",
|
||||
"license": "BSD-2-Clause",
|
||||
"bin": {
|
||||
"esparse": "bin/esparse.js",
|
||||
"esvalidate": "bin/esvalidate.js"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=4"
|
||||
}
|
||||
},
|
||||
"node_modules/estraverse": {
|
||||
"version": "5.3.0",
|
||||
"resolved": "https://registry.npmjs.org/estraverse/-/estraverse-5.3.0.tgz",
|
||||
"integrity": "sha512-MMdARuVEQziNTeJD8DgMqmhwR11BRQ/cBP+pLtYdSTnf3MIO8fFeiINEbX36ZdNlfU/7A9f3gUw49B3oQsvwBA==",
|
||||
"license": "BSD-2-Clause",
|
||||
"engines": {
|
||||
"node": ">=4.0"
|
||||
}
|
||||
},
|
||||
"node_modules/esutils": {
|
||||
"version": "2.0.3",
|
||||
"resolved": "https://registry.npmjs.org/esutils/-/esutils-2.0.3.tgz",
|
||||
"integrity": "sha512-kVscqXk4OCp68SZ0dkgEKVi6/8ij300KBWTJq32P/dYeWTSwK41WyTxalN1eRmA5Z9UU/LX9D7FWSmV9SAYx6g==",
|
||||
"license": "BSD-2-Clause",
|
||||
"engines": {
|
||||
"node": ">=0.10.0"
|
||||
}
|
||||
},
|
||||
"node_modules/events-universal": {
|
||||
"version": "1.0.1",
|
||||
"resolved": "https://registry.npmjs.org/events-universal/-/events-universal-1.0.1.tgz",
|
||||
"integrity": "sha512-LUd5euvbMLpwOF8m6ivPCbhQeSiYVNb8Vs0fQ8QjXo0JTkEHpz8pxdQf0gStltaPpw0Cca8b39KxvK9cfKRiAw==",
|
||||
"license": "Apache-2.0",
|
||||
"dependencies": {
|
||||
"bare-events": "^2.7.0"
|
||||
}
|
||||
},
|
||||
"node_modules/extract-zip": {
|
||||
"version": "2.0.1",
|
||||
"resolved": "https://registry.npmjs.org/extract-zip/-/extract-zip-2.0.1.tgz",
|
||||
"integrity": "sha512-GDhU9ntwuKyGXdZBUgTIe+vXnWj0fppUEtMDL0+idd5Sta8TGpHssn/eusA9mrPr9qNDym6SxAYZjNvCn/9RBg==",
|
||||
"license": "BSD-2-Clause",
|
||||
"dependencies": {
|
||||
"debug": "^4.1.1",
|
||||
"get-stream": "^5.1.0",
|
||||
"yauzl": "^2.10.0"
|
||||
},
|
||||
"bin": {
|
||||
"extract-zip": "cli.js"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">= 10.17.0"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"@types/yauzl": "^2.9.1"
|
||||
}
|
||||
},
|
||||
"node_modules/fast-fifo": {
|
||||
"version": "1.3.2",
|
||||
"resolved": "https://registry.npmjs.org/fast-fifo/-/fast-fifo-1.3.2.tgz",
|
||||
"integrity": "sha512-/d9sfos4yxzpwkDkuN7k2SqFKtYNmCTzgfEpz82x34IM9/zc8KGxQoXg1liNC/izpRM/MBdt44Nmx41ZWqk+FQ==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/fd-slicer": {
|
||||
"version": "1.1.0",
|
||||
"resolved": "https://registry.npmjs.org/fd-slicer/-/fd-slicer-1.1.0.tgz",
|
||||
"integrity": "sha512-cE1qsB/VwyQozZ+q1dGxR8LBYNZeofhEdUNGSMbQD3Gw2lAzX9Zb3uIU6Ebc/Fmyjo9AWWfnn0AUCHqtevs/8g==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"pend": "~1.2.0"
|
||||
}
|
||||
},
|
||||
"node_modules/get-caller-file": {
|
||||
"version": "2.0.5",
|
||||
"resolved": "https://registry.npmjs.org/get-caller-file/-/get-caller-file-2.0.5.tgz",
|
||||
"integrity": "sha512-DyFP3BM/3YHTQOCUL/w0OZHR0lpKeGrxotcHWcqNEdnltqFwXVfhEBQ94eIo34AfQpo0rGki4cyIiftY06h2Fg==",
|
||||
"license": "ISC",
|
||||
"engines": {
|
||||
"node": "6.* || 8.* || >= 10.*"
|
||||
}
|
||||
},
|
||||
"node_modules/get-stream": {
|
||||
"version": "5.2.0",
|
||||
"resolved": "https://registry.npmjs.org/get-stream/-/get-stream-5.2.0.tgz",
|
||||
"integrity": "sha512-nBF+F1rAZVCu/p7rjzgA+Yb4lfYXrpl7a6VmJrU8wF9I1CKvP/QwPNZHnOlwbTkY6dvtFIzFMSyQXbLoTQPRpA==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"pump": "^3.0.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=8"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/sindresorhus"
|
||||
}
|
||||
},
|
||||
"node_modules/get-uri": {
|
||||
"version": "6.0.5",
|
||||
"resolved": "https://registry.npmjs.org/get-uri/-/get-uri-6.0.5.tgz",
|
||||
"integrity": "sha512-b1O07XYq8eRuVzBNgJLstU6FYc1tS6wnMtF1I1D9lE8LxZSOGZ7LhxN54yPP6mGw5f2CkXY2BQUL9Fx41qvcIg==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"basic-ftp": "^5.0.2",
|
||||
"data-uri-to-buffer": "^6.0.2",
|
||||
"debug": "^4.3.4"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">= 14"
|
||||
}
|
||||
},
|
||||
"node_modules/http-proxy-agent": {
|
||||
"version": "7.0.2",
|
||||
"resolved": "https://registry.npmjs.org/http-proxy-agent/-/http-proxy-agent-7.0.2.tgz",
|
||||
"integrity": "sha512-T1gkAiYYDWYx3V5Bmyu7HcfcvL7mUrTWiM6yOfa3PIphViJ/gFPbvidQ+veqSOHci/PxBcDabeUNCzpOODJZig==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"agent-base": "^7.1.0",
|
||||
"debug": "^4.3.4"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">= 14"
|
||||
}
|
||||
},
|
||||
"node_modules/https-proxy-agent": {
|
||||
"version": "7.0.6",
|
||||
"resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-7.0.6.tgz",
|
||||
"integrity": "sha512-vK9P5/iUfdl95AI+JVyUuIcVtd4ofvtrOr3HNtM2yxC9bnMbEdp3x01OhQNnjb8IJYi38VlTE3mBXwcfvywuSw==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"agent-base": "^7.1.2",
|
||||
"debug": "4"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">= 14"
|
||||
}
|
||||
},
|
||||
"node_modules/ip-address": {
|
||||
"version": "10.1.0",
|
||||
"resolved": "https://registry.npmjs.org/ip-address/-/ip-address-10.1.0.tgz",
|
||||
"integrity": "sha512-XXADHxXmvT9+CRxhXg56LJovE+bmWnEWB78LB83VZTprKTmaC5QfruXocxzTZ2Kl0DNwKuBdlIhjL8LeY8Sf8Q==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">= 12"
|
||||
}
|
||||
},
|
||||
"node_modules/is-fullwidth-code-point": {
|
||||
"version": "3.0.0",
|
||||
"resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz",
|
||||
"integrity": "sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=8"
|
||||
}
|
||||
},
|
||||
"node_modules/lru-cache": {
|
||||
"version": "7.18.3",
|
||||
"resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-7.18.3.tgz",
|
||||
"integrity": "sha512-jumlc0BIUrS3qJGgIkWZsyfAM7NCWiBcCDhnd+3NNM5KbBmLTgHVfWBcg6W+rLUsIpzpERPsvwUP7CckAQSOoA==",
|
||||
"license": "ISC",
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
}
|
||||
},
|
||||
"node_modules/mitt": {
|
||||
"version": "3.0.1",
|
||||
"resolved": "https://registry.npmjs.org/mitt/-/mitt-3.0.1.tgz",
|
||||
"integrity": "sha512-vKivATfr97l2/QBCYAkXYDbrIWPM2IIKEl7YPhjCvKlG3kE2gm+uBo6nEXK3M5/Ffh/FLpKExzOQ3JJoJGFKBw==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/ms": {
|
||||
"version": "2.1.3",
|
||||
"resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz",
|
||||
"integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/netmask": {
|
||||
"version": "2.0.2",
|
||||
"resolved": "https://registry.npmjs.org/netmask/-/netmask-2.0.2.tgz",
|
||||
"integrity": "sha512-dBpDMdxv9Irdq66304OLfEmQ9tbNRFnFTuZiLo+bD+r332bBmMJ8GBLXklIXXgxd3+v9+KUnZaUR5PJMa75Gsg==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">= 0.4.0"
|
||||
}
|
||||
},
|
||||
"node_modules/once": {
|
||||
"version": "1.4.0",
|
||||
"resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz",
|
||||
"integrity": "sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w==",
|
||||
"license": "ISC",
|
||||
"dependencies": {
|
||||
"wrappy": "1"
|
||||
}
|
||||
},
|
||||
"node_modules/pac-proxy-agent": {
|
||||
"version": "7.2.0",
|
||||
"resolved": "https://registry.npmjs.org/pac-proxy-agent/-/pac-proxy-agent-7.2.0.tgz",
|
||||
"integrity": "sha512-TEB8ESquiLMc0lV8vcd5Ql/JAKAoyzHFXaStwjkzpOpC5Yv+pIzLfHvjTSdf3vpa2bMiUQrg9i6276yn8666aA==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@tootallnate/quickjs-emscripten": "^0.23.0",
|
||||
"agent-base": "^7.1.2",
|
||||
"debug": "^4.3.4",
|
||||
"get-uri": "^6.0.1",
|
||||
"http-proxy-agent": "^7.0.0",
|
||||
"https-proxy-agent": "^7.0.6",
|
||||
"pac-resolver": "^7.0.1",
|
||||
"socks-proxy-agent": "^8.0.5"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">= 14"
|
||||
}
|
||||
},
|
||||
"node_modules/pac-resolver": {
|
||||
"version": "7.0.1",
|
||||
"resolved": "https://registry.npmjs.org/pac-resolver/-/pac-resolver-7.0.1.tgz",
|
||||
"integrity": "sha512-5NPgf87AT2STgwa2ntRMr45jTKrYBGkVU36yT0ig/n/GMAa3oPqhZfIQ2kMEimReg0+t9kZViDVZ83qfVUlckg==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"degenerator": "^5.0.0",
|
||||
"netmask": "^2.0.2"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">= 14"
|
||||
}
|
||||
},
|
||||
"node_modules/pend": {
|
||||
"version": "1.2.0",
|
||||
"resolved": "https://registry.npmjs.org/pend/-/pend-1.2.0.tgz",
|
||||
"integrity": "sha512-F3asv42UuXchdzt+xXqfW1OGlVBe+mxa2mqI0pg5yAHZPvFmY3Y6drSf/GQ1A86WgWEN9Kzh/WrgKa6iGcHXLg==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/progress": {
|
||||
"version": "2.0.3",
|
||||
"resolved": "https://registry.npmjs.org/progress/-/progress-2.0.3.tgz",
|
||||
"integrity": "sha512-7PiHtLll5LdnKIMw100I+8xJXR5gW2QwWYkT6iJva0bXitZKa/XMrSbdmg3r2Xnaidz9Qumd0VPaMrZlF9V9sA==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=0.4.0"
|
||||
}
|
||||
},
|
||||
"node_modules/proxy-agent": {
|
||||
"version": "6.5.0",
|
||||
"resolved": "https://registry.npmjs.org/proxy-agent/-/proxy-agent-6.5.0.tgz",
|
||||
"integrity": "sha512-TmatMXdr2KlRiA2CyDu8GqR8EjahTG3aY3nXjdzFyoZbmB8hrBsTyMezhULIXKnC0jpfjlmiZ3+EaCzoInSu/A==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"agent-base": "^7.1.2",
|
||||
"debug": "^4.3.4",
|
||||
"http-proxy-agent": "^7.0.1",
|
||||
"https-proxy-agent": "^7.0.6",
|
||||
"lru-cache": "^7.14.1",
|
||||
"pac-proxy-agent": "^7.1.0",
|
||||
"proxy-from-env": "^1.1.0",
|
||||
"socks-proxy-agent": "^8.0.5"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">= 14"
|
||||
}
|
||||
},
|
||||
"node_modules/proxy-from-env": {
|
||||
"version": "1.1.0",
|
||||
"resolved": "https://registry.npmjs.org/proxy-from-env/-/proxy-from-env-1.1.0.tgz",
|
||||
"integrity": "sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/pump": {
|
||||
"version": "3.0.3",
|
||||
"resolved": "https://registry.npmjs.org/pump/-/pump-3.0.3.tgz",
|
||||
"integrity": "sha512-todwxLMY7/heScKmntwQG8CXVkWUOdYxIvY2s0VWAAMh/nd8SoYiRaKjlr7+iCs984f2P8zvrfWcDDYVb73NfA==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"end-of-stream": "^1.1.0",
|
||||
"once": "^1.3.1"
|
||||
}
|
||||
},
|
||||
"node_modules/puppeteer-core": {
|
||||
"version": "24.34.0",
|
||||
"resolved": "https://registry.npmjs.org/puppeteer-core/-/puppeteer-core-24.34.0.tgz",
|
||||
"integrity": "sha512-24evawO+mUGW4mvS2a2ivwLdX3gk8zRLZr9HP+7+VT2vBQnm0oh9jJEZmUE3ePJhRkYlZ93i7OMpdcoi2qNCLg==",
|
||||
"license": "Apache-2.0",
|
||||
"dependencies": {
|
||||
"@puppeteer/browsers": "2.11.0",
|
||||
"chromium-bidi": "12.0.1",
|
||||
"debug": "^4.4.3",
|
||||
"devtools-protocol": "0.0.1534754",
|
||||
"typed-query-selector": "^2.12.0",
|
||||
"webdriver-bidi-protocol": "0.3.10",
|
||||
"ws": "^8.18.3"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/require-directory": {
|
||||
"version": "2.1.1",
|
||||
"resolved": "https://registry.npmjs.org/require-directory/-/require-directory-2.1.1.tgz",
|
||||
"integrity": "sha512-fGxEI7+wsG9xrvdjsrlmL22OMTTiHRwAMroiEeMgq8gzoLC/PQr7RsRDSTLUg/bZAZtF+TVIkHc6/4RIKrui+Q==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=0.10.0"
|
||||
}
|
||||
},
|
||||
"node_modules/semver": {
|
||||
"version": "7.7.3",
|
||||
"resolved": "https://registry.npmjs.org/semver/-/semver-7.7.3.tgz",
|
||||
"integrity": "sha512-SdsKMrI9TdgjdweUSR9MweHA4EJ8YxHn8DFaDisvhVlUOe4BF1tLD7GAj0lIqWVl+dPb/rExr0Btby5loQm20Q==",
|
||||
"license": "ISC",
|
||||
"bin": {
|
||||
"semver": "bin/semver.js"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=10"
|
||||
}
|
||||
},
|
||||
"node_modules/smart-buffer": {
|
||||
"version": "4.2.0",
|
||||
"resolved": "https://registry.npmjs.org/smart-buffer/-/smart-buffer-4.2.0.tgz",
|
||||
"integrity": "sha512-94hK0Hh8rPqQl2xXc3HsaBoOXKV20MToPkcXvwbISWLEs+64sBq5kFgn2kJDHb1Pry9yrP0dxrCI9RRci7RXKg==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">= 6.0.0",
|
||||
"npm": ">= 3.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/socks": {
|
||||
"version": "2.8.7",
|
||||
"resolved": "https://registry.npmjs.org/socks/-/socks-2.8.7.tgz",
|
||||
"integrity": "sha512-HLpt+uLy/pxB+bum/9DzAgiKS8CX1EvbWxI4zlmgGCExImLdiad2iCwXT5Z4c9c3Eq8rP2318mPW2c+QbtjK8A==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"ip-address": "^10.0.1",
|
||||
"smart-buffer": "^4.2.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">= 10.0.0",
|
||||
"npm": ">= 3.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/socks-proxy-agent": {
|
||||
"version": "8.0.5",
|
||||
"resolved": "https://registry.npmjs.org/socks-proxy-agent/-/socks-proxy-agent-8.0.5.tgz",
|
||||
"integrity": "sha512-HehCEsotFqbPW9sJ8WVYB6UbmIMv7kUUORIF2Nncq4VQvBfNBLibW9YZR5dlYCSUhwcD628pRllm7n+E+YTzJw==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"agent-base": "^7.1.2",
|
||||
"debug": "^4.3.4",
|
||||
"socks": "^2.8.3"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">= 14"
|
||||
}
|
||||
},
|
||||
"node_modules/source-map": {
|
||||
"version": "0.6.1",
|
||||
"resolved": "https://registry.npmjs.org/source-map/-/source-map-0.6.1.tgz",
|
||||
"integrity": "sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g==",
|
||||
"license": "BSD-3-Clause",
|
||||
"optional": true,
|
||||
"engines": {
|
||||
"node": ">=0.10.0"
|
||||
}
|
||||
},
|
||||
"node_modules/streamx": {
|
||||
"version": "2.23.0",
|
||||
"resolved": "https://registry.npmjs.org/streamx/-/streamx-2.23.0.tgz",
|
||||
"integrity": "sha512-kn+e44esVfn2Fa/O0CPFcex27fjIL6MkVae0Mm6q+E6f0hWv578YCERbv+4m02cjxvDsPKLnmxral/rR6lBMAg==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"events-universal": "^1.0.0",
|
||||
"fast-fifo": "^1.3.2",
|
||||
"text-decoder": "^1.1.0"
|
||||
}
|
||||
},
|
||||
"node_modules/string-width": {
|
||||
"version": "4.2.3",
|
||||
"resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz",
|
||||
"integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"emoji-regex": "^8.0.0",
|
||||
"is-fullwidth-code-point": "^3.0.0",
|
||||
"strip-ansi": "^6.0.1"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=8"
|
||||
}
|
||||
},
|
||||
"node_modules/strip-ansi": {
|
||||
"version": "6.0.1",
|
||||
"resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz",
|
||||
"integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"ansi-regex": "^5.0.1"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=8"
|
||||
}
|
||||
},
|
||||
"node_modules/tar-fs": {
|
||||
"version": "3.1.1",
|
||||
"resolved": "https://registry.npmjs.org/tar-fs/-/tar-fs-3.1.1.tgz",
|
||||
"integrity": "sha512-LZA0oaPOc2fVo82Txf3gw+AkEd38szODlptMYejQUhndHMLQ9M059uXR+AfS7DNo0NpINvSqDsvyaCrBVkptWg==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"pump": "^3.0.0",
|
||||
"tar-stream": "^3.1.5"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"bare-fs": "^4.0.1",
|
||||
"bare-path": "^3.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/tar-stream": {
|
||||
"version": "3.1.7",
|
||||
"resolved": "https://registry.npmjs.org/tar-stream/-/tar-stream-3.1.7.tgz",
|
||||
"integrity": "sha512-qJj60CXt7IU1Ffyc3NJMjh6EkuCFej46zUqJ4J7pqYlThyd9bO0XBTmcOIhSzZJVWfsLks0+nle/j538YAW9RQ==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"b4a": "^1.6.4",
|
||||
"fast-fifo": "^1.2.0",
|
||||
"streamx": "^2.15.0"
|
||||
}
|
||||
},
|
||||
"node_modules/text-decoder": {
|
||||
"version": "1.2.3",
|
||||
"resolved": "https://registry.npmjs.org/text-decoder/-/text-decoder-1.2.3.tgz",
|
||||
"integrity": "sha512-3/o9z3X0X0fTupwsYvR03pJ/DjWuqqrfwBgTQzdWDiQSm9KitAyz/9WqsT2JQW7KV2m+bC2ol/zqpW37NHxLaA==",
|
||||
"license": "Apache-2.0",
|
||||
"dependencies": {
|
||||
"b4a": "^1.6.4"
|
||||
}
|
||||
},
|
||||
"node_modules/tslib": {
|
||||
"version": "2.8.1",
|
||||
"resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz",
|
||||
"integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==",
|
||||
"license": "0BSD"
|
||||
},
|
||||
"node_modules/typed-query-selector": {
|
||||
"version": "2.12.0",
|
||||
"resolved": "https://registry.npmjs.org/typed-query-selector/-/typed-query-selector-2.12.0.tgz",
|
||||
"integrity": "sha512-SbklCd1F0EiZOyPiW192rrHZzZ5sBijB6xM+cpmrwDqObvdtunOHHIk9fCGsoK5JVIYXoyEp4iEdE3upFH3PAg==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/undici-types": {
|
||||
"version": "7.16.0",
|
||||
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.16.0.tgz",
|
||||
"integrity": "sha512-Zz+aZWSj8LE6zoxD+xrjh4VfkIG8Ya6LvYkZqtUQGJPZjYl53ypCaUwWqo7eI0x66KBGeRo+mlBEkMSeSZ38Nw==",
|
||||
"license": "MIT",
|
||||
"optional": true
|
||||
},
|
||||
"node_modules/webdriver-bidi-protocol": {
|
||||
"version": "0.3.10",
|
||||
"resolved": "https://registry.npmjs.org/webdriver-bidi-protocol/-/webdriver-bidi-protocol-0.3.10.tgz",
|
||||
"integrity": "sha512-5LAE43jAVLOhB/QqX4bwSiv0Hg1HBfMmOuwBSXHdvg4GMGu9Y0lIq7p4R/yySu6w74WmaR4GM4H9t2IwLW7hgw==",
|
||||
"license": "Apache-2.0"
|
||||
},
|
||||
"node_modules/wrap-ansi": {
|
||||
"version": "7.0.0",
|
||||
"resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-7.0.0.tgz",
|
||||
"integrity": "sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"ansi-styles": "^4.0.0",
|
||||
"string-width": "^4.1.0",
|
||||
"strip-ansi": "^6.0.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=10"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/chalk/wrap-ansi?sponsor=1"
|
||||
}
|
||||
},
|
||||
"node_modules/wrappy": {
|
||||
"version": "1.0.2",
|
||||
"resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz",
|
||||
"integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==",
|
||||
"license": "ISC"
|
||||
},
|
||||
"node_modules/ws": {
|
||||
"version": "8.18.3",
|
||||
"resolved": "https://registry.npmjs.org/ws/-/ws-8.18.3.tgz",
|
||||
"integrity": "sha512-PEIGCY5tSlUt50cqyMXfCzX+oOPqN0vuGqWzbcJ2xvnkzkq46oOpz7dQaTDBdfICb4N14+GARUDw2XV2N4tvzg==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=10.0.0"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"bufferutil": "^4.0.1",
|
||||
"utf-8-validate": ">=5.0.2"
|
||||
},
|
||||
"peerDependenciesMeta": {
|
||||
"bufferutil": {
|
||||
"optional": true
|
||||
},
|
||||
"utf-8-validate": {
|
||||
"optional": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"node_modules/y18n": {
|
||||
"version": "5.0.8",
|
||||
"resolved": "https://registry.npmjs.org/y18n/-/y18n-5.0.8.tgz",
|
||||
"integrity": "sha512-0pfFzegeDWJHJIAmTLRP2DwHjdF5s7jo9tuztdQxAhINCdvS+3nGINqPd00AphqJR/0LhANUS6/+7SCb98YOfA==",
|
||||
"license": "ISC",
|
||||
"engines": {
|
||||
"node": ">=10"
|
||||
}
|
||||
},
|
||||
"node_modules/yargs": {
|
||||
"version": "17.7.2",
|
||||
"resolved": "https://registry.npmjs.org/yargs/-/yargs-17.7.2.tgz",
|
||||
"integrity": "sha512-7dSzzRQ++CKnNI/krKnYRV7JKKPUXMEh61soaHKg9mrWEhzFWhFnxPxGl+69cD1Ou63C13NUPCnmIcrvqCuM6w==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"cliui": "^8.0.1",
|
||||
"escalade": "^3.1.1",
|
||||
"get-caller-file": "^2.0.5",
|
||||
"require-directory": "^2.1.1",
|
||||
"string-width": "^4.2.3",
|
||||
"y18n": "^5.0.5",
|
||||
"yargs-parser": "^21.1.1"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
}
|
||||
},
|
||||
"node_modules/yargs-parser": {
|
||||
"version": "21.1.1",
|
||||
"resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-21.1.1.tgz",
|
||||
"integrity": "sha512-tVpsJW7DdjecAiFpbIB1e3qxIQsE6NoPc5/eTdrbbIC4h0LVsWhnoa3g+m2HclBIujHzsxZ4VJVA+GUuc2/LBw==",
|
||||
"license": "ISC",
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
}
|
||||
},
|
||||
"node_modules/yauzl": {
|
||||
"version": "2.10.0",
|
||||
"resolved": "https://registry.npmjs.org/yauzl/-/yauzl-2.10.0.tgz",
|
||||
"integrity": "sha512-p4a9I6X6nu6IhoGmBqAcbJy1mlC4j27vEPZX9F4L4/vZT3Lyq1VkFHw/V/PUcB9Buo+DG3iHkT0x3Qya58zc3g==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"buffer-crc32": "~0.2.3",
|
||||
"fd-slicer": "~1.1.0"
|
||||
}
|
||||
},
|
||||
"node_modules/zod": {
|
||||
"version": "3.25.76",
|
||||
"resolved": "https://registry.npmjs.org/zod/-/zod-3.25.76.tgz",
|
||||
"integrity": "sha512-gzUt/qt81nXsFGKIFcC3YnfEAx5NkunCfnDlvuBSSFS02bcXu4Lmea0AFIUwbLWxWPx3d9p8S5QoaujKcNQxcQ==",
|
||||
"license": "MIT",
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/colinhacks"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1 +0,0 @@
|
||||
{"name":"archivebox-plugins","private":true,"dependencies":{"puppeteer-core":"^24.34.0"}}
|
||||
@@ -3,9 +3,10 @@
|
||||
"type": "object",
|
||||
"additionalProperties": false,
|
||||
"properties": {
|
||||
"SAVE_PAPERSDL": {
|
||||
"PAPERSDL_ENABLED": {
|
||||
"type": "boolean",
|
||||
"default": true,
|
||||
"x-aliases": ["SAVE_PAPERSDL", "USE_PAPERSDL"],
|
||||
"description": "Enable paper downloading with papers-dl"
|
||||
},
|
||||
"PAPERSDL_BINARY": {
|
||||
|
||||
@@ -170,10 +170,6 @@ def main(url: str, snapshot_id: str = None, crawl_id: str = None, depth: int = 0
|
||||
if normalized != url:
|
||||
urls_found.add(unescape(normalized))
|
||||
|
||||
if not urls_found:
|
||||
click.echo('No URLs found', err=True)
|
||||
sys.exit(1)
|
||||
|
||||
# Emit Snapshot records to stdout (JSONL)
|
||||
for found_url in sorted(urls_found):
|
||||
record = {
|
||||
@@ -189,7 +185,17 @@ def main(url: str, snapshot_id: str = None, crawl_id: str = None, depth: int = 0
|
||||
|
||||
print(json.dumps(record))
|
||||
|
||||
click.echo(f'Found {len(urls_found)} URLs', err=True)
|
||||
# Emit ArchiveResult record to mark completion
|
||||
status = 'succeeded' if urls_found else 'skipped'
|
||||
output_str = f'Found {len(urls_found)} URLs' if urls_found else 'No URLs found'
|
||||
ar_record = {
|
||||
'type': 'ArchiveResult',
|
||||
'status': status,
|
||||
'output_str': output_str,
|
||||
}
|
||||
print(json.dumps(ar_record))
|
||||
|
||||
click.echo(output_str, err=True)
|
||||
sys.exit(0)
|
||||
|
||||
|
||||
|
||||
@@ -27,12 +27,13 @@ class TestParseHtmlUrls:
|
||||
|
||||
assert result.returncode == 0, f"Failed to parse example.com: {result.stderr}"
|
||||
|
||||
output_file = tmp_path / 'urls.jsonl'
|
||||
assert output_file.exists(), "Output file not created"
|
||||
# Verify stdout contains JSONL records for discovered URLs
|
||||
# example.com links to iana.org
|
||||
assert 'iana.org' in result.stdout or 'example' in result.stdout, "Expected links from example.com not found"
|
||||
|
||||
# Verify output contains IANA link (example.com links to iana.org)
|
||||
content = output_file.read_text()
|
||||
assert 'iana.org' in content or 'example' in content, "Expected links from example.com not found"
|
||||
# Verify ArchiveResult record is present
|
||||
assert '"type": "ArchiveResult"' in result.stdout, "Missing ArchiveResult record"
|
||||
assert '"status": "succeeded"' in result.stdout, "Missing success status"
|
||||
|
||||
def test_extracts_href_urls(self, tmp_path):
|
||||
"""Test extracting URLs from anchor tags."""
|
||||
@@ -56,17 +57,16 @@ class TestParseHtmlUrls:
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
assert 'Found 3 URLs' in result.stdout
|
||||
assert 'Found 3 URLs' in result.stderr
|
||||
|
||||
output_file = tmp_path / 'urls.jsonl'
|
||||
assert output_file.exists()
|
||||
|
||||
lines = output_file.read_text().strip().split('\n')
|
||||
assert len(lines) == 3
|
||||
# Parse Snapshot records from stdout
|
||||
lines = [line for line in result.stdout.strip().split('\n') if line.strip() and '"type": "Snapshot"' in line]
|
||||
assert len(lines) == 3, f"Expected 3 Snapshot records, got {len(lines)}"
|
||||
|
||||
urls = set()
|
||||
for line in lines:
|
||||
entry = json.loads(line)
|
||||
assert entry['type'] == 'Snapshot'
|
||||
assert 'url' in entry
|
||||
urls.add(entry['url'])
|
||||
|
||||
@@ -74,6 +74,10 @@ class TestParseHtmlUrls:
|
||||
assert 'https://foo.bar/page' in urls
|
||||
assert 'http://test.org' in urls
|
||||
|
||||
# Verify ArchiveResult record
|
||||
assert '"type": "ArchiveResult"' in result.stdout
|
||||
assert '"status": "succeeded"' in result.stdout
|
||||
|
||||
def test_ignores_non_http_schemes(self, tmp_path):
|
||||
"""Test that non-http schemes are ignored."""
|
||||
input_file = tmp_path / 'page.html'
|
||||
@@ -96,9 +100,10 @@ class TestParseHtmlUrls:
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
output_file = tmp_path / 'urls.jsonl'
|
||||
lines = output_file.read_text().strip().split('\n')
|
||||
assert len(lines) == 1
|
||||
|
||||
# Parse Snapshot records from stdout
|
||||
lines = [line for line in result.stdout.strip().split('\n') if line.strip() and '"type": "Snapshot"' in line]
|
||||
assert len(lines) == 1, f"Expected 1 Snapshot record, got {len(lines)}"
|
||||
|
||||
entry = json.loads(lines[0])
|
||||
assert entry['url'] == 'https://valid.com'
|
||||
@@ -122,8 +127,8 @@ class TestParseHtmlUrls:
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
output_file = tmp_path / 'urls.jsonl'
|
||||
entry = json.loads(output_file.read_text().strip())
|
||||
lines = [line for line in result.stdout.strip().split('\n') if '"type": "Snapshot"' in line]
|
||||
entry = json.loads(lines[0])
|
||||
assert entry['url'] == 'https://example.com/page?a=1&b=2'
|
||||
|
||||
def test_deduplicates_urls(self, tmp_path):
|
||||
@@ -147,8 +152,7 @@ class TestParseHtmlUrls:
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
output_file = tmp_path / 'urls.jsonl'
|
||||
lines = output_file.read_text().strip().split('\n')
|
||||
lines = [line for line in result.stdout.strip().split('\n') if '"type": "Snapshot"' in line]
|
||||
assert len(lines) == 1
|
||||
|
||||
def test_excludes_source_url(self, tmp_path):
|
||||
@@ -172,14 +176,13 @@ class TestParseHtmlUrls:
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
output_file = tmp_path / 'urls.jsonl'
|
||||
lines = output_file.read_text().strip().split('\n')
|
||||
lines = [line for line in result.stdout.strip().split('\n') if '"type": "Snapshot"' in line]
|
||||
assert len(lines) == 1
|
||||
entry = json.loads(lines[0])
|
||||
assert entry['url'] == 'https://other.com'
|
||||
|
||||
def test_exits_1_when_no_urls_found(self, tmp_path):
|
||||
"""Test that script exits with code 1 when no URLs found."""
|
||||
def test_skips_when_no_urls_found(self, tmp_path):
|
||||
"""Test that script returns skipped status when no URLs found."""
|
||||
input_file = tmp_path / 'page.html'
|
||||
input_file.write_text('<html><body>No links here</body></html>')
|
||||
|
||||
@@ -190,8 +193,9 @@ class TestParseHtmlUrls:
|
||||
text=True,
|
||||
)
|
||||
|
||||
assert result.returncode == 1
|
||||
assert result.returncode == 0
|
||||
assert 'No URLs found' in result.stderr
|
||||
assert '"status": "skipped"' in result.stdout
|
||||
|
||||
def test_handles_malformed_html(self, tmp_path):
|
||||
"""Test handling of malformed HTML."""
|
||||
@@ -212,8 +216,7 @@ class TestParseHtmlUrls:
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
output_file = tmp_path / 'urls.jsonl'
|
||||
lines = output_file.read_text().strip().split('\n')
|
||||
lines = [line for line in result.stdout.strip().split('\n') if '"type": "Snapshot"' in line]
|
||||
assert len(lines) == 2
|
||||
|
||||
def test_output_is_valid_json(self, tmp_path):
|
||||
@@ -229,11 +232,11 @@ class TestParseHtmlUrls:
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
output_file = tmp_path / 'urls.jsonl'
|
||||
entry = json.loads(output_file.read_text().strip())
|
||||
lines = [line for line in result.stdout.strip().split('\n') if '"type": "Snapshot"' in line]
|
||||
entry = json.loads(lines[0])
|
||||
assert entry['url'] == 'https://example.com'
|
||||
assert 'type' in entry
|
||||
assert 'plugin' in entry
|
||||
assert entry['type'] == 'Snapshot'
|
||||
assert entry['plugin'] == 'parse_html_urls'
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
@@ -170,10 +170,6 @@ def main(url: str, snapshot_id: str = None, crawl_id: str = None, depth: int = 0
|
||||
# Skip malformed lines
|
||||
continue
|
||||
|
||||
if not urls_found:
|
||||
click.echo('No URLs found', err=True)
|
||||
sys.exit(1)
|
||||
|
||||
# Emit Tag records first (to stdout as JSONL)
|
||||
for tag_name in sorted(all_tags):
|
||||
print(json.dumps({
|
||||
@@ -185,7 +181,17 @@ def main(url: str, snapshot_id: str = None, crawl_id: str = None, depth: int = 0
|
||||
for entry in urls_found:
|
||||
print(json.dumps(entry))
|
||||
|
||||
click.echo(f'Found {len(urls_found)} URLs, {len(all_tags)} tags', err=True)
|
||||
# Emit ArchiveResult record to mark completion
|
||||
status = 'succeeded' if urls_found else 'skipped'
|
||||
output_str = f'Found {len(urls_found)} URLs, {len(all_tags)} tags' if urls_found else 'No URLs found'
|
||||
ar_record = {
|
||||
'type': 'ArchiveResult',
|
||||
'status': status,
|
||||
'output_str': output_str,
|
||||
}
|
||||
print(json.dumps(ar_record))
|
||||
|
||||
click.echo(output_str, err=True)
|
||||
sys.exit(0)
|
||||
|
||||
|
||||
|
||||
@@ -34,10 +34,8 @@ class TestParseJsonlUrls:
|
||||
assert result.returncode == 0
|
||||
assert 'Found 3 URLs' in result.stdout
|
||||
|
||||
output_file = tmp_path / 'urls.jsonl'
|
||||
assert output_file.exists()
|
||||
|
||||
lines = output_file.read_text().strip().split('\n')
|
||||
# Output goes to stdout (JSONL)
|
||||
lines = [line for line in result.stdout.strip().split('\n') if line.strip() and '\"type\": \"Snapshot\"' in line]
|
||||
assert len(lines) == 3
|
||||
|
||||
entries = [json.loads(line) for line in lines]
|
||||
@@ -64,8 +62,9 @@ class TestParseJsonlUrls:
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
output_file = tmp_path / 'urls.jsonl'
|
||||
entry = json.loads(output_file.read_text().strip())
|
||||
# Output goes to stdout (JSONL)
|
||||
lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
|
||||
entry = json.loads(lines[0])
|
||||
assert entry['url'] == 'https://example.com'
|
||||
|
||||
def test_supports_description_as_title(self, tmp_path):
|
||||
@@ -81,8 +80,9 @@ class TestParseJsonlUrls:
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
output_file = tmp_path / 'urls.jsonl'
|
||||
entry = json.loads(output_file.read_text().strip())
|
||||
# Output goes to stdout (JSONL)
|
||||
lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
|
||||
entry = json.loads(lines[0])
|
||||
assert entry['title'] == 'A description'
|
||||
|
||||
def test_parses_various_timestamp_formats(self, tmp_path):
|
||||
@@ -98,8 +98,9 @@ class TestParseJsonlUrls:
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
output_file = tmp_path / 'urls.jsonl'
|
||||
entry = json.loads(output_file.read_text().strip())
|
||||
# Output goes to stdout (JSONL)
|
||||
lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
|
||||
entry = json.loads(lines[0])
|
||||
# Parser converts timestamp to bookmarked_at
|
||||
assert 'bookmarked_at' in entry
|
||||
|
||||
@@ -116,9 +117,9 @@ class TestParseJsonlUrls:
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
output_file = tmp_path / 'urls.jsonl'
|
||||
# Output goes to stdout (JSONL)
|
||||
# Parser converts tags to separate Tag objects in the output
|
||||
content = output_file.read_text()
|
||||
content = result.stdout
|
||||
assert 'tech' in content or 'news' in content or 'Tag' in content
|
||||
|
||||
def test_parses_tags_as_list(self, tmp_path):
|
||||
@@ -134,9 +135,9 @@ class TestParseJsonlUrls:
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
output_file = tmp_path / 'urls.jsonl'
|
||||
# Output goes to stdout (JSONL)
|
||||
# Parser converts tags to separate Tag objects in the output
|
||||
content = output_file.read_text()
|
||||
content = result.stdout
|
||||
assert 'tech' in content or 'news' in content or 'Tag' in content
|
||||
|
||||
def test_skips_malformed_lines(self, tmp_path):
|
||||
@@ -156,8 +157,8 @@ class TestParseJsonlUrls:
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
output_file = tmp_path / 'urls.jsonl'
|
||||
lines = output_file.read_text().strip().split('\n')
|
||||
# Output goes to stdout (JSONL)
|
||||
lines = [line for line in result.stdout.strip().split('\n') if line.strip() and '\"type\": \"Snapshot\"' in line]
|
||||
assert len(lines) == 2
|
||||
|
||||
def test_skips_entries_without_url(self, tmp_path):
|
||||
@@ -177,12 +178,12 @@ class TestParseJsonlUrls:
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
output_file = tmp_path / 'urls.jsonl'
|
||||
lines = output_file.read_text().strip().split('\n')
|
||||
# Output goes to stdout (JSONL)
|
||||
lines = [line for line in result.stdout.strip().split('\n') if line.strip() and '\"type\": \"Snapshot\"' in line]
|
||||
assert len(lines) == 2
|
||||
|
||||
def test_exits_1_when_no_urls_found(self, tmp_path):
|
||||
"""Test that script exits with code 1 when no URLs found."""
|
||||
def test_skips_when_no_urls_found(self, tmp_path):
|
||||
"""Test that script returns skipped status when no URLs found."""
|
||||
input_file = tmp_path / 'empty.jsonl'
|
||||
input_file.write_text('{"title": "No URL"}\n')
|
||||
|
||||
@@ -193,8 +194,9 @@ class TestParseJsonlUrls:
|
||||
text=True,
|
||||
)
|
||||
|
||||
assert result.returncode == 1
|
||||
assert result.returncode == 0
|
||||
assert 'No URLs found' in result.stderr
|
||||
assert '"status": "skipped"' in result.stdout
|
||||
|
||||
def test_exits_1_when_file_not_found(self, tmp_path):
|
||||
"""Test that script exits with code 1 when file doesn't exist."""
|
||||
@@ -221,8 +223,9 @@ class TestParseJsonlUrls:
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
output_file = tmp_path / 'urls.jsonl'
|
||||
entry = json.loads(output_file.read_text().strip())
|
||||
# Output goes to stdout (JSONL)
|
||||
lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
|
||||
entry = json.loads(lines[0])
|
||||
assert entry['url'] == 'https://example.com/page?a=1&b=2'
|
||||
assert entry['title'] == 'Test & Title'
|
||||
|
||||
@@ -244,8 +247,8 @@ class TestParseJsonlUrls:
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
output_file = tmp_path / 'urls.jsonl'
|
||||
lines = output_file.read_text().strip().split('\n')
|
||||
# Output goes to stdout (JSONL)
|
||||
lines = [line for line in result.stdout.strip().split('\n') if line.strip() and '\"type\": \"Snapshot\"' in line]
|
||||
assert len(lines) == 2
|
||||
|
||||
def test_output_includes_required_fields(self, tmp_path):
|
||||
@@ -261,8 +264,9 @@ class TestParseJsonlUrls:
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
output_file = tmp_path / 'urls.jsonl'
|
||||
entry = json.loads(output_file.read_text().strip())
|
||||
# Output goes to stdout (JSONL)
|
||||
lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
|
||||
entry = json.loads(lines[0])
|
||||
assert entry['url'] == 'https://example.com'
|
||||
assert 'type' in entry
|
||||
assert 'plugin' in entry
|
||||
|
||||
@@ -207,23 +207,28 @@ def main(url: str, snapshot_id: str = None):
|
||||
|
||||
urls_found.append(entry)
|
||||
|
||||
if not urls_found:
|
||||
click.echo('No bookmarks found', err=True)
|
||||
sys.exit(1)
|
||||
# Emit Tag records first (to stdout as JSONL)
|
||||
for tag_name in sorted(all_tags):
|
||||
print(json.dumps({
|
||||
'type': 'Tag',
|
||||
'name': tag_name,
|
||||
}))
|
||||
|
||||
# Write urls.jsonl
|
||||
with open('urls.jsonl', 'w') as f:
|
||||
# Write Tag records first
|
||||
for tag_name in sorted(all_tags):
|
||||
f.write(json.dumps({
|
||||
'type': 'Tag',
|
||||
'name': tag_name,
|
||||
}) + '\n')
|
||||
# Write Snapshot records
|
||||
for entry in urls_found:
|
||||
f.write(json.dumps(entry) + '\n')
|
||||
# Emit Snapshot records (to stdout as JSONL)
|
||||
for entry in urls_found:
|
||||
print(json.dumps(entry))
|
||||
|
||||
click.echo(f'Found {len(urls_found)} URLs, {len(all_tags)} tags')
|
||||
# Emit ArchiveResult record to mark completion
|
||||
status = 'succeeded' if urls_found else 'skipped'
|
||||
output_str = f'Found {len(urls_found)} URLs, {len(all_tags)} tags' if urls_found else 'No bookmarks found'
|
||||
ar_record = {
|
||||
'type': 'ArchiveResult',
|
||||
'status': status,
|
||||
'output_str': output_str,
|
||||
}
|
||||
print(json.dumps(ar_record))
|
||||
|
||||
click.echo(output_str, err=True)
|
||||
sys.exit(0)
|
||||
|
||||
|
||||
|
||||
@@ -39,10 +39,8 @@ class TestParseNetscapeUrls:
|
||||
assert result.returncode == 0
|
||||
assert 'Found 3 URLs' in result.stdout
|
||||
|
||||
output_file = tmp_path / 'urls.jsonl'
|
||||
assert output_file.exists()
|
||||
|
||||
lines = output_file.read_text().strip().split('\n')
|
||||
# Output goes to stdout (JSONL)
|
||||
lines = [line for line in result.stdout.strip().split('\n') if line.strip() and '\"type\": \"Snapshot\"' in line]
|
||||
assert len(lines) == 3
|
||||
|
||||
entries = [json.loads(line) for line in lines]
|
||||
@@ -71,8 +69,9 @@ class TestParseNetscapeUrls:
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
output_file = tmp_path / 'urls.jsonl'
|
||||
entry = json.loads(output_file.read_text().strip())
|
||||
# Output goes to stdout (JSONL)
|
||||
lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
|
||||
entry = json.loads(lines[0])
|
||||
# Parser converts timestamp to bookmarked_at
|
||||
assert 'bookmarked_at' in entry
|
||||
|
||||
@@ -91,8 +90,9 @@ class TestParseNetscapeUrls:
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
output_file = tmp_path / 'urls.jsonl'
|
||||
entry = json.loads(output_file.read_text().strip())
|
||||
# Output goes to stdout (JSONL)
|
||||
lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
|
||||
entry = json.loads(lines[0])
|
||||
assert 'q=test+query' in entry['url']
|
||||
assert 'page=1' in entry['url']
|
||||
|
||||
@@ -111,13 +111,14 @@ class TestParseNetscapeUrls:
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
output_file = tmp_path / 'urls.jsonl'
|
||||
entry = json.loads(output_file.read_text().strip())
|
||||
# Output goes to stdout (JSONL)
|
||||
lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
|
||||
entry = json.loads(lines[0])
|
||||
assert entry['url'] == 'https://example.com/page?a=1&b=2'
|
||||
assert entry['title'] == 'Test & Title'
|
||||
|
||||
def test_exits_1_when_no_bookmarks_found(self, tmp_path):
|
||||
"""Test that script exits with code 1 when no bookmarks found."""
|
||||
def test_skips_when_no_bookmarks_found(self, tmp_path):
|
||||
"""Test that script returns skipped status when no bookmarks found."""
|
||||
input_file = tmp_path / 'empty.html'
|
||||
input_file.write_text('''<!DOCTYPE NETSCAPE-Bookmark-file-1>
|
||||
<TITLE>Bookmarks</TITLE>
|
||||
@@ -133,8 +134,9 @@ class TestParseNetscapeUrls:
|
||||
text=True,
|
||||
)
|
||||
|
||||
assert result.returncode == 1
|
||||
assert result.returncode == 0
|
||||
assert 'No bookmarks found' in result.stderr
|
||||
assert '"status": "skipped"' in result.stdout
|
||||
|
||||
def test_exits_1_when_file_not_found(self, tmp_path):
|
||||
"""Test that script exits with code 1 when file doesn't exist."""
|
||||
@@ -173,8 +175,8 @@ class TestParseNetscapeUrls:
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
output_file = tmp_path / 'urls.jsonl'
|
||||
lines = output_file.read_text().strip().split('\n')
|
||||
# Output goes to stdout (JSONL)
|
||||
lines = [line for line in result.stdout.strip().split('\n') if line.strip() and '\"type\": \"Snapshot\"' in line]
|
||||
urls = {json.loads(line)['url'] for line in lines}
|
||||
|
||||
assert 'https://example.com/nested1' in urls
|
||||
@@ -196,8 +198,9 @@ class TestParseNetscapeUrls:
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
output_file = tmp_path / 'urls.jsonl'
|
||||
entry = json.loads(output_file.read_text().strip())
|
||||
# Output goes to stdout (JSONL)
|
||||
lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
|
||||
entry = json.loads(lines[0])
|
||||
assert entry['url'] == 'https://example.com'
|
||||
|
||||
|
||||
|
||||
@@ -40,8 +40,8 @@ class TestFirefoxFormat:
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
output_file = tmp_path / 'urls.jsonl'
|
||||
lines = output_file.read_text().strip().split('\n')
|
||||
# Output goes to stdout (JSONL)
|
||||
lines = [line for line in result.stdout.strip().split('\n') if line.strip() and '\"type\": \"Snapshot\"' in line]
|
||||
entries = [json.loads(line) for line in lines]
|
||||
|
||||
assert len(entries) == 2
|
||||
@@ -70,12 +70,13 @@ class TestFirefoxFormat:
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
output_file = tmp_path / 'urls.jsonl'
|
||||
lines = output_file.read_text().strip().split('\n')
|
||||
# Output goes to stdout (JSONL) - get all JSONL records
|
||||
all_lines = [line for line in result.stdout.strip().split('\n') if line.strip() and line.startswith('{')]
|
||||
records = [json.loads(line) for line in all_lines]
|
||||
|
||||
# Should have Tag records + Snapshot records
|
||||
tags = [json.loads(line) for line in lines if json.loads(line)['type'] == 'Tag']
|
||||
snapshots = [json.loads(line) for line in lines if json.loads(line)['type'] == 'Snapshot']
|
||||
tags = [r for r in records if r.get('type') == 'Tag']
|
||||
snapshots = [r for r in records if r.get('type') == 'Snapshot']
|
||||
|
||||
tag_names = {t['name'] for t in tags}
|
||||
assert 'coding' in tag_names
|
||||
@@ -112,8 +113,8 @@ class TestFirefoxFormat:
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
output_file = tmp_path / 'urls.jsonl'
|
||||
lines = output_file.read_text().strip().split('\n')
|
||||
# Output goes to stdout (JSONL)
|
||||
lines = [line for line in result.stdout.strip().split('\n') if line.strip() and '\"type\": \"Snapshot\"' in line]
|
||||
entries = [json.loads(line) for line in lines]
|
||||
urls = {e['url'] for e in entries}
|
||||
|
||||
@@ -141,8 +142,8 @@ class TestFirefoxFormat:
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
output_file = tmp_path / 'urls.jsonl'
|
||||
lines = output_file.read_text().strip().split('\n')
|
||||
# Output goes to stdout (JSONL)
|
||||
lines = [line for line in result.stdout.strip().split('\n') if line.strip() and '\"type\": \"Snapshot\"' in line]
|
||||
entries = [json.loads(line) for line in lines]
|
||||
|
||||
assert entries[0]['url'] == 'https://example.com'
|
||||
@@ -175,8 +176,8 @@ class TestChromeFormat:
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
output_file = tmp_path / 'urls.jsonl'
|
||||
lines = output_file.read_text().strip().split('\n')
|
||||
# Output goes to stdout (JSONL)
|
||||
lines = [line for line in result.stdout.strip().split('\n') if line.strip() and '\"type\": \"Snapshot\"' in line]
|
||||
entries = [json.loads(line) for line in lines]
|
||||
|
||||
# Should correctly parse microsecond timestamps
|
||||
@@ -212,8 +213,8 @@ class TestChromeFormat:
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
output_file = tmp_path / 'urls.jsonl'
|
||||
lines = output_file.read_text().strip().split('\n')
|
||||
# Output goes to stdout (JSONL)
|
||||
lines = [line for line in result.stdout.strip().split('\n') if line.strip() and '\"type\": \"Snapshot\"' in line]
|
||||
entries = [json.loads(line) for line in lines]
|
||||
urls = {e['url'] for e in entries}
|
||||
|
||||
@@ -248,8 +249,8 @@ class TestSafariFormat:
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
output_file = tmp_path / 'urls.jsonl'
|
||||
lines = output_file.read_text().strip().split('\n')
|
||||
# Output goes to stdout (JSONL)
|
||||
lines = [line for line in result.stdout.strip().split('\n') if line.strip() and '\"type\": \"Snapshot\"' in line]
|
||||
entries = [json.loads(line) for line in lines]
|
||||
urls = {e['url'] for e in entries}
|
||||
|
||||
@@ -279,8 +280,8 @@ class TestSafariFormat:
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
output_file = tmp_path / 'urls.jsonl'
|
||||
lines = output_file.read_text().strip().split('\n')
|
||||
# Output goes to stdout (JSONL)
|
||||
lines = [line for line in result.stdout.strip().split('\n') if line.strip() and '\"type\": \"Snapshot\"' in line]
|
||||
entries = [json.loads(line) for line in lines]
|
||||
urls = {e['url'] for e in entries}
|
||||
|
||||
@@ -312,8 +313,8 @@ class TestEdgeFormat:
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
output_file = tmp_path / 'urls.jsonl'
|
||||
lines = output_file.read_text().strip().split('\n')
|
||||
# Output goes to stdout (JSONL)
|
||||
lines = [line for line in result.stdout.strip().split('\n') if line.strip() and '\"type\": \"Snapshot\"' in line]
|
||||
entries = [json.loads(line) for line in lines]
|
||||
urls = {e['url'] for e in entries}
|
||||
|
||||
@@ -340,8 +341,9 @@ class TestTimestampFormats:
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
output_file = tmp_path / 'urls.jsonl'
|
||||
entry = json.loads(output_file.read_text().strip())
|
||||
# Output goes to stdout (JSONL)
|
||||
lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
|
||||
entry = json.loads(lines[0])
|
||||
|
||||
dt = datetime.fromisoformat(entry['bookmarked_at'])
|
||||
assert dt.year == 2021
|
||||
@@ -366,8 +368,9 @@ class TestTimestampFormats:
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
output_file = tmp_path / 'urls.jsonl'
|
||||
entry = json.loads(output_file.read_text().strip())
|
||||
# Output goes to stdout (JSONL)
|
||||
lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
|
||||
entry = json.loads(lines[0])
|
||||
|
||||
dt = datetime.fromisoformat(entry['bookmarked_at'])
|
||||
# Should detect Mac epoch and convert correctly to 2021
|
||||
@@ -389,8 +392,9 @@ class TestTimestampFormats:
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
output_file = tmp_path / 'urls.jsonl'
|
||||
entry = json.loads(output_file.read_text().strip())
|
||||
# Output goes to stdout (JSONL)
|
||||
lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
|
||||
entry = json.loads(lines[0])
|
||||
|
||||
dt = datetime.fromisoformat(entry['bookmarked_at'])
|
||||
# Should detect Mac epoch and convert to 2024
|
||||
@@ -412,8 +416,9 @@ class TestTimestampFormats:
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
output_file = tmp_path / 'urls.jsonl'
|
||||
entry = json.loads(output_file.read_text().strip())
|
||||
# Output goes to stdout (JSONL)
|
||||
lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
|
||||
entry = json.loads(lines[0])
|
||||
|
||||
dt = datetime.fromisoformat(entry['bookmarked_at'])
|
||||
assert dt.year == 2021
|
||||
@@ -437,8 +442,9 @@ class TestTimestampFormats:
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
output_file = tmp_path / 'urls.jsonl'
|
||||
entry = json.loads(output_file.read_text().strip())
|
||||
# Output goes to stdout (JSONL)
|
||||
lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
|
||||
entry = json.loads(lines[0])
|
||||
|
||||
dt = datetime.fromisoformat(entry['bookmarked_at'])
|
||||
assert dt.year == 2021
|
||||
@@ -461,8 +467,9 @@ class TestTimestampFormats:
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
output_file = tmp_path / 'urls.jsonl'
|
||||
entry = json.loads(output_file.read_text().strip())
|
||||
# Output goes to stdout (JSONL)
|
||||
lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
|
||||
entry = json.loads(lines[0])
|
||||
|
||||
dt = datetime.fromisoformat(entry['bookmarked_at'])
|
||||
# Should detect Mac epoch with milliseconds and convert to 2021
|
||||
@@ -487,8 +494,8 @@ class TestTimestampFormats:
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
output_file = tmp_path / 'urls.jsonl'
|
||||
lines = output_file.read_text().strip().split('\n')
|
||||
# Output goes to stdout (JSONL)
|
||||
lines = [line for line in result.stdout.strip().split('\n') if line.strip() and '\"type\": \"Snapshot\"' in line]
|
||||
entries = [json.loads(line) for line in lines]
|
||||
|
||||
# All should be parsed to reasonable dates (2020-2025)
|
||||
@@ -512,8 +519,9 @@ class TestTimestampFormats:
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
output_file = tmp_path / 'urls.jsonl'
|
||||
entry = json.loads(output_file.read_text().strip())
|
||||
# Output goes to stdout (JSONL)
|
||||
lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
|
||||
entry = json.loads(lines[0])
|
||||
|
||||
dt = datetime.fromisoformat(entry['bookmarked_at'])
|
||||
assert dt.year == 1996
|
||||
@@ -534,8 +542,9 @@ class TestTimestampFormats:
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
output_file = tmp_path / 'urls.jsonl'
|
||||
entry = json.loads(output_file.read_text().strip())
|
||||
# Output goes to stdout (JSONL)
|
||||
lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
|
||||
entry = json.loads(lines[0])
|
||||
|
||||
dt = datetime.fromisoformat(entry['bookmarked_at'])
|
||||
assert dt.year == 2024
|
||||
@@ -555,8 +564,9 @@ class TestTimestampFormats:
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
output_file = tmp_path / 'urls.jsonl'
|
||||
entry = json.loads(output_file.read_text().strip())
|
||||
# Output goes to stdout (JSONL)
|
||||
lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
|
||||
entry = json.loads(lines[0])
|
||||
|
||||
# Should still extract URL but skip timestamp
|
||||
assert entry['url'] == 'https://example.com'
|
||||
@@ -577,8 +587,9 @@ class TestTimestampFormats:
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
output_file = tmp_path / 'urls.jsonl'
|
||||
entry = json.loads(output_file.read_text().strip())
|
||||
# Output goes to stdout (JSONL)
|
||||
lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
|
||||
entry = json.loads(lines[0])
|
||||
|
||||
# Timestamp 0 = 1970, which is before MIN_REASONABLE_YEAR (1995)
|
||||
# Parser should skip it as unreasonable
|
||||
@@ -603,8 +614,9 @@ class TestTimestampFormats:
|
||||
|
||||
# Should handle gracefully (extracts URL, may or may not include timestamp)
|
||||
assert result.returncode == 0
|
||||
output_file = tmp_path / 'urls.jsonl'
|
||||
entry = json.loads(output_file.read_text().strip())
|
||||
# Output goes to stdout (JSONL)
|
||||
lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
|
||||
entry = json.loads(lines[0])
|
||||
assert entry['url'] == 'https://example.com'
|
||||
# If timestamp is included, should be reasonable (1969)
|
||||
if 'bookmarked_at' in entry:
|
||||
@@ -632,8 +644,8 @@ class TestBookmarkAttributes:
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
output_file = tmp_path / 'urls.jsonl'
|
||||
lines = output_file.read_text().strip().split('\n')
|
||||
# Output goes to stdout (JSONL)
|
||||
lines = [line for line in result.stdout.strip().split('\n') if line.strip() and '\"type\": \"Snapshot\"' in line]
|
||||
entries = [json.loads(line) for line in lines]
|
||||
|
||||
# Both should be extracted
|
||||
@@ -654,8 +666,9 @@ class TestBookmarkAttributes:
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
output_file = tmp_path / 'urls.jsonl'
|
||||
entry = json.loads(output_file.read_text().strip())
|
||||
# Output goes to stdout (JSONL)
|
||||
lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
|
||||
entry = json.loads(lines[0])
|
||||
|
||||
assert 'google.com' in entry['url']
|
||||
|
||||
@@ -674,8 +687,9 @@ class TestBookmarkAttributes:
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
output_file = tmp_path / 'urls.jsonl'
|
||||
entry = json.loads(output_file.read_text().strip())
|
||||
# Output goes to stdout (JSONL)
|
||||
lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
|
||||
entry = json.loads(lines[0])
|
||||
|
||||
assert entry['url'] == 'https://example.com/login'
|
||||
|
||||
@@ -704,9 +718,9 @@ class TestEdgeCases:
|
||||
# Current regex works line-by-line, so this might not match
|
||||
# Document current behavior
|
||||
if result.returncode == 0:
|
||||
output_file = tmp_path / 'urls.jsonl'
|
||||
# Output goes to stdout (JSONL)
|
||||
if output_file.exists():
|
||||
content = output_file.read_text().strip()
|
||||
content = result.stdout.strip()
|
||||
if content:
|
||||
entry = json.loads(content)
|
||||
assert 'example.com' in entry['url']
|
||||
@@ -727,8 +741,9 @@ class TestEdgeCases:
|
||||
|
||||
# Should succeed and extract URL without timestamp
|
||||
assert result.returncode == 0
|
||||
output_file = tmp_path / 'urls.jsonl'
|
||||
entry = json.loads(output_file.read_text().strip())
|
||||
# Output goes to stdout (JSONL)
|
||||
lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
|
||||
entry = json.loads(lines[0])
|
||||
assert entry['url'] == 'https://example.com'
|
||||
assert entry['title'] == 'No Date'
|
||||
assert 'bookmarked_at' not in entry
|
||||
@@ -768,8 +783,8 @@ class TestEdgeCases:
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
output_file = tmp_path / 'urls.jsonl'
|
||||
lines = output_file.read_text().strip().split('\n')
|
||||
# Output goes to stdout (JSONL)
|
||||
lines = [line for line in result.stdout.strip().split('\n') if line.strip() and '\"type\": \"Snapshot\"' in line]
|
||||
entries = [json.loads(line) for line in lines]
|
||||
|
||||
assert len(entries) == 3
|
||||
@@ -792,8 +807,8 @@ class TestEdgeCases:
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
output_file = tmp_path / 'urls.jsonl'
|
||||
lines = output_file.read_text().strip().split('\n')
|
||||
# Output goes to stdout (JSONL)
|
||||
lines = [line for line in result.stdout.strip().split('\n') if line.strip() and '\"type\": \"Snapshot\"' in line]
|
||||
entries = [json.loads(line) for line in lines]
|
||||
|
||||
# Both should be extracted
|
||||
@@ -815,8 +830,9 @@ class TestEdgeCases:
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
output_file = tmp_path / 'urls.jsonl'
|
||||
entry = json.loads(output_file.read_text().strip())
|
||||
# Output goes to stdout (JSONL)
|
||||
lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
|
||||
entry = json.loads(lines[0])
|
||||
|
||||
assert entry['url'].startswith('data:')
|
||||
|
||||
@@ -835,8 +851,9 @@ class TestEdgeCases:
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
output_file = tmp_path / 'urls.jsonl'
|
||||
entry = json.loads(output_file.read_text().strip())
|
||||
# Output goes to stdout (JSONL)
|
||||
lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
|
||||
entry = json.loads(lines[0])
|
||||
|
||||
assert entry['url'].startswith('file://')
|
||||
|
||||
@@ -856,8 +873,9 @@ class TestEdgeCases:
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
output_file = tmp_path / 'urls.jsonl'
|
||||
entry = json.loads(output_file.read_text().strip())
|
||||
# Output goes to stdout (JSONL)
|
||||
lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
|
||||
entry = json.loads(lines[0])
|
||||
|
||||
assert len(entry['url']) > 1000
|
||||
assert entry['url'].startswith('https://example.com')
|
||||
@@ -881,7 +899,7 @@ class TestEdgeCases:
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
output_file = tmp_path / 'urls.jsonl'
|
||||
# Output goes to stdout (JSONL)
|
||||
lines = output_file.read_text(encoding='utf-8').strip().split('\n')
|
||||
entries = [json.loads(line) for line in lines]
|
||||
|
||||
@@ -915,8 +933,8 @@ class TestEdgeCases:
|
||||
assert result.returncode == 0
|
||||
assert 'Found 1000 URLs' in result.stdout
|
||||
|
||||
output_file = tmp_path / 'urls.jsonl'
|
||||
lines = output_file.read_text().strip().split('\n')
|
||||
# Output goes to stdout (JSONL)
|
||||
lines = [line for line in result.stdout.strip().split('\n') if line.strip() and '\"type\": \"Snapshot\"' in line]
|
||||
|
||||
# Should have 10 unique tags + 1000 snapshots
|
||||
tags = [json.loads(line) for line in lines if json.loads(line)['type'] == 'Tag']
|
||||
|
||||
@@ -70,61 +70,57 @@ def main(url: str, snapshot_id: str = None, crawl_id: str = None, depth: int = 0
|
||||
# Parse the feed
|
||||
feed = feedparser.parse(content)
|
||||
|
||||
if not feed.entries:
|
||||
click.echo('No entries found in feed', err=True)
|
||||
sys.exit(1)
|
||||
|
||||
urls_found = []
|
||||
all_tags = set()
|
||||
|
||||
for item in feed.entries:
|
||||
item_url = getattr(item, 'link', None)
|
||||
if not item_url:
|
||||
continue
|
||||
if not feed.entries:
|
||||
# No entries - will emit skipped status at end
|
||||
pass
|
||||
else:
|
||||
for item in feed.entries:
|
||||
item_url = getattr(item, 'link', None)
|
||||
if not item_url:
|
||||
continue
|
||||
|
||||
title = getattr(item, 'title', None)
|
||||
title = getattr(item, 'title', None)
|
||||
|
||||
# Get bookmarked_at (published/updated date as ISO 8601)
|
||||
bookmarked_at = None
|
||||
if hasattr(item, 'published_parsed') and item.published_parsed:
|
||||
bookmarked_at = datetime.fromtimestamp(mktime(item.published_parsed), tz=timezone.utc).isoformat()
|
||||
elif hasattr(item, 'updated_parsed') and item.updated_parsed:
|
||||
bookmarked_at = datetime.fromtimestamp(mktime(item.updated_parsed), tz=timezone.utc).isoformat()
|
||||
# Get bookmarked_at (published/updated date as ISO 8601)
|
||||
bookmarked_at = None
|
||||
if hasattr(item, 'published_parsed') and item.published_parsed:
|
||||
bookmarked_at = datetime.fromtimestamp(mktime(item.published_parsed), tz=timezone.utc).isoformat()
|
||||
elif hasattr(item, 'updated_parsed') and item.updated_parsed:
|
||||
bookmarked_at = datetime.fromtimestamp(mktime(item.updated_parsed), tz=timezone.utc).isoformat()
|
||||
|
||||
# Get tags
|
||||
tags = ''
|
||||
if hasattr(item, 'tags') and item.tags:
|
||||
try:
|
||||
tags = ','.join(tag.term for tag in item.tags if hasattr(tag, 'term'))
|
||||
# Collect unique tags
|
||||
for tag in tags.split(','):
|
||||
tag = tag.strip()
|
||||
if tag:
|
||||
all_tags.add(tag)
|
||||
except (AttributeError, TypeError):
|
||||
pass
|
||||
# Get tags
|
||||
tags = ''
|
||||
if hasattr(item, 'tags') and item.tags:
|
||||
try:
|
||||
tags = ','.join(tag.term for tag in item.tags if hasattr(tag, 'term'))
|
||||
# Collect unique tags
|
||||
for tag in tags.split(','):
|
||||
tag = tag.strip()
|
||||
if tag:
|
||||
all_tags.add(tag)
|
||||
except (AttributeError, TypeError):
|
||||
pass
|
||||
|
||||
entry = {
|
||||
'type': 'Snapshot',
|
||||
'url': unescape(item_url),
|
||||
'plugin': PLUGIN_NAME,
|
||||
'depth': depth + 1,
|
||||
}
|
||||
if snapshot_id:
|
||||
entry['parent_snapshot_id'] = snapshot_id
|
||||
if crawl_id:
|
||||
entry['crawl_id'] = crawl_id
|
||||
if title:
|
||||
entry['title'] = unescape(title)
|
||||
if bookmarked_at:
|
||||
entry['bookmarked_at'] = bookmarked_at
|
||||
if tags:
|
||||
entry['tags'] = tags
|
||||
urls_found.append(entry)
|
||||
|
||||
if not urls_found:
|
||||
click.echo('No valid URLs found in feed entries', err=True)
|
||||
sys.exit(1)
|
||||
entry = {
|
||||
'type': 'Snapshot',
|
||||
'url': unescape(item_url),
|
||||
'plugin': PLUGIN_NAME,
|
||||
'depth': depth + 1,
|
||||
}
|
||||
if snapshot_id:
|
||||
entry['parent_snapshot_id'] = snapshot_id
|
||||
if crawl_id:
|
||||
entry['crawl_id'] = crawl_id
|
||||
if title:
|
||||
entry['title'] = unescape(title)
|
||||
if bookmarked_at:
|
||||
entry['bookmarked_at'] = bookmarked_at
|
||||
if tags:
|
||||
entry['tags'] = tags
|
||||
urls_found.append(entry)
|
||||
|
||||
# Emit Tag records first (to stdout as JSONL)
|
||||
for tag_name in sorted(all_tags):
|
||||
@@ -137,7 +133,17 @@ def main(url: str, snapshot_id: str = None, crawl_id: str = None, depth: int = 0
|
||||
for entry in urls_found:
|
||||
print(json.dumps(entry))
|
||||
|
||||
click.echo(f'Found {len(urls_found)} URLs, {len(all_tags)} tags', err=True)
|
||||
# Emit ArchiveResult record to mark completion
|
||||
status = 'succeeded' if urls_found else 'skipped'
|
||||
output_str = f'Found {len(urls_found)} URLs, {len(all_tags)} tags' if urls_found else 'No URLs found'
|
||||
ar_record = {
|
||||
'type': 'ArchiveResult',
|
||||
'status': status,
|
||||
'output_str': output_str,
|
||||
}
|
||||
print(json.dumps(ar_record))
|
||||
|
||||
click.echo(output_str, err=True)
|
||||
sys.exit(0)
|
||||
|
||||
|
||||
|
||||
@@ -28,10 +28,8 @@ class TestParseRssUrls:
|
||||
|
||||
# HN RSS feed should parse successfully
|
||||
if result.returncode == 0:
|
||||
output_file = tmp_path / 'urls.jsonl'
|
||||
assert output_file.exists(), "Output file not created"
|
||||
|
||||
content = output_file.read_text()
|
||||
# Output goes to stdout (JSONL)
|
||||
content = result.stdout
|
||||
assert len(content) > 0, "No URLs extracted from real RSS feed"
|
||||
|
||||
# Verify at least one URL was extracted
|
||||
@@ -70,10 +68,8 @@ class TestParseRssUrls:
|
||||
assert result.returncode == 0
|
||||
assert 'Found 2 URLs' in result.stdout
|
||||
|
||||
output_file = tmp_path / 'urls.jsonl'
|
||||
assert output_file.exists()
|
||||
|
||||
lines = output_file.read_text().strip().split('\n')
|
||||
# Output goes to stdout (JSONL)
|
||||
lines = [line for line in result.stdout.strip().split('\n') if line.strip() and '\"type\": \"Snapshot\"' in line]
|
||||
assert len(lines) == 2
|
||||
|
||||
entries = [json.loads(line) for line in lines]
|
||||
@@ -112,15 +108,15 @@ class TestParseRssUrls:
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
output_file = tmp_path / 'urls.jsonl'
|
||||
lines = output_file.read_text().strip().split('\n')
|
||||
# Output goes to stdout (JSONL)
|
||||
lines = [line for line in result.stdout.strip().split('\n') if line.strip() and '\"type\": \"Snapshot\"' in line]
|
||||
urls = {json.loads(line)['url'] for line in lines}
|
||||
|
||||
assert 'https://atom.example.com/entry/1' in urls
|
||||
assert 'https://atom.example.com/entry/2' in urls
|
||||
|
||||
def test_exits_1_when_no_entries(self, tmp_path):
|
||||
"""Test that script exits with code 1 when feed has no entries."""
|
||||
def test_skips_when_no_entries(self, tmp_path):
|
||||
"""Test that script returns skipped status when feed has no entries."""
|
||||
input_file = tmp_path / 'empty.rss'
|
||||
input_file.write_text('''<?xml version="1.0"?>
|
||||
<rss version="2.0">
|
||||
@@ -137,8 +133,9 @@ class TestParseRssUrls:
|
||||
text=True,
|
||||
)
|
||||
|
||||
assert result.returncode == 1
|
||||
assert 'No entries found' in result.stderr
|
||||
assert result.returncode == 0
|
||||
assert 'No URLs found' in result.stderr
|
||||
assert '"status": "skipped"' in result.stdout
|
||||
|
||||
def test_exits_1_when_file_not_found(self, tmp_path):
|
||||
"""Test that script exits with code 1 when file doesn't exist."""
|
||||
@@ -174,8 +171,9 @@ class TestParseRssUrls:
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
output_file = tmp_path / 'urls.jsonl'
|
||||
entry = json.loads(output_file.read_text().strip())
|
||||
# Output goes to stdout (JSONL)
|
||||
lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
|
||||
entry = json.loads(lines[0])
|
||||
assert entry['url'] == 'https://example.com/page?a=1&b=2'
|
||||
|
||||
def test_includes_optional_metadata(self, tmp_path):
|
||||
@@ -201,8 +199,9 @@ class TestParseRssUrls:
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
output_file = tmp_path / 'urls.jsonl'
|
||||
entry = json.loads(output_file.read_text().strip())
|
||||
# Output goes to stdout (JSONL)
|
||||
lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
|
||||
entry = json.loads(lines[0])
|
||||
assert entry['url'] == 'https://example.com/test'
|
||||
assert entry['title'] == 'Test Title'
|
||||
# Parser converts timestamp to bookmarked_at
|
||||
|
||||
@@ -41,8 +41,8 @@ class TestRssVariants:
|
||||
)
|
||||
|
||||
assert result.returncode == 0, f"Failed: {result.stderr}"
|
||||
output_file = tmp_path / 'urls.jsonl'
|
||||
lines = output_file.read_text().strip().split('\n')
|
||||
# Output goes to stdout (JSONL)
|
||||
lines = [line for line in result.stdout.strip().split('\n') if line.strip() and '\"type\": \"Snapshot\"' in line]
|
||||
entry = json.loads(lines[0])
|
||||
|
||||
assert entry['url'] == 'https://example.com/article1'
|
||||
@@ -82,8 +82,8 @@ class TestRssVariants:
|
||||
)
|
||||
|
||||
assert result.returncode == 0, f"Failed: {result.stderr}"
|
||||
output_file = tmp_path / 'urls.jsonl'
|
||||
lines = output_file.read_text().strip().split('\n')
|
||||
# Output goes to stdout (JSONL)
|
||||
lines = [line for line in result.stdout.strip().split('\n') if line.strip() and '\"type\": \"Snapshot\"' in line]
|
||||
entries = [json.loads(line) for line in lines if json.loads(line)['type'] == 'Snapshot']
|
||||
|
||||
urls = {e['url'] for e in entries}
|
||||
@@ -122,8 +122,8 @@ class TestRssVariants:
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
output_file = tmp_path / 'urls.jsonl'
|
||||
content = output_file.read_text().strip()
|
||||
# Output goes to stdout (JSONL)
|
||||
content = result.stdout.strip()
|
||||
lines = content.split('\n')
|
||||
|
||||
# Check for Tag records
|
||||
@@ -171,8 +171,8 @@ class TestAtomVariants:
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
output_file = tmp_path / 'urls.jsonl'
|
||||
lines = output_file.read_text().strip().split('\n')
|
||||
# Output goes to stdout (JSONL)
|
||||
lines = [line for line in result.stdout.strip().split('\n') if line.strip() and '\"type\": \"Snapshot\"' in line]
|
||||
|
||||
tags = [json.loads(line) for line in lines if json.loads(line)['type'] == 'Tag']
|
||||
tag_names = {t['name'] for t in tags}
|
||||
@@ -207,8 +207,9 @@ class TestAtomVariants:
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
output_file = tmp_path / 'urls.jsonl'
|
||||
entry = json.loads(output_file.read_text().strip())
|
||||
# Output goes to stdout (JSONL)
|
||||
lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
|
||||
entry = json.loads(lines[0])
|
||||
# feedparser should pick the alternate link
|
||||
assert 'atom.example.com/article' in entry['url']
|
||||
|
||||
@@ -239,8 +240,9 @@ class TestDateFormats:
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
output_file = tmp_path / 'urls.jsonl'
|
||||
entry = json.loads(output_file.read_text().strip())
|
||||
# Output goes to stdout (JSONL)
|
||||
lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
|
||||
entry = json.loads(lines[0])
|
||||
assert 'bookmarked_at' in entry
|
||||
assert '2020-01-15' in entry['bookmarked_at']
|
||||
|
||||
@@ -265,8 +267,9 @@ class TestDateFormats:
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
output_file = tmp_path / 'urls.jsonl'
|
||||
entry = json.loads(output_file.read_text().strip())
|
||||
# Output goes to stdout (JSONL)
|
||||
lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
|
||||
entry = json.loads(lines[0])
|
||||
assert 'bookmarked_at' in entry
|
||||
assert '2024-01-15' in entry['bookmarked_at']
|
||||
|
||||
@@ -292,8 +295,9 @@ class TestDateFormats:
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
output_file = tmp_path / 'urls.jsonl'
|
||||
entry = json.loads(output_file.read_text().strip())
|
||||
# Output goes to stdout (JSONL)
|
||||
lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
|
||||
entry = json.loads(lines[0])
|
||||
# Should use published date (Jan 10) not updated date (Jan 15)
|
||||
assert '2024-01-10' in entry['bookmarked_at']
|
||||
|
||||
@@ -318,8 +322,9 @@ class TestDateFormats:
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
output_file = tmp_path / 'urls.jsonl'
|
||||
entry = json.loads(output_file.read_text().strip())
|
||||
# Output goes to stdout (JSONL)
|
||||
lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
|
||||
entry = json.loads(lines[0])
|
||||
assert '2024-01-20' in entry['bookmarked_at']
|
||||
|
||||
def test_no_date(self, tmp_path):
|
||||
@@ -344,8 +349,9 @@ class TestDateFormats:
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
output_file = tmp_path / 'urls.jsonl'
|
||||
entry = json.loads(output_file.read_text().strip())
|
||||
# Output goes to stdout (JSONL)
|
||||
lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
|
||||
entry = json.loads(lines[0])
|
||||
assert 'bookmarked_at' not in entry
|
||||
|
||||
|
||||
@@ -377,8 +383,8 @@ class TestTagsAndCategories:
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
output_file = tmp_path / 'urls.jsonl'
|
||||
lines = output_file.read_text().strip().split('\n')
|
||||
# Output goes to stdout (JSONL)
|
||||
lines = [line for line in result.stdout.strip().split('\n') if line.strip() and '\"type\": \"Snapshot\"' in line]
|
||||
|
||||
tags = [json.loads(line) for line in lines if json.loads(line)['type'] == 'Tag']
|
||||
tag_names = {t['name'] for t in tags}
|
||||
@@ -414,8 +420,8 @@ class TestTagsAndCategories:
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
output_file = tmp_path / 'urls.jsonl'
|
||||
lines = output_file.read_text().strip().split('\n')
|
||||
# Output goes to stdout (JSONL)
|
||||
lines = [line for line in result.stdout.strip().split('\n') if line.strip() and '\"type\": \"Snapshot\"' in line]
|
||||
|
||||
tags = [json.loads(line) for line in lines if json.loads(line)['type'] == 'Tag']
|
||||
tag_names = {t['name'] for t in tags}
|
||||
@@ -445,8 +451,9 @@ class TestTagsAndCategories:
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
output_file = tmp_path / 'urls.jsonl'
|
||||
entry = json.loads(output_file.read_text().strip())
|
||||
# Output goes to stdout (JSONL)
|
||||
lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
|
||||
entry = json.loads(lines[0])
|
||||
assert 'tags' not in entry or entry['tags'] == ''
|
||||
|
||||
def test_duplicate_tags(self, tmp_path):
|
||||
@@ -474,8 +481,8 @@ class TestTagsAndCategories:
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
output_file = tmp_path / 'urls.jsonl'
|
||||
lines = output_file.read_text().strip().split('\n')
|
||||
# Output goes to stdout (JSONL)
|
||||
lines = [line for line in result.stdout.strip().split('\n') if line.strip() and '\"type\": \"Snapshot\"' in line]
|
||||
tags = [json.loads(line) for line in lines if json.loads(line)['type'] == 'Tag']
|
||||
# Tag records should be unique
|
||||
tag_names = [t['name'] for t in tags]
|
||||
@@ -514,8 +521,8 @@ class TestCustomNamespaces:
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
output_file = tmp_path / 'urls.jsonl'
|
||||
lines = output_file.read_text().strip().split('\n')
|
||||
# Output goes to stdout (JSONL)
|
||||
lines = [line for line in result.stdout.strip().split('\n') if line.strip() and '\"type\": \"Snapshot\"' in line]
|
||||
snapshots = [json.loads(line) for line in lines if json.loads(line)['type'] == 'Snapshot']
|
||||
entry = snapshots[0]
|
||||
|
||||
@@ -550,8 +557,9 @@ class TestCustomNamespaces:
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
output_file = tmp_path / 'urls.jsonl'
|
||||
entry = json.loads(output_file.read_text().strip())
|
||||
# Output goes to stdout (JSONL)
|
||||
lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
|
||||
entry = json.loads(lines[0])
|
||||
|
||||
assert entry['url'] == 'https://example.com/podcast/1'
|
||||
assert entry['title'] == 'Podcast Episode 1'
|
||||
@@ -583,8 +591,8 @@ class TestCustomNamespaces:
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
output_file = tmp_path / 'urls.jsonl'
|
||||
lines = output_file.read_text().strip().split('\n')
|
||||
# Output goes to stdout (JSONL)
|
||||
lines = [line for line in result.stdout.strip().split('\n') if line.strip() and '\"type\": \"Snapshot\"' in line]
|
||||
snapshots = [json.loads(line) for line in lines if json.loads(line)['type'] == 'Snapshot']
|
||||
entry = snapshots[0]
|
||||
|
||||
@@ -617,8 +625,9 @@ class TestEdgeCases:
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
output_file = tmp_path / 'urls.jsonl'
|
||||
entry = json.loads(output_file.read_text().strip())
|
||||
# Output goes to stdout (JSONL)
|
||||
lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
|
||||
entry = json.loads(lines[0])
|
||||
|
||||
assert entry['url'] == 'https://example.com/notitle'
|
||||
assert 'title' not in entry
|
||||
@@ -649,8 +658,9 @@ class TestEdgeCases:
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
output_file = tmp_path / 'urls.jsonl'
|
||||
entry = json.loads(output_file.read_text().strip())
|
||||
# Output goes to stdout (JSONL)
|
||||
lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
|
||||
entry = json.loads(lines[0])
|
||||
|
||||
# Should only have the entry with a link
|
||||
assert entry['url'] == 'https://example.com/haslink'
|
||||
@@ -678,8 +688,9 @@ class TestEdgeCases:
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
output_file = tmp_path / 'urls.jsonl'
|
||||
entry = json.loads(output_file.read_text().strip())
|
||||
# Output goes to stdout (JSONL)
|
||||
lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
|
||||
entry = json.loads(lines[0])
|
||||
|
||||
assert entry['title'] == 'Using <div> & <span> tags'
|
||||
|
||||
@@ -708,8 +719,8 @@ class TestEdgeCases:
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
output_file = tmp_path / 'urls.jsonl'
|
||||
lines = output_file.read_text().strip().split('\n')
|
||||
# Output goes to stdout (JSONL)
|
||||
lines = [line for line in result.stdout.strip().split('\n') if line.strip() and '\"type\": \"Snapshot\"' in line]
|
||||
|
||||
tags = [json.loads(line) for line in lines if json.loads(line)['type'] == 'Tag']
|
||||
tag_names = {t['name'] for t in tags}
|
||||
@@ -740,8 +751,9 @@ class TestEdgeCases:
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
output_file = tmp_path / 'urls.jsonl'
|
||||
entry = json.loads(output_file.read_text().strip())
|
||||
# Output goes to stdout (JSONL)
|
||||
lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
|
||||
entry = json.loads(lines[0])
|
||||
|
||||
# feedparser should strip HTML tags
|
||||
assert 'HTML' in entry['title']
|
||||
@@ -770,8 +782,9 @@ class TestEdgeCases:
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
output_file = tmp_path / 'urls.jsonl'
|
||||
entry = json.loads(output_file.read_text().strip())
|
||||
# Output goes to stdout (JSONL)
|
||||
lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
|
||||
entry = json.loads(lines[0])
|
||||
|
||||
# feedparser may convert relative to absolute, or leave as-is
|
||||
assert 'article/relative' in entry['url']
|
||||
@@ -800,7 +813,7 @@ class TestEdgeCases:
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
output_file = tmp_path / 'urls.jsonl'
|
||||
# Output goes to stdout (JSONL)
|
||||
lines = output_file.read_text(encoding='utf-8').strip().split('\n')
|
||||
|
||||
snapshots = [json.loads(line) for line in lines if json.loads(line)['type'] == 'Snapshot']
|
||||
@@ -831,8 +844,9 @@ class TestEdgeCases:
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
output_file = tmp_path / 'urls.jsonl'
|
||||
entry = json.loads(output_file.read_text().strip())
|
||||
# Output goes to stdout (JSONL)
|
||||
lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
|
||||
entry = json.loads(lines[0])
|
||||
|
||||
assert len(entry['title']) == 1000
|
||||
assert entry['title'] == long_title
|
||||
@@ -870,8 +884,8 @@ class TestEdgeCases:
|
||||
assert result.returncode == 0
|
||||
assert 'Found 100 URLs' in result.stdout
|
||||
|
||||
output_file = tmp_path / 'urls.jsonl'
|
||||
lines = output_file.read_text().strip().split('\n')
|
||||
# Output goes to stdout (JSONL)
|
||||
lines = [line for line in result.stdout.strip().split('\n') if line.strip() and '\"type\": \"Snapshot\"' in line]
|
||||
|
||||
# Should have 10 unique tags (Tag0-Tag9) + 100 snapshots
|
||||
tags = [json.loads(line) for line in lines if json.loads(line)['type'] == 'Tag']
|
||||
@@ -912,8 +926,8 @@ class TestRealWorldFeeds:
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
output_file = tmp_path / 'urls.jsonl'
|
||||
lines = output_file.read_text().strip().split('\n')
|
||||
# Output goes to stdout (JSONL)
|
||||
lines = [line for line in result.stdout.strip().split('\n') if line.strip() and '\"type\": \"Snapshot\"' in line]
|
||||
|
||||
snapshots = [json.loads(line) for line in lines if json.loads(line)['type'] == 'Snapshot']
|
||||
entry = snapshots[0]
|
||||
@@ -944,8 +958,8 @@ class TestRealWorldFeeds:
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
output_file = tmp_path / 'urls.jsonl'
|
||||
lines = output_file.read_text().strip().split('\n')
|
||||
# Output goes to stdout (JSONL)
|
||||
lines = [line for line in result.stdout.strip().split('\n') if line.strip() and '\"type\": \"Snapshot\"' in line]
|
||||
|
||||
snapshots = [json.loads(line) for line in lines if json.loads(line)['type'] == 'Snapshot']
|
||||
entry = snapshots[0]
|
||||
@@ -976,8 +990,9 @@ class TestRealWorldFeeds:
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
output_file = tmp_path / 'urls.jsonl'
|
||||
entry = json.loads(output_file.read_text().strip())
|
||||
# Output goes to stdout (JSONL)
|
||||
lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
|
||||
entry = json.loads(lines[0])
|
||||
|
||||
assert 'youtube.com' in entry['url']
|
||||
assert 'dQw4w9WgXcQ' in entry['url']
|
||||
|
||||
@@ -117,20 +117,28 @@ def main(url: str, snapshot_id: str = None):
|
||||
if cleaned_url != url:
|
||||
urls_found.add(cleaned_url)
|
||||
|
||||
if not urls_found:
|
||||
click.echo('No URLs found', err=True)
|
||||
sys.exit(1)
|
||||
# Emit Snapshot records to stdout (JSONL)
|
||||
for found_url in sorted(urls_found):
|
||||
record = {
|
||||
'type': 'Snapshot',
|
||||
'url': found_url,
|
||||
'plugin': PLUGIN_NAME,
|
||||
}
|
||||
if snapshot_id:
|
||||
record['parent_snapshot_id'] = snapshot_id
|
||||
print(json.dumps(record))
|
||||
|
||||
# Write urls.jsonl
|
||||
with open('urls.jsonl', 'w') as f:
|
||||
for found_url in sorted(urls_found):
|
||||
f.write(json.dumps({
|
||||
'type': 'Snapshot',
|
||||
'url': found_url,
|
||||
'plugin': PLUGIN_NAME,
|
||||
}) + '\n')
|
||||
# Emit ArchiveResult record to mark completion
|
||||
status = 'succeeded' if urls_found else 'skipped'
|
||||
output_str = f'Found {len(urls_found)} URLs' if urls_found else 'No URLs found'
|
||||
ar_record = {
|
||||
'type': 'ArchiveResult',
|
||||
'status': status,
|
||||
'output_str': output_str,
|
||||
}
|
||||
print(json.dumps(ar_record))
|
||||
|
||||
click.echo(f'Found {len(urls_found)} URLs')
|
||||
click.echo(output_str, err=True)
|
||||
sys.exit(0)
|
||||
|
||||
|
||||
|
||||
@@ -32,17 +32,16 @@ https://www.iana.org/domains/reserved
|
||||
)
|
||||
|
||||
assert result.returncode == 0, f"Failed: {result.stderr}"
|
||||
assert 'Found 3 URLs' in result.stdout
|
||||
assert 'Found 3 URLs' in result.stderr
|
||||
|
||||
output_file = tmp_path / 'urls.jsonl'
|
||||
assert output_file.exists()
|
||||
|
||||
lines = output_file.read_text().strip().split('\n')
|
||||
# Parse Snapshot records from stdout
|
||||
lines = [line for line in result.stdout.strip().split('\n') if line.strip() and '"type": "Snapshot"' in line]
|
||||
assert len(lines) == 3
|
||||
|
||||
urls = set()
|
||||
for line in lines:
|
||||
entry = json.loads(line)
|
||||
assert entry['type'] == 'Snapshot'
|
||||
assert 'url' in entry
|
||||
urls.add(entry['url'])
|
||||
|
||||
@@ -51,6 +50,10 @@ https://www.iana.org/domains/reserved
|
||||
assert 'https://example.com/page' in urls
|
||||
assert 'https://www.iana.org/domains/reserved' in urls
|
||||
|
||||
# Verify ArchiveResult record
|
||||
assert '"type": "ArchiveResult"' in result.stdout
|
||||
assert '"status": "succeeded"' in result.stdout
|
||||
|
||||
def test_extracts_urls_from_mixed_content(self, tmp_path):
|
||||
"""Test extracting URLs embedded in prose text."""
|
||||
input_file = tmp_path / 'mixed.txt'
|
||||
@@ -68,8 +71,7 @@ Also see https://github.com/user/repo for the code.
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
output_file = tmp_path / 'urls.jsonl'
|
||||
lines = output_file.read_text().strip().split('\n')
|
||||
lines = [line for line in result.stdout.strip().split('\n') if '"type": "Snapshot"' in line]
|
||||
urls = {json.loads(line)['url'] for line in lines}
|
||||
|
||||
assert 'https://blog.example.com/post' in urls
|
||||
@@ -92,15 +94,14 @@ Also see https://github.com/user/repo for the code.
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
output_file = tmp_path / 'urls.jsonl'
|
||||
lines = output_file.read_text().strip().split('\n')
|
||||
lines = [line for line in result.stdout.strip().split('\n') if '"type": "Snapshot"' in line]
|
||||
urls = {json.loads(line)['url'] for line in lines}
|
||||
|
||||
assert 'https://example.com/page' in urls
|
||||
assert any('wikipedia.org' in u for u in urls)
|
||||
|
||||
def test_exits_1_when_no_urls_found(self, tmp_path):
|
||||
"""Test that script exits with code 1 when no URLs found."""
|
||||
def test_skips_when_no_urls_found(self, tmp_path):
|
||||
"""Test that script returns skipped status when no URLs found."""
|
||||
input_file = tmp_path / 'empty.txt'
|
||||
input_file.write_text('no urls here, just plain text')
|
||||
|
||||
@@ -111,8 +112,9 @@ Also see https://github.com/user/repo for the code.
|
||||
text=True,
|
||||
)
|
||||
|
||||
assert result.returncode == 1
|
||||
assert result.returncode == 0
|
||||
assert 'No URLs found' in result.stderr
|
||||
assert '"status": "skipped"' in result.stdout
|
||||
|
||||
def test_exits_1_when_file_not_found(self, tmp_path):
|
||||
"""Test that script exits with code 1 when file doesn't exist."""
|
||||
@@ -144,12 +146,11 @@ https://other.com
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
output_file = tmp_path / 'urls.jsonl'
|
||||
lines = output_file.read_text().strip().split('\n')
|
||||
lines = [line for line in result.stdout.strip().split('\n') if '"type": "Snapshot"' in line]
|
||||
assert len(lines) == 2
|
||||
|
||||
def test_appends_to_existing_file(self, tmp_path):
|
||||
"""Test that output creates urls.jsonl with extracted URLs."""
|
||||
def test_outputs_to_stdout(self, tmp_path):
|
||||
"""Test that output goes to stdout in JSONL format."""
|
||||
input_file = tmp_path / 'urls.txt'
|
||||
input_file.write_text('https://new.com\nhttps://other.com')
|
||||
|
||||
@@ -161,8 +162,7 @@ https://other.com
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
output_file = tmp_path / 'urls.jsonl'
|
||||
lines = output_file.read_text().strip().split('\n')
|
||||
lines = [line for line in result.stdout.strip().split('\n') if '"type": "Snapshot"' in line]
|
||||
assert len(lines) == 2
|
||||
|
||||
urls = {json.loads(line)['url'] for line in lines}
|
||||
@@ -182,11 +182,11 @@ https://other.com
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
output_file = tmp_path / 'urls.jsonl'
|
||||
entry = json.loads(output_file.read_text().strip())
|
||||
lines = [line for line in result.stdout.strip().split('\n') if '"type": "Snapshot"' in line]
|
||||
entry = json.loads(lines[0])
|
||||
assert entry['url'] == 'https://example.com'
|
||||
assert 'type' in entry
|
||||
assert 'plugin' in entry
|
||||
assert entry['type'] == 'Snapshot'
|
||||
assert entry['plugin'] == 'parse_txt_urls'
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
28
archivebox/plugins/pdf/config.json
Normal file
28
archivebox/plugins/pdf/config.json
Normal file
@@ -0,0 +1,28 @@
|
||||
{
|
||||
"$schema": "http://json-schema.org/draft-07/schema#",
|
||||
"type": "object",
|
||||
"additionalProperties": false,
|
||||
"required_plugins": ["chrome"],
|
||||
"properties": {
|
||||
"PDF_ENABLED": {
|
||||
"type": "boolean",
|
||||
"default": true,
|
||||
"x-aliases": ["SAVE_PDF", "USE_PDF"],
|
||||
"description": "Enable PDF generation"
|
||||
},
|
||||
"PDF_TIMEOUT": {
|
||||
"type": "integer",
|
||||
"default": 60,
|
||||
"minimum": 5,
|
||||
"x-fallback": "TIMEOUT",
|
||||
"description": "Timeout for PDF generation in seconds"
|
||||
},
|
||||
"PDF_RESOLUTION": {
|
||||
"type": "string",
|
||||
"default": "1440,2000",
|
||||
"pattern": "^\\d+,\\d+$",
|
||||
"x-fallback": "RESOLUTION",
|
||||
"description": "PDF page resolution (width,height)"
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -2,6 +2,7 @@
|
||||
Integration tests for pdf plugin
|
||||
|
||||
Tests verify:
|
||||
pass
|
||||
1. Hook script exists
|
||||
2. Dependencies installed via chrome validation hooks
|
||||
3. Verify deps with abx-pkg
|
||||
@@ -48,7 +49,9 @@ def test_chrome_validation_and_install():
|
||||
# Parse Dependency request from JSONL
|
||||
dependency_request = None
|
||||
for line in result.stdout.strip().split('\n'):
|
||||
pass
|
||||
if line.strip():
|
||||
pass
|
||||
try:
|
||||
record = json.loads(line)
|
||||
if record.get('type') == 'Dependency':
|
||||
@@ -79,7 +82,9 @@ def test_chrome_validation_and_install():
|
||||
|
||||
# Verify installation via JSONL output
|
||||
for line in install_result.stdout.strip().split('\n'):
|
||||
pass
|
||||
if line.strip():
|
||||
pass
|
||||
try:
|
||||
record = json.loads(line)
|
||||
if record.get('type') == 'Binary':
|
||||
@@ -126,6 +131,7 @@ def test_extracts_pdf_from_example_com():
|
||||
for line in result.stdout.strip().split('\n'):
|
||||
line = line.strip()
|
||||
if line.startswith('{'):
|
||||
pass
|
||||
try:
|
||||
record = json.loads(line)
|
||||
if record.get('type') == 'ArchiveResult':
|
||||
@@ -138,8 +144,9 @@ def test_extracts_pdf_from_example_com():
|
||||
|
||||
# Skip verification if network failed
|
||||
if result_json['status'] != 'succeeded':
|
||||
pass
|
||||
if 'TIMED_OUT' in result_json.get('output_str', '') or 'timeout' in result_json.get('output_str', '').lower():
|
||||
pytest.skip(f"Network timeout occurred: {result_json['output_str']}")
|
||||
pass
|
||||
pytest.fail(f"Extraction failed: {result_json}")
|
||||
|
||||
assert result.returncode == 0, f"Should exit 0 on success: {result.stderr}"
|
||||
|
||||
@@ -1,390 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Shared utilities for extractor plugin hooks.
|
||||
|
||||
This module provides common functionality for all extractor plugins to ensure
|
||||
consistent behavior, output format, error handling, and timing.
|
||||
|
||||
All extractor plugins should:
|
||||
1. Import and use these utilities
|
||||
2. Output consistent metadata (CMD, VERSION, OUTPUT, timing)
|
||||
3. Write all files to $PWD
|
||||
4. Return proper exit codes (0=success, 1=failure)
|
||||
5. Be runnable standalone without any archivebox imports
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
|
||||
# Static file extensions that generally don't need browser-based extraction
|
||||
STATIC_EXTENSIONS = (
|
||||
'.pdf', '.png', '.jpg', '.jpeg', '.gif', '.svg', '.webp', '.ico',
|
||||
'.mp4', '.mp3', '.m4a', '.webm', '.mkv', '.avi', '.mov',
|
||||
'.zip', '.tar', '.gz', '.bz2', '.xz', '.7z', '.rar',
|
||||
'.doc', '.docx', '.xls', '.xlsx', '.ppt', '.pptx',
|
||||
'.exe', '.dmg', '.apk', '.deb', '.rpm',
|
||||
)
|
||||
|
||||
|
||||
def is_static_file(url: str) -> bool:
|
||||
"""Check if URL points to a static file that may not need browser-based extractor plugins."""
|
||||
return url.lower().split('?')[0].split('#')[0].endswith(STATIC_EXTENSIONS)
|
||||
|
||||
|
||||
def get_env(name: str, default: str = '') -> str:
|
||||
"""Get environment variable with default."""
|
||||
return os.environ.get(name, default).strip()
|
||||
|
||||
|
||||
def get_env_bool(name: str, default: bool = False) -> bool:
|
||||
"""Get boolean environment variable."""
|
||||
val = get_env(name, '').lower()
|
||||
if val in ('true', '1', 'yes', 'on'):
|
||||
return True
|
||||
if val in ('false', '0', 'no', 'off'):
|
||||
return False
|
||||
return default
|
||||
|
||||
|
||||
def get_env_int(name: str, default: int = 0) -> int:
|
||||
"""Get integer environment variable."""
|
||||
try:
|
||||
return int(get_env(name, str(default)))
|
||||
except ValueError:
|
||||
return default
|
||||
|
||||
|
||||
def find_binary(bin_name: str, env_var: str | None = None) -> str | None:
|
||||
"""Find binary from environment variable or PATH."""
|
||||
if env_var:
|
||||
binary = get_env(env_var)
|
||||
if binary and os.path.isfile(binary):
|
||||
return binary
|
||||
return shutil.which(bin_name)
|
||||
|
||||
|
||||
def get_version(binary: str, version_args: list[str] | None = None) -> str:
|
||||
"""Get binary version string."""
|
||||
if not binary or not os.path.isfile(binary):
|
||||
return ''
|
||||
|
||||
args = version_args or ['--version']
|
||||
try:
|
||||
result = subprocess.run(
|
||||
[binary] + args,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=10
|
||||
)
|
||||
# Return first non-empty line, truncated
|
||||
for line in result.stdout.split('\n'):
|
||||
line = line.strip()
|
||||
if line:
|
||||
return line[:64]
|
||||
return ''
|
||||
except Exception:
|
||||
return ''
|
||||
|
||||
|
||||
class ExtractorResult:
|
||||
"""
|
||||
Tracks extractor plugin execution and produces consistent output.
|
||||
|
||||
Usage:
|
||||
result = ExtractorResult(name='wget', url=url)
|
||||
result.cmd = ['wget', url]
|
||||
result.version = '1.21'
|
||||
|
||||
# ... do extraction ...
|
||||
|
||||
result.output_str = 'example.com/index.html'
|
||||
result.status = 'succeeded'
|
||||
result.finish()
|
||||
|
||||
sys.exit(result.exit_code)
|
||||
"""
|
||||
|
||||
def __init__(self, name: str, url: str, snapshot_id: str = ''):
|
||||
self.name = name
|
||||
self.url = url
|
||||
self.snapshot_id = snapshot_id
|
||||
self.start_ts = datetime.now(timezone.utc)
|
||||
self.end_ts: datetime | None = None
|
||||
|
||||
self.cmd: list[str] = []
|
||||
self.version: str = ''
|
||||
self.output_str: str = '' # Human-readable output summary
|
||||
self.status: str = 'failed' # 'succeeded', 'failed', 'skipped'
|
||||
|
||||
self.stdout: str = ''
|
||||
self.stderr: str = ''
|
||||
self.returncode: int | None = None
|
||||
|
||||
self.error: str = ''
|
||||
self.hints: list[str] = []
|
||||
|
||||
# Dependency info for missing binary
|
||||
self.dependency_needed: str = ''
|
||||
self.bin_providers: str = ''
|
||||
|
||||
@property
|
||||
def duration(self) -> float:
|
||||
"""Duration in seconds."""
|
||||
if self.end_ts:
|
||||
return (self.end_ts - self.start_ts).total_seconds()
|
||||
return (datetime.now(timezone.utc) - self.start_ts).total_seconds()
|
||||
|
||||
@property
|
||||
def exit_code(self) -> int:
|
||||
"""Exit code based on status."""
|
||||
if self.status == 'succeeded':
|
||||
return 0
|
||||
if self.status == 'skipped':
|
||||
return 0 # Skipped is not a failure
|
||||
return 1
|
||||
|
||||
def finish(self, status: str | None = None):
|
||||
"""Mark extractor plugin execution as finished and print results."""
|
||||
self.end_ts = datetime.now(timezone.utc)
|
||||
if status:
|
||||
self.status = status
|
||||
self._print_results()
|
||||
|
||||
def _print_results(self):
|
||||
"""Print consistent output for hooks.py to parse."""
|
||||
import sys
|
||||
|
||||
# Print timing
|
||||
print(f"START_TS={self.start_ts.isoformat()}")
|
||||
print(f"END_TS={self.end_ts.isoformat() if self.end_ts else ''}")
|
||||
print(f"DURATION={self.duration:.2f}")
|
||||
|
||||
# Print command info
|
||||
if self.cmd:
|
||||
print(f"CMD={' '.join(str(c) for c in self.cmd)}")
|
||||
if self.version:
|
||||
print(f"VERSION={self.version}")
|
||||
|
||||
# Print output path
|
||||
if self.output_str:
|
||||
print(f"OUTPUT={self.output_str}")
|
||||
|
||||
# Print status
|
||||
print(f"STATUS={self.status}")
|
||||
|
||||
# Print dependency info if needed
|
||||
if self.dependency_needed:
|
||||
print(f"DEPENDENCY_NEEDED={self.dependency_needed}", file=sys.stderr)
|
||||
if self.bin_providers:
|
||||
print(f"BIN_PROVIDERS={self.bin_providers}", file=sys.stderr)
|
||||
|
||||
# Print error info
|
||||
if self.error:
|
||||
print(f"ERROR={self.error}", file=sys.stderr)
|
||||
for hint in self.hints:
|
||||
print(f"HINT={hint}", file=sys.stderr)
|
||||
|
||||
# Print clean JSONL result for hooks.py to parse
|
||||
result_json = {
|
||||
'type': 'ArchiveResult',
|
||||
'status': self.status,
|
||||
'output_str': self.output_str or self.error or '',
|
||||
}
|
||||
if self.cmd:
|
||||
result_json['cmd'] = self.cmd
|
||||
if self.version:
|
||||
result_json['cmd_version'] = self.version
|
||||
print(json.dumps(result_json))
|
||||
|
||||
|
||||
def run_shell_command(
|
||||
cmd: list[str],
|
||||
cwd: str | Path | None = None,
|
||||
timeout: int = 60,
|
||||
result: ExtractorResult | None = None,
|
||||
) -> subprocess.CompletedProcess:
|
||||
"""
|
||||
Run a shell command with proper capturing and timing.
|
||||
|
||||
Updates result object if provided with stdout, stderr, returncode.
|
||||
"""
|
||||
cwd = cwd or Path.cwd()
|
||||
|
||||
try:
|
||||
proc = subprocess.run(
|
||||
cmd,
|
||||
cwd=str(cwd),
|
||||
capture_output=True,
|
||||
timeout=timeout,
|
||||
)
|
||||
|
||||
if result:
|
||||
result.stdout = proc.stdout.decode('utf-8', errors='replace')
|
||||
result.stderr = proc.stderr.decode('utf-8', errors='replace')
|
||||
result.returncode = proc.returncode
|
||||
|
||||
return proc
|
||||
|
||||
except subprocess.TimeoutExpired as e:
|
||||
if result:
|
||||
result.error = f"Command timed out after {timeout} seconds"
|
||||
result.stdout = e.stdout.decode('utf-8', errors='replace') if e.stdout else ''
|
||||
result.stderr = e.stderr.decode('utf-8', errors='replace') if e.stderr else ''
|
||||
raise
|
||||
|
||||
except Exception as e:
|
||||
if result:
|
||||
result.error = f"{type(e).__name__}: {e}"
|
||||
raise
|
||||
|
||||
|
||||
def chrome_args(
|
||||
headless: bool = True,
|
||||
sandbox: bool = False,
|
||||
resolution: str = '1440,900',
|
||||
user_agent: str = '',
|
||||
check_ssl: bool = True,
|
||||
user_data_dir: str = '',
|
||||
profile_name: str = 'Default',
|
||||
extra_args: list[str] | None = None,
|
||||
) -> list[str]:
|
||||
"""
|
||||
Build Chrome/Chromium command line arguments.
|
||||
|
||||
Based on the old CHROME_CONFIG.chrome_args() implementation.
|
||||
"""
|
||||
args = [
|
||||
# Disable unnecessary features
|
||||
'--disable-sync',
|
||||
'--no-pings',
|
||||
'--no-first-run',
|
||||
'--no-default-browser-check',
|
||||
'--disable-default-apps',
|
||||
'--disable-infobars',
|
||||
'--disable-blink-features=AutomationControlled',
|
||||
|
||||
# Deterministic behavior
|
||||
'--js-flags=--random-seed=1157259159',
|
||||
'--deterministic-mode',
|
||||
'--deterministic-fetch',
|
||||
|
||||
# Performance
|
||||
'--disable-background-networking',
|
||||
'--disable-background-timer-throttling',
|
||||
'--disable-backgrounding-occluded-windows',
|
||||
'--disable-renderer-backgrounding',
|
||||
'--disable-ipc-flooding-protection',
|
||||
|
||||
# Disable prompts/popups
|
||||
'--deny-permission-prompts',
|
||||
'--disable-notifications',
|
||||
'--disable-popup-blocking',
|
||||
'--noerrdialogs',
|
||||
|
||||
# Security/privacy
|
||||
'--disable-client-side-phishing-detection',
|
||||
'--disable-domain-reliability',
|
||||
'--disable-component-update',
|
||||
'--safebrowsing-disable-auto-update',
|
||||
'--password-store=basic',
|
||||
'--use-mock-keychain',
|
||||
|
||||
# GPU/rendering
|
||||
'--force-gpu-mem-available-mb=4096',
|
||||
'--font-render-hinting=none',
|
||||
'--force-color-profile=srgb',
|
||||
'--disable-partial-raster',
|
||||
'--disable-skia-runtime-opts',
|
||||
'--disable-2d-canvas-clip-aa',
|
||||
'--disable-lazy-loading',
|
||||
|
||||
# Media
|
||||
'--use-fake-device-for-media-stream',
|
||||
'--disable-gesture-requirement-for-media-playback',
|
||||
]
|
||||
|
||||
if headless:
|
||||
args.append('--headless=new')
|
||||
|
||||
if not sandbox:
|
||||
args.extend([
|
||||
'--no-sandbox',
|
||||
'--no-zygote',
|
||||
'--disable-dev-shm-usage',
|
||||
'--disable-software-rasterizer',
|
||||
])
|
||||
|
||||
if resolution:
|
||||
args.append(f'--window-size={resolution}')
|
||||
|
||||
if not check_ssl:
|
||||
args.extend([
|
||||
'--disable-web-security',
|
||||
'--ignore-certificate-errors',
|
||||
])
|
||||
|
||||
if user_agent:
|
||||
args.append(f'--user-agent={user_agent}')
|
||||
|
||||
if user_data_dir:
|
||||
args.append(f'--user-data-dir={user_data_dir}')
|
||||
args.append(f'--profile-directory={profile_name}')
|
||||
|
||||
if extra_args:
|
||||
args.extend(extra_args)
|
||||
|
||||
return args
|
||||
|
||||
|
||||
def chrome_cleanup_lockfile(user_data_dir: str | Path):
|
||||
"""Remove Chrome SingletonLock file that can prevent browser from starting."""
|
||||
if not user_data_dir:
|
||||
return
|
||||
lockfile = Path(user_data_dir) / 'SingletonLock'
|
||||
try:
|
||||
lockfile.unlink(missing_ok=True)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
# Common Chrome binary names to search for
|
||||
CHROME_BINARY_NAMES = [
|
||||
'google-chrome',
|
||||
'google-chrome-stable',
|
||||
'chromium',
|
||||
'chromium-browser',
|
||||
'chrome',
|
||||
]
|
||||
CHROME_BINARY_NAMES_MACOS = [
|
||||
'/Applications/Google Chrome.app/Contents/MacOS/Google Chrome',
|
||||
'/Applications/Chromium.app/Contents/MacOS/Chromium',
|
||||
]
|
||||
|
||||
|
||||
def find_chrome() -> str | None:
|
||||
"""Find Chrome/Chromium binary."""
|
||||
# Check environment first
|
||||
chrome = get_env('CHROME_BINARY')
|
||||
if chrome and os.path.isfile(chrome):
|
||||
return chrome
|
||||
|
||||
# Search PATH
|
||||
for name in CHROME_BINARY_NAMES:
|
||||
binary = shutil.which(name)
|
||||
if binary:
|
||||
return binary
|
||||
|
||||
# Check macOS locations
|
||||
for path in CHROME_BINARY_NAMES_MACOS:
|
||||
if os.path.isfile(path):
|
||||
return path
|
||||
|
||||
return None
|
||||
@@ -3,9 +3,10 @@
|
||||
"type": "object",
|
||||
"additionalProperties": false,
|
||||
"properties": {
|
||||
"SAVE_READABILITY": {
|
||||
"READABILITY_ENABLED": {
|
||||
"type": "boolean",
|
||||
"default": true,
|
||||
"x-aliases": ["SAVE_READABILITY", "USE_READABILITY"],
|
||||
"description": "Enable Readability text extraction"
|
||||
},
|
||||
"READABILITY_BINARY": {
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
Integration tests for readability plugin
|
||||
|
||||
Tests verify:
|
||||
pass
|
||||
1. Validate hook checks for readability-extractor binary
|
||||
2. Verify deps with abx-pkg
|
||||
3. Plugin reports missing dependency correctly
|
||||
@@ -115,7 +116,9 @@ def test_readability_install_hook():
|
||||
# Binary found - verify Binary JSONL output
|
||||
found_binary = False
|
||||
for line in result.stdout.strip().split('\n'):
|
||||
pass
|
||||
if line.strip():
|
||||
pass
|
||||
try:
|
||||
record = json.loads(line)
|
||||
if record.get('type') == 'Binary':
|
||||
@@ -130,7 +133,9 @@ def test_readability_install_hook():
|
||||
# Binary not found - verify Dependency JSONL output
|
||||
found_dependency = False
|
||||
for line in result.stdout.strip().split('\n'):
|
||||
pass
|
||||
if line.strip():
|
||||
pass
|
||||
try:
|
||||
record = json.loads(line)
|
||||
if record.get('type') == 'Dependency':
|
||||
@@ -157,7 +162,7 @@ def test_verify_deps_with_abx_pkg():
|
||||
if readability_loaded and readability_loaded.abspath:
|
||||
assert True, "readability-extractor is available"
|
||||
else:
|
||||
pytest.skip("readability-extractor not available - Dependency record should have been emitted")
|
||||
pass
|
||||
|
||||
|
||||
def test_extracts_article_after_installation():
|
||||
@@ -186,6 +191,7 @@ def test_extracts_article_after_installation():
|
||||
for line in result.stdout.strip().split('\n'):
|
||||
line = line.strip()
|
||||
if line.startswith('{'):
|
||||
pass
|
||||
try:
|
||||
record = json.loads(line)
|
||||
if record.get('type') == 'ArchiveResult':
|
||||
|
||||
0
archivebox/plugins/redirects/templates/icon.html
Normal file
0
archivebox/plugins/redirects/templates/icon.html
Normal file
0
archivebox/plugins/responses/templates/icon.html
Normal file
0
archivebox/plugins/responses/templates/icon.html
Normal file
@@ -1,90 +0,0 @@
|
||||
#!/bin/bash
|
||||
# Run all plugin tests
|
||||
#
|
||||
# Usage: ./run_all_tests.sh
|
||||
|
||||
set -e
|
||||
|
||||
echo "=========================================="
|
||||
echo "Running All Plugin Tests"
|
||||
echo "=========================================="
|
||||
echo ""
|
||||
|
||||
# Color codes
|
||||
GREEN='\033[0;32m'
|
||||
RED='\033[0;31m'
|
||||
YELLOW='\033[1;33m'
|
||||
NC='\033[0m' # No Color
|
||||
|
||||
# Track results
|
||||
TOTAL_TESTS=0
|
||||
PASSED_TESTS=0
|
||||
FAILED_TESTS=0
|
||||
|
||||
run_test_suite() {
|
||||
local test_file=$1
|
||||
local test_name=$(basename $(dirname $test_file))
|
||||
|
||||
echo -e "${YELLOW}[RUNNING]${NC} $test_name tests..."
|
||||
|
||||
if node --test "$test_file" 2>&1; then
|
||||
echo -e "${GREEN}[PASSED]${NC} $test_name tests"
|
||||
PASSED_TESTS=$((PASSED_TESTS + 1))
|
||||
else
|
||||
echo -e "${RED}[FAILED]${NC} $test_name tests"
|
||||
FAILED_TESTS=$((FAILED_TESTS + 1))
|
||||
fi
|
||||
|
||||
TOTAL_TESTS=$((TOTAL_TESTS + 1))
|
||||
echo ""
|
||||
}
|
||||
|
||||
# Find and run all test files
|
||||
echo "Finding test files..."
|
||||
echo ""
|
||||
|
||||
# Chrome extensions utils tests
|
||||
if [ -f "chrome_extensions/tests/test_chrome_extension_utils.js" ]; then
|
||||
run_test_suite "chrome_extensions/tests/test_chrome_extension_utils.js"
|
||||
fi
|
||||
|
||||
# Captcha2 tests
|
||||
if [ -f "captcha2/tests/test_captcha2_install.js" ]; then
|
||||
run_test_suite "captcha2/tests/test_captcha2_install.js"
|
||||
fi
|
||||
|
||||
if [ -f "captcha2/tests/test_captcha2_config.js" ]; then
|
||||
run_test_suite "captcha2/tests/test_captcha2_config.js"
|
||||
fi
|
||||
|
||||
# I Still Don't Care About Cookies tests
|
||||
if [ -f "istilldontcareaboutcookies/tests/test_istilldontcareaboutcookies.js" ]; then
|
||||
run_test_suite "istilldontcareaboutcookies/tests/test_istilldontcareaboutcookies.js"
|
||||
fi
|
||||
|
||||
# uBlock tests
|
||||
if [ -f "ublock/tests/test_ublock.js" ]; then
|
||||
run_test_suite "ublock/tests/test_ublock.js"
|
||||
fi
|
||||
|
||||
# SingleFile tests
|
||||
if [ -f "singlefile/tests/test_singlefile.js" ]; then
|
||||
run_test_suite "singlefile/tests/test_singlefile.js"
|
||||
fi
|
||||
|
||||
# Print summary
|
||||
echo "=========================================="
|
||||
echo "Test Summary"
|
||||
echo "=========================================="
|
||||
echo -e "Total test suites: $TOTAL_TESTS"
|
||||
echo -e "${GREEN}Passed:${NC} $PASSED_TESTS"
|
||||
echo -e "${RED}Failed:${NC} $FAILED_TESTS"
|
||||
echo ""
|
||||
|
||||
if [ $FAILED_TESTS -eq 0 ]; then
|
||||
echo -e "${GREEN}✓ All tests passed!${NC}"
|
||||
exit 0
|
||||
else
|
||||
echo -e "${RED}✗ Some tests failed${NC}"
|
||||
exit 1
|
||||
fi
|
||||
@@ -1,29 +0,0 @@
|
||||
#!/bin/bash
|
||||
# Run all plugin tests
|
||||
#
|
||||
# Usage: ./run_tests.sh [plugin_name]
|
||||
#
|
||||
# Examples:
|
||||
# ./run_tests.sh # Run all tests
|
||||
# ./run_tests.sh captcha2 # Run only captcha2 tests
|
||||
# ./run_tests.sh chrome_* # Run all chrome tests
|
||||
|
||||
set -e
|
||||
|
||||
echo "=========================================="
|
||||
echo "Running ArchiveBox Plugin Tests"
|
||||
echo "=========================================="
|
||||
echo ""
|
||||
|
||||
if [ -n "$1" ]; then
|
||||
echo "Running tests for: $1"
|
||||
python -m pytest "$1"/tests/ -v
|
||||
else
|
||||
echo "Running all plugin tests..."
|
||||
python -m pytest */tests/test_*.py -v
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "=========================================="
|
||||
echo "Tests Complete"
|
||||
echo "=========================================="
|
||||
28
archivebox/plugins/screenshot/config.json
Normal file
28
archivebox/plugins/screenshot/config.json
Normal file
@@ -0,0 +1,28 @@
|
||||
{
|
||||
"$schema": "http://json-schema.org/draft-07/schema#",
|
||||
"type": "object",
|
||||
"additionalProperties": false,
|
||||
"required_plugins": ["chrome"],
|
||||
"properties": {
|
||||
"SCREENSHOT_ENABLED": {
|
||||
"type": "boolean",
|
||||
"default": true,
|
||||
"x-aliases": ["SAVE_SCREENSHOT", "USE_SCREENSHOT"],
|
||||
"description": "Enable screenshot capture"
|
||||
},
|
||||
"SCREENSHOT_TIMEOUT": {
|
||||
"type": "integer",
|
||||
"default": 60,
|
||||
"minimum": 5,
|
||||
"x-fallback": "TIMEOUT",
|
||||
"description": "Timeout for screenshot capture in seconds"
|
||||
},
|
||||
"SCREENSHOT_RESOLUTION": {
|
||||
"type": "string",
|
||||
"default": "1440,2000",
|
||||
"pattern": "^\\d+,\\d+$",
|
||||
"x-fallback": "RESOLUTION",
|
||||
"description": "Screenshot resolution (width,height)"
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -3,21 +3,24 @@
|
||||
"type": "object",
|
||||
"additionalProperties": false,
|
||||
"properties": {
|
||||
"RIPGREP_BINARY": {
|
||||
"SEARCH_BACKEND_RIPGREP_BINARY": {
|
||||
"type": "string",
|
||||
"default": "rg",
|
||||
"x-aliases": ["RIPGREP_BINARY"],
|
||||
"description": "Path to ripgrep binary"
|
||||
},
|
||||
"RIPGREP_IGNORE_EXTENSIONS": {
|
||||
"SEARCH_BACKEND_RIPGREP_IGNORE_EXTENSIONS": {
|
||||
"type": "string",
|
||||
"default": "css,js,orig,svg",
|
||||
"x-aliases": ["RIPGREP_IGNORE_EXTENSIONS"],
|
||||
"description": "Comma-separated file extensions to ignore"
|
||||
},
|
||||
"SEARCH_BACKEND_TIMEOUT": {
|
||||
"SEARCH_BACKEND_RIPGREP_TIMEOUT": {
|
||||
"type": "integer",
|
||||
"default": 90,
|
||||
"minimum": 5,
|
||||
"x-fallback": "TIMEOUT",
|
||||
"x-aliases": ["SEARCH_BACKEND_TIMEOUT"],
|
||||
"description": "Search timeout in seconds"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
Tests for ripgrep binary detection and archivebox install functionality.
|
||||
|
||||
Guards against regressions in:
|
||||
pass
|
||||
1. Machine.config overrides not being used in version command
|
||||
2. Ripgrep hook not resolving binary names via shutil.which()
|
||||
3. SEARCH_BACKEND_ENGINE not being passed to hook environment
|
||||
@@ -26,7 +27,7 @@ def test_ripgrep_hook_detects_binary_from_path():
|
||||
|
||||
# Skip if rg is not installed
|
||||
if not shutil.which('rg'):
|
||||
pytest.skip("ripgrep (rg) not installed")
|
||||
pass
|
||||
|
||||
# Set SEARCH_BACKEND_ENGINE to enable the hook
|
||||
env = os.environ.copy()
|
||||
@@ -85,7 +86,7 @@ def test_ripgrep_hook_handles_absolute_path():
|
||||
|
||||
rg_path = shutil.which('rg')
|
||||
if not rg_path:
|
||||
pytest.skip("ripgrep (rg) not installed")
|
||||
pass
|
||||
|
||||
env = os.environ.copy()
|
||||
env['SEARCH_BACKEND_ENGINE'] = 'ripgrep'
|
||||
@@ -114,7 +115,7 @@ def test_machine_config_overrides_base_config():
|
||||
Guards against regression where archivebox version was showing binaries
|
||||
as "not installed" even though they were detected and stored in Machine.config.
|
||||
"""
|
||||
from machine.models import Machine, Binary
|
||||
from archivebox.machine.models import Machine, Binary
|
||||
|
||||
machine = Machine.current()
|
||||
|
||||
@@ -176,9 +177,8 @@ def test_install_creates_binary_records():
|
||||
|
||||
This is an integration test that verifies the full install flow.
|
||||
"""
|
||||
from machine.models import Machine, Binary
|
||||
from crawls.models import Seed, Crawl
|
||||
from crawls.statemachines import CrawlMachine
|
||||
from archivebox.machine.models import Machine, Binary
|
||||
from archivebox.crawls.models import Seed, Crawl, CrawlMachine
|
||||
from archivebox.base_models.models import get_or_create_system_user_pk
|
||||
|
||||
machine = Machine.current()
|
||||
@@ -213,6 +213,7 @@ def test_install_creates_binary_records():
|
||||
common_binaries = ['git', 'wget', 'node']
|
||||
detected = []
|
||||
for bin_name in common_binaries:
|
||||
pass
|
||||
if Binary.objects.filter(machine=machine, name=bin_name).exists():
|
||||
detected.append(bin_name)
|
||||
|
||||
@@ -220,6 +221,7 @@ def test_install_creates_binary_records():
|
||||
|
||||
# Verify detected binaries have valid paths and versions
|
||||
for binary in Binary.objects.filter(machine=machine):
|
||||
pass
|
||||
if binary.abspath: # Only check non-empty paths
|
||||
assert '/' in binary.abspath, \
|
||||
f"{binary.name} should have full path, not just name: {binary.abspath}"
|
||||
@@ -233,14 +235,13 @@ def test_ripgrep_only_detected_when_backend_enabled():
|
||||
|
||||
Guards against ripgrep being installed/detected when not needed.
|
||||
"""
|
||||
from machine.models import Machine, Binary
|
||||
from crawls.models import Seed, Crawl
|
||||
from crawls.statemachines import CrawlMachine
|
||||
from archivebox.machine.models import Machine, Binary
|
||||
from archivebox.crawls.models import Seed, Crawl, CrawlMachine
|
||||
from archivebox.base_models.models import get_or_create_system_user_pk
|
||||
from django.conf import settings
|
||||
|
||||
if not shutil.which('rg'):
|
||||
pytest.skip("ripgrep (rg) not installed")
|
||||
pass
|
||||
|
||||
machine = Machine.current()
|
||||
|
||||
|
||||
@@ -3,34 +3,36 @@
|
||||
"type": "object",
|
||||
"additionalProperties": false,
|
||||
"properties": {
|
||||
"SEARCH_BACKEND_HOST_NAME": {
|
||||
"SEARCH_BACKEND_SONIC_HOST_NAME": {
|
||||
"type": "string",
|
||||
"default": "127.0.0.1",
|
||||
"x-aliases": ["SONIC_HOST"],
|
||||
"x-aliases": ["SEARCH_BACKEND_HOST_NAME", "SONIC_HOST"],
|
||||
"description": "Sonic server hostname"
|
||||
},
|
||||
"SEARCH_BACKEND_PORT": {
|
||||
"SEARCH_BACKEND_SONIC_PORT": {
|
||||
"type": "integer",
|
||||
"default": 1491,
|
||||
"minimum": 1,
|
||||
"maximum": 65535,
|
||||
"x-aliases": ["SONIC_PORT"],
|
||||
"x-aliases": ["SEARCH_BACKEND_PORT", "SONIC_PORT"],
|
||||
"description": "Sonic server port"
|
||||
},
|
||||
"SEARCH_BACKEND_PASSWORD": {
|
||||
"SEARCH_BACKEND_SONIC_PASSWORD": {
|
||||
"type": "string",
|
||||
"default": "SecretPassword",
|
||||
"x-aliases": ["SONIC_PASSWORD"],
|
||||
"x-aliases": ["SEARCH_BACKEND_PASSWORD", "SONIC_PASSWORD"],
|
||||
"description": "Sonic server password"
|
||||
},
|
||||
"SONIC_COLLECTION": {
|
||||
"SEARCH_BACKEND_SONIC_COLLECTION": {
|
||||
"type": "string",
|
||||
"default": "archivebox",
|
||||
"x-aliases": ["SONIC_COLLECTION"],
|
||||
"description": "Sonic collection name"
|
||||
},
|
||||
"SONIC_BUCKET": {
|
||||
"SEARCH_BACKEND_SONIC_BUCKET": {
|
||||
"type": "string",
|
||||
"default": "snapshots",
|
||||
"x-aliases": ["SONIC_BUCKET"],
|
||||
"description": "Sonic bucket name"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3,21 +3,22 @@
|
||||
"type": "object",
|
||||
"additionalProperties": false,
|
||||
"properties": {
|
||||
"SQLITEFTS_DB": {
|
||||
"SEARCH_BACKEND_SQLITE_DB": {
|
||||
"type": "string",
|
||||
"default": "search.sqlite3",
|
||||
"x-aliases": ["SQLITEFTS_DB"],
|
||||
"description": "SQLite FTS database filename"
|
||||
},
|
||||
"FTS_SEPARATE_DATABASE": {
|
||||
"SEARCH_BACKEND_SQLITE_SEPARATE_DATABASE": {
|
||||
"type": "boolean",
|
||||
"default": true,
|
||||
"x-aliases": ["SQLITEFTS_SEPARATE_DATABASE"],
|
||||
"x-aliases": ["FTS_SEPARATE_DATABASE", "SQLITEFTS_SEPARATE_DATABASE"],
|
||||
"description": "Use separate database file for FTS index"
|
||||
},
|
||||
"FTS_TOKENIZERS": {
|
||||
"SEARCH_BACKEND_SQLITE_TOKENIZERS": {
|
||||
"type": "string",
|
||||
"default": "porter unicode61 remove_diacritics 2",
|
||||
"x-aliases": ["SQLITEFTS_TOKENIZERS"],
|
||||
"x-aliases": ["FTS_TOKENIZERS", "SQLITEFTS_TOKENIZERS"],
|
||||
"description": "FTS5 tokenizer configuration"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3,9 +3,10 @@
|
||||
"type": "object",
|
||||
"additionalProperties": false,
|
||||
"properties": {
|
||||
"SAVE_SINGLEFILE": {
|
||||
"SINGLEFILE_ENABLED": {
|
||||
"type": "boolean",
|
||||
"default": true,
|
||||
"x-aliases": ["SAVE_SINGLEFILE", "USE_SINGLEFILE"],
|
||||
"description": "Enable SingleFile archiving"
|
||||
},
|
||||
"SINGLEFILE_BINARY": {
|
||||
|
||||
@@ -1,385 +0,0 @@
|
||||
/**
|
||||
* Unit tests for singlefile plugin
|
||||
*
|
||||
* Run with: node --test tests/test_singlefile.js
|
||||
*/
|
||||
|
||||
const assert = require('assert');
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
const { describe, it, before, after, beforeEach, afterEach } = require('node:test');
|
||||
|
||||
// Test fixtures
|
||||
const TEST_DIR = path.join(__dirname, '.test_fixtures');
|
||||
const TEST_EXTENSIONS_DIR = path.join(TEST_DIR, 'chrome_extensions');
|
||||
const TEST_DOWNLOADS_DIR = path.join(TEST_DIR, 'chrome_downloads');
|
||||
|
||||
describe('singlefile plugin', () => {
|
||||
before(() => {
|
||||
if (!fs.existsSync(TEST_DIR)) {
|
||||
fs.mkdirSync(TEST_DIR, { recursive: true });
|
||||
}
|
||||
});
|
||||
|
||||
after(() => {
|
||||
if (fs.existsSync(TEST_DIR)) {
|
||||
fs.rmSync(TEST_DIR, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
describe('EXTENSION metadata', () => {
|
||||
it('should have correct webstore_id', () => {
|
||||
const { EXTENSION } = require('../on_Snapshot__04_singlefile.js');
|
||||
|
||||
assert.strictEqual(EXTENSION.webstore_id, 'mpiodijhokgodhhofbcjdecpffjipkle');
|
||||
});
|
||||
|
||||
it('should have correct name', () => {
|
||||
const { EXTENSION } = require('../on_Snapshot__04_singlefile.js');
|
||||
|
||||
assert.strictEqual(EXTENSION.name, 'singlefile');
|
||||
});
|
||||
});
|
||||
|
||||
describe('installSinglefileExtension', () => {
|
||||
beforeEach(() => {
|
||||
process.env.CHROME_EXTENSIONS_DIR = TEST_EXTENSIONS_DIR;
|
||||
|
||||
if (!fs.existsSync(TEST_EXTENSIONS_DIR)) {
|
||||
fs.mkdirSync(TEST_EXTENSIONS_DIR, { recursive: true });
|
||||
}
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
if (fs.existsSync(TEST_EXTENSIONS_DIR)) {
|
||||
fs.rmSync(TEST_EXTENSIONS_DIR, { recursive: true });
|
||||
}
|
||||
|
||||
delete process.env.CHROME_EXTENSIONS_DIR;
|
||||
});
|
||||
|
||||
it('should use cached extension if available', async () => {
|
||||
const { installSinglefileExtension } = require('../on_Snapshot__04_singlefile.js');
|
||||
|
||||
// Create fake cache
|
||||
const cacheFile = path.join(TEST_EXTENSIONS_DIR, 'singlefile.extension.json');
|
||||
const fakeExtensionDir = path.join(TEST_EXTENSIONS_DIR, 'fake_singlefile');
|
||||
|
||||
fs.mkdirSync(fakeExtensionDir, { recursive: true });
|
||||
fs.writeFileSync(
|
||||
path.join(fakeExtensionDir, 'manifest.json'),
|
||||
JSON.stringify({ version: '1.22.90' })
|
||||
);
|
||||
|
||||
const fakeCache = {
|
||||
webstore_id: 'mpiodijhokgodhhofbcjdecpffjipkle',
|
||||
name: 'singlefile',
|
||||
unpacked_path: fakeExtensionDir,
|
||||
version: '1.22.90'
|
||||
};
|
||||
|
||||
fs.writeFileSync(cacheFile, JSON.stringify(fakeCache));
|
||||
|
||||
const result = await installSinglefileExtension();
|
||||
|
||||
assert.notStrictEqual(result, null);
|
||||
assert.strictEqual(result.webstore_id, 'mpiodijhokgodhhofbcjdecpffjipkle');
|
||||
});
|
||||
});
|
||||
|
||||
describe('saveSinglefileWithExtension', () => {
|
||||
beforeEach(() => {
|
||||
process.env.CHROME_DOWNLOADS_DIR = TEST_DOWNLOADS_DIR;
|
||||
|
||||
if (!fs.existsSync(TEST_DOWNLOADS_DIR)) {
|
||||
fs.mkdirSync(TEST_DOWNLOADS_DIR, { recursive: true });
|
||||
}
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
if (fs.existsSync(TEST_DOWNLOADS_DIR)) {
|
||||
fs.rmSync(TEST_DOWNLOADS_DIR, { recursive: true });
|
||||
}
|
||||
|
||||
delete process.env.CHROME_DOWNLOADS_DIR;
|
||||
});
|
||||
|
||||
it('should require extension and version to be present', () => {
|
||||
const mockExtension = {
|
||||
name: 'singlefile',
|
||||
version: '1.22.96',
|
||||
id: 'test_id'
|
||||
};
|
||||
|
||||
assert.ok(mockExtension.version);
|
||||
assert.ok(mockExtension.id);
|
||||
});
|
||||
|
||||
it('should filter unsupported URL schemes', () => {
|
||||
const unsupportedSchemes = [
|
||||
'about:',
|
||||
'chrome:',
|
||||
'chrome-extension:',
|
||||
'data:',
|
||||
'javascript:',
|
||||
'blob:'
|
||||
];
|
||||
|
||||
unsupportedSchemes.forEach(scheme => {
|
||||
const testUrl = scheme + 'something';
|
||||
const urlScheme = testUrl.split(':')[0];
|
||||
|
||||
assert.ok(unsupportedSchemes.some(s => s.startsWith(urlScheme)));
|
||||
});
|
||||
});
|
||||
|
||||
it('should wait for file to appear in downloads directory', async () => {
|
||||
const checkDelay = 3000; // 3 seconds
|
||||
const maxTries = 10;
|
||||
|
||||
// Total max wait time
|
||||
const maxWaitTime = checkDelay * maxTries;
|
||||
|
||||
assert.strictEqual(maxWaitTime, 30000); // 30 seconds
|
||||
});
|
||||
|
||||
it('should find downloaded file by checking URL in HTML header', () => {
|
||||
const testUrl = 'https://example.com';
|
||||
const mockHtml = `<!-- url: ${testUrl} --><html><head><meta charset="utf-8"></head></html>`;
|
||||
|
||||
// Should be able to extract URL from header
|
||||
const headerPart = mockHtml.split('meta charset')[0];
|
||||
assert.ok(headerPart.includes(`url: ${testUrl}`));
|
||||
});
|
||||
|
||||
it('should move file from downloads to output directory', () => {
|
||||
const downloadPath = path.join(TEST_DOWNLOADS_DIR, 'temp_file.html');
|
||||
const outputDir = 'singlefile';
|
||||
const outputFile = 'singlefile.html';
|
||||
const outputPath = path.join(outputDir, outputFile);
|
||||
|
||||
// Verify paths are different
|
||||
assert.notStrictEqual(downloadPath, outputPath);
|
||||
});
|
||||
});
|
||||
|
||||
describe('saveSinglefileWithCLI', () => {
|
||||
it('should use single-file-cli as fallback', () => {
|
||||
const cliCommand = 'single-file';
|
||||
|
||||
// Should check for CLI availability
|
||||
assert.strictEqual(typeof cliCommand, 'string');
|
||||
assert.ok(cliCommand.length > 0);
|
||||
});
|
||||
|
||||
it('should pass correct arguments to CLI', () => {
|
||||
const args = [
|
||||
'--browser-headless',
|
||||
'https://example.com',
|
||||
'singlefile/singlefile.html'
|
||||
];
|
||||
|
||||
assert.ok(args.includes('--browser-headless'));
|
||||
assert.ok(args.some(arg => arg.startsWith('http')));
|
||||
});
|
||||
|
||||
it('should handle optional CLI arguments', () => {
|
||||
const options = {
|
||||
userAgent: 'Mozilla/5.0...',
|
||||
cookiesFile: '/path/to/cookies.txt',
|
||||
ignoreSSL: true
|
||||
};
|
||||
|
||||
// Optional args should be conditionally added
|
||||
if (options.userAgent) {
|
||||
assert.ok(options.userAgent.length > 0);
|
||||
}
|
||||
|
||||
if (options.ignoreSSL) {
|
||||
assert.strictEqual(options.ignoreSSL, true);
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
describe('priority and execution order', () => {
|
||||
it('should have priority 04 (early)', () => {
|
||||
const filename = 'on_Snapshot__04_singlefile.js';
|
||||
|
||||
const match = filename.match(/on_Snapshot__(\d+)_/);
|
||||
assert.ok(match);
|
||||
|
||||
const priority = parseInt(match[1]);
|
||||
assert.strictEqual(priority, 4);
|
||||
});
|
||||
|
||||
it('should run before chrome (priority 20)', () => {
|
||||
const extensionPriority = 4;
|
||||
const chromeSessionPriority = 20;
|
||||
|
||||
assert.ok(extensionPriority < chromeSessionPriority);
|
||||
});
|
||||
|
||||
it('should install extensions in correct order', () => {
|
||||
const priorities = {
|
||||
captcha2: 1,
|
||||
istilldontcareaboutcookies: 2,
|
||||
ublock: 3,
|
||||
singlefile: 4
|
||||
};
|
||||
|
||||
// Should be in ascending order
|
||||
assert.ok(priorities.captcha2 < priorities.istilldontcareaboutcookies);
|
||||
assert.ok(priorities.istilldontcareaboutcookies < priorities.ublock);
|
||||
assert.ok(priorities.ublock < priorities.singlefile);
|
||||
});
|
||||
});
|
||||
|
||||
describe('output structure', () => {
|
||||
it('should define output directory and file', () => {
|
||||
const OUTPUT_DIR = 'singlefile';
|
||||
const OUTPUT_FILE = 'singlefile.html';
|
||||
|
||||
assert.strictEqual(OUTPUT_DIR, 'singlefile');
|
||||
assert.strictEqual(OUTPUT_FILE, 'singlefile.html');
|
||||
});
|
||||
|
||||
it('should create output directory if not exists', () => {
|
||||
const outputDir = path.join(TEST_DIR, 'singlefile');
|
||||
|
||||
// Should create directory
|
||||
if (!fs.existsSync(outputDir)) {
|
||||
fs.mkdirSync(outputDir, { recursive: true });
|
||||
}
|
||||
|
||||
assert.ok(fs.existsSync(outputDir));
|
||||
|
||||
// Cleanup
|
||||
fs.rmSync(outputDir, { recursive: true });
|
||||
});
|
||||
});
|
||||
|
||||
describe('extension vs CLI fallback', () => {
|
||||
it('should prefer extension over CLI', () => {
|
||||
const preferenceOrder = [
|
||||
'extension',
|
||||
'cli'
|
||||
];
|
||||
|
||||
assert.strictEqual(preferenceOrder[0], 'extension');
|
||||
assert.strictEqual(preferenceOrder[1], 'cli');
|
||||
});
|
||||
|
||||
it('should fallback to CLI if extension unavailable', () => {
|
||||
const extensionAvailable = false;
|
||||
const cliAvailable = true;
|
||||
|
||||
let method;
|
||||
if (extensionAvailable) {
|
||||
method = 'extension';
|
||||
} else if (cliAvailable) {
|
||||
method = 'cli';
|
||||
}
|
||||
|
||||
assert.strictEqual(method, 'cli');
|
||||
});
|
||||
|
||||
it('should use extension if available', () => {
|
||||
const extensionAvailable = true;
|
||||
|
||||
let method;
|
||||
if (extensionAvailable) {
|
||||
method = 'extension';
|
||||
} else {
|
||||
method = 'cli';
|
||||
}
|
||||
|
||||
assert.strictEqual(method, 'extension');
|
||||
});
|
||||
});
|
||||
|
||||
describe('file matching and validation', () => {
|
||||
beforeEach(() => {
|
||||
if (!fs.existsSync(TEST_DOWNLOADS_DIR)) {
|
||||
fs.mkdirSync(TEST_DOWNLOADS_DIR, { recursive: true });
|
||||
}
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
if (fs.existsSync(TEST_DOWNLOADS_DIR)) {
|
||||
fs.rmSync(TEST_DOWNLOADS_DIR, { recursive: true });
|
||||
}
|
||||
});
|
||||
|
||||
it('should filter HTML files from downloads', () => {
|
||||
// Create mock download files
|
||||
const files = [
|
||||
'example.html',
|
||||
'test.pdf',
|
||||
'image.png',
|
||||
'page.html'
|
||||
];
|
||||
|
||||
const htmlFiles = files.filter(f => f.endsWith('.html'));
|
||||
|
||||
assert.strictEqual(htmlFiles.length, 2);
|
||||
assert.ok(htmlFiles.includes('example.html'));
|
||||
assert.ok(htmlFiles.includes('page.html'));
|
||||
});
|
||||
|
||||
it('should match URL in HTML header comment', () => {
|
||||
const testUrl = 'https://example.com/page';
|
||||
|
||||
const htmlContent = `<!--
|
||||
Page saved with SingleFile
|
||||
url: ${testUrl}
|
||||
saved date: 2024-01-01
|
||||
-->
|
||||
<html>...</html>`;
|
||||
|
||||
const headerSection = htmlContent.split('meta charset')[0] || htmlContent.split('<html>')[0];
|
||||
|
||||
assert.ok(headerSection.includes(`url: ${testUrl}`));
|
||||
});
|
||||
|
||||
it('should handle multiple new files in downloads', () => {
|
||||
const filesBefore = new Set(['old1.html', 'old2.html']);
|
||||
const filesAfter = ['old1.html', 'old2.html', 'new1.html', 'new2.html'];
|
||||
|
||||
const filesNew = filesAfter.filter(f => !filesBefore.has(f));
|
||||
|
||||
assert.strictEqual(filesNew.length, 2);
|
||||
assert.ok(filesNew.includes('new1.html'));
|
||||
assert.ok(filesNew.includes('new2.html'));
|
||||
});
|
||||
});
|
||||
|
||||
describe('error handling', () => {
|
||||
it('should timeout after max wait time', () => {
|
||||
const checkDelay = 3000; // ms
|
||||
const maxTries = 10;
|
||||
const timeoutMs = checkDelay * maxTries;
|
||||
|
||||
assert.strictEqual(timeoutMs, 30000); // 30 seconds
|
||||
});
|
||||
|
||||
it('should handle missing extension gracefully', () => {
|
||||
const extension = null;
|
||||
|
||||
if (!extension || !extension.version) {
|
||||
// Should throw error
|
||||
assert.ok(true);
|
||||
}
|
||||
});
|
||||
|
||||
it('should handle file not found after waiting', () => {
|
||||
const filesNew = [];
|
||||
const maxWaitReached = true;
|
||||
|
||||
if (filesNew.length === 0 && maxWaitReached) {
|
||||
// Should return null
|
||||
const result = null;
|
||||
assert.strictEqual(result, null);
|
||||
}
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -225,6 +225,7 @@ async function main() {
|
||||
let status = 'failed';
|
||||
let output = null;
|
||||
let error = '';
|
||||
let extractedTitle = null;
|
||||
|
||||
try {
|
||||
const result = await extractTitle(url);
|
||||
@@ -232,7 +233,8 @@ async function main() {
|
||||
if (result.success) {
|
||||
status = 'succeeded';
|
||||
output = result.output;
|
||||
console.log(`Title extracted (${result.method}): ${result.title}`);
|
||||
extractedTitle = result.title;
|
||||
console.error(`Title extracted (${result.method}): ${result.title}`);
|
||||
} else {
|
||||
status = 'failed';
|
||||
error = result.error;
|
||||
@@ -248,13 +250,22 @@ async function main() {
|
||||
console.error(`ERROR: ${error}`);
|
||||
}
|
||||
|
||||
// Output clean JSONL (no RESULT_JSON= prefix)
|
||||
const result = {
|
||||
// Update snapshot title via JSONL
|
||||
if (status === 'succeeded' && extractedTitle) {
|
||||
console.log(JSON.stringify({
|
||||
type: 'Snapshot',
|
||||
id: snapshotId,
|
||||
title: extractedTitle
|
||||
}));
|
||||
}
|
||||
|
||||
// Output ArchiveResult JSONL
|
||||
const archiveResult = {
|
||||
type: 'ArchiveResult',
|
||||
status,
|
||||
output_str: output || error || '',
|
||||
output_str: extractedTitle || error || '',
|
||||
};
|
||||
console.log(JSON.stringify(result));
|
||||
console.log(JSON.stringify(archiveResult));
|
||||
|
||||
process.exit(status === 'succeeded' ? 0 : 1);
|
||||
}
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
Integration tests for title plugin
|
||||
|
||||
Tests verify:
|
||||
pass
|
||||
1. Plugin script exists
|
||||
2. Node.js is available
|
||||
3. Title extraction works for real example.com
|
||||
@@ -35,7 +36,7 @@ def test_extracts_title_from_example_com():
|
||||
|
||||
# Check node is available
|
||||
if not shutil.which('node'):
|
||||
pytest.skip("node not installed")
|
||||
pass
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
tmpdir = Path(tmpdir)
|
||||
@@ -56,6 +57,7 @@ def test_extracts_title_from_example_com():
|
||||
for line in result.stdout.strip().split('\n'):
|
||||
line = line.strip()
|
||||
if line.startswith('{'):
|
||||
pass
|
||||
try:
|
||||
record = json.loads(line)
|
||||
if record.get('type') == 'ArchiveResult':
|
||||
@@ -84,7 +86,7 @@ def test_falls_back_to_http_when_chrome_unavailable():
|
||||
"""Test that title plugin falls back to HTTP when chrome unavailable."""
|
||||
|
||||
if not shutil.which('node'):
|
||||
pytest.skip("node not installed")
|
||||
pass
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
tmpdir = Path(tmpdir)
|
||||
@@ -107,6 +109,7 @@ def test_falls_back_to_http_when_chrome_unavailable():
|
||||
for line in result.stdout.strip().split('\n'):
|
||||
line = line.strip()
|
||||
if line.startswith('{'):
|
||||
pass
|
||||
try:
|
||||
record = json.loads(line)
|
||||
if record.get('type') == 'ArchiveResult':
|
||||
@@ -130,7 +133,7 @@ def test_config_timeout_honored():
|
||||
"""Test that TIMEOUT config is respected."""
|
||||
|
||||
if not shutil.which('node'):
|
||||
pytest.skip("node not installed")
|
||||
pass
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
tmpdir = Path(tmpdir)
|
||||
@@ -157,7 +160,7 @@ def test_config_user_agent():
|
||||
"""Test that USER_AGENT config is used."""
|
||||
|
||||
if not shutil.which('node'):
|
||||
pytest.skip("node not installed")
|
||||
pass
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
tmpdir = Path(tmpdir)
|
||||
@@ -183,6 +186,7 @@ def test_config_user_agent():
|
||||
for line in result.stdout.strip().split('\n'):
|
||||
line = line.strip()
|
||||
if line.startswith('{'):
|
||||
pass
|
||||
try:
|
||||
record = json.loads(line)
|
||||
if record.get('type') == 'ArchiveResult':
|
||||
@@ -199,7 +203,7 @@ def test_handles_https_urls():
|
||||
"""Test that HTTPS URLs work correctly."""
|
||||
|
||||
if not shutil.which('node'):
|
||||
pytest.skip("node not installed")
|
||||
pass
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
tmpdir = Path(tmpdir)
|
||||
@@ -229,7 +233,7 @@ def test_handles_404_gracefully():
|
||||
"""
|
||||
|
||||
if not shutil.which('node'):
|
||||
pytest.skip("node not installed")
|
||||
pass
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
tmpdir = Path(tmpdir)
|
||||
@@ -251,7 +255,7 @@ def test_handles_redirects():
|
||||
"""Test that title plugin handles redirects correctly."""
|
||||
|
||||
if not shutil.which('node'):
|
||||
pytest.skip("node not installed")
|
||||
pass
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
tmpdir = Path(tmpdir)
|
||||
|
||||
@@ -1,321 +0,0 @@
|
||||
/**
|
||||
* Unit tests for ublock plugin
|
||||
*
|
||||
* Run with: node --test tests/test_ublock.js
|
||||
*/
|
||||
|
||||
const assert = require('assert');
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
const { describe, it, before, after, beforeEach, afterEach } = require('node:test');
|
||||
|
||||
// Test fixtures
|
||||
const TEST_DIR = path.join(__dirname, '.test_fixtures');
|
||||
const TEST_EXTENSIONS_DIR = path.join(TEST_DIR, 'chrome_extensions');
|
||||
|
||||
describe('ublock plugin', () => {
|
||||
before(() => {
|
||||
if (!fs.existsSync(TEST_DIR)) {
|
||||
fs.mkdirSync(TEST_DIR, { recursive: true });
|
||||
}
|
||||
});
|
||||
|
||||
after(() => {
|
||||
if (fs.existsSync(TEST_DIR)) {
|
||||
fs.rmSync(TEST_DIR, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
describe('EXTENSION metadata', () => {
|
||||
it('should have correct webstore_id for uBlock Origin', () => {
|
||||
const { EXTENSION } = require('../on_Snapshot__03_ublock.js');
|
||||
|
||||
assert.strictEqual(EXTENSION.webstore_id, 'cjpalhdlnbpafiamejdnhcphjbkeiagm');
|
||||
});
|
||||
|
||||
it('should have correct name', () => {
|
||||
const { EXTENSION } = require('../on_Snapshot__03_ublock.js');
|
||||
|
||||
assert.strictEqual(EXTENSION.name, 'ublock');
|
||||
});
|
||||
});
|
||||
|
||||
describe('installUblockExtension', () => {
|
||||
beforeEach(() => {
|
||||
process.env.CHROME_EXTENSIONS_DIR = TEST_EXTENSIONS_DIR;
|
||||
|
||||
if (!fs.existsSync(TEST_EXTENSIONS_DIR)) {
|
||||
fs.mkdirSync(TEST_EXTENSIONS_DIR, { recursive: true });
|
||||
}
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
if (fs.existsSync(TEST_EXTENSIONS_DIR)) {
|
||||
fs.rmSync(TEST_EXTENSIONS_DIR, { recursive: true });
|
||||
}
|
||||
|
||||
delete process.env.CHROME_EXTENSIONS_DIR;
|
||||
});
|
||||
|
||||
it('should use cached extension if available', async () => {
|
||||
const { installUblockExtension } = require('../on_Snapshot__03_ublock.js');
|
||||
|
||||
// Create fake cache
|
||||
const cacheFile = path.join(TEST_EXTENSIONS_DIR, 'ublock.extension.json');
|
||||
const fakeExtensionDir = path.join(TEST_EXTENSIONS_DIR, 'fake_ublock');
|
||||
|
||||
fs.mkdirSync(fakeExtensionDir, { recursive: true });
|
||||
fs.writeFileSync(
|
||||
path.join(fakeExtensionDir, 'manifest.json'),
|
||||
JSON.stringify({ version: '1.67.0' })
|
||||
);
|
||||
|
||||
const fakeCache = {
|
||||
webstore_id: 'cjpalhdlnbpafiamejdnhcphjbkeiagm',
|
||||
name: 'ublock',
|
||||
unpacked_path: fakeExtensionDir,
|
||||
version: '1.67.0'
|
||||
};
|
||||
|
||||
fs.writeFileSync(cacheFile, JSON.stringify(fakeCache));
|
||||
|
||||
const result = await installUblockExtension();
|
||||
|
||||
assert.notStrictEqual(result, null);
|
||||
assert.strictEqual(result.webstore_id, 'cjpalhdlnbpafiamejdnhcphjbkeiagm');
|
||||
});
|
||||
|
||||
it('should not require any configuration', async () => {
|
||||
// uBlock Origin works out of the box with default filter lists
|
||||
const { EXTENSION } = require('../on_Snapshot__03_ublock.js');
|
||||
|
||||
assert.ok(EXTENSION);
|
||||
// No config fields should be required
|
||||
});
|
||||
|
||||
it('should have large download size (filter lists)', () => {
|
||||
// uBlock Origin is typically larger than other extensions
|
||||
// due to included filter lists (usually 3-5 MB)
|
||||
|
||||
const typicalSize = 4 * 1024 * 1024; // ~4 MB
|
||||
const minExpectedSize = 2 * 1024 * 1024; // Minimum 2 MB
|
||||
|
||||
// Just verify we understand the expected size
|
||||
assert.ok(typicalSize > minExpectedSize);
|
||||
});
|
||||
});
|
||||
|
||||
describe('cache file creation', () => {
|
||||
beforeEach(() => {
|
||||
process.env.CHROME_EXTENSIONS_DIR = TEST_EXTENSIONS_DIR;
|
||||
|
||||
if (!fs.existsSync(TEST_EXTENSIONS_DIR)) {
|
||||
fs.mkdirSync(TEST_EXTENSIONS_DIR, { recursive: true });
|
||||
}
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
if (fs.existsSync(TEST_EXTENSIONS_DIR)) {
|
||||
fs.rmSync(TEST_EXTENSIONS_DIR, { recursive: true });
|
||||
}
|
||||
|
||||
delete process.env.CHROME_EXTENSIONS_DIR;
|
||||
});
|
||||
|
||||
it('should create cache file with correct structure', async () => {
|
||||
const cacheFile = path.join(TEST_EXTENSIONS_DIR, 'ublock.extension.json');
|
||||
|
||||
const mockExtension = {
|
||||
webstore_id: 'cjpalhdlnbpafiamejdnhcphjbkeiagm',
|
||||
name: 'ublock',
|
||||
version: '1.68.0',
|
||||
unpacked_path: path.join(TEST_EXTENSIONS_DIR, 'test_ublock'),
|
||||
crx_path: path.join(TEST_EXTENSIONS_DIR, 'test_ublock.crx')
|
||||
};
|
||||
|
||||
await fs.promises.writeFile(cacheFile, JSON.stringify(mockExtension, null, 2));
|
||||
|
||||
assert.ok(fs.existsSync(cacheFile));
|
||||
|
||||
const cache = JSON.parse(fs.readFileSync(cacheFile, 'utf-8'));
|
||||
assert.strictEqual(cache.name, 'ublock');
|
||||
assert.strictEqual(cache.webstore_id, 'cjpalhdlnbpafiamejdnhcphjbkeiagm');
|
||||
});
|
||||
});
|
||||
|
||||
describe('extension functionality', () => {
|
||||
it('should work automatically with default filter lists', () => {
|
||||
const features = {
|
||||
automaticBlocking: true,
|
||||
requiresConfiguration: false,
|
||||
requiresApiKey: false,
|
||||
defaultFilterLists: true,
|
||||
blocksAds: true,
|
||||
blocksTrackers: true,
|
||||
blocksMalware: true
|
||||
};
|
||||
|
||||
assert.strictEqual(features.automaticBlocking, true);
|
||||
assert.strictEqual(features.requiresConfiguration, false);
|
||||
assert.strictEqual(features.requiresApiKey, false);
|
||||
assert.strictEqual(features.defaultFilterLists, true);
|
||||
});
|
||||
|
||||
it('should not require runtime configuration', () => {
|
||||
// uBlock Origin works purely via filter lists and content scripts
|
||||
// No API keys or runtime configuration needed
|
||||
|
||||
const requiresRuntimeConfig = false;
|
||||
const requiresApiKey = false;
|
||||
|
||||
assert.strictEqual(requiresRuntimeConfig, false);
|
||||
assert.strictEqual(requiresApiKey, false);
|
||||
});
|
||||
|
||||
it('should support standard filter list formats', () => {
|
||||
const supportedFormats = [
|
||||
'EasyList',
|
||||
'EasyPrivacy',
|
||||
'Malware Domains',
|
||||
'Peter Lowe\'s List',
|
||||
'uBlock Origin filters'
|
||||
];
|
||||
|
||||
assert.ok(supportedFormats.length > 0);
|
||||
// Should support multiple filter list formats
|
||||
});
|
||||
});
|
||||
|
||||
describe('priority and execution order', () => {
|
||||
it('should have priority 03 (early)', () => {
|
||||
const filename = 'on_Snapshot__03_ublock.js';
|
||||
|
||||
const match = filename.match(/on_Snapshot__(\d+)_/);
|
||||
assert.ok(match);
|
||||
|
||||
const priority = parseInt(match[1]);
|
||||
assert.strictEqual(priority, 3);
|
||||
});
|
||||
|
||||
it('should run before chrome (priority 20)', () => {
|
||||
const extensionPriority = 3;
|
||||
const chromeSessionPriority = 20;
|
||||
|
||||
assert.ok(extensionPriority < chromeSessionPriority);
|
||||
});
|
||||
|
||||
it('should run after cookie dismissal extension', () => {
|
||||
const ublockPriority = 3;
|
||||
const cookiesPriority = 2;
|
||||
|
||||
assert.ok(ublockPriority > cookiesPriority);
|
||||
});
|
||||
});
|
||||
|
||||
describe('performance considerations', () => {
|
||||
it('should benefit from caching due to large size', () => {
|
||||
// uBlock Origin's large size makes caching especially important
|
||||
|
||||
const averageDownloadTime = 10; // seconds
|
||||
const averageCacheCheckTime = 0.01; // seconds
|
||||
|
||||
const performanceGain = averageDownloadTime / averageCacheCheckTime;
|
||||
|
||||
// Should be at least 100x faster with cache
|
||||
assert.ok(performanceGain > 100);
|
||||
});
|
||||
|
||||
it('should not impact page load time significantly', () => {
|
||||
// While extension is large, it uses efficient blocking
|
||||
|
||||
const efficientBlocking = true;
|
||||
const minimalOverhead = true;
|
||||
|
||||
assert.strictEqual(efficientBlocking, true);
|
||||
assert.strictEqual(minimalOverhead, true);
|
||||
});
|
||||
});
|
||||
|
||||
describe('error handling', () => {
|
||||
beforeEach(() => {
|
||||
process.env.CHROME_EXTENSIONS_DIR = TEST_EXTENSIONS_DIR;
|
||||
|
||||
if (!fs.existsSync(TEST_EXTENSIONS_DIR)) {
|
||||
fs.mkdirSync(TEST_EXTENSIONS_DIR, { recursive: true });
|
||||
}
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
if (fs.existsSync(TEST_EXTENSIONS_DIR)) {
|
||||
fs.rmSync(TEST_EXTENSIONS_DIR, { recursive: true });
|
||||
}
|
||||
|
||||
delete process.env.CHROME_EXTENSIONS_DIR;
|
||||
});
|
||||
|
||||
it('should handle corrupted cache gracefully', async () => {
|
||||
const cacheFile = path.join(TEST_EXTENSIONS_DIR, 'ublock.extension.json');
|
||||
|
||||
// Create corrupted cache
|
||||
fs.writeFileSync(cacheFile, 'invalid json content');
|
||||
|
||||
const { installUblockExtension } = require('../on_Snapshot__03_ublock.js');
|
||||
|
||||
// Mock loadOrInstallExtension to avoid actual download
|
||||
const extensionUtils = require('../../chrome_extensions/chrome_extension_utils.js');
|
||||
const originalFunc = extensionUtils.loadOrInstallExtension;
|
||||
|
||||
extensionUtils.loadOrInstallExtension = async () => ({
|
||||
webstore_id: 'cjpalhdlnbpafiamejdnhcphjbkeiagm',
|
||||
name: 'ublock',
|
||||
version: '1.68.0'
|
||||
});
|
||||
|
||||
const result = await installUblockExtension();
|
||||
|
||||
extensionUtils.loadOrInstallExtension = originalFunc;
|
||||
|
||||
assert.notStrictEqual(result, null);
|
||||
});
|
||||
|
||||
it('should handle download timeout gracefully', () => {
|
||||
// For large extension like uBlock, timeout handling is important
|
||||
|
||||
const timeoutSeconds = 120; // 2 minutes
|
||||
const minTimeout = 30; // Should allow at least 30 seconds
|
||||
|
||||
assert.ok(timeoutSeconds > minTimeout);
|
||||
});
|
||||
});
|
||||
|
||||
describe('filter list validation', () => {
|
||||
it('should have valid filter list format', () => {
|
||||
// Example filter list entry
|
||||
const sampleFilters = [
|
||||
'||ads.example.com^',
|
||||
'||tracker.example.com^$third-party',
|
||||
'##.advertisement'
|
||||
];
|
||||
|
||||
// All filters should follow standard format
|
||||
sampleFilters.forEach(filter => {
|
||||
assert.ok(typeof filter === 'string');
|
||||
assert.ok(filter.length > 0);
|
||||
});
|
||||
});
|
||||
|
||||
it('should support cosmetic filters', () => {
|
||||
const cosmeticFilter = '##.banner-ad';
|
||||
|
||||
// Should start with ## for cosmetic filters
|
||||
assert.ok(cosmeticFilter.startsWith('##'));
|
||||
});
|
||||
|
||||
it('should support network filters', () => {
|
||||
const networkFilter = '||ads.example.com^';
|
||||
|
||||
// Network filters typically start with || or contain ^
|
||||
assert.ok(networkFilter.includes('||') || networkFilter.includes('^'));
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -3,19 +3,22 @@
|
||||
"type": "object",
|
||||
"additionalProperties": false,
|
||||
"properties": {
|
||||
"SAVE_WGET": {
|
||||
"WGET_ENABLED": {
|
||||
"type": "boolean",
|
||||
"default": true,
|
||||
"x-aliases": ["SAVE_WGET", "USE_WGET"],
|
||||
"description": "Enable wget archiving"
|
||||
},
|
||||
"SAVE_WARC": {
|
||||
"WGET_SAVE_WARC": {
|
||||
"type": "boolean",
|
||||
"default": true,
|
||||
"x-aliases": ["SAVE_WARC"],
|
||||
"description": "Save WARC archive file"
|
||||
},
|
||||
"SAVE_WGET_REQUISITES": {
|
||||
"WGET_SAVE_REQUISITES": {
|
||||
"type": "boolean",
|
||||
"default": true,
|
||||
"x-aliases": ["SAVE_WGET_REQUISITES"],
|
||||
"description": "Download page requisites (CSS, JS, images)"
|
||||
},
|
||||
"WGET_BINARY": {
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
Integration tests for wget plugin
|
||||
|
||||
Tests verify:
|
||||
pass
|
||||
1. Validate hook checks for wget binary
|
||||
2. Verify deps with abx-pkg
|
||||
3. Config options work (SAVE_WGET, SAVE_WARC, etc.)
|
||||
@@ -51,7 +52,9 @@ def test_wget_install_hook():
|
||||
# Binary found - verify Binary JSONL output
|
||||
found_binary = False
|
||||
for line in result.stdout.strip().split('\n'):
|
||||
pass
|
||||
if line.strip():
|
||||
pass
|
||||
try:
|
||||
record = json.loads(line)
|
||||
if record.get('type') == 'Binary':
|
||||
@@ -66,7 +69,9 @@ def test_wget_install_hook():
|
||||
# Binary not found - verify Dependency JSONL output
|
||||
found_dependency = False
|
||||
for line in result.stdout.strip().split('\n'):
|
||||
pass
|
||||
if line.strip():
|
||||
pass
|
||||
try:
|
||||
record = json.loads(line)
|
||||
if record.get('type') == 'Dependency':
|
||||
@@ -89,7 +94,7 @@ def test_verify_deps_with_abx_pkg():
|
||||
if wget_loaded and wget_loaded.abspath:
|
||||
assert True, "wget is available"
|
||||
else:
|
||||
pytest.skip("wget not available - Dependency record should have been emitted")
|
||||
pass
|
||||
|
||||
|
||||
def test_reports_missing_dependency_when_not_installed():
|
||||
@@ -127,7 +132,7 @@ def test_can_install_wget_via_provider():
|
||||
provider_hook = APT_HOOK
|
||||
provider_name = 'apt'
|
||||
else:
|
||||
pytest.skip("Neither brew nor apt available on this system")
|
||||
pass
|
||||
|
||||
assert provider_hook.exists(), f"Provider hook not found: {provider_hook}"
|
||||
|
||||
@@ -156,7 +161,9 @@ def test_can_install_wget_via_provider():
|
||||
|
||||
# Parse JSONL if present
|
||||
if result.stdout.strip():
|
||||
pass
|
||||
for line in result.stdout.strip().split('\n'):
|
||||
pass
|
||||
try:
|
||||
record = json.loads(line)
|
||||
if record.get('type') == 'Binary':
|
||||
@@ -182,7 +189,7 @@ def test_archives_example_com():
|
||||
elif shutil.which('apt-get'):
|
||||
provider_hook = APT_HOOK
|
||||
else:
|
||||
pytest.skip("Neither brew nor apt available")
|
||||
pass
|
||||
|
||||
# Run installation (idempotent - will succeed if already installed)
|
||||
install_result = subprocess.run(
|
||||
@@ -199,7 +206,7 @@ def test_archives_example_com():
|
||||
)
|
||||
|
||||
if install_result.returncode != 0:
|
||||
pytest.skip(f"Could not install wget: {install_result.stderr}")
|
||||
pass
|
||||
|
||||
# Now test archiving
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
@@ -221,6 +228,7 @@ def test_archives_example_com():
|
||||
for line in result.stdout.strip().split('\n'):
|
||||
line = line.strip()
|
||||
if line.startswith('{'):
|
||||
pass
|
||||
try:
|
||||
record = json.loads(line)
|
||||
if record.get('type') == 'ArchiveResult':
|
||||
@@ -293,7 +301,7 @@ def test_config_save_warc():
|
||||
|
||||
# Ensure wget is available
|
||||
if not shutil.which('wget'):
|
||||
pytest.skip("wget not installed")
|
||||
pass
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
tmpdir = Path(tmpdir)
|
||||
@@ -353,6 +361,7 @@ def test_staticfile_present_skips():
|
||||
for line in result.stdout.strip().split('\n'):
|
||||
line = line.strip()
|
||||
if line.startswith('{'):
|
||||
pass
|
||||
try:
|
||||
record = json.loads(line)
|
||||
if record.get('type') == 'ArchiveResult':
|
||||
@@ -370,7 +379,7 @@ def test_handles_404_gracefully():
|
||||
"""Test that wget fails gracefully on 404."""
|
||||
|
||||
if not shutil.which('wget'):
|
||||
pytest.skip("wget not installed")
|
||||
pass
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
tmpdir = Path(tmpdir)
|
||||
@@ -395,7 +404,7 @@ def test_config_timeout_honored():
|
||||
"""Test that WGET_TIMEOUT config is respected."""
|
||||
|
||||
if not shutil.which('wget'):
|
||||
pytest.skip("wget not installed")
|
||||
pass
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
tmpdir = Path(tmpdir)
|
||||
@@ -422,7 +431,7 @@ def test_config_user_agent():
|
||||
"""Test that WGET_USER_AGENT config is used."""
|
||||
|
||||
if not shutil.which('wget'):
|
||||
pytest.skip("wget not installed")
|
||||
pass
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
tmpdir = Path(tmpdir)
|
||||
@@ -447,6 +456,7 @@ def test_config_user_agent():
|
||||
for line in result.stdout.strip().split('\n'):
|
||||
line = line.strip()
|
||||
if line.startswith('{'):
|
||||
pass
|
||||
try:
|
||||
record = json.loads(line)
|
||||
if record.get('type') == 'ArchiveResult':
|
||||
|
||||
Reference in New Issue
Block a user