diff --git a/archivebox/plugins/caddl/on_Snapshot__65_caddl.bg.js b/archivebox/plugins/caddl/on_Snapshot__65_caddl.bg.js index 2a014494..2513cbeb 100755 --- a/archivebox/plugins/caddl/on_Snapshot__65_caddl.bg.js +++ b/archivebox/plugins/caddl/on_Snapshot__65_caddl.bg.js @@ -207,13 +207,19 @@ async function downloadFile(page, url, outputDir, maxSize) { try { // Set a response handler to check file size let responseReceived = false; + let sizeExceeded = false; + let sizeExceededError = null; + downloadPage.on('response', response => { if (response.url() === url) { responseReceived = true; const headers = response.headers(); const contentLength = headers['content-length']; if (contentLength && parseInt(contentLength, 10) > maxSize) { - throw new Error(`File exceeds max size limit (${contentLength} > ${maxSize})`); + sizeExceeded = true; + sizeExceededError = `File exceeds max size limit (${contentLength} > ${maxSize})`; + // Close the page to abort the download + downloadPage.close().catch(() => {}); } } }); @@ -224,6 +230,11 @@ async function downloadFile(page, url, outputDir, maxSize) { timeout: 60000 }); + // Check if size was exceeded + if (sizeExceeded) { + return { success: false, outputPath: null, error: sizeExceededError }; + } + // Wait a bit for download to start await sleep(2000); diff --git a/archivebox/plugins/caddl/tests/test_caddl.py b/archivebox/plugins/caddl/tests/test_caddl.py index 1dfdddb9..2f11d4de 100644 --- a/archivebox/plugins/caddl/tests/test_caddl.py +++ b/archivebox/plugins/caddl/tests/test_caddl.py @@ -65,70 +65,97 @@ class TestCaddlPlugin(unittest.TestCase): self.assertIn('Chrome CDP URL not found', result.stderr, "Should log CDP error") def test_parse_size_limit(self): - """Test size limit parsing logic.""" - # Test the parseSizeLimit function by running JS code - test_js = """ - function parseSizeLimit(sizeStr) { - if (!sizeStr) return 750 * 1024 * 1024; - sizeStr = sizeStr.toLowerCase().trim(); - const multipliers = { k: 1024, m: 1024**2, g: 1024**3 }; - const lastChar = sizeStr[sizeStr.length - 1]; - if (multipliers[lastChar]) { - const num = parseFloat(sizeStr.slice(0, -1)); - return isNaN(num) ? 750 * 1024 * 1024 : Math.floor(num * multipliers[lastChar]); - } - const num = parseInt(sizeStr, 10); - return isNaN(num) ? 750 * 1024 * 1024 : num; - } + """Test size limit parsing logic from the actual implementation.""" + # Test the actual parseSizeLimit function from the script + test_js = f""" + const script = require('{self.script_path}'); + // Extract and test the parseSizeLimit function by executing the script's code + const {{parseSizeLimit}} = require('module')._load('{self.script_path}', null, true); + """ + + # Since the functions aren't exported, we need to extract and test them + # by executing a wrapper that sources the implementation + test_code = f""" + const fs = require('fs'); + const scriptContent = fs.readFileSync('{self.script_path}', 'utf8'); + + // Extract the parseSizeLimit function + const parseSizeLimitMatch = scriptContent.match(/function parseSizeLimit\\([^)]*\\)\\s*\\{{[\\s\\S]*?^\\}}/m); + if (!parseSizeLimitMatch) {{ + console.error('Could not find parseSizeLimit function'); + process.exit(1); + }} + + // Execute the function definition + eval(parseSizeLimitMatch[0]); + + // Test it console.log(parseSizeLimit('100m')); console.log(parseSizeLimit('1g')); console.log(parseSizeLimit('500k')); + console.log(parseSizeLimit('')); + console.log(parseSizeLimit('invalid')); """ result = subprocess.run( - ['node', '-e', test_js], + ['node', '-e', test_code], capture_output=True, text=True, timeout=5 ) - self.assertEqual(result.returncode, 0) + self.assertEqual(result.returncode, 0, f"Failed to test parseSizeLimit: {result.stderr}") lines = result.stdout.strip().split('\n') self.assertEqual(lines[0], str(100 * 1024 * 1024)) # 100m self.assertEqual(lines[1], str(1024 * 1024 * 1024)) # 1g self.assertEqual(lines[2], str(500 * 1024)) # 500k + self.assertEqual(lines[3], str(750 * 1024 * 1024)) # default + self.assertEqual(lines[4], str(750 * 1024 * 1024)) # invalid -> default def test_sanitize_filename(self): - """Test filename sanitization.""" - test_js = """ + """Test filename sanitization from the actual implementation.""" + # Test the actual sanitizeFilename function from the script + test_code = f""" + const fs = require('fs'); const path = require('path'); - function sanitizeFilename(filename) { - filename = path.basename(filename); - filename = filename.replace(/[^\\w\\-_.]/g, '_'); - if (!filename || filename === '.' || filename === '..') { - return 'asset.bin'; - } - return filename; - } + const scriptContent = fs.readFileSync('{self.script_path}', 'utf8'); + + // Extract the sanitizeFilename function + const sanitizeFilenameMatch = scriptContent.match(/function sanitizeFilename\\([^)]*\\)\\s*\\{{[\\s\\S]*?^\\}}/m); + if (!sanitizeFilenameMatch) {{ + console.error('Could not find sanitizeFilename function'); + process.exit(1); + }} + + // Execute the function definition + eval(sanitizeFilenameMatch[0]); + + // Test it console.log(sanitizeFilename('model.stl')); console.log(sanitizeFilename('/path/to/file.obj')); console.log(sanitizeFilename('..')); + console.log(sanitizeFilename('.')); + console.log(sanitizeFilename('')); console.log(sanitizeFilename('model with spaces.gltf')); + console.log(sanitizeFilename('../../../etc/passwd')); """ result = subprocess.run( - ['node', '-e', test_js], + ['node', '-e', test_code], capture_output=True, text=True, timeout=5 ) - self.assertEqual(result.returncode, 0) + self.assertEqual(result.returncode, 0, f"Failed to test sanitizeFilename: {result.stderr}") lines = result.stdout.strip().split('\n') self.assertEqual(lines[0], 'model.stl') self.assertEqual(lines[1], 'file.obj') self.assertEqual(lines[2], 'asset.bin') # Dangerous filename replaced - self.assertEqual(lines[3], 'model_with_spaces.gltf') + self.assertEqual(lines[3], 'asset.bin') # Dangerous filename replaced + self.assertEqual(lines[4], 'asset.bin') # Empty filename replaced + self.assertEqual(lines[5], 'model_with_spaces.gltf') + self.assertEqual(lines[6], 'passwd') # Path traversal prevented if __name__ == '__main__':