Fix caddl tests to test actual implementation and fix error handling

- Tests now extract and execute actual parseSizeLimit and sanitizeFilename functions from the JS file - Added more edge case tests (empty strings, invalid input, path traversal) - Fixed P1 bug where throwing in event listener wouldn't propagate to try/catch - Use flag variables to track size exceeded state and check after navigation Co-authored-by: Nick Sweeting <pirate@users.noreply.github.com>
2026-01-03 01:15:57 +10:00 · 2025-12-29 23:46:39 +00:00
parent cd7528e073
commit e937688be0
2 changed files with 69 additions and 31 deletions
--- a/archivebox/plugins/caddl/on_Snapshot__65_caddl.bg.js
+++ b/archivebox/plugins/caddl/on_Snapshot__65_caddl.bg.js
@@ -207,13 +207,19 @@ async function downloadFile(page, url, outputDir, maxSize) {
        try {
            // Set a response handler to check file size
            let responseReceived = false;
+            let sizeExceeded = false;
+            let sizeExceededError = null;
+
            downloadPage.on('response', response => {
                if (response.url() === url) {
                    responseReceived = true;
                    const headers = response.headers();
                    const contentLength = headers['content-length'];
                    if (contentLength && parseInt(contentLength, 10) > maxSize) {
-                        throw new Error(`File exceeds max size limit (${contentLength} > ${maxSize})`);
+                        sizeExceeded = true;
+                        sizeExceededError = `File exceeds max size limit (${contentLength} > ${maxSize})`;
+                        // Close the page to abort the download
+                        downloadPage.close().catch(() => {});
                    }
                }
            });
@@ -224,6 +230,11 @@ async function downloadFile(page, url, outputDir, maxSize) {
                timeout: 60000
            });

+            // Check if size was exceeded
+            if (sizeExceeded) {
+                return { success: false, outputPath: null, error: sizeExceededError };
+            }
+
            // Wait a bit for download to start
            await sleep(2000);

--- a/archivebox/plugins/caddl/tests/test_caddl.py
+++ b/archivebox/plugins/caddl/tests/test_caddl.py
@@ -65,70 +65,97 @@ class TestCaddlPlugin(unittest.TestCase):
        self.assertIn('Chrome CDP URL not found', result.stderr, "Should log CDP error")

    def test_parse_size_limit(self):
-        """Test size limit parsing logic."""
-        # Test the parseSizeLimit function by running JS code
-        test_js = """
-        function parseSizeLimit(sizeStr) {
-            if (!sizeStr) return 750 * 1024 * 1024;
-            sizeStr = sizeStr.toLowerCase().trim();
-            const multipliers = { k: 1024, m: 1024**2, g: 1024**3 };
-            const lastChar = sizeStr[sizeStr.length - 1];
-            if (multipliers[lastChar]) {
-                const num = parseFloat(sizeStr.slice(0, -1));
-                return isNaN(num) ? 750 * 1024 * 1024 : Math.floor(num * multipliers[lastChar]);
-            }
-            const num = parseInt(sizeStr, 10);
-            return isNaN(num) ? 750 * 1024 * 1024 : num;
-        }
+        """Test size limit parsing logic from the actual implementation."""
+        # Test the actual parseSizeLimit function from the script
+        test_js = f"""
+        const script = require('{self.script_path}');
+        // Extract and test the parseSizeLimit function by executing the script's code
+        const {{parseSizeLimit}} = require('module')._load('{self.script_path}', null, true);
+        """
+
+        # Since the functions aren't exported, we need to extract and test them
+        # by executing a wrapper that sources the implementation
+        test_code = f"""
+        const fs = require('fs');
+        const scriptContent = fs.readFileSync('{self.script_path}', 'utf8');
+
+        // Extract the parseSizeLimit function
+        const parseSizeLimitMatch = scriptContent.match(/function parseSizeLimit\\([^)]*\\)\\s*\\{{[\\s\\S]*?^\\}}/m);
+        if (!parseSizeLimitMatch) {{
+            console.error('Could not find parseSizeLimit function');
+            process.exit(1);
+        }}
+
+        // Execute the function definition
+        eval(parseSizeLimitMatch[0]);
+
+        // Test it
        console.log(parseSizeLimit('100m'));
        console.log(parseSizeLimit('1g'));
        console.log(parseSizeLimit('500k'));
+        console.log(parseSizeLimit(''));
+        console.log(parseSizeLimit('invalid'));
        """

        result = subprocess.run(
-            ['node', '-e', test_js],
+            ['node', '-e', test_code],
            capture_output=True,
            text=True,
            timeout=5
        )

-        self.assertEqual(result.returncode, 0)
+        self.assertEqual(result.returncode, 0, f"Failed to test parseSizeLimit: {result.stderr}")
        lines = result.stdout.strip().split('\n')
        self.assertEqual(lines[0], str(100 * 1024 * 1024))  # 100m
        self.assertEqual(lines[1], str(1024 * 1024 * 1024))  # 1g
        self.assertEqual(lines[2], str(500 * 1024))  # 500k
+        self.assertEqual(lines[3], str(750 * 1024 * 1024))  # default
+        self.assertEqual(lines[4], str(750 * 1024 * 1024))  # invalid -> default

    def test_sanitize_filename(self):
-        """Test filename sanitization."""
-        test_js = """
+        """Test filename sanitization from the actual implementation."""
+        # Test the actual sanitizeFilename function from the script
+        test_code = f"""
+        const fs = require('fs');
        const path = require('path');
-        function sanitizeFilename(filename) {
-            filename = path.basename(filename);
-            filename = filename.replace(/[^\\w\\-_.]/g, '_');
-            if (!filename || filename === '.' || filename === '..') {
-                return 'asset.bin';
-            }
-            return filename;
-        }
+        const scriptContent = fs.readFileSync('{self.script_path}', 'utf8');
+
+        // Extract the sanitizeFilename function
+        const sanitizeFilenameMatch = scriptContent.match(/function sanitizeFilename\\([^)]*\\)\\s*\\{{[\\s\\S]*?^\\}}/m);
+        if (!sanitizeFilenameMatch) {{
+            console.error('Could not find sanitizeFilename function');
+            process.exit(1);
+        }}
+
+        // Execute the function definition
+        eval(sanitizeFilenameMatch[0]);
+
+        // Test it
        console.log(sanitizeFilename('model.stl'));
        console.log(sanitizeFilename('/path/to/file.obj'));
        console.log(sanitizeFilename('..'));
+        console.log(sanitizeFilename('.'));
+        console.log(sanitizeFilename(''));
        console.log(sanitizeFilename('model with spaces.gltf'));
+        console.log(sanitizeFilename('../../../etc/passwd'));
        """

        result = subprocess.run(
-            ['node', '-e', test_js],
+            ['node', '-e', test_code],
            capture_output=True,
            text=True,
            timeout=5
        )

-        self.assertEqual(result.returncode, 0)
+        self.assertEqual(result.returncode, 0, f"Failed to test sanitizeFilename: {result.stderr}")
        lines = result.stdout.strip().split('\n')
        self.assertEqual(lines[0], 'model.stl')
        self.assertEqual(lines[1], 'file.obj')
        self.assertEqual(lines[2], 'asset.bin')  # Dangerous filename replaced
-        self.assertEqual(lines[3], 'model_with_spaces.gltf')
+        self.assertEqual(lines[3], 'asset.bin')  # Dangerous filename replaced
+        self.assertEqual(lines[4], 'asset.bin')  # Empty filename replaced
+        self.assertEqual(lines[5], 'model_with_spaces.gltf')
+        self.assertEqual(lines[6], 'passwd')  # Path traversal prevented


 if __name__ == '__main__':