mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2026-01-03 01:15:57 +10:00
Fix caddl tests to test actual implementation and fix error handling
- Tests now extract and execute actual parseSizeLimit and sanitizeFilename functions from the JS file - Added more edge case tests (empty strings, invalid input, path traversal) - Fixed P1 bug where throwing in event listener wouldn't propagate to try/catch - Use flag variables to track size exceeded state and check after navigation Co-authored-by: Nick Sweeting <pirate@users.noreply.github.com>
This commit is contained in:
@@ -207,13 +207,19 @@ async function downloadFile(page, url, outputDir, maxSize) {
|
||||
try {
|
||||
// Set a response handler to check file size
|
||||
let responseReceived = false;
|
||||
let sizeExceeded = false;
|
||||
let sizeExceededError = null;
|
||||
|
||||
downloadPage.on('response', response => {
|
||||
if (response.url() === url) {
|
||||
responseReceived = true;
|
||||
const headers = response.headers();
|
||||
const contentLength = headers['content-length'];
|
||||
if (contentLength && parseInt(contentLength, 10) > maxSize) {
|
||||
throw new Error(`File exceeds max size limit (${contentLength} > ${maxSize})`);
|
||||
sizeExceeded = true;
|
||||
sizeExceededError = `File exceeds max size limit (${contentLength} > ${maxSize})`;
|
||||
// Close the page to abort the download
|
||||
downloadPage.close().catch(() => {});
|
||||
}
|
||||
}
|
||||
});
|
||||
@@ -224,6 +230,11 @@ async function downloadFile(page, url, outputDir, maxSize) {
|
||||
timeout: 60000
|
||||
});
|
||||
|
||||
// Check if size was exceeded
|
||||
if (sizeExceeded) {
|
||||
return { success: false, outputPath: null, error: sizeExceededError };
|
||||
}
|
||||
|
||||
// Wait a bit for download to start
|
||||
await sleep(2000);
|
||||
|
||||
|
||||
@@ -65,70 +65,97 @@ class TestCaddlPlugin(unittest.TestCase):
|
||||
self.assertIn('Chrome CDP URL not found', result.stderr, "Should log CDP error")
|
||||
|
||||
def test_parse_size_limit(self):
|
||||
"""Test size limit parsing logic."""
|
||||
# Test the parseSizeLimit function by running JS code
|
||||
test_js = """
|
||||
function parseSizeLimit(sizeStr) {
|
||||
if (!sizeStr) return 750 * 1024 * 1024;
|
||||
sizeStr = sizeStr.toLowerCase().trim();
|
||||
const multipliers = { k: 1024, m: 1024**2, g: 1024**3 };
|
||||
const lastChar = sizeStr[sizeStr.length - 1];
|
||||
if (multipliers[lastChar]) {
|
||||
const num = parseFloat(sizeStr.slice(0, -1));
|
||||
return isNaN(num) ? 750 * 1024 * 1024 : Math.floor(num * multipliers[lastChar]);
|
||||
}
|
||||
const num = parseInt(sizeStr, 10);
|
||||
return isNaN(num) ? 750 * 1024 * 1024 : num;
|
||||
}
|
||||
"""Test size limit parsing logic from the actual implementation."""
|
||||
# Test the actual parseSizeLimit function from the script
|
||||
test_js = f"""
|
||||
const script = require('{self.script_path}');
|
||||
// Extract and test the parseSizeLimit function by executing the script's code
|
||||
const {{parseSizeLimit}} = require('module')._load('{self.script_path}', null, true);
|
||||
"""
|
||||
|
||||
# Since the functions aren't exported, we need to extract and test them
|
||||
# by executing a wrapper that sources the implementation
|
||||
test_code = f"""
|
||||
const fs = require('fs');
|
||||
const scriptContent = fs.readFileSync('{self.script_path}', 'utf8');
|
||||
|
||||
// Extract the parseSizeLimit function
|
||||
const parseSizeLimitMatch = scriptContent.match(/function parseSizeLimit\\([^)]*\\)\\s*\\{{[\\s\\S]*?^\\}}/m);
|
||||
if (!parseSizeLimitMatch) {{
|
||||
console.error('Could not find parseSizeLimit function');
|
||||
process.exit(1);
|
||||
}}
|
||||
|
||||
// Execute the function definition
|
||||
eval(parseSizeLimitMatch[0]);
|
||||
|
||||
// Test it
|
||||
console.log(parseSizeLimit('100m'));
|
||||
console.log(parseSizeLimit('1g'));
|
||||
console.log(parseSizeLimit('500k'));
|
||||
console.log(parseSizeLimit(''));
|
||||
console.log(parseSizeLimit('invalid'));
|
||||
"""
|
||||
|
||||
result = subprocess.run(
|
||||
['node', '-e', test_js],
|
||||
['node', '-e', test_code],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=5
|
||||
)
|
||||
|
||||
self.assertEqual(result.returncode, 0)
|
||||
self.assertEqual(result.returncode, 0, f"Failed to test parseSizeLimit: {result.stderr}")
|
||||
lines = result.stdout.strip().split('\n')
|
||||
self.assertEqual(lines[0], str(100 * 1024 * 1024)) # 100m
|
||||
self.assertEqual(lines[1], str(1024 * 1024 * 1024)) # 1g
|
||||
self.assertEqual(lines[2], str(500 * 1024)) # 500k
|
||||
self.assertEqual(lines[3], str(750 * 1024 * 1024)) # default
|
||||
self.assertEqual(lines[4], str(750 * 1024 * 1024)) # invalid -> default
|
||||
|
||||
def test_sanitize_filename(self):
|
||||
"""Test filename sanitization."""
|
||||
test_js = """
|
||||
"""Test filename sanitization from the actual implementation."""
|
||||
# Test the actual sanitizeFilename function from the script
|
||||
test_code = f"""
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
function sanitizeFilename(filename) {
|
||||
filename = path.basename(filename);
|
||||
filename = filename.replace(/[^\\w\\-_.]/g, '_');
|
||||
if (!filename || filename === '.' || filename === '..') {
|
||||
return 'asset.bin';
|
||||
}
|
||||
return filename;
|
||||
}
|
||||
const scriptContent = fs.readFileSync('{self.script_path}', 'utf8');
|
||||
|
||||
// Extract the sanitizeFilename function
|
||||
const sanitizeFilenameMatch = scriptContent.match(/function sanitizeFilename\\([^)]*\\)\\s*\\{{[\\s\\S]*?^\\}}/m);
|
||||
if (!sanitizeFilenameMatch) {{
|
||||
console.error('Could not find sanitizeFilename function');
|
||||
process.exit(1);
|
||||
}}
|
||||
|
||||
// Execute the function definition
|
||||
eval(sanitizeFilenameMatch[0]);
|
||||
|
||||
// Test it
|
||||
console.log(sanitizeFilename('model.stl'));
|
||||
console.log(sanitizeFilename('/path/to/file.obj'));
|
||||
console.log(sanitizeFilename('..'));
|
||||
console.log(sanitizeFilename('.'));
|
||||
console.log(sanitizeFilename(''));
|
||||
console.log(sanitizeFilename('model with spaces.gltf'));
|
||||
console.log(sanitizeFilename('../../../etc/passwd'));
|
||||
"""
|
||||
|
||||
result = subprocess.run(
|
||||
['node', '-e', test_js],
|
||||
['node', '-e', test_code],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=5
|
||||
)
|
||||
|
||||
self.assertEqual(result.returncode, 0)
|
||||
self.assertEqual(result.returncode, 0, f"Failed to test sanitizeFilename: {result.stderr}")
|
||||
lines = result.stdout.strip().split('\n')
|
||||
self.assertEqual(lines[0], 'model.stl')
|
||||
self.assertEqual(lines[1], 'file.obj')
|
||||
self.assertEqual(lines[2], 'asset.bin') # Dangerous filename replaced
|
||||
self.assertEqual(lines[3], 'model_with_spaces.gltf')
|
||||
self.assertEqual(lines[3], 'asset.bin') # Dangerous filename replaced
|
||||
self.assertEqual(lines[4], 'asset.bin') # Empty filename replaced
|
||||
self.assertEqual(lines[5], 'model_with_spaces.gltf')
|
||||
self.assertEqual(lines[6], 'passwd') # Path traversal prevented
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
Reference in New Issue
Block a user