mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2026-01-03 17:35:45 +10:00
Fix test assertions to fail properly and add NXDOMAIN deduplication
- test_seo.py: Add assertIsNotNone before conditional to catch SEO extraction failures - test_ssl.py: Add assertIsNotNone to ensure SSL data is captured from HTTPS URLs - test_pip_provider.py: Assert jsonl_found variable to verify binary discovery - dns plugin: Deduplicate NXDOMAIN records using seenResolutions map Tests now fail when functionality doesn't work (no cheating). Co-authored-by: Nick Sweeting <pirate@users.noreply.github.com>
This commit is contained in:
@@ -147,6 +147,15 @@ async function setupListener(targetUrl) {
|
||||
if (errorText.includes('net::ERR_NAME_NOT_RESOLVED') ||
|
||||
errorText.includes('net::ERR_NAME_RESOLUTION_FAILED')) {
|
||||
|
||||
// Create a unique key for this failed resolution
|
||||
const resolutionKey = `${hostname}:NXDOMAIN`;
|
||||
|
||||
// Skip if we've already recorded this NXDOMAIN
|
||||
if (seenResolutions.has(resolutionKey)) {
|
||||
return;
|
||||
}
|
||||
seenResolutions.set(resolutionKey, true);
|
||||
|
||||
const timestamp = new Date().toISOString();
|
||||
const dnsRecord = {
|
||||
ts: timestamp,
|
||||
|
||||
@@ -89,9 +89,12 @@ class TestPipProviderHook(TestCase):
|
||||
except json.JSONDecodeError:
|
||||
continue
|
||||
|
||||
# May or may not find python3 via pip, but should not crash
|
||||
# Should not crash
|
||||
self.assertNotIn('Traceback', result.stderr)
|
||||
|
||||
# Should find python3 via pip or env provider
|
||||
self.assertTrue(jsonl_found, "Expected to find python3 binary in JSONL output")
|
||||
|
||||
def test_hook_unknown_package(self):
|
||||
"""Hook should handle unknown packages gracefully."""
|
||||
env = os.environ.copy()
|
||||
|
||||
@@ -119,11 +119,12 @@ class TestSEOWithChrome(TestCase):
|
||||
self.assertNotIn('Traceback', result.stderr)
|
||||
self.assertNotIn('Error:', result.stderr)
|
||||
|
||||
# example.com has a title, so we should get at least that
|
||||
if seo_data:
|
||||
# Verify we got some SEO data
|
||||
has_seo_data = any(key in seo_data for key in ['title', 'description', 'og:title', 'canonical', 'meta'])
|
||||
self.assertTrue(has_seo_data, f"No SEO data extracted: {seo_data}")
|
||||
# example.com has a title, so we MUST get SEO data
|
||||
self.assertIsNotNone(seo_data, "No SEO data extracted from file or stdout")
|
||||
|
||||
# Verify we got some SEO data
|
||||
has_seo_data = any(key in seo_data for key in ['title', 'description', 'og:title', 'canonical', 'meta'])
|
||||
self.assertTrue(has_seo_data, f"No SEO data extracted: {seo_data}")
|
||||
|
||||
except RuntimeError as e:
|
||||
if 'Chrome' in str(e) or 'CDP' in str(e):
|
||||
|
||||
@@ -117,17 +117,20 @@ class TestSSLWithChrome(TestCase):
|
||||
except json.JSONDecodeError:
|
||||
continue
|
||||
|
||||
# Verify we got SSL data from HTTPS URL
|
||||
if ssl_data:
|
||||
# example.com uses HTTPS, should get certificate info
|
||||
self.assertIn('protocol', ssl_data, f"SSL data missing protocol: {ssl_data}")
|
||||
self.assertTrue(
|
||||
ssl_data['protocol'].startswith('TLS') or ssl_data['protocol'].startswith('SSL'),
|
||||
f"Unexpected protocol: {ssl_data['protocol']}"
|
||||
)
|
||||
else:
|
||||
# If no SSL data, at least verify hook ran without crashing
|
||||
self.assertEqual(result.returncode, 0, f"Hook failed: {result.stderr}")
|
||||
# Verify hook ran successfully
|
||||
self.assertEqual(result.returncode, 0, f"Hook failed: {result.stderr}")
|
||||
self.assertNotIn('Traceback', result.stderr)
|
||||
self.assertNotIn('Error:', result.stderr)
|
||||
|
||||
# example.com uses HTTPS, so we MUST get SSL certificate data
|
||||
self.assertIsNotNone(ssl_data, "No SSL data extracted from HTTPS URL")
|
||||
|
||||
# Verify we got certificate info
|
||||
self.assertIn('protocol', ssl_data, f"SSL data missing protocol: {ssl_data}")
|
||||
self.assertTrue(
|
||||
ssl_data['protocol'].startswith('TLS') or ssl_data['protocol'].startswith('SSL'),
|
||||
f"Unexpected protocol: {ssl_data['protocol']}"
|
||||
)
|
||||
|
||||
except RuntimeError as e:
|
||||
if 'Chrome' in str(e) or 'CDP' in str(e):
|
||||
|
||||
Reference in New Issue
Block a user