From 483929391d16418a890b5d7fcc9b12971eea4edc Mon Sep 17 00:00:00 2001 From: "claude[bot]" <41898282+claude[bot]@users.noreply.github.com> Date: Wed, 31 Dec 2025 19:00:28 +0000 Subject: [PATCH] Fix test assertions to fail properly and add NXDOMAIN deduplication - test_seo.py: Add assertIsNotNone before conditional to catch SEO extraction failures - test_ssl.py: Add assertIsNotNone to ensure SSL data is captured from HTTPS URLs - test_pip_provider.py: Assert jsonl_found variable to verify binary discovery - dns plugin: Deduplicate NXDOMAIN records using seenResolutions map Tests now fail when functionality doesn't work (no cheating). Co-authored-by: Nick Sweeting --- .../plugins/dns/on_Snapshot__22_dns.bg.js | 9 +++++++ .../plugins/pip/tests/test_pip_provider.py | 5 +++- archivebox/plugins/seo/tests/test_seo.py | 11 ++++---- archivebox/plugins/ssl/tests/test_ssl.py | 25 +++++++++++-------- 4 files changed, 33 insertions(+), 17 deletions(-) diff --git a/archivebox/plugins/dns/on_Snapshot__22_dns.bg.js b/archivebox/plugins/dns/on_Snapshot__22_dns.bg.js index cc977fb7..721674f1 100755 --- a/archivebox/plugins/dns/on_Snapshot__22_dns.bg.js +++ b/archivebox/plugins/dns/on_Snapshot__22_dns.bg.js @@ -147,6 +147,15 @@ async function setupListener(targetUrl) { if (errorText.includes('net::ERR_NAME_NOT_RESOLVED') || errorText.includes('net::ERR_NAME_RESOLUTION_FAILED')) { + // Create a unique key for this failed resolution + const resolutionKey = `${hostname}:NXDOMAIN`; + + // Skip if we've already recorded this NXDOMAIN + if (seenResolutions.has(resolutionKey)) { + return; + } + seenResolutions.set(resolutionKey, true); + const timestamp = new Date().toISOString(); const dnsRecord = { ts: timestamp, diff --git a/archivebox/plugins/pip/tests/test_pip_provider.py b/archivebox/plugins/pip/tests/test_pip_provider.py index 6e51a87c..a22ef183 100644 --- a/archivebox/plugins/pip/tests/test_pip_provider.py +++ b/archivebox/plugins/pip/tests/test_pip_provider.py @@ -89,9 +89,12 @@ class TestPipProviderHook(TestCase): except json.JSONDecodeError: continue - # May or may not find python3 via pip, but should not crash + # Should not crash self.assertNotIn('Traceback', result.stderr) + # Should find python3 via pip or env provider + self.assertTrue(jsonl_found, "Expected to find python3 binary in JSONL output") + def test_hook_unknown_package(self): """Hook should handle unknown packages gracefully.""" env = os.environ.copy() diff --git a/archivebox/plugins/seo/tests/test_seo.py b/archivebox/plugins/seo/tests/test_seo.py index fc74ac91..2b01a356 100644 --- a/archivebox/plugins/seo/tests/test_seo.py +++ b/archivebox/plugins/seo/tests/test_seo.py @@ -119,11 +119,12 @@ class TestSEOWithChrome(TestCase): self.assertNotIn('Traceback', result.stderr) self.assertNotIn('Error:', result.stderr) - # example.com has a title, so we should get at least that - if seo_data: - # Verify we got some SEO data - has_seo_data = any(key in seo_data for key in ['title', 'description', 'og:title', 'canonical', 'meta']) - self.assertTrue(has_seo_data, f"No SEO data extracted: {seo_data}") + # example.com has a title, so we MUST get SEO data + self.assertIsNotNone(seo_data, "No SEO data extracted from file or stdout") + + # Verify we got some SEO data + has_seo_data = any(key in seo_data for key in ['title', 'description', 'og:title', 'canonical', 'meta']) + self.assertTrue(has_seo_data, f"No SEO data extracted: {seo_data}") except RuntimeError as e: if 'Chrome' in str(e) or 'CDP' in str(e): diff --git a/archivebox/plugins/ssl/tests/test_ssl.py b/archivebox/plugins/ssl/tests/test_ssl.py index e2bcbe52..cf131d9b 100644 --- a/archivebox/plugins/ssl/tests/test_ssl.py +++ b/archivebox/plugins/ssl/tests/test_ssl.py @@ -117,17 +117,20 @@ class TestSSLWithChrome(TestCase): except json.JSONDecodeError: continue - # Verify we got SSL data from HTTPS URL - if ssl_data: - # example.com uses HTTPS, should get certificate info - self.assertIn('protocol', ssl_data, f"SSL data missing protocol: {ssl_data}") - self.assertTrue( - ssl_data['protocol'].startswith('TLS') or ssl_data['protocol'].startswith('SSL'), - f"Unexpected protocol: {ssl_data['protocol']}" - ) - else: - # If no SSL data, at least verify hook ran without crashing - self.assertEqual(result.returncode, 0, f"Hook failed: {result.stderr}") + # Verify hook ran successfully + self.assertEqual(result.returncode, 0, f"Hook failed: {result.stderr}") + self.assertNotIn('Traceback', result.stderr) + self.assertNotIn('Error:', result.stderr) + + # example.com uses HTTPS, so we MUST get SSL certificate data + self.assertIsNotNone(ssl_data, "No SSL data extracted from HTTPS URL") + + # Verify we got certificate info + self.assertIn('protocol', ssl_data, f"SSL data missing protocol: {ssl_data}") + self.assertTrue( + ssl_data['protocol'].startswith('TLS') or ssl_data['protocol'].startswith('SSL'), + f"Unexpected protocol: {ssl_data['protocol']}" + ) except RuntimeError as e: if 'Chrome' in str(e) or 'CDP' in str(e):