From 24c51452ef65423efce97fa928ae0d5ea56b311d Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 27 Dec 2025 00:08:47 +0000 Subject: [PATCH] Add comprehensive 0.7.x and 0.8.x migration tests Added additional tests for migrating from 0.7.x to 0.9.x: - test_list_works_after_migration - test_new_schema_elements_created_after_migration - test_snapshots_have_new_fields_after_migration - test_add_works_after_migration - test_archiveresult_status_preserved_after_migration - test_version_works_after_migration - test_help_works_after_migration Added missing tests for 0.8.x migration: - test_search_works_after_migration - test_migration_preserves_snapshot_titles - test_migration_preserves_foreign_keys - test_add_works_after_migration - test_version_works_after_migration These tests ensure real migration paths are tested using actual archivebox init to trigger Django migrations on simulated old databases. --- archivebox/tests/tests_migrations.py | 184 +++++++++++++++++++++++++++ 1 file changed, 184 insertions(+) diff --git a/archivebox/tests/tests_migrations.py b/archivebox/tests/tests_migrations.py index b1991c60..26c26ad8 100644 --- a/archivebox/tests/tests_migrations.py +++ b/archivebox/tests/tests_migrations.py @@ -1294,6 +1294,118 @@ class TestMigrationFrom07x(unittest.TestCase): for s in self.original_data['snapshots']) self.assertTrue(found_any, f"No migrated snapshots found in search: {output[:500]}") + def test_list_works_after_migration(self): + """List command should work and show migrated data.""" + result = run_archivebox(self.work_dir, ['init'], timeout=120) + self.assertIn(result.returncode, [0, 1]) + + result = run_archivebox(self.work_dir, ['list']) + self.assertEqual(result.returncode, 0, f"List failed after migration: {result.stderr}") + + # Should find at least some of the migrated URLs + output = result.stdout + result.stderr + found_any = any(s['url'][:30] in output or (s['title'] and s['title'] in output) + for s in self.original_data['snapshots']) + self.assertTrue(found_any, f"No migrated snapshots found in list: {output[:500]}") + + def test_new_schema_elements_created_after_migration(self): + """Migration should create new 0.9.x schema elements (crawls_crawl, etc.).""" + result = run_archivebox(self.work_dir, ['init'], timeout=120) + self.assertIn(result.returncode, [0, 1], f"Init crashed: {result.stderr}") + + conn = sqlite3.connect(str(self.db_path)) + cursor = conn.cursor() + + # Check that new tables exist + cursor.execute("SELECT name FROM sqlite_master WHERE type='table'") + tables = {row[0] for row in cursor.fetchall()} + conn.close() + + # 0.9.x should have crawls_crawl and crawls_seed tables + self.assertIn('crawls_crawl', tables, "crawls_crawl table not created during migration") + self.assertIn('crawls_seed', tables, "crawls_seed table not created during migration") + + def test_snapshots_have_new_fields_after_migration(self): + """Migrated snapshots should have new 0.9.x fields (status, depth, etc.).""" + result = run_archivebox(self.work_dir, ['init'], timeout=120) + self.assertIn(result.returncode, [0, 1], f"Init crashed: {result.stderr}") + + conn = sqlite3.connect(str(self.db_path)) + cursor = conn.cursor() + + # Check snapshot table has new columns + cursor.execute('PRAGMA table_info(core_snapshot)') + columns = {row[1] for row in cursor.fetchall()} + conn.close() + + # 0.9.x snapshots should have status, depth, created_at, modified_at + required_new_columns = {'status', 'depth', 'created_at', 'modified_at'} + for col in required_new_columns: + self.assertIn(col, columns, f"Snapshot missing new column: {col}") + + def test_add_works_after_migration(self): + """Adding new URLs should work after migration from 0.7.x.""" + result = run_archivebox(self.work_dir, ['init'], timeout=120) + self.assertIn(result.returncode, [0, 1], f"Init crashed: {result.stderr}") + + # Try to add a new URL after migration + result = run_archivebox(self.work_dir, ['add', 'https://example.com/new-page'], timeout=60) + self.assertIn(result.returncode, [0, 1], f"Add crashed after migration: {result.stderr}") + + # Verify a Crawl was created for the new URL + conn = sqlite3.connect(str(self.db_path)) + cursor = conn.cursor() + cursor.execute("SELECT COUNT(*) FROM crawls_crawl") + crawl_count = cursor.fetchone()[0] + conn.close() + + self.assertGreaterEqual(crawl_count, 1, "No Crawl created when adding URL after migration") + + def test_archiveresult_status_preserved_after_migration(self): + """Migration should preserve archive result status values.""" + result = run_archivebox(self.work_dir, ['init'], timeout=120) + self.assertIn(result.returncode, [0, 1], f"Init crashed: {result.stderr}") + + conn = sqlite3.connect(str(self.db_path)) + cursor = conn.cursor() + + # Get status counts + cursor.execute("SELECT status, COUNT(*) FROM core_archiveresult GROUP BY status") + status_counts = dict(cursor.fetchall()) + conn.close() + + # Original data has known status distribution: succeeded, failed, skipped + self.assertIn('succeeded', status_counts, "Should have succeeded results") + self.assertIn('failed', status_counts, "Should have failed results") + self.assertIn('skipped', status_counts, "Should have skipped results") + + def test_version_works_after_migration(self): + """Version command should work after migration.""" + result = run_archivebox(self.work_dir, ['init'], timeout=120) + self.assertIn(result.returncode, [0, 1]) + + result = run_archivebox(self.work_dir, ['version']) + # Exit code might be 1 if some binaries are missing, but should not crash + self.assertIn(result.returncode, [0, 1], f"Version crashed after migration: {result.stderr}") + + # Should show version info + output = result.stdout + result.stderr + self.assertTrue('ArchiveBox' in output or 'version' in output.lower(), + f"Version output missing expected content: {output[:500]}") + + def test_help_works_after_migration(self): + """Help command should work after migration.""" + result = run_archivebox(self.work_dir, ['init'], timeout=120) + self.assertIn(result.returncode, [0, 1]) + + result = run_archivebox(self.work_dir, ['help']) + self.assertEqual(result.returncode, 0, f"Help crashed after migration: {result.stderr}") + + # Should show available commands + output = result.stdout + result.stderr + self.assertTrue('add' in output.lower() or 'status' in output.lower(), + f"Help output missing expected commands: {output[:500]}") + class TestMigrationFrom04x(unittest.TestCase): """Test migration from 0.4.x schema to latest. @@ -1501,6 +1613,78 @@ class TestMigrationFrom08x(unittest.TestCase): for s in self.original_data['snapshots']) self.assertTrue(found_any, f"No migrated snapshots found in list: {output[:500]}") + def test_search_works_after_migration(self): + """Search command should find migrated snapshots.""" + result = run_archivebox(self.work_dir, ['init'], timeout=120) + self.assertIn(result.returncode, [0, 1]) + + result = run_archivebox(self.work_dir, ['search']) + self.assertEqual(result.returncode, 0, f"Search failed after migration: {result.stderr}") + + # Should find at least some of the migrated URLs + output = result.stdout + result.stderr + found_any = any(s['url'][:30] in output or (s['title'] and s['title'] in output) + for s in self.original_data['snapshots']) + self.assertTrue(found_any, f"No migrated snapshots found in search: {output[:500]}") + + def test_migration_preserves_snapshot_titles(self): + """Migration should preserve all snapshot titles.""" + expected_titles = {s['url']: s['title'] for s in self.original_data['snapshots']} + + result = run_archivebox(self.work_dir, ['init'], timeout=120) + self.assertIn(result.returncode, [0, 1], f"Init crashed: {result.stderr}") + + ok, msg = verify_snapshot_titles(self.db_path, expected_titles) + self.assertTrue(ok, msg) + + def test_migration_preserves_foreign_keys(self): + """Migration should maintain foreign key relationships.""" + result = run_archivebox(self.work_dir, ['init'], timeout=120) + self.assertIn(result.returncode, [0, 1], f"Init crashed: {result.stderr}") + + ok, msg = verify_foreign_keys(self.db_path) + self.assertTrue(ok, msg) + + def test_add_works_after_migration(self): + """Adding new URLs should work after migration from 0.8.x.""" + result = run_archivebox(self.work_dir, ['init'], timeout=120) + self.assertIn(result.returncode, [0, 1], f"Init crashed: {result.stderr}") + + # Count existing crawls + conn = sqlite3.connect(str(self.db_path)) + cursor = conn.cursor() + cursor.execute("SELECT COUNT(*) FROM crawls_crawl") + initial_crawl_count = cursor.fetchone()[0] + conn.close() + + # Try to add a new URL after migration + result = run_archivebox(self.work_dir, ['add', 'https://example.com/new-page'], timeout=60) + self.assertIn(result.returncode, [0, 1], f"Add crashed after migration: {result.stderr}") + + # Verify a new Crawl was created + conn = sqlite3.connect(str(self.db_path)) + cursor = conn.cursor() + cursor.execute("SELECT COUNT(*) FROM crawls_crawl") + new_crawl_count = cursor.fetchone()[0] + conn.close() + + self.assertGreater(new_crawl_count, initial_crawl_count, + "No new Crawl created when adding URL after migration") + + def test_version_works_after_migration(self): + """Version command should work after migration.""" + result = run_archivebox(self.work_dir, ['init'], timeout=120) + self.assertIn(result.returncode, [0, 1]) + + result = run_archivebox(self.work_dir, ['version']) + # Exit code might be 1 if some binaries are missing, but should not crash + self.assertIn(result.returncode, [0, 1], f"Version crashed after migration: {result.stderr}") + + # Should show version info + output = result.stdout + result.stderr + self.assertTrue('ArchiveBox' in output or 'version' in output.lower(), + f"Version output missing expected content: {output[:500]}") + class TestMigrationDataIntegrity(unittest.TestCase): """Comprehensive data integrity tests for migrations."""