mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2026-04-05 23:37:58 +10:00
wip
This commit is contained in:
@@ -1,3 +1,319 @@
|
||||
#from django.test import TestCase
|
||||
"""Tests for the core views, especially AddView."""
|
||||
|
||||
# Create your tests here.
|
||||
import os
|
||||
import django
|
||||
|
||||
# Set up Django before importing any Django-dependent modules
|
||||
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'archivebox.settings')
|
||||
django.setup()
|
||||
|
||||
from django.test import TestCase, Client
|
||||
from django.contrib.auth.models import User
|
||||
from django.urls import reverse
|
||||
|
||||
from archivebox.crawls.models import Crawl, CrawlSchedule
|
||||
from archivebox.core.models import Tag
|
||||
|
||||
|
||||
class AddViewTests(TestCase):
|
||||
"""Tests for the AddView (crawl creation form)."""
|
||||
|
||||
def setUp(self):
|
||||
"""Set up test user and client."""
|
||||
self.client = Client()
|
||||
self.user = User.objects.create_user(
|
||||
username='testuser',
|
||||
password='testpass123',
|
||||
email='test@example.com'
|
||||
)
|
||||
self.client.login(username='testuser', password='testpass123')
|
||||
self.add_url = reverse('add')
|
||||
|
||||
def test_add_view_get_requires_auth(self):
|
||||
"""Test that GET /add requires authentication."""
|
||||
self.client.logout()
|
||||
response = self.client.get(self.add_url)
|
||||
# Should redirect to login or show 403/404
|
||||
self.assertIn(response.status_code, [302, 403, 404])
|
||||
|
||||
def test_add_view_get_shows_form(self):
|
||||
"""Test that GET /add shows the form with all fields."""
|
||||
response = self.client.get(self.add_url)
|
||||
self.assertEqual(response.status_code, 200)
|
||||
|
||||
# Check that form fields are present
|
||||
self.assertContains(response, 'name="url"')
|
||||
self.assertContains(response, 'name="tag"')
|
||||
self.assertContains(response, 'name="depth"')
|
||||
self.assertContains(response, 'name="notes"')
|
||||
self.assertContains(response, 'name="schedule"')
|
||||
self.assertContains(response, 'name="persona"')
|
||||
self.assertContains(response, 'name="overwrite"')
|
||||
self.assertContains(response, 'name="update"')
|
||||
self.assertContains(response, 'name="index_only"')
|
||||
|
||||
# Check for plugin groups
|
||||
self.assertContains(response, 'name="chrome_plugins"')
|
||||
self.assertContains(response, 'name="archiving_plugins"')
|
||||
self.assertContains(response, 'name="parsing_plugins"')
|
||||
|
||||
def test_add_view_shows_tag_autocomplete(self):
|
||||
"""Test that tag autocomplete datalist is rendered."""
|
||||
# Create some tags
|
||||
Tag.objects.create(name='test-tag-1')
|
||||
Tag.objects.create(name='test-tag-2')
|
||||
|
||||
response = self.client.get(self.add_url)
|
||||
self.assertEqual(response.status_code, 200)
|
||||
|
||||
# Check for datalist with tags
|
||||
self.assertContains(response, 'id="tag-datalist"')
|
||||
self.assertContains(response, 'test-tag-1')
|
||||
self.assertContains(response, 'test-tag-2')
|
||||
|
||||
def test_add_view_shows_plugin_presets(self):
|
||||
"""Test that plugin preset buttons are rendered."""
|
||||
response = self.client.get(self.add_url)
|
||||
self.assertEqual(response.status_code, 200)
|
||||
|
||||
self.assertContains(response, 'Quick Archive')
|
||||
self.assertContains(response, 'Full Chrome')
|
||||
self.assertContains(response, 'Text Only')
|
||||
self.assertContains(response, 'Select All')
|
||||
self.assertContains(response, 'Clear All')
|
||||
|
||||
def test_add_view_shows_links_to_resources(self):
|
||||
"""Test that helpful links are present."""
|
||||
response = self.client.get(self.add_url)
|
||||
self.assertEqual(response.status_code, 200)
|
||||
|
||||
# Link to plugin documentation
|
||||
self.assertContains(response, '/admin/environment/plugins/')
|
||||
|
||||
# Link to create new persona
|
||||
self.assertContains(response, '/admin/personas/persona/add/')
|
||||
|
||||
def test_add_basic_crawl_without_schedule(self):
|
||||
"""Test creating a basic crawl without a schedule."""
|
||||
response = self.client.post(self.add_url, {
|
||||
'url': 'https://example.com\nhttps://example.org',
|
||||
'tag': 'test-tag',
|
||||
'depth': '0',
|
||||
'notes': 'Test crawl notes',
|
||||
})
|
||||
|
||||
# Should redirect to crawl admin page
|
||||
self.assertEqual(response.status_code, 302)
|
||||
|
||||
# Check that crawl was created
|
||||
self.assertEqual(Crawl.objects.count(), 1)
|
||||
crawl = Crawl.objects.first()
|
||||
|
||||
self.assertIn('https://example.com', crawl.urls)
|
||||
self.assertIn('https://example.org', crawl.urls)
|
||||
self.assertEqual(crawl.tags_str, 'test-tag')
|
||||
self.assertEqual(crawl.max_depth, 0)
|
||||
self.assertEqual(crawl.notes, 'Test crawl notes')
|
||||
self.assertEqual(crawl.created_by, self.user)
|
||||
|
||||
# No schedule should be created
|
||||
self.assertIsNone(crawl.schedule)
|
||||
self.assertEqual(CrawlSchedule.objects.count(), 0)
|
||||
|
||||
def test_add_crawl_with_schedule(self):
|
||||
"""Test creating a crawl with a repeat schedule."""
|
||||
response = self.client.post(self.add_url, {
|
||||
'url': 'https://example.com',
|
||||
'tag': 'scheduled',
|
||||
'depth': '1',
|
||||
'notes': 'Daily crawl',
|
||||
'schedule': 'daily',
|
||||
})
|
||||
|
||||
self.assertEqual(response.status_code, 302)
|
||||
|
||||
# Check that crawl and schedule were created
|
||||
self.assertEqual(Crawl.objects.count(), 1)
|
||||
self.assertEqual(CrawlSchedule.objects.count(), 1)
|
||||
|
||||
crawl = Crawl.objects.first()
|
||||
schedule = CrawlSchedule.objects.first()
|
||||
|
||||
self.assertEqual(crawl.schedule, schedule)
|
||||
self.assertEqual(schedule.template, crawl)
|
||||
self.assertEqual(schedule.schedule, 'daily')
|
||||
self.assertTrue(schedule.is_enabled)
|
||||
self.assertEqual(schedule.created_by, self.user)
|
||||
|
||||
def test_add_crawl_with_cron_schedule(self):
|
||||
"""Test creating a crawl with a cron format schedule."""
|
||||
response = self.client.post(self.add_url, {
|
||||
'url': 'https://example.com',
|
||||
'depth': '0',
|
||||
'schedule': '0 */6 * * *', # Every 6 hours
|
||||
})
|
||||
|
||||
self.assertEqual(response.status_code, 302)
|
||||
|
||||
schedule = CrawlSchedule.objects.first()
|
||||
self.assertEqual(schedule.schedule, '0 */6 * * *')
|
||||
|
||||
def test_add_crawl_with_plugins(self):
|
||||
"""Test creating a crawl with specific plugins selected."""
|
||||
response = self.client.post(self.add_url, {
|
||||
'url': 'https://example.com',
|
||||
'depth': '0',
|
||||
'chrome_plugins': ['screenshot', 'dom'],
|
||||
'archiving_plugins': ['wget'],
|
||||
})
|
||||
|
||||
self.assertEqual(response.status_code, 302)
|
||||
|
||||
crawl = Crawl.objects.first()
|
||||
plugins = crawl.config.get('PLUGINS', '')
|
||||
|
||||
# Should contain the selected plugins
|
||||
self.assertIn('screenshot', plugins)
|
||||
self.assertIn('dom', plugins)
|
||||
self.assertIn('wget', plugins)
|
||||
|
||||
def test_add_crawl_with_depth_range(self):
|
||||
"""Test creating crawls with different depth values (0-4)."""
|
||||
for depth in range(5):
|
||||
response = self.client.post(self.add_url, {
|
||||
'url': f'https://example{depth}.com',
|
||||
'depth': str(depth),
|
||||
})
|
||||
|
||||
self.assertEqual(response.status_code, 302)
|
||||
|
||||
self.assertEqual(Crawl.objects.count(), 5)
|
||||
|
||||
for i, crawl in enumerate(Crawl.objects.order_by('created_at')):
|
||||
self.assertEqual(crawl.max_depth, i)
|
||||
|
||||
def test_add_crawl_with_advanced_options(self):
|
||||
"""Test creating a crawl with advanced options."""
|
||||
response = self.client.post(self.add_url, {
|
||||
'url': 'https://example.com',
|
||||
'depth': '0',
|
||||
'persona': 'CustomPersona',
|
||||
'overwrite': True,
|
||||
'update': True,
|
||||
'index_only': True,
|
||||
})
|
||||
|
||||
self.assertEqual(response.status_code, 302)
|
||||
|
||||
crawl = Crawl.objects.first()
|
||||
config = crawl.config
|
||||
|
||||
self.assertEqual(config.get('DEFAULT_PERSONA'), 'CustomPersona')
|
||||
self.assertEqual(config.get('OVERWRITE'), True)
|
||||
self.assertEqual(config.get('ONLY_NEW'), False) # opposite of update
|
||||
self.assertEqual(config.get('INDEX_ONLY'), True)
|
||||
|
||||
def test_add_crawl_with_custom_config(self):
|
||||
"""Test creating a crawl with custom config overrides."""
|
||||
# Note: Django test client can't easily POST the KeyValueWidget format,
|
||||
# so this test would need to use the form directly or mock the cleaned_data
|
||||
# For now, we'll skip this test or mark it as TODO
|
||||
pass
|
||||
|
||||
def test_add_empty_urls_fails(self):
|
||||
"""Test that submitting without URLs fails validation."""
|
||||
response = self.client.post(self.add_url, {
|
||||
'url': '',
|
||||
'depth': '0',
|
||||
})
|
||||
|
||||
# Should show form again with errors, not redirect
|
||||
self.assertEqual(response.status_code, 200)
|
||||
self.assertFormError(response, 'form', 'url', 'This field is required.')
|
||||
|
||||
def test_add_invalid_urls_fails(self):
|
||||
"""Test that invalid URLs fail validation."""
|
||||
response = self.client.post(self.add_url, {
|
||||
'url': 'not-a-url',
|
||||
'depth': '0',
|
||||
})
|
||||
|
||||
# Should show form again with errors
|
||||
self.assertEqual(response.status_code, 200)
|
||||
# Check for validation error (URL regex should fail)
|
||||
self.assertContains(response, 'error')
|
||||
|
||||
def test_add_success_message_without_schedule(self):
|
||||
"""Test that success message is shown without schedule link."""
|
||||
response = self.client.post(self.add_url, {
|
||||
'url': 'https://example.com\nhttps://example.org',
|
||||
'depth': '0',
|
||||
}, follow=True)
|
||||
|
||||
# Check success message mentions crawl creation
|
||||
messages = list(response.context['messages'])
|
||||
self.assertEqual(len(messages), 1)
|
||||
message_text = str(messages[0])
|
||||
|
||||
self.assertIn('Created crawl with 2 starting URL', message_text)
|
||||
self.assertIn('View Crawl', message_text)
|
||||
self.assertNotIn('scheduled to repeat', message_text)
|
||||
|
||||
def test_add_success_message_with_schedule(self):
|
||||
"""Test that success message includes schedule link."""
|
||||
response = self.client.post(self.add_url, {
|
||||
'url': 'https://example.com',
|
||||
'depth': '0',
|
||||
'schedule': 'weekly',
|
||||
}, follow=True)
|
||||
|
||||
# Check success message mentions schedule
|
||||
messages = list(response.context['messages'])
|
||||
self.assertEqual(len(messages), 1)
|
||||
message_text = str(messages[0])
|
||||
|
||||
self.assertIn('Created crawl', message_text)
|
||||
self.assertIn('scheduled to repeat weekly', message_text)
|
||||
self.assertIn('View Crawl', message_text)
|
||||
|
||||
def test_add_crawl_creates_source_file(self):
|
||||
"""Test that crawl creation saves URLs to sources file."""
|
||||
response = self.client.post(self.add_url, {
|
||||
'url': 'https://example.com',
|
||||
'depth': '0',
|
||||
})
|
||||
|
||||
self.assertEqual(response.status_code, 302)
|
||||
|
||||
# Check that source file was created in sources/ directory
|
||||
from archivebox.config import CONSTANTS
|
||||
sources_dir = CONSTANTS.SOURCES_DIR
|
||||
|
||||
# Should have created a source file
|
||||
source_files = list(sources_dir.glob('*__web_ui_add_by_user_*.txt'))
|
||||
self.assertGreater(len(source_files), 0)
|
||||
|
||||
def test_multiple_tags_are_saved(self):
|
||||
"""Test that multiple comma-separated tags are saved."""
|
||||
response = self.client.post(self.add_url, {
|
||||
'url': 'https://example.com',
|
||||
'depth': '0',
|
||||
'tag': 'tag1,tag2,tag3',
|
||||
})
|
||||
|
||||
self.assertEqual(response.status_code, 302)
|
||||
|
||||
crawl = Crawl.objects.first()
|
||||
self.assertEqual(crawl.tags_str, 'tag1,tag2,tag3')
|
||||
|
||||
def test_crawl_redirects_to_admin_change_page(self):
|
||||
"""Test that successful submission redirects to crawl admin page."""
|
||||
response = self.client.post(self.add_url, {
|
||||
'url': 'https://example.com',
|
||||
'depth': '0',
|
||||
})
|
||||
|
||||
crawl = Crawl.objects.first()
|
||||
expected_redirect = f'/admin/crawls/crawl/{crawl.id}/change/'
|
||||
|
||||
self.assertRedirects(response, expected_redirect, fetch_redirect_response=False)
|
||||
|
||||
Reference in New Issue
Block a user