From 5f0cfe525190ce5e65f206dbd7292c34a1a49691 Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Sun, 15 Mar 2026 18:46:45 -0700 Subject: [PATCH] add new persona tests --- archivebox/tests/test_persona_runtime.py | 146 +++++++++++++++++++++++ 1 file changed, 146 insertions(+) create mode 100644 archivebox/tests/test_persona_runtime.py diff --git a/archivebox/tests/test_persona_runtime.py b/archivebox/tests/test_persona_runtime.py new file mode 100644 index 00000000..09773d18 --- /dev/null +++ b/archivebox/tests/test_persona_runtime.py @@ -0,0 +1,146 @@ +#!/usr/bin/env python3 +"""Tests for per-crawl Persona runtime profile management.""" + +import json +import textwrap + +from .conftest import run_python_cwd + + +def test_persona_prepare_runtime_for_crawl_clones_and_cleans_profile(initialized_archive): + script = textwrap.dedent( + """ + import json + import os + from pathlib import Path + + os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'archivebox.core.settings') + import django + django.setup() + + from archivebox.crawls.models import Crawl + from archivebox.personas.models import Persona + + persona, _ = Persona.objects.get_or_create(name='Default') + persona.ensure_dirs() + + template_dir = Path(persona.CHROME_USER_DATA_DIR) + (template_dir / 'SingletonLock').write_text('locked') + (template_dir / 'chrome.log').write_text('noise') + (template_dir / 'Default' / 'GPUCache').mkdir(parents=True, exist_ok=True) + (template_dir / 'Default' / 'GPUCache' / 'blob').write_text('cached') + (template_dir / 'Default' / 'Preferences').write_text('{"ok": true}') + + crawl = Crawl.objects.create(urls='https://example.com', persona_id=persona.id) + overrides = persona.prepare_runtime_for_crawl( + crawl, + chrome_binary='/Applications/Chromium.app/Contents/MacOS/Chromium', + ) + + runtime_root = persona.runtime_root_for_crawl(crawl) + runtime_profile = Path(overrides['CHROME_USER_DATA_DIR']) + runtime_downloads = Path(overrides['CHROME_DOWNLOADS_DIR']) + + print(json.dumps({ + 'runtime_root_exists': runtime_root.exists(), + 'runtime_profile_exists': runtime_profile.exists(), + 'runtime_downloads_exists': runtime_downloads.exists(), + 'preferences_copied': (runtime_profile / 'Default' / 'Preferences').exists(), + 'singleton_removed': not (runtime_profile / 'SingletonLock').exists(), + 'cache_removed': not (runtime_profile / 'Default' / 'GPUCache').exists(), + 'log_removed': not (runtime_profile / 'chrome.log').exists(), + 'persona_name_recorded': (runtime_root / 'persona_name.txt').read_text().strip(), + 'template_dir_recorded': (runtime_root / 'template_dir.txt').read_text().strip(), + 'chrome_binary_recorded': (runtime_root / 'chrome_binary.txt').read_text().strip(), + })) + """ + ) + + stdout, stderr, code = run_python_cwd(script, cwd=initialized_archive, timeout=60) + assert code == 0, stderr + + payload = json.loads(stdout.strip().splitlines()[-1]) + assert payload['runtime_root_exists'] is True + assert payload['runtime_profile_exists'] is True + assert payload['runtime_downloads_exists'] is True + assert payload['preferences_copied'] is True + assert payload['singleton_removed'] is True + assert payload['cache_removed'] is True + assert payload['log_removed'] is True + assert payload['persona_name_recorded'] == 'Default' + assert payload['template_dir_recorded'].endswith('/personas/Default/chrome_user_data') + assert payload['chrome_binary_recorded'] == '/Applications/Chromium.app/Contents/MacOS/Chromium' + + +def test_persona_cleanup_runtime_for_crawl_removes_only_runtime_copy(initialized_archive): + script = textwrap.dedent( + """ + import json + import os + from pathlib import Path + + os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'archivebox.core.settings') + import django + django.setup() + + from archivebox.crawls.models import Crawl + from archivebox.personas.models import Persona + + persona, _ = Persona.objects.get_or_create(name='Default') + persona.ensure_dirs() + template_dir = Path(persona.CHROME_USER_DATA_DIR) + (template_dir / 'Default').mkdir(parents=True, exist_ok=True) + (template_dir / 'Default' / 'Preferences').write_text('{"kept": true}') + + crawl = Crawl.objects.create(urls='https://example.com', persona_id=persona.id) + persona.prepare_runtime_for_crawl(crawl) + runtime_root = persona.runtime_root_for_crawl(crawl) + + persona.cleanup_runtime_for_crawl(crawl) + + print(json.dumps({ + 'runtime_removed': not runtime_root.exists(), + 'template_still_exists': (template_dir / 'Default' / 'Preferences').exists(), + })) + """ + ) + + stdout, stderr, code = run_python_cwd(script, cwd=initialized_archive, timeout=60) + assert code == 0, stderr + + payload = json.loads(stdout.strip().splitlines()[-1]) + assert payload['runtime_removed'] is True + assert payload['template_still_exists'] is True + + +def test_crawl_resolve_persona_raises_for_missing_persona_id(initialized_archive): + script = textwrap.dedent( + """ + import json + import os + from uuid import uuid4 + + os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'archivebox.core.settings') + import django + django.setup() + + from archivebox.crawls.models import Crawl + from archivebox.personas.models import Persona + + crawl = Crawl.objects.create(urls='https://example.com', persona_id=uuid4()) + + try: + crawl.resolve_persona() + except Persona.DoesNotExist as err: + print(json.dumps({'raised': True, 'message': str(err)})) + else: + raise SystemExit('resolve_persona unexpectedly succeeded') + """ + ) + + stdout, stderr, code = run_python_cwd(script, cwd=initialized_archive, timeout=60) + assert code == 0, stderr + + payload = json.loads(stdout.strip().splitlines()[-1]) + assert payload['raised'] is True + assert 'references missing Persona' in payload['message']