WIP: checkpoint working tree before rebasing onto dev

2026-04-06 07:47:53 +10:00 · 2026-03-22 20:23:45 -07:00
parent a6548df8d0
commit f400a2cd67
87 changed files with 12607 additions and 1808 deletions
--- a/archivebox/tests/test_add_view.py
+++ b/archivebox/tests/test_add_view.py
@@ -0,0 +1,195 @@
+import re
+
+import pytest
+from django.contrib.auth import get_user_model
+from django.urls import reverse
+
+from archivebox.config.common import SERVER_CONFIG, SEARCH_BACKEND_CONFIG
+from archivebox.core.models import Tag
+from archivebox.crawls.models import Crawl
+
+
+pytestmark = pytest.mark.django_db
+
+User = get_user_model()
+WEB_HOST = 'web.archivebox.localhost:8000'
+ADMIN_HOST = 'admin.archivebox.localhost:8000'
+
+
+@pytest.fixture
+def admin_user(db):
+    return User.objects.create_superuser(
+        username='addviewadmin',
+        email='addviewadmin@test.com',
+        password='testpassword',
+    )
+
+
+def test_add_view_renders_tag_editor_and_url_filter_fields(client, admin_user, monkeypatch):
+    monkeypatch.setattr(SERVER_CONFIG, 'PUBLIC_ADD_VIEW', True)
+
+    response = client.get(reverse('add'), HTTP_HOST=WEB_HOST)
+    body = response.content.decode()
+
+    assert response.status_code == 200
+    assert 'tag-editor-container' in body
+    assert 'name="url_filters_allowlist"' in body
+    assert 'name="url_filters_denylist"' in body
+    assert 'Same domain only' in body
+    assert 'name="persona"' in body
+    assert 'Overwrite existing snapshots' not in body
+    assert 'Update/retry previously failed URLs' not in body
+    assert 'Index only dry run (add crawl but don&#x27;t archive yet)' in body
+    assert 'name="notes"' in body
+    assert '<input type="text" name="notes"' in body
+    assert body.index('name="persona"') < body.index('<h3>Crawl Plugins</h3>')
+    assert 'data-url-regex=' in body
+    assert 'id="url-highlight-layer"' in body
+    assert 'id="detected-urls-list"' in body
+    assert 'detected-url-toggle-btn' in body
+
+
+def test_add_view_checks_configured_search_backend_by_default(client, monkeypatch):
+    monkeypatch.setattr(SERVER_CONFIG, 'PUBLIC_ADD_VIEW', True)
+    monkeypatch.setattr(SEARCH_BACKEND_CONFIG, 'SEARCH_BACKEND_ENGINE', 'sqlite')
+
+    response = client.get(reverse('add'), HTTP_HOST=WEB_HOST)
+    body = response.content.decode()
+
+    assert response.status_code == 200
+    assert re.search(
+        r'<input type="checkbox" name="search_plugins" value="search_backend_sqlite"[^>]* checked\b',
+        body,
+    )
+    assert "const requiredSearchPlugin = 'search_backend_sqlite';" in body
+
+
+def test_add_view_creates_crawl_with_tag_and_url_filter_overrides(client, admin_user, monkeypatch):
+    monkeypatch.setattr(SERVER_CONFIG, 'PUBLIC_ADD_VIEW', True)
+    client.force_login(admin_user)
+
+    response = client.post(
+        reverse('add'),
+        data={
+            'url': 'https://example.com\nhttps://cdn.example.com/asset.js',
+            'tag': 'alpha,beta',
+            'depth': '1',
+            'url_filters_allowlist': 'example.com\n*.example.com',
+            'url_filters_denylist': 'cdn.example.com',
+            'notes': 'Created from /add/',
+            'schedule': '',
+            'persona': 'Default',
+            'index_only': '',
+            'config': '{}',
+        },
+        HTTP_HOST=WEB_HOST,
+    )
+
+    assert response.status_code == 302
+
+    crawl = Crawl.objects.order_by('-created_at').first()
+    assert crawl is not None
+    assert crawl.tags_str == 'alpha,beta'
+    assert crawl.notes == 'Created from /add/'
+    assert crawl.config.get('DEFAULT_PERSONA') == 'Default'
+    assert crawl.config['URL_ALLOWLIST'] == 'example.com\n*.example.com'
+    assert crawl.config['URL_DENYLIST'] == 'cdn.example.com'
+    assert 'OVERWRITE' not in crawl.config
+    assert 'ONLY_NEW' not in crawl.config
+
+
+def test_add_view_extracts_urls_from_mixed_text_input(client, admin_user, monkeypatch):
+    monkeypatch.setattr(SERVER_CONFIG, 'PUBLIC_ADD_VIEW', True)
+    client.force_login(admin_user)
+
+    response = client.post(
+        reverse('add'),
+        data={
+            'url': '\n'.join([
+                'https://sweeting.me,https://google.com',
+                'Notes: [ArchiveBox](https://github.com/ArchiveBox/ArchiveBox), https://news.ycombinator.com',
+                '[Wiki](https://en.wikipedia.org/wiki/Classification_(machine_learning))',
+                '{"items":["https://example.com/three"]}',
+                'csv,https://example.com/four',
+            ]),
+            'tag': '',
+            'depth': '0',
+            'url_filters_allowlist': '',
+            'url_filters_denylist': '',
+            'notes': '',
+            'schedule': '',
+            'persona': 'Default',
+            'index_only': '',
+            'config': '{}',
+        },
+        HTTP_HOST=WEB_HOST,
+    )
+
+    assert response.status_code == 302
+
+    crawl = Crawl.objects.order_by('-created_at').first()
+    assert crawl is not None
+    assert crawl.urls == '\n'.join([
+        'https://sweeting.me',
+        'https://google.com',
+        'https://github.com/ArchiveBox/ArchiveBox',
+        'https://news.ycombinator.com',
+        'https://en.wikipedia.org/wiki/Classification_(machine_learning)',
+        'https://example.com/three',
+        'https://example.com/four',
+    ])
+
+
+def test_add_view_exposes_api_token_for_tag_widget_autocomplete(client, admin_user, monkeypatch):
+    monkeypatch.setattr(SERVER_CONFIG, 'PUBLIC_ADD_VIEW', True)
+    client.force_login(admin_user)
+
+    response = client.get(reverse('add'), HTTP_HOST=WEB_HOST)
+
+    assert response.status_code == 200
+    assert b'window.ARCHIVEBOX_API_KEY' in response.content
+
+
+def test_tags_autocomplete_requires_auth_when_public_snapshots_list_disabled(client, settings):
+    settings.PUBLIC_SNAPSHOTS_LIST = False
+    settings.PUBLIC_INDEX = False
+    Tag.objects.create(name='archive')
+
+    response = client.get(
+        reverse('api-1:tags_autocomplete'),
+        {'q': 'a'},
+        HTTP_HOST=ADMIN_HOST,
+    )
+
+    assert response.status_code == 401
+
+
+def test_tags_autocomplete_allows_public_access_when_public_snapshots_list_enabled(client, settings):
+    settings.PUBLIC_SNAPSHOTS_LIST = True
+    settings.PUBLIC_INDEX = False
+    Tag.objects.create(name='archive')
+
+    response = client.get(
+        reverse('api-1:tags_autocomplete'),
+        {'q': 'a'},
+        HTTP_HOST=ADMIN_HOST,
+    )
+
+    assert response.status_code == 200
+    assert response.json()['tags'][0]['name'] == 'archive'
+
+
+def test_tags_autocomplete_allows_authenticated_user_when_public_snapshots_list_disabled(client, admin_user, settings):
+    settings.PUBLIC_SNAPSHOTS_LIST = False
+    settings.PUBLIC_INDEX = False
+    Tag.objects.create(name='archive')
+    client.force_login(admin_user)
+
+    response = client.get(
+        reverse('api-1:tags_autocomplete'),
+        {'q': 'a'},
+        HTTP_HOST=ADMIN_HOST,
+    )
+
+    assert response.status_code == 200
+    assert response.json()['tags'][0]['name'] == 'archive'
--- a/archivebox/tests/test_admin_config_widget.py
+++ b/archivebox/tests/test_admin_config_widget.py
@@ -0,0 +1,151 @@
+from archivebox.base_models.admin import KeyValueWidget
+
+
+def test_key_value_widget_renders_enum_autocomplete_metadata(monkeypatch):
+    monkeypatch.setattr(
+        KeyValueWidget,
+        '_get_config_options',
+        lambda self: {
+            'CHROME_WAIT_FOR': {
+                'plugin': 'chrome',
+                'type': 'string',
+                'default': 'networkidle2',
+                'description': 'Page load completion condition',
+                'enum': ['domcontentloaded', 'load', 'networkidle0', 'networkidle2'],
+            },
+        },
+    )
+
+    html = str(
+        KeyValueWidget().render(
+            'config',
+            {'CHROME_WAIT_FOR': 'load'},
+            attrs={'id': 'id_config'},
+        )
+    )
+
+    assert '"enum": ["domcontentloaded", "load", "networkidle0", "networkidle2"]' in html
+    assert 'class="kv-value-options"' in html
+    assert 'class="kv-help"' in html
+    assert 'configureValueInput_id_config' in html
+    assert 'describeMeta_id_config' in html
+    assert 'validateValueAgainstMeta_id_config' in html
+
+
+def test_key_value_widget_renders_numeric_and_pattern_constraints(monkeypatch):
+    monkeypatch.setattr(
+        KeyValueWidget,
+        '_get_config_options',
+        lambda self: {
+            'TIMEOUT': {
+                'plugin': 'base',
+                'type': 'integer',
+                'default': 60,
+                'description': 'Timeout in seconds',
+                'minimum': 5,
+                'maximum': 120,
+            },
+            'CHROME_RESOLUTION': {
+                'plugin': 'chrome',
+                'type': 'string',
+                'default': '1440,2000',
+                'description': 'Viewport resolution',
+                'pattern': '^\\d+,\\d+$',
+            },
+        },
+    )
+
+    html = str(KeyValueWidget().render('config', {}, attrs={'id': 'id_config'}))
+
+    assert '"minimum": 5' in html
+    assert '"maximum": 120' in html
+    assert '"pattern": "^\\\\d+,\\\\d+$"' in html
+    assert 'Expected: ' in html
+    assert 'Example: ' in html
+    assert 'setValueValidationState_id_config' in html
+    assert 'coerceValueForStorage_id_config' in html
+
+
+def test_key_value_widget_accepts_common_boolean_spellings(monkeypatch):
+    monkeypatch.setattr(
+        KeyValueWidget,
+        '_get_config_options',
+        lambda self: {
+            'DEBUG': {
+                'plugin': 'base',
+                'type': 'boolean',
+                'default': False,
+                'description': 'Enable debug mode',
+            },
+        },
+    )
+
+    html = str(KeyValueWidget().render('config', {'DEBUG': 'True'}, attrs={'id': 'id_config'}))
+
+    assert "enumValues = ['True', 'False']" in html
+    assert "raw.toLowerCase()" in html
+    assert "lowered === 'true' || raw === '1'" in html
+    assert "lowered === 'false' || raw === '0'" in html
+
+
+def test_key_value_widget_shows_array_and_object_examples_and_binary_rules(monkeypatch):
+    monkeypatch.setattr(
+        KeyValueWidget,
+        '_get_config_options',
+        lambda self: {
+            'WGET_ARGS_EXTRA': {
+                'plugin': 'wget',
+                'type': 'array',
+                'default': [],
+                'description': 'Extra arguments to append to wget command',
+            },
+            'SAVE_ALLOWLIST': {
+                'plugin': 'base',
+                'type': 'object',
+                'default': {},
+                'description': 'Regex allowlist mapped to enabled methods',
+            },
+            'WGET_BINARY': {
+                'plugin': 'wget',
+                'type': 'string',
+                'default': 'wget',
+                'description': 'Path to wget binary',
+            },
+        },
+    )
+
+    html = str(KeyValueWidget().render('config', {}, attrs={'id': 'id_config'}))
+
+    assert 'Example: ["--extra-arg"]' in html
+    assert 'Example: {"^https://example\\\\.com": ["wget"]}' in html
+    assert 'Example: wget or /usr/bin/wget' in html
+    assert 'validateBinaryValue_id_config' in html
+    assert "meta.key.endsWith('_BINARY')" in html
+    assert "Binary paths cannot contain quotes" in html
+
+
+def test_key_value_widget_falls_back_to_binary_validation_for_unknown_binary_keys(monkeypatch):
+    monkeypatch.setattr(
+        KeyValueWidget,
+        '_get_config_options',
+        lambda self: {
+            'CHROME_BINARY': {
+                'plugin': 'base',
+                'type': 'string',
+                'default': '',
+                'description': 'Resolved Chromium/Chrome binary path shared across plugins',
+            },
+        },
+    )
+
+    html = str(
+        KeyValueWidget().render(
+            'config',
+            {'NODE_BINARY': '/opt/homebrew/bin/node'},
+            attrs={'id': 'id_config'},
+        )
+    )
+
+    assert 'function getMetaForKey_id_config' in html
+    assert "if (key.endsWith('_BINARY'))" in html
+    assert 'Path to binary executable' in html
--- a/archivebox/tests/test_admin_links.py
+++ b/archivebox/tests/test_admin_links.py
@@ -0,0 +1,127 @@
+import pytest
+from django.contrib.admin.sites import AdminSite
+from uuid import uuid4
+
+
+pytestmark = pytest.mark.django_db
+
+
+def _create_snapshot():
+    from archivebox.base_models.models import get_or_create_system_user_pk
+    from archivebox.crawls.models import Crawl
+    from archivebox.core.models import Snapshot
+
+    crawl = Crawl.objects.create(
+        urls="https://example.com",
+        created_by_id=get_or_create_system_user_pk(),
+    )
+    return Snapshot.objects.create(
+        url="https://example.com",
+        crawl=crawl,
+        status=Snapshot.StatusChoices.STARTED,
+    )
+
+
+def _create_machine():
+    from archivebox.machine.models import Machine
+
+    return Machine.objects.create(
+        guid=f'test-guid-{uuid4()}',
+        hostname='test-host',
+        hw_in_docker=False,
+        hw_in_vm=False,
+        hw_manufacturer='Test',
+        hw_product='Test Product',
+        hw_uuid=f'test-hw-{uuid4()}',
+        os_arch='arm64',
+        os_family='darwin',
+        os_platform='macOS',
+        os_release='14.0',
+        os_kernel='Darwin',
+        stats={},
+        config={},
+    )
+
+
+def _create_iface(machine):
+    from archivebox.machine.models import NetworkInterface
+
+    return NetworkInterface.objects.create(
+        machine=machine,
+        mac_address='00:11:22:33:44:66',
+        ip_public='203.0.113.11',
+        ip_local='10.0.0.11',
+        dns_server='1.1.1.1',
+        hostname='test-host',
+        iface='en0',
+        isp='Test ISP',
+        city='Test City',
+        region='Test Region',
+        country='Test Country',
+    )
+
+
+def test_archiveresult_admin_links_plugin_and_process():
+    from archivebox.core.admin_archiveresults import ArchiveResultAdmin
+    from archivebox.core.models import ArchiveResult
+    from archivebox.machine.models import Process
+
+    snapshot = _create_snapshot()
+    iface = _create_iface(_create_machine())
+    process = Process.objects.create(
+        machine=iface.machine,
+        iface=iface,
+        process_type=Process.TypeChoices.HOOK,
+        pwd=str(snapshot.output_dir / 'wget'),
+        cmd=['/tmp/on_Snapshot__06_wget.finite.bg.py', '--url=https://example.com'],
+        status=Process.StatusChoices.EXITED,
+    )
+    result = ArchiveResult.objects.create(
+        snapshot=snapshot,
+        plugin='wget',
+        hook_name='on_Snapshot__06_wget.finite.bg.py',
+        process=process,
+        status=ArchiveResult.StatusChoices.SUCCEEDED,
+    )
+
+    admin = ArchiveResultAdmin(ArchiveResult, AdminSite())
+
+    plugin_html = str(admin.plugin_with_icon(result))
+    process_html = str(admin.process_link(result))
+
+    assert '/admin/environment/plugins/builtin.wget/' in plugin_html
+    assert f'/admin/machine/process/{process.id}/change' in process_html
+
+
+def test_process_admin_links_binary_and_iface():
+    from archivebox.machine.admin import ProcessAdmin
+    from archivebox.machine.models import Binary, Process
+
+    machine = _create_machine()
+    iface = _create_iface(machine)
+    binary = Binary.objects.create(
+        machine=machine,
+        name='wget',
+        abspath='/usr/local/bin/wget',
+        version='1.21.2',
+        binprovider='env',
+        binproviders='env',
+        status=Binary.StatusChoices.INSTALLED,
+    )
+    process = Process.objects.create(
+        machine=machine,
+        iface=iface,
+        binary=binary,
+        process_type=Process.TypeChoices.HOOK,
+        pwd='/tmp/wget',
+        cmd=['/tmp/on_Snapshot__06_wget.finite.bg.py', '--url=https://example.com'],
+        status=Process.StatusChoices.EXITED,
+    )
+
+    admin = ProcessAdmin(Process, AdminSite())
+
+    binary_html = str(admin.binary_link(process))
+    iface_html = str(admin.iface_link(process))
+
+    assert f'/admin/machine/binary/{binary.id}/change' in binary_html
+    assert f'/admin/machine/networkinterface/{iface.id}/change' in iface_html
--- a/archivebox/tests/test_admin_views.py
+++ b/archivebox/tests/test_admin_views.py
@@ -9,11 +9,13 @@ Tests cover:
 """

 import pytest
+import uuid
 from typing import cast
 from django.test import override_settings
 from django.urls import reverse
 from django.contrib.auth import get_user_model
 from django.contrib.auth.models import UserManager
+from django.utils import timezone

 pytestmark = pytest.mark.django_db

@@ -195,6 +197,232 @@ class TestAdminSnapshotListView:
        assert b'snapshot-view-list' in response.content
        assert b'snapshot-view-grid' in response.content

+    def test_binary_change_view_renders(self, client, admin_user, db):
+        """Binary admin change form should load without FieldError."""
+        from archivebox.machine.models import Machine, Binary
+
+        machine = Machine.objects.create(
+            guid=f'test-guid-{uuid.uuid4()}',
+            hostname='test-host',
+            hw_in_docker=False,
+            hw_in_vm=False,
+            hw_manufacturer='Test',
+            hw_product='Test Product',
+            hw_uuid=f'test-hw-{uuid.uuid4()}',
+            os_arch='x86_64',
+            os_family='darwin',
+            os_platform='darwin',
+            os_release='test',
+            os_kernel='test-kernel',
+            stats={},
+        )
+        binary = Binary.objects.create(
+            machine=machine,
+            name='gallery-dl',
+            binproviders='env',
+            binprovider='env',
+            abspath='/opt/homebrew/bin/gallery-dl',
+            version='1.26.9',
+            sha256='abc123',
+            status=Binary.StatusChoices.INSTALLED,
+        )
+
+        client.login(username='testadmin', password='testpassword')
+        url = f'/admin/machine/binary/{binary.pk}/change/'
+        response = client.get(url, HTTP_HOST=ADMIN_HOST)
+
+        assert response.status_code == 200
+        assert b'gallery-dl' in response.content
+
+    def test_change_view_renders_real_redo_failed_action(self, client, admin_user, snapshot):
+        client.login(username='testadmin', password='testpassword')
+        url = reverse('admin:core_snapshot_change', args=[snapshot.pk])
+        response = client.get(url, HTTP_HOST=ADMIN_HOST)
+
+        assert response.status_code == 200
+        assert f'/admin/core/snapshot/{snapshot.pk}/redo-failed/'.encode() in response.content
+
+    def test_redo_failed_action_requeues_snapshot(self, client, admin_user, snapshot, monkeypatch):
+        import archivebox.core.admin_snapshots as admin_snapshots
+
+        queued = []
+
+        def fake_bg_archive_snapshot(obj, overwrite=False, methods=None):
+            queued.append((str(obj.pk), overwrite, methods))
+            return 1
+
+        monkeypatch.setattr(admin_snapshots, 'bg_archive_snapshot', fake_bg_archive_snapshot)
+
+        client.login(username='testadmin', password='testpassword')
+        url = reverse('admin:core_snapshot_redo_failed', args=[snapshot.pk])
+        response = client.post(url, HTTP_HOST=ADMIN_HOST)
+
+        assert response.status_code == 302
+        assert queued == [(str(snapshot.pk), False, None)]
+        assert response['Location'].endswith(f'/admin/core/snapshot/{snapshot.pk}/change/')
+
+
+class TestArchiveResultAdminListView:
+    def test_list_view_renders_readonly_tags_and_noresults_status(self, client, admin_user, snapshot):
+        from archivebox.core.models import ArchiveResult, Tag
+
+        tag = Tag.objects.create(name='Alpha Research')
+        snapshot.tags.add(tag)
+        ArchiveResult.objects.create(
+            snapshot=snapshot,
+            plugin='title',
+            status=ArchiveResult.StatusChoices.NORESULTS,
+            output_str='No title found',
+        )
+
+        client.login(username='testadmin', password='testpassword')
+        response = client.get(reverse('admin:core_archiveresult_changelist'), HTTP_HOST=ADMIN_HOST)
+
+        assert response.status_code == 200
+        assert b'Alpha Research' in response.content
+        assert b'tag-editor-inline readonly' in response.content
+        assert b'No Results' in response.content
+
+    def test_archiveresult_model_has_no_retry_at_field(self):
+        from archivebox.core.models import ArchiveResult
+
+        assert 'retry_at' not in {field.name for field in ArchiveResult._meta.fields}
+
+
+class TestLiveProgressView:
+    def test_live_progress_routes_crawl_process_rows_to_crawl_setup(self, client, admin_user, snapshot, db):
+        import archivebox.machine.models as machine_models
+        from archivebox.machine.models import Machine, Process
+
+        machine_models._CURRENT_MACHINE = None
+        machine = Machine.current()
+        Process.objects.create(
+            machine=machine,
+            process_type=Process.TypeChoices.HOOK,
+            status=Process.StatusChoices.RUNNING,
+            pid=43210,
+            cmd=['/plugins/chrome/on_Crawl__91_chrome_wait.js', '--url=https://example.com'],
+            env={
+                'CRAWL_ID': str(snapshot.crawl_id),
+                'SNAPSHOT_ID': str(snapshot.id),
+            },
+            started_at=timezone.now(),
+        )
+
+        client.login(username='testadmin', password='testpassword')
+        response = client.get(reverse('live_progress'), HTTP_HOST=ADMIN_HOST)
+
+        assert response.status_code == 200
+        payload = response.json()
+        active_crawl = next(crawl for crawl in payload['active_crawls'] if crawl['id'] == str(snapshot.crawl_id))
+        setup_entry = next(item for item in active_crawl['setup_plugins'] if item['source'] == 'process')
+        active_snapshot = next(item for item in active_crawl['active_snapshots'] if item['id'] == str(snapshot.id))
+        assert setup_entry['label'] == 'chrome wait'
+        assert setup_entry['status'] == 'started'
+        assert active_crawl['worker_pid'] == 43210
+        assert active_snapshot['all_plugins'] == []
+
+    def test_live_progress_uses_snapshot_process_rows_before_archiveresults(self, client, admin_user, snapshot, db):
+        import archivebox.machine.models as machine_models
+        from archivebox.machine.models import Machine, Process
+
+        machine_models._CURRENT_MACHINE = None
+        machine = Machine.current()
+        Process.objects.create(
+            machine=machine,
+            process_type=Process.TypeChoices.HOOK,
+            status=Process.StatusChoices.RUNNING,
+            pid=43211,
+            cmd=['/plugins/title/on_Snapshot__10_title.py', '--url=https://example.com'],
+            env={
+                'CRAWL_ID': str(snapshot.crawl_id),
+                'SNAPSHOT_ID': str(snapshot.id),
+            },
+            started_at=timezone.now(),
+        )
+
+        client.login(username='testadmin', password='testpassword')
+        response = client.get(reverse('live_progress'), HTTP_HOST=ADMIN_HOST)
+
+        assert response.status_code == 200
+        payload = response.json()
+        active_crawl = next(crawl for crawl in payload['active_crawls'] if crawl['id'] == str(snapshot.crawl_id))
+        active_snapshot = next(item for item in active_crawl['active_snapshots'] if item['id'] == str(snapshot.id))
+        assert active_snapshot['all_plugins'][0]['source'] == 'process'
+        assert active_snapshot['all_plugins'][0]['label'] == 'title'
+        assert active_snapshot['all_plugins'][0]['status'] == 'started'
+        assert active_snapshot['worker_pid'] == 43211
+
+    def test_live_progress_merges_process_rows_with_archiveresults_when_present(self, client, admin_user, snapshot, db):
+        import archivebox.machine.models as machine_models
+        from archivebox.core.models import ArchiveResult
+        from archivebox.machine.models import Machine, Process
+
+        machine_models._CURRENT_MACHINE = None
+        machine = Machine.current()
+        Process.objects.create(
+            machine=machine,
+            process_type=Process.TypeChoices.HOOK,
+            status=Process.StatusChoices.RUNNING,
+            pid=54321,
+            cmd=['/plugins/chrome/on_Snapshot__11_chrome_wait.js', '--url=https://example.com'],
+            env={
+                'CRAWL_ID': str(snapshot.crawl_id),
+                'SNAPSHOT_ID': str(snapshot.id),
+            },
+            started_at=timezone.now(),
+        )
+        ArchiveResult.objects.create(
+            snapshot=snapshot,
+            plugin='title',
+            status=ArchiveResult.StatusChoices.STARTED,
+        )
+
+        client.login(username='testadmin', password='testpassword')
+        response = client.get(reverse('live_progress'), HTTP_HOST=ADMIN_HOST)
+
+        assert response.status_code == 200
+        payload = response.json()
+        active_crawl = next(crawl for crawl in payload['active_crawls'] if crawl['id'] == str(snapshot.crawl_id))
+        active_snapshot = next(item for item in active_crawl['active_snapshots'] if item['id'] == str(snapshot.id))
+        sources = {item['source'] for item in active_snapshot['all_plugins']}
+        plugins = {item['plugin'] for item in active_snapshot['all_plugins']}
+        assert sources == {'archiveresult', 'process'}
+        assert 'title' in plugins
+        assert 'chrome' in plugins
+
+    def test_live_progress_omits_pid_for_exited_process_rows(self, client, admin_user, snapshot, db):
+        import archivebox.machine.models as machine_models
+        from archivebox.machine.models import Machine, Process
+
+        machine_models._CURRENT_MACHINE = None
+        machine = Machine.current()
+        Process.objects.create(
+            machine=machine,
+            process_type=Process.TypeChoices.HOOK,
+            status=Process.StatusChoices.EXITED,
+            exit_code=0,
+            pid=99999,
+            cmd=['/plugins/title/on_Snapshot__10_title.py', '--url=https://example.com'],
+            env={
+                'CRAWL_ID': str(snapshot.crawl_id),
+                'SNAPSHOT_ID': str(snapshot.id),
+            },
+            started_at=timezone.now(),
+            ended_at=timezone.now(),
+        )
+
+        client.login(username='testadmin', password='testpassword')
+        response = client.get(reverse('live_progress'), HTTP_HOST=ADMIN_HOST)
+
+        assert response.status_code == 200
+        payload = response.json()
+        active_crawl = next(crawl for crawl in payload['active_crawls'] if crawl['id'] == str(snapshot.crawl_id))
+        active_snapshot = next(item for item in active_crawl['active_snapshots'] if item['id'] == str(snapshot.id))
+        process_entry = next(item for item in active_snapshot['all_plugins'] if item['source'] == 'process')
+        assert process_entry['status'] == 'succeeded'
+        assert 'pid' not in process_entry
+

 class TestAdminSnapshotSearch:
    """Tests for admin snapshot search functionality."""
--- a/archivebox/tests/test_archive_result_service.py
+++ b/archivebox/tests/test_archive_result_service.py
@@ -0,0 +1,305 @@
+from pathlib import Path
+from uuid import uuid4
+
+import pytest
+from django.db import connection
+
+from abx_dl.events import ProcessCompletedEvent, ProcessStartedEvent
+from abx_dl.orchestrator import create_bus
+
+
+pytestmark = pytest.mark.django_db
+
+
+def _cleanup_machine_process_rows() -> None:
+    with connection.cursor() as cursor:
+        cursor.execute("DELETE FROM machine_process")
+
+
+def _create_snapshot():
+    from archivebox.base_models.models import get_or_create_system_user_pk
+    from archivebox.crawls.models import Crawl
+    from archivebox.core.models import Snapshot
+
+    crawl = Crawl.objects.create(
+        urls="https://example.com",
+        created_by_id=get_or_create_system_user_pk(),
+    )
+    return Snapshot.objects.create(
+        url="https://example.com",
+        crawl=crawl,
+        status=Snapshot.StatusChoices.STARTED,
+    )
+
+
+def _create_machine():
+    from archivebox.machine.models import Machine
+
+    return Machine.objects.create(
+        guid=f'test-guid-{uuid4()}',
+        hostname='test-host',
+        hw_in_docker=False,
+        hw_in_vm=False,
+        hw_manufacturer='Test',
+        hw_product='Test Product',
+        hw_uuid=f'test-hw-{uuid4()}',
+        os_arch='arm64',
+        os_family='darwin',
+        os_platform='macOS',
+        os_release='14.0',
+        os_kernel='Darwin',
+        stats={},
+        config={},
+    )
+
+
+def _create_iface(machine):
+    from archivebox.machine.models import NetworkInterface
+
+    return NetworkInterface.objects.create(
+        machine=machine,
+        mac_address='00:11:22:33:44:55',
+        ip_public='203.0.113.10',
+        ip_local='10.0.0.10',
+        dns_server='1.1.1.1',
+        hostname='test-host',
+        iface='en0',
+        isp='Test ISP',
+        city='Test City',
+        region='Test Region',
+        country='Test Country',
+    )
+
+
+def test_process_completed_projects_inline_archiveresult():
+    from archivebox.core.models import ArchiveResult
+    from archivebox.services.archive_result_service import ArchiveResultService, _collect_output_metadata
+    from archivebox.services.process_service import ProcessService
+
+    snapshot = _create_snapshot()
+    plugin_dir = Path(snapshot.output_dir) / "wget"
+    plugin_dir.mkdir(parents=True, exist_ok=True)
+    (plugin_dir / "index.html").write_text("<html>ok</html>")
+
+    bus = create_bus(name="test_inline_archiveresult")
+    process_service = ProcessService(bus)
+    service = ArchiveResultService(bus, process_service=process_service)
+
+    event = ProcessCompletedEvent(
+        plugin_name="wget",
+        hook_name="on_Snapshot__06_wget.finite.bg",
+        stdout='{"snapshot_id":"%s","type":"ArchiveResult","status":"succeeded","output_str":"wget/index.html"}\n' % snapshot.id,
+        stderr="",
+        exit_code=0,
+        output_dir=str(plugin_dir),
+        output_files=["index.html"],
+        process_id="proc-inline",
+        snapshot_id=str(snapshot.id),
+        start_ts="2026-03-22T12:00:00+00:00",
+        end_ts="2026-03-22T12:00:01+00:00",
+    )
+
+    output_files, output_size, output_mimetypes = _collect_output_metadata(plugin_dir)
+    service._project_from_process_completed(
+        event,
+        {
+            "snapshot_id": str(snapshot.id),
+            "plugin": "wget",
+            "hook_name": "on_Snapshot__06_wget.finite.bg",
+            "status": "succeeded",
+            "output_str": "wget/index.html",
+        },
+        output_files,
+        output_size,
+        output_mimetypes,
+    )
+
+    result = ArchiveResult.objects.get(snapshot=snapshot, plugin="wget", hook_name="on_Snapshot__06_wget.finite.bg")
+    assert result.status == ArchiveResult.StatusChoices.SUCCEEDED
+    assert result.output_str == "wget/index.html"
+    assert "index.html" in result.output_files
+    _cleanup_machine_process_rows()
+
+
+def test_process_completed_projects_synthetic_failed_archiveresult():
+    from archivebox.core.models import ArchiveResult
+    from archivebox.services.archive_result_service import ArchiveResultService, _collect_output_metadata
+    from archivebox.services.process_service import ProcessService
+
+    snapshot = _create_snapshot()
+    plugin_dir = Path(snapshot.output_dir) / "chrome"
+    plugin_dir.mkdir(parents=True, exist_ok=True)
+
+    bus = create_bus(name="test_synthetic_archiveresult")
+    process_service = ProcessService(bus)
+    service = ArchiveResultService(bus, process_service=process_service)
+
+    event = ProcessCompletedEvent(
+        plugin_name="chrome",
+        hook_name="on_Snapshot__11_chrome_wait",
+        stdout="",
+        stderr="Hook timed out after 60 seconds",
+        exit_code=-1,
+        output_dir=str(plugin_dir),
+        output_files=[],
+        process_id="proc-failed",
+        snapshot_id=str(snapshot.id),
+        start_ts="2026-03-22T12:00:00+00:00",
+        end_ts="2026-03-22T12:01:00+00:00",
+    )
+
+    output_files, output_size, output_mimetypes = _collect_output_metadata(plugin_dir)
+    service._project_from_process_completed(
+        event,
+        {
+            "plugin": "chrome",
+            "hook_name": "on_Snapshot__11_chrome_wait",
+            "status": "failed",
+            "output_str": "Hook timed out after 60 seconds",
+            "error": "Hook timed out after 60 seconds",
+        },
+        output_files,
+        output_size,
+        output_mimetypes,
+    )
+
+    result = ArchiveResult.objects.get(snapshot=snapshot, plugin="chrome", hook_name="on_Snapshot__11_chrome_wait")
+    assert result.status == ArchiveResult.StatusChoices.FAILED
+    assert result.output_str == "Hook timed out after 60 seconds"
+    assert "Hook timed out" in result.notes
+    _cleanup_machine_process_rows()
+
+
+def test_process_completed_projects_noresults_archiveresult():
+    from archivebox.core.models import ArchiveResult
+    from archivebox.services.archive_result_service import ArchiveResultService, _collect_output_metadata
+    from archivebox.services.process_service import ProcessService
+
+    snapshot = _create_snapshot()
+    plugin_dir = Path(snapshot.output_dir) / "title"
+    plugin_dir.mkdir(parents=True, exist_ok=True)
+
+    bus = create_bus(name="test_noresults_archiveresult")
+    process_service = ProcessService(bus)
+    service = ArchiveResultService(bus, process_service=process_service)
+
+    event = ProcessCompletedEvent(
+        plugin_name="title",
+        hook_name="on_Snapshot__54_title.js",
+        stdout='{"snapshot_id":"%s","type":"ArchiveResult","status":"noresults","output_str":"No title found"}\n' % snapshot.id,
+        stderr="",
+        exit_code=0,
+        output_dir=str(plugin_dir),
+        output_files=[],
+        process_id="proc-noresults",
+        snapshot_id=str(snapshot.id),
+        start_ts="2026-03-22T12:00:00+00:00",
+        end_ts="2026-03-22T12:00:01+00:00",
+    )
+
+    output_files, output_size, output_mimetypes = _collect_output_metadata(plugin_dir)
+    service._project_from_process_completed(
+        event,
+        {
+            "snapshot_id": str(snapshot.id),
+            "plugin": "title",
+            "hook_name": "on_Snapshot__54_title.js",
+            "status": "noresults",
+            "output_str": "No title found",
+        },
+        output_files,
+        output_size,
+        output_mimetypes,
+    )
+
+    result = ArchiveResult.objects.get(snapshot=snapshot, plugin="title", hook_name="on_Snapshot__54_title.js")
+    assert result.status == ArchiveResult.StatusChoices.NORESULTS
+    assert result.output_str == "No title found"
+    _cleanup_machine_process_rows()
+
+
+def test_process_started_hydrates_binary_and_iface_from_existing_binary_records(monkeypatch):
+    from archivebox.machine.models import Binary, NetworkInterface
+    from archivebox.services.process_service import ProcessService
+
+    machine = _create_machine()
+    iface = _create_iface(machine)
+    monkeypatch.setattr(NetworkInterface, 'current', classmethod(lambda cls, refresh=False: iface))
+
+    binary = Binary.objects.create(
+        machine=machine,
+        name='postlight-parser',
+        abspath='/tmp/postlight-parser',
+        version='2.2.3',
+        binprovider='npm',
+        binproviders='npm',
+        status=Binary.StatusChoices.INSTALLED,
+    )
+
+    bus = create_bus(name="test_process_started_binary_hydration")
+    service = ProcessService(bus)
+    event = ProcessStartedEvent(
+        plugin_name="mercury",
+        hook_name="on_Snapshot__57_mercury.py",
+        hook_path="/plugins/mercury/on_Snapshot__57_mercury.py",
+        hook_args=["--url=https://example.com"],
+        output_dir="/tmp/mercury",
+        env={
+            "MERCURY_BINARY": binary.abspath,
+            "NODE_BINARY": "/tmp/node",
+        },
+        timeout=60,
+        pid=4321,
+        process_id="proc-mercury",
+        snapshot_id="",
+        start_ts="2026-03-22T12:00:00+00:00",
+    )
+
+    service._project_started(event)
+
+    process = service._get_or_create_process(event)
+    assert process.binary_id == binary.id
+    assert process.iface_id == iface.id
+
+
+def test_process_started_uses_node_binary_for_js_hooks_without_plugin_binary(monkeypatch):
+    from archivebox.machine.models import Binary, NetworkInterface
+    from archivebox.services.process_service import ProcessService
+
+    machine = _create_machine()
+    iface = _create_iface(machine)
+    monkeypatch.setattr(NetworkInterface, 'current', classmethod(lambda cls, refresh=False: iface))
+
+    node = Binary.objects.create(
+        machine=machine,
+        name='node',
+        abspath='/tmp/node',
+        version='22.0.0',
+        binprovider='env',
+        binproviders='env',
+        status=Binary.StatusChoices.INSTALLED,
+    )
+
+    bus = create_bus(name="test_process_started_node_fallback")
+    service = ProcessService(bus)
+    event = ProcessStartedEvent(
+        plugin_name="parse_dom_outlinks",
+        hook_name="on_Snapshot__75_parse_dom_outlinks.js",
+        hook_path="/plugins/parse_dom_outlinks/on_Snapshot__75_parse_dom_outlinks.js",
+        hook_args=["--url=https://example.com"],
+        output_dir="/tmp/parse-dom-outlinks",
+        env={
+            "NODE_BINARY": node.abspath,
+        },
+        timeout=60,
+        pid=9876,
+        process_id="proc-parse-dom-outlinks",
+        snapshot_id="",
+        start_ts="2026-03-22T12:00:00+00:00",
+    )
+
+    service._project_started(event)
+
+    process = service._get_or_create_process(event)
+    assert process.binary_id == node.id
+    assert process.iface_id == iface.id
--- a/archivebox/tests/test_cli_add.py
+++ b/archivebox/tests/test_cli_add.py
@@ -44,6 +44,27 @@ def test_add_single_url_creates_snapshot_in_db(tmp_path, process, disable_extrac
    assert snapshots[0][0] == 'https://example.com'


+def test_add_bg_creates_root_snapshot_rows_immediately(tmp_path, process, disable_extractors_dict):
+    """Background add should create root snapshots immediately so the queue is visible in the DB."""
+    os.chdir(tmp_path)
+    result = subprocess.run(
+        ['archivebox', 'add', '--bg', '--depth=0', 'https://example.com'],
+        capture_output=True,
+        env=disable_extractors_dict,
+    )
+
+    assert result.returncode == 0
+
+    conn = sqlite3.connect("index.sqlite3")
+    c = conn.cursor()
+    snapshots = c.execute("SELECT url, status FROM core_snapshot").fetchall()
+    conn.close()
+
+    assert len(snapshots) == 1
+    assert snapshots[0][0] == 'https://example.com'
+    assert snapshots[0][1] == 'queued'
+
+
 def test_add_creates_crawl_record(tmp_path, process, disable_extractors_dict):
    """Test that add command creates a Crawl record in the database."""
    os.chdir(tmp_path)
@@ -217,6 +238,32 @@ def test_add_records_selected_persona_on_crawl(tmp_path, process, disable_extrac
    assert persona_id
    assert default_persona == 'Default'
    assert (tmp_path / "personas" / "Default" / "chrome_user_data").is_dir()
+
+
+def test_add_records_url_filter_overrides_on_crawl(tmp_path, process, disable_extractors_dict):
+    os.chdir(tmp_path)
+    result = subprocess.run(
+        [
+            'archivebox', 'add', '--index-only', '--depth=0',
+            '--domain-allowlist=example.com,*.example.com',
+            '--domain-denylist=static.example.com',
+            'https://example.com',
+        ],
+        capture_output=True,
+        env=disable_extractors_dict,
+    )
+
+    assert result.returncode == 0
+
+    conn = sqlite3.connect("index.sqlite3")
+    c = conn.cursor()
+    allowlist, denylist = c.execute(
+        "SELECT json_extract(config, '$.URL_ALLOWLIST'), json_extract(config, '$.URL_DENYLIST') FROM crawls_crawl LIMIT 1"
+    ).fetchone()
+    conn.close()
+
+    assert allowlist == 'example.com,*.example.com'
+    assert denylist == 'static.example.com'
    assert (tmp_path / "personas" / "Default" / "chrome_extensions").is_dir()


--- a/archivebox/tests/test_cli_archiveresult.py
+++ b/archivebox/tests/test_cli_archiveresult.py
@@ -16,6 +16,13 @@ from archivebox.tests.conftest import (
    create_test_url,
 )

+PROJECTOR_TEST_ENV = {
+    'PLUGINS': 'favicon',
+    'SAVE_FAVICON': 'True',
+    'USE_COLOR': 'False',
+    'SHOW_PROGRESS': 'False',
+}
+

 class TestArchiveResultCreate:
    """Tests for `archivebox archiveresult create`."""
@@ -38,13 +45,14 @@ class TestArchiveResultCreate:
        assert code == 0, f"Command failed: {stderr}"

        records = parse_jsonl_output(stdout2)
-        # Should have the Snapshot passed through and ArchiveResult created
+        # Should have the Snapshot passed through and an ArchiveResult request emitted
        types = [r.get('type') for r in records]
        assert 'Snapshot' in types
        assert 'ArchiveResult' in types

        ar = next(r for r in records if r['type'] == 'ArchiveResult')
        assert ar['plugin'] == 'title'
+        assert 'id' not in ar

    def test_create_with_specific_plugin(self, initialized_archive):
        """Create archive result for specific plugin."""
@@ -122,15 +130,33 @@ class TestArchiveResultList:

    def test_list_filter_by_status(self, initialized_archive):
        """Filter archive results by status."""
-        # Create snapshot and archive result
+        # Create snapshot and materialize an archive result via the runner
        url = create_test_url()
        stdout1, _, _ = run_archivebox_cmd(['snapshot', 'create', url], data_dir=initialized_archive)
        snapshot = parse_jsonl_output(stdout1)[0]
-        run_archivebox_cmd(
-            ['archiveresult', 'create', '--plugin=title'],
+        stdout2, _, _ = run_archivebox_cmd(
+            ['archiveresult', 'create', '--plugin=favicon'],
            stdin=json.dumps(snapshot),
            data_dir=initialized_archive,
        )
+        run_archivebox_cmd(
+            ['run'],
+            stdin=stdout2,
+            data_dir=initialized_archive,
+            timeout=120,
+            env=PROJECTOR_TEST_ENV,
+        )
+        created = parse_jsonl_output(
+            run_archivebox_cmd(
+                ['archiveresult', 'list', '--plugin=favicon'],
+                data_dir=initialized_archive,
+            )[0]
+        )[0]
+        run_archivebox_cmd(
+            ['archiveresult', 'update', '--status=queued'],
+            stdin=json.dumps(created),
+            data_dir=initialized_archive,
+        )

        stdout, stderr, code = run_archivebox_cmd(
            ['archiveresult', 'list', '--status=queued'],
@@ -147,21 +173,28 @@ class TestArchiveResultList:
        url = create_test_url()
        stdout1, _, _ = run_archivebox_cmd(['snapshot', 'create', url], data_dir=initialized_archive)
        snapshot = parse_jsonl_output(stdout1)[0]
-        run_archivebox_cmd(
-            ['archiveresult', 'create', '--plugin=title'],
+        stdout2, _, _ = run_archivebox_cmd(
+            ['archiveresult', 'create', '--plugin=favicon'],
            stdin=json.dumps(snapshot),
            data_dir=initialized_archive,
        )
+        run_archivebox_cmd(
+            ['run'],
+            stdin=stdout2,
+            data_dir=initialized_archive,
+            timeout=120,
+            env=PROJECTOR_TEST_ENV,
+        )

        stdout, stderr, code = run_archivebox_cmd(
-            ['archiveresult', 'list', '--plugin=title'],
+            ['archiveresult', 'list', '--plugin=favicon'],
            data_dir=initialized_archive,
        )

        assert code == 0
        records = parse_jsonl_output(stdout)
        for r in records:
-            assert r['plugin'] == 'title'
+            assert r['plugin'] == 'favicon'

    def test_list_with_limit(self, initialized_archive):
        """Limit number of results."""
@@ -170,11 +203,18 @@ class TestArchiveResultList:
            url = create_test_url()
            stdout1, _, _ = run_archivebox_cmd(['snapshot', 'create', url], data_dir=initialized_archive)
            snapshot = parse_jsonl_output(stdout1)[0]
-            run_archivebox_cmd(
-                ['archiveresult', 'create', '--plugin=title'],
+            stdout2, _, _ = run_archivebox_cmd(
+                ['archiveresult', 'create', '--plugin=favicon'],
                stdin=json.dumps(snapshot),
                data_dir=initialized_archive,
            )
+            run_archivebox_cmd(
+                ['run'],
+                stdin=stdout2,
+                data_dir=initialized_archive,
+                timeout=120,
+                env=PROJECTOR_TEST_ENV,
+            )

        stdout, stderr, code = run_archivebox_cmd(
            ['archiveresult', 'list', '--limit=2'],
@@ -196,11 +236,22 @@ class TestArchiveResultUpdate:
        snapshot = parse_jsonl_output(stdout1)[0]

        stdout2, _, _ = run_archivebox_cmd(
-            ['archiveresult', 'create', '--plugin=title'],
+            ['archiveresult', 'create', '--plugin=favicon'],
            stdin=json.dumps(snapshot),
            data_dir=initialized_archive,
        )
-        ar = next(r for r in parse_jsonl_output(stdout2) if r.get('type') == 'ArchiveResult')
+        stdout_run, _, _ = run_archivebox_cmd(
+            ['run'],
+            stdin=stdout2,
+            data_dir=initialized_archive,
+            timeout=120,
+            env=PROJECTOR_TEST_ENV,
+        )
+        stdout_list, _, _ = run_archivebox_cmd(
+            ['archiveresult', 'list', '--plugin=favicon'],
+            data_dir=initialized_archive,
+        )
+        ar = parse_jsonl_output(stdout_list)[0]

        stdout3, stderr, code = run_archivebox_cmd(
            ['archiveresult', 'update', '--status=failed'],
@@ -225,11 +276,22 @@ class TestArchiveResultDelete:
        snapshot = parse_jsonl_output(stdout1)[0]

        stdout2, _, _ = run_archivebox_cmd(
-            ['archiveresult', 'create', '--plugin=title'],
+            ['archiveresult', 'create', '--plugin=favicon'],
            stdin=json.dumps(snapshot),
            data_dir=initialized_archive,
        )
-        ar = next(r for r in parse_jsonl_output(stdout2) if r.get('type') == 'ArchiveResult')
+        stdout_run, _, _ = run_archivebox_cmd(
+            ['run'],
+            stdin=stdout2,
+            data_dir=initialized_archive,
+            timeout=120,
+            env=PROJECTOR_TEST_ENV,
+        )
+        stdout_list, _, _ = run_archivebox_cmd(
+            ['archiveresult', 'list', '--plugin=favicon'],
+            data_dir=initialized_archive,
+        )
+        ar = parse_jsonl_output(stdout_list)[0]

        stdout, stderr, code = run_archivebox_cmd(
            ['archiveresult', 'delete'],
@@ -247,11 +309,22 @@ class TestArchiveResultDelete:
        snapshot = parse_jsonl_output(stdout1)[0]

        stdout2, _, _ = run_archivebox_cmd(
-            ['archiveresult', 'create', '--plugin=title'],
+            ['archiveresult', 'create', '--plugin=favicon'],
            stdin=json.dumps(snapshot),
            data_dir=initialized_archive,
        )
-        ar = next(r for r in parse_jsonl_output(stdout2) if r.get('type') == 'ArchiveResult')
+        stdout_run, _, _ = run_archivebox_cmd(
+            ['run'],
+            stdin=stdout2,
+            data_dir=initialized_archive,
+            timeout=120,
+            env=PROJECTOR_TEST_ENV,
+        )
+        stdout_list, _, _ = run_archivebox_cmd(
+            ['archiveresult', 'list', '--plugin=favicon'],
+            data_dir=initialized_archive,
+        )
+        ar = parse_jsonl_output(stdout_list)[0]

        stdout, stderr, code = run_archivebox_cmd(
            ['archiveresult', 'delete', '--yes'],
--- a/archivebox/tests/test_cli_crawl.py
+++ b/archivebox/tests/test_cli_crawl.py
@@ -83,7 +83,7 @@ class TestCrawlCreate:

        assert code == 0
        records = parse_jsonl_output(stdout)
-        assert 'test-tag' in records[0].get('tags_str', '')
+        assert 'test-tag' in records[0].get('tags', '')

    def test_create_pass_through_other_types(self, initialized_archive):
        """Pass-through records of other types unchanged."""
--- a/archivebox/tests/test_cli_piping.py
+++ b/archivebox/tests/test_cli_piping.py
@@ -173,6 +173,20 @@ def test_collect_urls_from_plugins_reads_only_parser_outputs(tmp_path):
    assert collect_urls_from_plugins(tmp_path / "nonexistent") == []


+def test_collect_urls_from_plugins_trims_markdown_suffixes(tmp_path):
+    from archivebox.hooks import collect_urls_from_plugins
+
+    (tmp_path / "parse_html_urls").mkdir()
+    (tmp_path / "parse_html_urls" / "urls.jsonl").write_text(
+        '{"url":"https://docs.sweeting.me/s/youtube-favorites)**"}\n',
+        encoding="utf-8",
+    )
+
+    urls = collect_urls_from_plugins(tmp_path)
+    assert len(urls) == 1
+    assert urls[0]["url"] == "https://docs.sweeting.me/s/youtube-favorites"
+
+
 def test_crawl_create_stdout_pipes_into_run(initialized_archive):
    """`archivebox crawl create | archivebox run` should queue and materialize snapshots."""
    url = create_test_url()
@@ -269,8 +283,13 @@ def test_archiveresult_list_stdout_pipes_into_run(initialized_archive):
    )
    assert ar_create_code == 0, ar_create_stderr

-    created_records = parse_jsonl_output(ar_create_stdout)
-    archiveresult = next(record for record in created_records if record.get("type") == "ArchiveResult")
+    run_archivebox_cmd(
+        ["run"],
+        stdin=ar_create_stdout,
+        data_dir=initialized_archive,
+        timeout=120,
+        env=PIPE_TEST_ENV,
+    )

    list_stdout, list_stderr, list_code = run_archivebox_cmd(
        ["archiveresult", "list", "--plugin=favicon"],
@@ -278,6 +297,8 @@ def test_archiveresult_list_stdout_pipes_into_run(initialized_archive):
    )
    assert list_code == 0, list_stderr
    _assert_stdout_is_jsonl_only(list_stdout)
+    listed_records = parse_jsonl_output(list_stdout)
+    archiveresult = next(record for record in listed_records if record.get("type") == "ArchiveResult")

    run_stdout, run_stderr, run_code = run_archivebox_cmd(
        ["run"],
--- a/archivebox/tests/test_cli_run.py
+++ b/archivebox/tests/test_cli_run.py
@@ -8,6 +8,9 @@ Tests cover:
 """

 import json
+import sys
+
+import pytest

 from archivebox.tests.conftest import (
    run_archivebox_cmd,
@@ -266,3 +269,182 @@ class TestRunEmpty:

        assert code == 0
        assert 'No records to process' in stderr
+
+
+class TestRunDaemonMode:
+    def test_run_daemon_processes_stdin_before_runner(self, monkeypatch):
+        from archivebox.cli import archivebox_run
+
+        class FakeStdin:
+            def isatty(self):
+                return False
+
+        monkeypatch.setattr(sys, "stdin", FakeStdin())
+        calls = []
+        monkeypatch.setattr(
+            archivebox_run,
+            "process_stdin_records",
+            lambda: calls.append("stdin") or 0,
+        )
+        monkeypatch.setattr(
+            archivebox_run,
+            "run_runner",
+            lambda daemon=False: calls.append(f"runner:{daemon}") or 0,
+        )
+
+        with pytest.raises(SystemExit) as exit_info:
+            archivebox_run.main.callback(daemon=True, crawl_id=None, snapshot_id=None, binary_id=None)
+
+        assert exit_info.value.code == 0
+        assert calls == ["stdin", "runner:True"]
+
+    def test_run_daemon_skips_runner_if_stdin_processing_fails(self, monkeypatch):
+        from archivebox.cli import archivebox_run
+
+        class FakeStdin:
+            def isatty(self):
+                return False
+
+        monkeypatch.setattr(sys, "stdin", FakeStdin())
+        monkeypatch.setattr(archivebox_run, "process_stdin_records", lambda: 1)
+        monkeypatch.setattr(
+            archivebox_run,
+            "run_runner",
+            lambda daemon=False: (_ for _ in ()).throw(AssertionError("runner should not start after stdin failure")),
+        )
+
+        with pytest.raises(SystemExit) as exit_info:
+            archivebox_run.main.callback(daemon=True, crawl_id=None, snapshot_id=None, binary_id=None)
+
+        assert exit_info.value.code == 1
+
+
+@pytest.mark.django_db
+class TestRecoverOrphanedCrawls:
+    def test_recover_orphaned_crawl_requeues_started_crawl_without_active_processes(self):
+        from archivebox.base_models.models import get_or_create_system_user_pk
+        from archivebox.crawls.models import Crawl
+        from archivebox.core.models import Snapshot
+        from archivebox.services.runner import recover_orphaned_crawls
+
+        crawl = Crawl.objects.create(
+            urls='https://example.com',
+            created_by_id=get_or_create_system_user_pk(),
+            status=Crawl.StatusChoices.STARTED,
+            retry_at=None,
+        )
+        Snapshot.objects.create(
+            url='https://example.com',
+            crawl=crawl,
+            status=Snapshot.StatusChoices.QUEUED,
+            retry_at=None,
+        )
+
+        recovered = recover_orphaned_crawls()
+
+        crawl.refresh_from_db()
+        assert recovered == 1
+        assert crawl.status == Crawl.StatusChoices.STARTED
+        assert crawl.retry_at is not None
+
+    def test_recover_orphaned_crawl_skips_active_child_processes(self):
+        import archivebox.machine.models as machine_models
+        from django.utils import timezone
+
+        from archivebox.base_models.models import get_or_create_system_user_pk
+        from archivebox.crawls.models import Crawl
+        from archivebox.core.models import Snapshot
+        from archivebox.machine.models import Machine, Process
+        from archivebox.services.runner import recover_orphaned_crawls
+
+        crawl = Crawl.objects.create(
+            urls='https://example.com',
+            created_by_id=get_or_create_system_user_pk(),
+            status=Crawl.StatusChoices.STARTED,
+            retry_at=None,
+        )
+        snapshot = Snapshot.objects.create(
+            url='https://example.com',
+            crawl=crawl,
+            status=Snapshot.StatusChoices.QUEUED,
+            retry_at=None,
+        )
+
+        machine_models._CURRENT_MACHINE = None
+        machine = Machine.current()
+        Process.objects.create(
+            machine=machine,
+            process_type=Process.TypeChoices.HOOK,
+            status=Process.StatusChoices.RUNNING,
+            cmd=['/plugins/chrome/on_Crawl__91_chrome_wait.js'],
+            env={
+                'CRAWL_ID': str(crawl.id),
+                'SNAPSHOT_ID': str(snapshot.id),
+            },
+            started_at=timezone.now(),
+        )
+
+        recovered = recover_orphaned_crawls()
+
+        crawl.refresh_from_db()
+        assert recovered == 0
+        assert crawl.retry_at is None
+
+    def test_recover_orphaned_crawl_seals_when_all_snapshots_are_already_sealed(self):
+        from archivebox.base_models.models import get_or_create_system_user_pk
+        from archivebox.crawls.models import Crawl
+        from archivebox.core.models import Snapshot
+        from archivebox.services.runner import recover_orphaned_crawls
+
+        crawl = Crawl.objects.create(
+            urls='https://example.com',
+            created_by_id=get_or_create_system_user_pk(),
+            status=Crawl.StatusChoices.STARTED,
+            retry_at=None,
+        )
+        Snapshot.objects.create(
+            url='https://example.com',
+            crawl=crawl,
+            status=Snapshot.StatusChoices.SEALED,
+            retry_at=None,
+        )
+
+        recovered = recover_orphaned_crawls()
+
+        crawl.refresh_from_db()
+        assert recovered == 1
+        assert crawl.status == Crawl.StatusChoices.SEALED
+        assert crawl.retry_at is None
+
+
+@pytest.mark.django_db
+class TestRecoverOrphanedSnapshots:
+    def test_recover_orphaned_snapshot_requeues_started_snapshot_without_active_processes(self):
+        from archivebox.base_models.models import get_or_create_system_user_pk
+        from archivebox.crawls.models import Crawl
+        from archivebox.core.models import Snapshot
+        from archivebox.services.runner import recover_orphaned_snapshots
+
+        crawl = Crawl.objects.create(
+            urls='https://example.com',
+            created_by_id=get_or_create_system_user_pk(),
+            status=Crawl.StatusChoices.SEALED,
+            retry_at=None,
+        )
+        snapshot = Snapshot.objects.create(
+            url='https://example.com',
+            crawl=crawl,
+            status=Snapshot.StatusChoices.STARTED,
+            retry_at=None,
+        )
+
+        recovered = recover_orphaned_snapshots()
+
+        snapshot.refresh_from_db()
+        crawl.refresh_from_db()
+
+        assert recovered == 1
+        assert snapshot.status == Snapshot.StatusChoices.QUEUED
+        assert snapshot.retry_at is not None
+        assert crawl.status == Crawl.StatusChoices.QUEUED
+        assert crawl.retry_at is not None
--- a/archivebox/tests/test_cli_server.py
+++ b/archivebox/tests/test_cli_server.py
@@ -6,6 +6,15 @@ Verify server can start (basic smoke tests only, no full server testing).

 import os
 import subprocess
+import sys
+from unittest.mock import Mock
+
+
+def test_sqlite_connections_use_explicit_30_second_busy_timeout():
+    from archivebox.core.settings import SQLITE_CONNECTION_OPTIONS
+
+    assert SQLITE_CONNECTION_OPTIONS["OPTIONS"]["timeout"] == 30
+    assert "PRAGMA busy_timeout = 30000;" in SQLITE_CONNECTION_OPTIONS["OPTIONS"]["init_command"]


 def test_server_shows_usage_info(tmp_path, process):
@@ -39,3 +48,64 @@ def test_server_init_flag(tmp_path, process):

    assert result.returncode == 0
    assert '--init' in result.stdout or 'init' in result.stdout.lower()
+
+
+def test_runner_worker_uses_current_interpreter():
+    """The supervised runner should use the active Python environment, not PATH."""
+    from archivebox.workers.supervisord_util import RUNNER_WORKER
+
+    assert RUNNER_WORKER["command"] == f"{sys.executable} -m archivebox run --daemon"
+
+
+def test_reload_workers_use_current_interpreter_and_supervisord_managed_runner():
+    from archivebox.workers.supervisord_util import RUNNER_WATCH_WORKER, RUNSERVER_WORKER
+
+    runserver = RUNSERVER_WORKER("127.0.0.1", "8000", reload=True, pidfile="/tmp/runserver.pid")
+    watcher = RUNNER_WATCH_WORKER("/tmp/runserver.pid")
+
+    assert runserver["name"] == "worker_runserver"
+    assert runserver["command"] == f"{sys.executable} -m archivebox manage runserver 127.0.0.1:8000"
+    assert 'ARCHIVEBOX_RUNSERVER="1"' in runserver["environment"]
+    assert 'ARCHIVEBOX_AUTORELOAD="1"' in runserver["environment"]
+    assert 'ARCHIVEBOX_RUNSERVER_PIDFILE="/tmp/runserver.pid"' in runserver["environment"]
+
+    assert watcher["name"] == "worker_runner_watch"
+    assert watcher["command"] == f"{sys.executable} -m archivebox manage runner_watch --pidfile=/tmp/runserver.pid"
+
+
+def test_stop_existing_background_runner_cleans_up_and_stops_orchestrators():
+    from archivebox.cli.archivebox_server import stop_existing_background_runner
+
+    runner_a = Mock()
+    runner_a.kill_tree = Mock()
+    runner_a.terminate = Mock()
+    runner_b = Mock()
+    runner_b.kill_tree = Mock(side_effect=RuntimeError("boom"))
+    runner_b.terminate = Mock()
+
+    process_model = Mock()
+    process_model.StatusChoices.RUNNING = "running"
+    process_model.TypeChoices.ORCHESTRATOR = "orchestrator"
+    queryset = Mock()
+    queryset.order_by.return_value = [runner_a, runner_b]
+    process_model.objects.filter.return_value = queryset
+
+    supervisor = Mock()
+    stop_worker = Mock()
+    log = Mock()
+
+    stopped = stop_existing_background_runner(
+        machine=Mock(),
+        process_model=process_model,
+        supervisor=supervisor,
+        stop_worker_fn=stop_worker,
+        log=log,
+    )
+
+    assert stopped == 2
+    assert process_model.cleanup_stale_running.call_count == 2
+    stop_worker.assert_any_call(supervisor, "worker_runner")
+    stop_worker.assert_any_call(supervisor, "worker_runner_watch")
+    runner_a.kill_tree.assert_called_once_with(graceful_timeout=2.0)
+    runner_b.terminate.assert_called_once_with(graceful_timeout=2.0)
+    log.assert_called_once()
--- a/archivebox/tests/test_cli_snapshot.py
+++ b/archivebox/tests/test_cli_snapshot.py
@@ -74,7 +74,7 @@ class TestSnapshotCreate:

        assert code == 0
        records = parse_jsonl_output(stdout)
-        assert 'test-tag' in records[0].get('tags_str', '')
+        assert 'test-tag' in records[0].get('tags', '')

    def test_create_pass_through_other_types(self, initialized_archive):
        """Pass-through records of other types unchanged."""
--- a/archivebox/tests/test_config_views.py
+++ b/archivebox/tests/test_config_views.py
@@ -0,0 +1,326 @@
+from datetime import timedelta
+from types import SimpleNamespace
+
+import pytest
+from django.test import RequestFactory
+from django.utils import timezone
+
+from archivebox.config import views as config_views
+from archivebox.core import views as core_views
+from archivebox.machine.models import Binary
+
+
+pytestmark = pytest.mark.django_db
+
+
+def test_get_db_binaries_by_name_collapses_youtube_dl_aliases(monkeypatch):
+    now = timezone.now()
+    records = [
+        SimpleNamespace(
+            name='youtube-dl',
+            version='',
+            binprovider='',
+            abspath='/usr/bin/youtube-dl',
+            status=Binary.StatusChoices.INSTALLED,
+            modified_at=now,
+        ),
+        SimpleNamespace(
+            name='yt-dlp',
+            version='2026.03.01',
+            binprovider='pip',
+            abspath='/usr/bin/yt-dlp',
+            status=Binary.StatusChoices.INSTALLED,
+            modified_at=now + timedelta(seconds=1),
+        ),
+    ]
+
+    monkeypatch.setattr(config_views.Binary, 'objects', SimpleNamespace(all=lambda: records))
+
+    binaries = config_views.get_db_binaries_by_name()
+
+    assert 'yt-dlp' in binaries
+    assert 'youtube-dl' not in binaries
+    assert binaries['yt-dlp'].version == '2026.03.01'
+
+
+def test_binaries_list_view_uses_db_version_and_hides_youtube_dl_alias(monkeypatch):
+    request = RequestFactory().get('/admin/environment/binaries/')
+    request.user = SimpleNamespace(is_superuser=True)
+
+    db_binary = SimpleNamespace(
+        name='youtube-dl',
+        version='2026.03.01',
+        binprovider='pip',
+        abspath='/usr/bin/yt-dlp',
+        status=Binary.StatusChoices.INSTALLED,
+        sha256='',
+        modified_at=timezone.now(),
+    )
+
+    monkeypatch.setattr(config_views, 'get_db_binaries_by_name', lambda: {'yt-dlp': db_binary})
+
+    context = config_views.binaries_list_view.__wrapped__(request)
+
+    assert len(context['table']['Binary Name']) == 1
+    assert str(context['table']['Binary Name'][0].link_item) == 'yt-dlp'
+    assert context['table']['Found Version'][0] == '✅ 2026.03.01'
+    assert context['table']['Provided By'][0] == 'pip'
+    assert context['table']['Found Abspath'][0] == '/usr/bin/yt-dlp'
+
+
+def test_binaries_list_view_only_shows_persisted_records(monkeypatch):
+    request = RequestFactory().get('/admin/environment/binaries/')
+    request.user = SimpleNamespace(is_superuser=True)
+
+    monkeypatch.setattr(config_views, 'get_db_binaries_by_name', lambda: {})
+
+    context = config_views.binaries_list_view.__wrapped__(request)
+
+    assert context['table']['Binary Name'] == []
+    assert context['table']['Found Version'] == []
+    assert context['table']['Provided By'] == []
+    assert context['table']['Found Abspath'] == []
+
+
+def test_binary_detail_view_uses_canonical_db_record(monkeypatch):
+    request = RequestFactory().get('/admin/environment/binaries/youtube-dl/')
+    request.user = SimpleNamespace(is_superuser=True)
+
+    db_binary = SimpleNamespace(
+        id='019d14cc-6c40-7793-8ff1-0f8bb050e8a3',
+        name='yt-dlp',
+        version='2026.03.01',
+        binprovider='pip',
+        abspath='/usr/bin/yt-dlp',
+        sha256='abc123',
+        status=Binary.StatusChoices.INSTALLED,
+        modified_at=timezone.now(),
+    )
+
+    monkeypatch.setattr(config_views, 'get_db_binaries_by_name', lambda: {'yt-dlp': db_binary})
+
+    context = config_views.binary_detail_view.__wrapped__(request, key='youtube-dl')
+    section = context['data'][0]
+
+    assert context['title'] == 'yt-dlp'
+    assert section['fields']['name'] == 'yt-dlp'
+    assert section['fields']['version'] == '2026.03.01'
+    assert section['fields']['binprovider'] == 'pip'
+    assert section['fields']['abspath'] == '/usr/bin/yt-dlp'
+    assert '/admin/machine/binary/019d14cc-6c40-7793-8ff1-0f8bb050e8a3/change/?_changelist_filters=q%3Dyt-dlp' in section['description']
+
+
+def test_binary_detail_view_marks_unrecorded_binary(monkeypatch):
+    request = RequestFactory().get('/admin/environment/binaries/wget/')
+    request.user = SimpleNamespace(is_superuser=True)
+
+    monkeypatch.setattr(config_views, 'get_db_binaries_by_name', lambda: {})
+
+    context = config_views.binary_detail_view.__wrapped__(request, key='wget')
+    section = context['data'][0]
+
+    assert section['description'] == 'No persisted Binary record found'
+    assert section['fields']['status'] == 'unrecorded'
+    assert section['fields']['binprovider'] == 'not recorded'
+
+
+def test_plugin_detail_view_renders_config_in_dedicated_sections(monkeypatch):
+    request = RequestFactory().get('/admin/environment/plugins/builtin.example/')
+    request.user = SimpleNamespace(is_superuser=True)
+
+    plugin_config = {
+        'title': 'Example Plugin',
+        'description': 'Example config used to verify plugin metadata rendering.',
+        'type': 'object',
+        'required_plugins': ['chrome'],
+        'required_binaries': ['example-cli'],
+        'output_mimetypes': ['text/plain', 'application/json'],
+        'properties': {
+            'EXAMPLE_ENABLED': {
+                'type': 'boolean',
+                'description': 'Enable the example plugin.',
+                'x-fallback': 'CHECK_SSL_VALIDITY',
+            },
+            'EXAMPLE_BINARY': {
+                'type': 'string',
+                'default': 'gallery-dl',
+                'description': 'Filesystem path for example output.',
+                'x-aliases': ['USE_EXAMPLE_BINARY'],
+            },
+        },
+    }
+
+    monkeypatch.setattr(config_views, 'get_filesystem_plugins', lambda: {
+        'builtin.example': {
+            'id': 'builtin.example',
+            'name': 'example',
+            'source': 'builtin',
+            'path': '/plugins/example',
+            'hooks': ['on_Snapshot__01_example.py'],
+            'config': plugin_config,
+        }
+    })
+    monkeypatch.setattr(config_views, 'get_machine_admin_url', lambda: '/admin/machine/machine/test-machine/change/')
+
+    context = config_views.plugin_detail_view.__wrapped__(request, key='builtin.example')
+
+    assert context['title'] == 'example'
+    assert len(context['data']) == 5
+
+    summary_section, hooks_section, metadata_section, config_section, properties_section = context['data']
+
+    assert summary_section['fields'] == {
+        'id': 'builtin.example',
+        'name': 'example',
+        'source': 'builtin',
+    }
+    assert '/plugins/example' in summary_section['description']
+    assert 'https://archivebox.github.io/abx-plugins/#example' in summary_section['description']
+
+    assert hooks_section['name'] == 'Hooks'
+    assert hooks_section['fields'] == {}
+    assert 'https://github.com/ArchiveBox/abx-plugins/tree/main/abx_plugins/plugins/example/on_Snapshot__01_example.py' in hooks_section['description']
+    assert 'on_Snapshot__01_example.py' in hooks_section['description']
+
+    assert metadata_section['name'] == 'Plugin Metadata'
+    assert metadata_section['fields'] == {}
+    assert 'Example Plugin' in metadata_section['description']
+    assert 'Example config used to verify plugin metadata rendering.' in metadata_section['description']
+    assert 'https://archivebox.github.io/abx-plugins/#chrome' in metadata_section['description']
+    assert '/admin/environment/binaries/example-cli/' in metadata_section['description']
+    assert 'text/plain' in metadata_section['description']
+    assert 'application/json' in metadata_section['description']
+
+    assert config_section['name'] == 'config.json'
+    assert config_section['fields'] == {}
+    assert '<pre style=' in config_section['description']
+    assert 'EXAMPLE_ENABLED' in config_section['description']
+    assert '<span style="color: #0550ae;">"properties"</span>' in config_section['description']
+
+    assert properties_section['name'] == 'Config Properties'
+    assert properties_section['fields'] == {}
+    assert '/admin/machine/machine/test-machine/change/' in properties_section['description']
+    assert '/admin/machine/binary/' in properties_section['description']
+    assert '/admin/environment/binaries/' in properties_section['description']
+    assert 'EXAMPLE_ENABLED' in properties_section['description']
+    assert 'boolean' in properties_section['description']
+    assert 'Enable the example plugin.' in properties_section['description']
+    assert '/admin/environment/config/EXAMPLE_ENABLED/' in properties_section['description']
+    assert '/admin/environment/config/CHECK_SSL_VALIDITY/' in properties_section['description']
+    assert '/admin/environment/config/USE_EXAMPLE_BINARY/' in properties_section['description']
+    assert '/admin/environment/binaries/gallery-dl/' in properties_section['description']
+    assert 'EXAMPLE_BINARY' in properties_section['description']
+
+
+def test_get_config_definition_link_keeps_core_config_search_link(monkeypatch):
+    monkeypatch.setattr(core_views, 'find_plugin_for_config_key', lambda key: None)
+
+    url, label = core_views.get_config_definition_link('CHECK_SSL_VALIDITY')
+
+    assert 'github.com/search' in url
+    assert 'CHECK_SSL_VALIDITY' in url
+    assert label == 'archivebox/config'
+
+
+def test_get_config_definition_link_uses_plugin_config_json_for_plugin_options(monkeypatch):
+    plugin_dir = core_views.BUILTIN_PLUGINS_DIR / 'parse_dom_outlinks'
+
+    monkeypatch.setattr(core_views, 'find_plugin_for_config_key', lambda key: 'parse_dom_outlinks')
+    monkeypatch.setattr(core_views, 'iter_plugin_dirs', lambda: [plugin_dir])
+
+    url, label = core_views.get_config_definition_link('PARSE_DOM_OUTLINKS_ENABLED')
+
+    assert url == 'https://github.com/ArchiveBox/abx-plugins/tree/main/abx_plugins/plugins/parse_dom_outlinks/config.json'
+    assert label == 'abx_plugins/plugins/parse_dom_outlinks/config.json'
+
+
+def test_live_config_value_view_renames_source_field_and_uses_plugin_definition_link(monkeypatch):
+    request = RequestFactory().get('/admin/environment/config/PARSE_DOM_OUTLINKS_ENABLED/')
+    request.user = SimpleNamespace(is_superuser=True)
+
+    monkeypatch.setattr(core_views, 'get_all_configs', lambda: {})
+    monkeypatch.setattr(core_views, 'get_flat_config', lambda: {})
+    monkeypatch.setattr(core_views, 'get_config', lambda: {'PARSE_DOM_OUTLINKS_ENABLED': True})
+    monkeypatch.setattr(core_views, 'find_config_default', lambda key: 'True')
+    monkeypatch.setattr(core_views, 'find_config_type', lambda key: 'bool')
+    monkeypatch.setattr(core_views, 'find_config_source', lambda key, merged: 'Default')
+    monkeypatch.setattr(core_views, 'key_is_safe', lambda key: True)
+    monkeypatch.setattr(core_views.CONSTANTS, 'CONFIG_FILE', SimpleNamespace(exists=lambda: False))
+
+    from archivebox.machine.models import Machine
+    from archivebox.config.configset import BaseConfigSet
+
+    monkeypatch.setattr(Machine, 'current', classmethod(lambda cls: SimpleNamespace(id='machine-id', config={})))
+    monkeypatch.setattr(BaseConfigSet, 'load_from_file', classmethod(lambda cls, path: {}))
+    monkeypatch.setattr(
+        core_views,
+        'get_config_definition_link',
+        lambda key: (
+            'https://github.com/ArchiveBox/abx-plugins/tree/main/abx_plugins/plugins/parse_dom_outlinks/config.json',
+            'abx_plugins/plugins/parse_dom_outlinks/config.json',
+        ),
+    )
+
+    context = core_views.live_config_value_view.__wrapped__(request, key='PARSE_DOM_OUTLINKS_ENABLED')
+    section = context['data'][0]
+
+    assert 'Currently read from' in section['fields']
+    assert 'Source' not in section['fields']
+    assert section['fields']['Currently read from'] == 'Default'
+    assert 'abx_plugins/plugins/parse_dom_outlinks/config.json' in section['help_texts']['Type']
+
+
+def test_find_config_source_prefers_environment_over_machine_and_file(monkeypatch):
+    monkeypatch.setenv('CHECK_SSL_VALIDITY', 'false')
+
+    from archivebox.machine.models import Machine
+    from archivebox.config.configset import BaseConfigSet
+
+    monkeypatch.setattr(
+        Machine,
+        'current',
+        classmethod(lambda cls: SimpleNamespace(id='machine-id', config={'CHECK_SSL_VALIDITY': 'true'})),
+    )
+    monkeypatch.setattr(
+        BaseConfigSet,
+        'load_from_file',
+        classmethod(lambda cls, path: {'CHECK_SSL_VALIDITY': 'true'}),
+    )
+
+    assert core_views.find_config_source('CHECK_SSL_VALIDITY', {'CHECK_SSL_VALIDITY': False}) == 'Environment'
+
+
+def test_live_config_value_view_priority_text_matches_runtime_precedence(monkeypatch):
+    request = RequestFactory().get('/admin/environment/config/CHECK_SSL_VALIDITY/')
+    request.user = SimpleNamespace(is_superuser=True)
+
+    monkeypatch.setattr(core_views, 'get_all_configs', lambda: {})
+    monkeypatch.setattr(core_views, 'get_flat_config', lambda: {'CHECK_SSL_VALIDITY': True})
+    monkeypatch.setattr(core_views, 'get_config', lambda: {'CHECK_SSL_VALIDITY': False})
+    monkeypatch.setattr(core_views, 'find_config_default', lambda key: 'True')
+    monkeypatch.setattr(core_views, 'find_config_type', lambda key: 'bool')
+    monkeypatch.setattr(core_views, 'key_is_safe', lambda key: True)
+
+    from archivebox.machine.models import Machine
+    from archivebox.config.configset import BaseConfigSet
+
+    monkeypatch.setattr(
+        Machine,
+        'current',
+        classmethod(lambda cls: SimpleNamespace(id='machine-id', config={'CHECK_SSL_VALIDITY': 'true'})),
+    )
+    monkeypatch.setattr(
+        BaseConfigSet,
+        'load_from_file',
+        classmethod(lambda cls, path: {'CHECK_SSL_VALIDITY': 'true'}),
+    )
+    monkeypatch.setattr(core_views.CONSTANTS, 'CONFIG_FILE', SimpleNamespace(exists=lambda: True))
+    monkeypatch.setenv('CHECK_SSL_VALIDITY', 'false')
+
+    context = core_views.live_config_value_view.__wrapped__(request, key='CHECK_SSL_VALIDITY')
+    section = context['data'][0]
+
+    assert section['fields']['Currently read from'] == 'Environment'
+    help_text = section['help_texts']['Currently read from']
+    assert help_text.index('Environment') < help_text.index('Machine') < help_text.index('Config File') < help_text.index('Default')
+    assert 'Configuration Sources (highest priority first):' in section['help_texts']['Value']
--- a/archivebox/tests/test_crawl_admin.py
+++ b/archivebox/tests/test_crawl_admin.py
@@ -0,0 +1,220 @@
+from typing import cast
+
+import pytest
+from django.contrib.auth import get_user_model
+from django.contrib.auth.models import UserManager
+from django.urls import reverse
+
+from archivebox.crawls.admin import CrawlAdminForm
+from archivebox.crawls.models import Crawl
+from archivebox.core.models import Snapshot
+
+
+pytestmark = pytest.mark.django_db
+
+
+User = get_user_model()
+ADMIN_HOST = 'admin.archivebox.localhost:8000'
+
+
+@pytest.fixture
+def admin_user(db):
+    return cast(UserManager, User.objects).create_superuser(
+        username='crawladmin',
+        email='crawladmin@test.com',
+        password='testpassword',
+    )
+
+
+@pytest.fixture
+def crawl(admin_user):
+    return Crawl.objects.create(
+        urls='https://example.com\nhttps://example.org',
+        tags_str='alpha,beta',
+        created_by=admin_user,
+    )
+
+
+def test_crawl_admin_change_view_renders_tag_editor_widget(client, admin_user, crawl):
+    client.login(username='crawladmin', password='testpassword')
+
+    response = client.get(
+        reverse('admin:crawls_crawl_change', args=[crawl.pk]),
+        HTTP_HOST=ADMIN_HOST,
+    )
+
+    assert response.status_code == 200
+    assert b'name="tags_editor"' in response.content
+    assert b'tag-editor-container' in response.content
+    assert b'alpha' in response.content
+    assert b'beta' in response.content
+
+
+def test_crawl_admin_add_view_renders_url_filter_alias_fields(client, admin_user):
+    client.login(username='crawladmin', password='testpassword')
+
+    response = client.get(
+        reverse('admin:crawls_crawl_add'),
+        HTTP_HOST=ADMIN_HOST,
+    )
+
+    assert response.status_code == 200
+    assert b'name="url_filters_allowlist"' in response.content
+    assert b'name="url_filters_denylist"' in response.content
+    assert b'Same domain only' in response.content
+
+
+def test_crawl_admin_form_saves_tags_editor_to_tags_str(crawl, admin_user):
+    form = CrawlAdminForm(
+        data={
+            'created_at': crawl.created_at.strftime('%Y-%m-%d %H:%M:%S'),
+            'urls': crawl.urls,
+            'config': '{}',
+            'max_depth': '0',
+            'tags_editor': 'alpha, beta, Alpha, gamma',
+            'url_filters_allowlist': 'example.com\n*.example.com',
+            'url_filters_denylist': 'static.example.com',
+            'persona_id': '',
+            'label': '',
+            'notes': '',
+            'schedule': '',
+            'status': crawl.status,
+            'retry_at': crawl.retry_at.strftime('%Y-%m-%d %H:%M:%S'),
+            'created_by': str(admin_user.pk),
+            'num_uses_failed': '0',
+            'num_uses_succeeded': '0',
+        },
+        instance=crawl,
+    )
+
+    assert form.is_valid(), form.errors
+
+    updated = form.save()
+    updated.refresh_from_db()
+    assert updated.tags_str == 'alpha,beta,gamma'
+    assert updated.config['URL_ALLOWLIST'] == 'example.com\n*.example.com'
+    assert updated.config['URL_DENYLIST'] == 'static.example.com'
+
+
+def test_crawl_admin_delete_snapshot_action_removes_snapshot_and_url(client, admin_user):
+    crawl = Crawl.objects.create(
+        urls='https://example.com/remove-me',
+        created_by=admin_user,
+    )
+    snapshot = Snapshot.objects.create(
+        crawl=crawl,
+        url='https://example.com/remove-me',
+    )
+
+    client.login(username='crawladmin', password='testpassword')
+    response = client.post(
+        reverse('admin:crawls_crawl_snapshot_delete', args=[crawl.pk, snapshot.pk]),
+        HTTP_HOST=ADMIN_HOST,
+    )
+
+    assert response.status_code == 200
+    assert response.json()['ok'] is True
+    assert not Snapshot.objects.filter(pk=snapshot.pk).exists()
+
+    crawl.refresh_from_db()
+    assert 'https://example.com/remove-me' not in crawl.urls
+
+
+def test_crawl_admin_exclude_domain_action_prunes_urls_and_pending_snapshots(client, admin_user):
+    crawl = Crawl.objects.create(
+        urls='\n'.join([
+            'https://cdn.example.com/asset.js',
+            'https://cdn.example.com/second.js',
+            'https://example.com/root',
+        ]),
+        created_by=admin_user,
+    )
+    queued_snapshot = Snapshot.objects.create(
+        crawl=crawl,
+        url='https://cdn.example.com/asset.js',
+        status=Snapshot.StatusChoices.QUEUED,
+    )
+    preserved_snapshot = Snapshot.objects.create(
+        crawl=crawl,
+        url='https://example.com/root',
+        status=Snapshot.StatusChoices.SEALED,
+    )
+
+    client.login(username='crawladmin', password='testpassword')
+    response = client.post(
+        reverse('admin:crawls_crawl_snapshot_exclude_domain', args=[crawl.pk, queued_snapshot.pk]),
+        HTTP_HOST=ADMIN_HOST,
+    )
+
+    assert response.status_code == 200
+    payload = response.json()
+    assert payload['ok'] is True
+    assert payload['domain'] == 'cdn.example.com'
+
+    crawl.refresh_from_db()
+    assert crawl.get_url_denylist(use_effective_config=False) == ['cdn.example.com']
+    assert 'https://cdn.example.com/asset.js' not in crawl.urls
+    assert 'https://cdn.example.com/second.js' not in crawl.urls
+    assert 'https://example.com/root' in crawl.urls
+    assert not Snapshot.objects.filter(pk=queued_snapshot.pk).exists()
+    assert Snapshot.objects.filter(pk=preserved_snapshot.pk).exists()
+
+
+def test_snapshot_from_json_trims_markdown_suffixes_on_discovered_urls(crawl):
+    snapshot = Snapshot.from_json(
+        {'url': 'https://docs.sweeting.me/s/youtube-favorites)**'},
+        overrides={'crawl': crawl},
+        queue_for_extraction=False,
+    )
+
+    assert snapshot is not None
+    assert snapshot.url == 'https://docs.sweeting.me/s/youtube-favorites'
+
+
+def test_create_snapshots_from_urls_respects_url_allowlist_and_denylist(admin_user):
+    crawl = Crawl.objects.create(
+        urls='\n'.join([
+            'https://example.com/root',
+            'https://static.example.com/app.js',
+            'https://other.test/page',
+        ]),
+        created_by=admin_user,
+        config={
+            'URL_ALLOWLIST': 'example.com',
+            'URL_DENYLIST': 'static.example.com',
+        },
+    )
+
+    created = crawl.create_snapshots_from_urls()
+
+    assert [snapshot.url for snapshot in created] == ['https://example.com/root']
+
+
+def test_url_filter_regex_lists_preserve_commas_and_split_on_newlines_only(admin_user):
+    crawl = Crawl.objects.create(
+        urls='\n'.join([
+            'https://example.com/root',
+            'https://example.com/path,with,commas',
+            'https://other.test/page',
+        ]),
+        created_by=admin_user,
+        config={
+            'URL_ALLOWLIST': r'^https://example\.com/(root|path,with,commas)$' + '\n' + r'^https://other\.test/page$',
+            'URL_DENYLIST': r'^https://example\.com/path,with,commas$',
+        },
+    )
+
+    assert crawl.get_url_allowlist(use_effective_config=False) == [
+        r'^https://example\.com/(root|path,with,commas)$',
+        r'^https://other\.test/page$',
+    ]
+    assert crawl.get_url_denylist(use_effective_config=False) == [
+        r'^https://example\.com/path,with,commas$',
+    ]
+
+    created = crawl.create_snapshots_from_urls()
+
+    assert [snapshot.url for snapshot in created] == [
+        'https://example.com/root',
+        'https://other.test/page',
+    ]
--- a/archivebox/tests/test_machine_models.py
+++ b/archivebox/tests/test_machine_models.py
@@ -14,7 +14,7 @@ Tests cover:
 import os
 from datetime import timedelta
 from typing import cast
-from unittest.mock import patch
+from unittest.mock import Mock, patch

 import pytest
 from django.test import TestCase
@@ -89,11 +89,45 @@ class TestMachineModel(TestCase):
        assert result is not None
        self.assertEqual(result.config.get('WGET_BINARY'), '/usr/bin/wget')

+    def test_machine_from_jsonl_strips_legacy_chromium_version(self):
+        """Machine.from_json() should ignore legacy browser version keys."""
+        Machine.current()  # Ensure machine exists
+        record = {
+            'config': {
+                'WGET_BINARY': '/usr/bin/wget',
+                'CHROMIUM_VERSION': '123.4.5',
+            },
+        }
+
+        result = Machine.from_json(record)
+
+        self.assertIsNotNone(result)
+        assert result is not None
+        self.assertEqual(result.config.get('WGET_BINARY'), '/usr/bin/wget')
+        self.assertNotIn('CHROMIUM_VERSION', result.config)
+
    def test_machine_from_jsonl_invalid(self):
        """Machine.from_json() should return None for invalid records."""
        result = Machine.from_json({'invalid': 'record'})
        self.assertIsNone(result)

+    def test_machine_current_strips_legacy_chromium_version(self):
+        """Machine.current() should clean legacy browser version keys from persisted config."""
+        import archivebox.machine.models as models
+
+        machine = Machine.current()
+        machine.config = {
+            'CHROME_BINARY': '/tmp/chromium',
+            'CHROMIUM_VERSION': '123.4.5',
+        }
+        machine.save(update_fields=['config'])
+        models._CURRENT_MACHINE = machine
+
+        refreshed = Machine.current()
+
+        self.assertEqual(refreshed.config.get('CHROME_BINARY'), '/tmp/chromium')
+        self.assertNotIn('CHROMIUM_VERSION', refreshed.config)
+
    def test_machine_manager_current(self):
        """Machine.objects.current() should return current machine."""
        machine = Machine.current()
@@ -131,6 +165,36 @@ class TestNetworkInterfaceModel(TestCase):
        interface = NetworkInterface.current()
        self.assertIsNotNone(interface)

+    def test_networkinterface_current_refresh_creates_new_interface_when_properties_change(self):
+        """Refreshing should persist a new NetworkInterface row when the host network fingerprint changes."""
+        import archivebox.machine.models as models
+
+        first = {
+            'mac_address': 'aa:bb:cc:dd:ee:01',
+            'ip_public': '1.1.1.1',
+            'ip_local': '192.168.1.10',
+            'dns_server': '8.8.8.8',
+            'hostname': 'host-a',
+            'iface': 'en0',
+            'isp': 'ISP A',
+            'city': 'City',
+            'region': 'Region',
+            'country': 'Country',
+        }
+        second = {
+            **first,
+            'ip_public': '2.2.2.2',
+            'ip_local': '10.0.0.5',
+        }
+
+        with patch.object(models, 'get_host_network', side_effect=[first, second]):
+            interface1 = NetworkInterface.current(refresh=True)
+            interface2 = NetworkInterface.current(refresh=True)
+
+        self.assertNotEqual(interface1.id, interface2.id)
+        self.assertEqual(interface1.machine_id, interface2.machine_id)
+        self.assertEqual(NetworkInterface.objects.filter(machine=interface1.machine).count(), 2)
+

 class TestBinaryModel(TestCase):
    """Test the Binary model."""
@@ -360,6 +424,8 @@ class TestProcessCurrent(TestCase):
        self.assertEqual(proc.pid, os.getpid())
        self.assertEqual(proc.status, Process.StatusChoices.RUNNING)
        self.assertIsNotNone(proc.machine)
+        self.assertIsNotNone(proc.iface)
+        self.assertEqual(proc.iface.machine_id, proc.machine_id)
        self.assertIsNotNone(proc.started_at)

    def test_process_current_caches(self):
@@ -375,6 +441,12 @@ class TestProcessCurrent(TestCase):
            result = Process._detect_process_type()
            self.assertEqual(result, Process.TypeChoices.ORCHESTRATOR)

+    def test_process_detect_type_runner_watch(self):
+        """runner_watch should be classified as a worker, not the orchestrator itself."""
+        with patch('sys.argv', ['archivebox', 'manage', 'runner_watch', '--pidfile=/tmp/runserver.pid']):
+            result = Process._detect_process_type()
+            self.assertEqual(result, Process.TypeChoices.WORKER)
+
    def test_process_detect_type_cli(self):
        """_detect_process_type should detect CLI commands."""
        with patch('sys.argv', ['archivebox', 'add', 'http://example.com']):
@@ -387,6 +459,27 @@ class TestProcessCurrent(TestCase):
            result = Process._detect_process_type()
            self.assertEqual(result, Process.TypeChoices.BINARY)

+    def test_process_proc_allows_interpreter_wrapped_script(self):
+        """Process.proc should accept a script recorded in DB when wrapped by an interpreter in psutil."""
+        proc = Process.objects.create(
+            machine=Machine.current(),
+            cmd=['/tmp/on_Crawl__90_chrome_launch.daemon.bg.js', '--url=https://example.com/'],
+            pid=12345,
+            status=Process.StatusChoices.RUNNING,
+            started_at=timezone.now(),
+        )
+
+        os_proc = Mock()
+        os_proc.create_time.return_value = proc.started_at.timestamp()
+        os_proc.cmdline.return_value = [
+            'node',
+            '/tmp/on_Crawl__90_chrome_launch.daemon.bg.js',
+            '--url=https://example.com/',
+        ]
+
+        with patch('archivebox.machine.models.psutil.Process', return_value=os_proc):
+            self.assertIs(proc.proc, os_proc)
+

 class TestProcessHierarchy(TestCase):
    """Test Process parent/child relationships."""
--- a/archivebox/tests/test_persona_admin.py
+++ b/archivebox/tests/test_persona_admin.py
@@ -0,0 +1,191 @@
+import pytest
+from typing import cast
+
+from django.contrib.auth import get_user_model
+from django.contrib.auth.models import UserManager
+from django.urls import reverse
+
+from archivebox.personas.importers import (
+    PersonaImportResult,
+    discover_persona_template_profiles,
+    import_persona_from_source,
+    resolve_browser_profile_source,
+    resolve_custom_import_source,
+)
+
+
+pytestmark = pytest.mark.django_db
+
+User = get_user_model()
+ADMIN_HOST = "admin.archivebox.localhost:8000"
+
+
+@pytest.fixture
+def admin_user(db):
+    return cast(UserManager, User.objects).create_superuser(
+        username="personaadmin",
+        email="personaadmin@test.com",
+        password="testpassword",
+    )
+
+
+def _make_profile_source(tmp_path):
+    user_data_dir = tmp_path / "Chrome User Data"
+    profile_dir = user_data_dir / "Default"
+    profile_dir.mkdir(parents=True)
+    (profile_dir / "Preferences").write_text("{}")
+    return resolve_browser_profile_source(
+        browser="chrome",
+        user_data_dir=user_data_dir,
+        profile_dir="Default",
+        browser_binary="/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
+    )
+
+
+def test_resolve_custom_import_source_accepts_exact_profile_dir(tmp_path):
+    user_data_dir = tmp_path / "Brave User Data"
+    profile_dir = user_data_dir / "Profile 2"
+    profile_dir.mkdir(parents=True)
+    (profile_dir / "Preferences").write_text("{}")
+
+    source = resolve_custom_import_source(str(profile_dir))
+
+    assert source.kind == "browser-profile"
+    assert source.user_data_dir == user_data_dir.resolve()
+    assert source.profile_dir == "Profile 2"
+
+
+def test_resolve_custom_import_source_accepts_cdp_url():
+    source = resolve_custom_import_source("ws://127.0.0.1:9222/devtools/browser/test-session")
+
+    assert source.kind == "cdp"
+    assert source.cdp_url == "ws://127.0.0.1:9222/devtools/browser/test-session"
+
+
+def test_discover_persona_template_profiles_finds_chrome_profile_dirs(tmp_path):
+    personas_dir = tmp_path / "personas"
+    chrome_profile = personas_dir / "ExistingPersona" / "chrome_profile"
+    default_profile = chrome_profile / "Default"
+    default_profile.mkdir(parents=True)
+    (default_profile / "Preferences").write_text("{}")
+
+    discovered = discover_persona_template_profiles(personas_dir=personas_dir)
+
+    assert len(discovered) == 1
+    assert discovered[0].browser == "persona"
+    assert discovered[0].source_name == "ExistingPersona"
+    assert discovered[0].profile_dir == "Default"
+    assert discovered[0].user_data_dir == chrome_profile.resolve()
+
+
+def test_discover_persona_template_profiles_finds_home_abx_personas(monkeypatch, tmp_path):
+    from archivebox.config.constants import CONSTANTS
+
+    monkeypatch.setattr(CONSTANTS, "PERSONAS_DIR", tmp_path / "missing-data-personas")
+    monkeypatch.setattr("archivebox.personas.importers.Path.home", lambda: tmp_path)
+
+    chrome_profile = tmp_path / ".config" / "abx" / "personas" / "HomePersona" / "chrome_profile"
+    default_profile = chrome_profile / "Default"
+    default_profile.mkdir(parents=True)
+    (default_profile / "Preferences").write_text("{}")
+
+    discovered = discover_persona_template_profiles()
+
+    assert len(discovered) == 1
+    assert discovered[0].browser == "persona"
+    assert discovered[0].source_name == "HomePersona"
+    assert discovered[0].profile_dir == "Default"
+    assert discovered[0].user_data_dir == chrome_profile.resolve()
+
+
+def test_persona_admin_add_view_renders_import_ui(client, admin_user, monkeypatch, tmp_path):
+    source = _make_profile_source(tmp_path)
+    monkeypatch.setattr("archivebox.personas.forms.discover_local_browser_profiles", lambda: [source])
+    monkeypatch.setattr("archivebox.personas.admin.discover_local_browser_profiles", lambda: [source])
+
+    client.login(username="personaadmin", password="testpassword")
+    response = client.get(reverse("admin:personas_persona_add"), HTTP_HOST=ADMIN_HOST)
+
+    assert response.status_code == 200
+    assert b"Bootstrap a persona from a real browser session" in response.content
+    assert b"Google Chrome / Default" in response.content
+    assert b"auth.json" in response.content
+
+
+def test_import_persona_from_source_copies_user_agent_to_persona_config(admin_user, monkeypatch, tmp_path):
+    from archivebox.personas.models import Persona
+
+    source = _make_profile_source(tmp_path)
+    persona = Persona.objects.create(name="AgentPersona", created_by=admin_user)
+
+    def fake_export_browser_state(**kwargs):
+        return True, {"user_agent": "Mozilla/5.0 Test Imported UA"}, "ok"
+
+    monkeypatch.setattr("archivebox.personas.importers.export_browser_state", fake_export_browser_state)
+
+    result = import_persona_from_source(
+        persona,
+        source,
+        copy_profile=False,
+        import_cookies=False,
+        capture_storage=False,
+    )
+
+    persona.refresh_from_db()
+    assert result.user_agent_imported is True
+    assert persona.config["USER_AGENT"] == "Mozilla/5.0 Test Imported UA"
+
+
+def test_persona_admin_add_post_runs_shared_importer(client, admin_user, monkeypatch, tmp_path):
+    from archivebox.personas.models import Persona
+
+    source = _make_profile_source(tmp_path)
+    monkeypatch.setattr("archivebox.personas.forms.discover_local_browser_profiles", lambda: [source])
+    monkeypatch.setattr("archivebox.personas.admin.discover_local_browser_profiles", lambda: [source])
+
+    calls = {}
+
+    def fake_import(persona, selected_source, **kwargs):
+        calls["persona_name"] = persona.name
+        calls["source"] = selected_source
+        calls["kwargs"] = kwargs
+        (persona.path / "cookies.txt").parent.mkdir(parents=True, exist_ok=True)
+        (persona.path / "cookies.txt").write_text("# Netscape HTTP Cookie File\n")
+        (persona.path / "auth.json").write_text('{"TYPE":"auth","cookies":[],"localStorage":{},"sessionStorage":{}}\n')
+        return PersonaImportResult(
+            source=selected_source,
+            profile_copied=True,
+            cookies_imported=True,
+            storage_captured=True,
+        )
+
+    monkeypatch.setattr("archivebox.personas.forms.import_persona_from_source", fake_import)
+
+    client.login(username="personaadmin", password="testpassword")
+    response = client.post(
+        reverse("admin:personas_persona_add"),
+        {
+            "name": "ImportedPersona",
+            "created_by": str(admin_user.pk),
+            "config": "{}",
+            "import_mode": "discovered",
+            "import_discovered_profile": source.choice_value,
+            "import_copy_profile": "on",
+            "import_extract_cookies": "on",
+            "import_capture_storage": "on",
+            "_save": "Save",
+        },
+        HTTP_HOST=ADMIN_HOST,
+    )
+
+    assert response.status_code == 302
+    persona = Persona.objects.get(name="ImportedPersona")
+    assert calls["persona_name"] == "ImportedPersona"
+    assert calls["source"].profile_dir == "Default"
+    assert calls["kwargs"] == {
+        "copy_profile": True,
+        "import_cookies": True,
+        "capture_storage": True,
+    }
+    assert persona.COOKIES_FILE.endswith("cookies.txt")
+    assert persona.AUTH_STORAGE_FILE.endswith("auth.json")
--- a/archivebox/tests/test_runner.py
+++ b/archivebox/tests/test_runner.py
@@ -0,0 +1,640 @@
+import asyncio
+import subprocess
+from types import SimpleNamespace
+
+import pytest
+from django.test import RequestFactory
+
+
+pytestmark = pytest.mark.django_db
+
+
+class _DummyBus:
+    def __init__(self, name: str):
+        self.name = name
+
+    async def stop(self):
+        return None
+
+
+class _DummyService:
+    def __init__(self, *args, **kwargs):
+        pass
+
+
+class _DummyAbxServices:
+    def __init__(self):
+        self.process = SimpleNamespace(wait_for_background_monitors=self._wait)
+
+    async def _wait(self):
+        return None
+
+
+async def _call_sync(func, *args, **kwargs):
+    return func(*args, **kwargs)
+
+
+def test_run_snapshot_uses_isolated_bus_per_snapshot(monkeypatch):
+    from archivebox.base_models.models import get_or_create_system_user_pk
+    from archivebox.crawls.models import Crawl
+    from archivebox.core.models import Snapshot
+    from archivebox.services import runner as runner_module
+
+    crawl = Crawl.objects.create(
+        urls='https://blog.sweeting.me\nhttps://sweeting.me',
+        created_by_id=get_or_create_system_user_pk(),
+    )
+    snapshot_a = Snapshot.objects.create(
+        url='https://blog.sweeting.me',
+        crawl=crawl,
+        status=Snapshot.StatusChoices.QUEUED,
+    )
+    snapshot_b = Snapshot.objects.create(
+        url='https://sweeting.me',
+        crawl=crawl,
+        status=Snapshot.StatusChoices.QUEUED,
+    )
+
+    created_buses: list[_DummyBus] = []
+
+    def fake_create_bus(*, name, total_timeout=3600.0, **kwargs):
+        bus = _DummyBus(name)
+        created_buses.append(bus)
+        return bus
+
+    monkeypatch.setattr(runner_module, 'create_bus', fake_create_bus)
+    monkeypatch.setattr(runner_module, 'discover_plugins', lambda: {})
+    monkeypatch.setattr(runner_module, 'ProcessService', _DummyService)
+    monkeypatch.setattr(runner_module, 'MachineService', _DummyService)
+    monkeypatch.setattr(runner_module, 'BinaryService', _DummyService)
+    monkeypatch.setattr(runner_module, 'TagService', _DummyService)
+    monkeypatch.setattr(runner_module, 'CrawlService', _DummyService)
+    monkeypatch.setattr(runner_module, 'SnapshotService', _DummyService)
+    monkeypatch.setattr(runner_module, 'ArchiveResultService', _DummyService)
+    monkeypatch.setattr(runner_module, 'setup_abx_services', lambda *args, **kwargs: _DummyAbxServices())
+
+    download_calls = []
+
+    async def fake_download(*, url, bus, config_overrides, snapshot, **kwargs):
+        download_calls.append(
+            {
+                'url': url,
+                'bus': bus,
+                'snapshot_id': config_overrides['SNAPSHOT_ID'],
+                'source_url': config_overrides['SOURCE_URL'],
+                'abx_snapshot_id': snapshot.id,
+            }
+        )
+        await asyncio.sleep(0)
+        return []
+
+    monkeypatch.setattr(runner_module, 'download', fake_download)
+
+    crawl_runner = runner_module.CrawlRunner(crawl)
+    snapshot_data = {
+        str(snapshot_a.id): {
+            'id': str(snapshot_a.id),
+            'url': snapshot_a.url,
+            'title': snapshot_a.title,
+            'timestamp': snapshot_a.timestamp,
+            'bookmarked_at': snapshot_a.bookmarked_at.isoformat() if snapshot_a.bookmarked_at else "",
+            'created_at': snapshot_a.created_at.isoformat() if snapshot_a.created_at else "",
+            'tags': snapshot_a.tags_str(),
+            'depth': snapshot_a.depth,
+            'parent_snapshot_id': str(snapshot_a.parent_snapshot_id) if snapshot_a.parent_snapshot_id else None,
+            'output_dir': str(snapshot_a.output_dir),
+            'config': crawl_runner._snapshot_config(snapshot_a),
+        },
+        str(snapshot_b.id): {
+            'id': str(snapshot_b.id),
+            'url': snapshot_b.url,
+            'title': snapshot_b.title,
+            'timestamp': snapshot_b.timestamp,
+            'bookmarked_at': snapshot_b.bookmarked_at.isoformat() if snapshot_b.bookmarked_at else "",
+            'created_at': snapshot_b.created_at.isoformat() if snapshot_b.created_at else "",
+            'tags': snapshot_b.tags_str(),
+            'depth': snapshot_b.depth,
+            'parent_snapshot_id': str(snapshot_b.parent_snapshot_id) if snapshot_b.parent_snapshot_id else None,
+            'output_dir': str(snapshot_b.output_dir),
+            'config': crawl_runner._snapshot_config(snapshot_b),
+        },
+    }
+    monkeypatch.setattr(crawl_runner, '_load_snapshot_run_data', lambda snapshot_id: snapshot_data[snapshot_id])
+
+    async def run_both():
+        await asyncio.gather(
+            crawl_runner._run_snapshot(str(snapshot_a.id)),
+            crawl_runner._run_snapshot(str(snapshot_b.id)),
+        )
+
+    asyncio.run(run_both())
+
+    assert len(download_calls) == 2
+    assert {call['snapshot_id'] for call in download_calls} == {str(snapshot_a.id), str(snapshot_b.id)}
+    assert {call['source_url'] for call in download_calls} == {snapshot_a.url, snapshot_b.url}
+    assert len({id(call['bus']) for call in download_calls}) == 2
+    assert len(created_buses) == 3  # 1 crawl bus + 2 isolated snapshot buses
+
+
+def test_ensure_background_runner_starts_when_none_running(monkeypatch):
+    import archivebox.machine.models as machine_models
+    from archivebox.services import runner as runner_module
+
+    popen_calls = []
+
+    class DummyPopen:
+        def __init__(self, args, **kwargs):
+            popen_calls.append((args, kwargs))
+
+    monkeypatch.setattr(machine_models.Process, 'cleanup_stale_running', classmethod(lambda cls, machine=None: 0))
+    monkeypatch.setattr(machine_models.Machine, 'current', classmethod(lambda cls: SimpleNamespace(id='machine-1')))
+    monkeypatch.setattr(
+        machine_models.Process.objects,
+        'filter',
+        lambda **kwargs: SimpleNamespace(exists=lambda: False),
+    )
+    monkeypatch.setattr(runner_module.subprocess, 'Popen', DummyPopen)
+
+    started = runner_module.ensure_background_runner(allow_under_pytest=True)
+
+    assert started is True
+    assert len(popen_calls) == 1
+    assert popen_calls[0][0] == [runner_module.sys.executable, '-m', 'archivebox', 'run', '--daemon']
+    assert popen_calls[0][1]['stdin'] is subprocess.DEVNULL
+
+
+def test_ensure_background_runner_skips_when_orchestrator_running(monkeypatch):
+    import archivebox.machine.models as machine_models
+    from archivebox.services import runner as runner_module
+
+    monkeypatch.setattr(machine_models.Process, 'cleanup_stale_running', classmethod(lambda cls, machine=None: 0))
+    monkeypatch.setattr(machine_models.Machine, 'current', classmethod(lambda cls: SimpleNamespace(id='machine-1')))
+    monkeypatch.setattr(
+        machine_models.Process.objects,
+        'filter',
+        lambda **kwargs: SimpleNamespace(exists=lambda: True),
+    )
+    monkeypatch.setattr(
+        runner_module.subprocess,
+        'Popen',
+        lambda *args, **kwargs: (_ for _ in ()).throw(AssertionError('runner should not be spawned')),
+    )
+
+    started = runner_module.ensure_background_runner(allow_under_pytest=True)
+
+    assert started is False
+
+
+def test_runner_prepare_refreshes_network_interface_and_attaches_current_process(monkeypatch):
+    from archivebox.base_models.models import get_or_create_system_user_pk
+    from archivebox.crawls.models import Crawl
+    from archivebox.services import runner as runner_module
+
+    crawl = Crawl.objects.create(
+        urls='https://example.com',
+        created_by_id=get_or_create_system_user_pk(),
+    )
+
+    class _Iface:
+        id = 'iface-1'
+        machine = SimpleNamespace(id='machine-1')
+        machine_id = 'machine-1'
+
+    saved_updates = []
+
+    class _Proc:
+        iface_id = None
+        machine_id = 'machine-1'
+        iface = None
+        machine = None
+
+        def save(self, *, update_fields):
+            saved_updates.append(tuple(update_fields))
+
+    proc = _Proc()
+
+    monkeypatch.setattr(runner_module, 'discover_plugins', lambda: {})
+    monkeypatch.setattr(runner_module, 'create_bus', lambda **kwargs: _DummyBus(kwargs['name']))
+    monkeypatch.setattr(runner_module, 'ProcessService', _DummyService)
+    monkeypatch.setattr(runner_module, 'MachineService', _DummyService)
+    monkeypatch.setattr(runner_module, 'BinaryService', _DummyService)
+    monkeypatch.setattr(runner_module, 'TagService', _DummyService)
+    monkeypatch.setattr(runner_module, 'CrawlService', _DummyService)
+    monkeypatch.setattr(runner_module, 'SnapshotService', _DummyService)
+    monkeypatch.setattr(runner_module, 'ArchiveResultService', _DummyService)
+
+    from archivebox.machine.models import NetworkInterface, Process
+    from archivebox.config import configset as configset_module
+
+    refresh_calls = []
+    monkeypatch.setattr(NetworkInterface, 'current', classmethod(lambda cls, refresh=False: refresh_calls.append(refresh) or _Iface()))
+    monkeypatch.setattr(Process, 'current', classmethod(lambda cls: proc))
+    monkeypatch.setattr(configset_module, 'get_config', lambda **kwargs: {})
+
+    crawl_runner = runner_module.CrawlRunner(crawl)
+    crawl_runner._prepare()
+
+    assert refresh_calls == [True]
+    assert proc.iface is not None
+    assert proc.machine == proc.iface.machine
+    assert saved_updates == [('iface', 'machine', 'modified_at')]
+
+
+def test_create_crawl_api_queues_crawl_without_spawning_runner(monkeypatch):
+    from django.contrib.auth import get_user_model
+    from archivebox.api.v1_crawls import CrawlCreateSchema, create_crawl
+
+    user = get_user_model().objects.create_superuser(
+        username='runner-api-admin',
+        email='runner-api-admin@example.com',
+        password='testpassword',
+    )
+    request = RequestFactory().post('/api/v1/crawls')
+    request.user = user
+
+    crawl = create_crawl(
+        request,
+        CrawlCreateSchema(
+            urls=['https://example.com'],
+            max_depth=0,
+            tags=[],
+            tags_str='',
+            label='',
+            notes='',
+            config={},
+        ),
+    )
+
+    assert str(crawl.id)
+    assert crawl.status == 'queued'
+    assert crawl.retry_at is not None
+
+
+def test_crawl_runner_does_not_seal_unfinished_crawl(monkeypatch):
+    import asgiref.sync
+    from archivebox.base_models.models import get_or_create_system_user_pk
+    from archivebox.crawls.models import Crawl
+    from archivebox.core.models import Snapshot
+    from archivebox.services import runner as runner_module
+
+    crawl = Crawl.objects.create(
+        urls='https://example.com',
+        created_by_id=get_or_create_system_user_pk(),
+        status=Crawl.StatusChoices.STARTED,
+    )
+    snapshot = Snapshot.objects.create(
+        url='https://example.com',
+        crawl=crawl,
+        status=Snapshot.StatusChoices.STARTED,
+    )
+
+    monkeypatch.setattr(runner_module, '_attach_bus_trace', lambda bus: None)
+    monkeypatch.setattr(runner_module, '_stop_bus_trace', lambda bus: asyncio.sleep(0))
+    monkeypatch.setattr(runner_module, 'setup_abx_services', lambda *args, **kwargs: _DummyAbxServices())
+    monkeypatch.setenv("DJANGO_ALLOW_ASYNC_UNSAFE", "true")
+    monkeypatch.setattr(
+        asgiref.sync,
+        'sync_to_async',
+        lambda func, thread_sensitive=True: (lambda *args, **kwargs: _call_sync(func, *args, **kwargs)),
+    )
+    monkeypatch.setattr(Crawl.objects, 'get', lambda id: crawl)
+    monkeypatch.setattr(crawl, 'is_finished', lambda: False)
+    monkeypatch.setattr(crawl, 'save', lambda *args, **kwargs: None)
+    monkeypatch.setattr(runner_module.CrawlRunner, '_prepare', lambda self: None)
+    monkeypatch.setattr(runner_module.CrawlRunner, '_create_live_ui', lambda self: None)
+    monkeypatch.setattr(runner_module.CrawlRunner, '_initial_snapshot_ids', lambda self: [str(snapshot.id)])
+    monkeypatch.setattr(runner_module.CrawlRunner, '_run_crawl_setup', lambda self, snapshot_id: asyncio.sleep(0))
+    monkeypatch.setattr(runner_module.CrawlRunner, 'enqueue_snapshot', lambda self, snapshot_id: asyncio.sleep(0))
+    monkeypatch.setattr(runner_module.CrawlRunner, '_wait_for_snapshot_tasks', lambda self: asyncio.sleep(0))
+    monkeypatch.setattr(runner_module.CrawlRunner, '_run_crawl_cleanup', lambda self, snapshot_id: asyncio.sleep(0))
+    monkeypatch.setattr(runner_module.CrawlRunner, '_cleanup_persona', lambda self: None)
+
+    asyncio.run(runner_module.CrawlRunner(crawl, snapshot_ids=[str(snapshot.id)]).run())
+
+    assert crawl.status != Crawl.StatusChoices.SEALED
+    assert crawl.retry_at is not None
+
+
+def test_crawl_runner_finalizes_with_sync_to_async_for_is_finished(monkeypatch):
+    import asgiref.sync
+    from archivebox.base_models.models import get_or_create_system_user_pk
+    from archivebox.crawls.models import Crawl
+    from archivebox.core.models import Snapshot
+    from archivebox.services import runner as runner_module
+
+    crawl = Crawl.objects.create(
+        urls='https://example.com',
+        created_by_id=get_or_create_system_user_pk(),
+        status=Crawl.StatusChoices.STARTED,
+    )
+    snapshot = Snapshot.objects.create(
+        url='https://example.com',
+        crawl=crawl,
+        status=Snapshot.StatusChoices.STARTED,
+    )
+
+    monkeypatch.setattr(runner_module, 'create_bus', lambda *args, **kwargs: _DummyBus('runner'))
+    monkeypatch.setattr(runner_module, 'discover_plugins', lambda: {})
+    monkeypatch.setattr(runner_module, 'ProcessService', _DummyService)
+    monkeypatch.setattr(runner_module, 'MachineService', _DummyService)
+    monkeypatch.setattr(runner_module, 'BinaryService', _DummyService)
+    monkeypatch.setattr(runner_module, 'TagService', _DummyService)
+    monkeypatch.setattr(runner_module, 'CrawlService', _DummyService)
+    monkeypatch.setattr(runner_module, 'SnapshotService', _DummyService)
+    monkeypatch.setattr(runner_module, 'ArchiveResultService', _DummyService)
+    monkeypatch.setattr(runner_module, '_attach_bus_trace', lambda bus: None)
+    monkeypatch.setattr(runner_module, '_stop_bus_trace', lambda bus: asyncio.sleep(0))
+    monkeypatch.setattr(runner_module, 'setup_abx_services', lambda *args, **kwargs: _DummyAbxServices())
+    monkeypatch.setattr(Crawl.objects, 'get', lambda id: crawl)
+    monkeypatch.setattr(crawl, 'save', lambda *args, **kwargs: None)
+    monkeypatch.setattr(crawl, 'cleanup', lambda: None)
+    monkeypatch.setattr(runner_module.CrawlRunner, '_prepare', lambda self: None)
+    monkeypatch.setattr(runner_module.CrawlRunner, '_create_live_ui', lambda self: None)
+    monkeypatch.setattr(runner_module.CrawlRunner, '_initial_snapshot_ids', lambda self: [str(snapshot.id)])
+    monkeypatch.setattr(runner_module.CrawlRunner, '_run_crawl_setup', lambda self, snapshot_id: asyncio.sleep(0))
+    monkeypatch.setattr(runner_module.CrawlRunner, 'enqueue_snapshot', lambda self, snapshot_id: asyncio.sleep(0))
+    monkeypatch.setattr(runner_module.CrawlRunner, '_wait_for_snapshot_tasks', lambda self: asyncio.sleep(0))
+    monkeypatch.setattr(runner_module.CrawlRunner, '_run_crawl_cleanup', lambda self, snapshot_id: asyncio.sleep(0))
+    monkeypatch.setattr(runner_module.CrawlRunner, '_cleanup_persona', lambda self: None)
+
+    sync_to_async_wrapped: list[str] = []
+    sync_to_async_active = False
+
+    def fake_sync_to_async(func, thread_sensitive=True):
+        async def wrapper(*args, **kwargs):
+            nonlocal sync_to_async_active
+            sync_to_async_wrapped.append(getattr(func, '__name__', repr(func)))
+            previous = sync_to_async_active
+            sync_to_async_active = True
+            try:
+                return func(*args, **kwargs)
+            finally:
+                sync_to_async_active = previous
+        return wrapper
+
+    def guarded_is_finished():
+        assert sync_to_async_active is True
+        return False
+
+    monkeypatch.setattr(asgiref.sync, 'sync_to_async', fake_sync_to_async)
+    monkeypatch.setattr(crawl, 'is_finished', guarded_is_finished)
+
+    asyncio.run(runner_module.CrawlRunner(crawl, snapshot_ids=[str(snapshot.id)]).run())
+
+    crawl.refresh_from_db()
+    assert crawl.status == Crawl.StatusChoices.STARTED
+    assert crawl.retry_at is not None
+    assert 'guarded_is_finished' in sync_to_async_wrapped
+
+
+def test_wait_for_snapshot_tasks_surfaces_already_failed_task():
+    from archivebox.base_models.models import get_or_create_system_user_pk
+    from archivebox.crawls.models import Crawl
+    from archivebox.services import runner as runner_module
+
+    crawl = Crawl.objects.create(
+        urls='https://example.com',
+        created_by_id=get_or_create_system_user_pk(),
+    )
+    crawl_runner = runner_module.CrawlRunner(crawl)
+
+    async def run_test():
+        task = asyncio.get_running_loop().create_future()
+        task.set_exception(RuntimeError('snapshot failed'))
+        crawl_runner.snapshot_tasks['snap-1'] = task
+        with pytest.raises(RuntimeError, match='snapshot failed'):
+            await crawl_runner._wait_for_snapshot_tasks()
+
+    asyncio.run(run_test())
+
+
+def test_wait_for_snapshot_tasks_returns_after_completed_tasks_are_pruned():
+    from archivebox.base_models.models import get_or_create_system_user_pk
+    from archivebox.crawls.models import Crawl
+    from archivebox.services import runner as runner_module
+
+    crawl = Crawl.objects.create(
+        urls='https://example.com',
+        created_by_id=get_or_create_system_user_pk(),
+    )
+    crawl_runner = runner_module.CrawlRunner(crawl)
+
+    async def finish_snapshot() -> None:
+        await asyncio.sleep(0)
+
+    async def run_test():
+        task = asyncio.create_task(finish_snapshot())
+        crawl_runner.snapshot_tasks['snap-1'] = task
+        await asyncio.wait_for(crawl_runner._wait_for_snapshot_tasks(), timeout=0.5)
+        assert crawl_runner.snapshot_tasks == {}
+
+    asyncio.run(run_test())
+
+
+def test_crawl_runner_calls_crawl_cleanup_after_snapshot_phase(monkeypatch):
+    import asgiref.sync
+    from archivebox.base_models.models import get_or_create_system_user_pk
+    from archivebox.crawls.models import Crawl
+    from archivebox.core.models import Snapshot
+    from archivebox.services import runner as runner_module
+
+    crawl = Crawl.objects.create(
+        urls='https://example.com',
+        created_by_id=get_or_create_system_user_pk(),
+        status=Crawl.StatusChoices.STARTED,
+    )
+    snapshot = Snapshot.objects.create(
+        url='https://example.com',
+        crawl=crawl,
+        status=Snapshot.StatusChoices.STARTED,
+    )
+
+    monkeypatch.setattr(runner_module, '_attach_bus_trace', lambda bus: None)
+    monkeypatch.setattr(runner_module, '_stop_bus_trace', lambda bus: asyncio.sleep(0))
+    monkeypatch.setattr(runner_module, 'setup_abx_services', lambda *args, **kwargs: _DummyAbxServices())
+    monkeypatch.setenv("DJANGO_ALLOW_ASYNC_UNSAFE", "true")
+    monkeypatch.setattr(
+        asgiref.sync,
+        'sync_to_async',
+        lambda func, thread_sensitive=True: (lambda *args, **kwargs: _call_sync(func, *args, **kwargs)),
+    )
+    monkeypatch.setattr(Crawl.objects, 'get', lambda id: crawl)
+    monkeypatch.setattr(crawl, 'is_finished', lambda: False)
+    monkeypatch.setattr(crawl, 'save', lambda *args, **kwargs: None)
+    monkeypatch.setattr(runner_module.CrawlRunner, '_prepare', lambda self: None)
+    monkeypatch.setattr(runner_module.CrawlRunner, '_create_live_ui', lambda self: None)
+    monkeypatch.setattr(runner_module.CrawlRunner, '_initial_snapshot_ids', lambda self: [str(snapshot.id)])
+    monkeypatch.setattr(runner_module.CrawlRunner, '_run_crawl_setup', lambda self, snapshot_id: asyncio.sleep(0))
+    monkeypatch.setattr(runner_module.CrawlRunner, 'enqueue_snapshot', lambda self, snapshot_id: asyncio.sleep(0))
+    monkeypatch.setattr(runner_module.CrawlRunner, '_wait_for_snapshot_tasks', lambda self: asyncio.sleep(0))
+    monkeypatch.setattr(runner_module.CrawlRunner, '_cleanup_persona', lambda self: None)
+
+    cleanup_calls = []
+    monkeypatch.setattr(runner_module.CrawlRunner, '_run_crawl_cleanup', lambda self, snapshot_id: cleanup_calls.append('abx_cleanup') or asyncio.sleep(0))
+    monkeypatch.setattr(crawl, 'cleanup', lambda: cleanup_calls.append('crawl_cleanup'))
+
+    asyncio.run(runner_module.CrawlRunner(crawl, snapshot_ids=[str(snapshot.id)]).run())
+
+    assert cleanup_calls == ['crawl_cleanup', 'abx_cleanup']
+
+
+def test_abx_process_service_background_monitor_finishes_after_process_exit(monkeypatch, tmp_path):
+    from abx_dl.models import Process as AbxProcess, now_iso
+    from abx_dl.services.process_service import ProcessService
+    from abx_dl.events import ProcessCompletedEvent
+
+    service = object.__new__(ProcessService)
+    service.emit_jsonl = False
+    emitted_events = []
+
+    async def fake_emit_event(event, *, detach_from_parent):
+        emitted_events.append((event, detach_from_parent))
+
+    async def fake_stream_stdout(**kwargs):
+        try:
+            await asyncio.Event().wait()
+        except asyncio.CancelledError:
+            return ["daemon output\n"]
+
+    service._emit_event = fake_emit_event
+    monkeypatch.setattr(service, '_stream_stdout', fake_stream_stdout)
+
+    class FakeAsyncProcess:
+        def __init__(self):
+            self.pid = 42424
+            self.returncode = None
+
+        async def wait(self):
+            await asyncio.sleep(0)
+            self.returncode = 0
+            return 0
+
+    plugin_output_dir = tmp_path / 'chrome'
+    plugin_output_dir.mkdir()
+    stdout_file = plugin_output_dir / 'on_Crawl__90_chrome_launch.daemon.bg.stdout.log'
+    stderr_file = plugin_output_dir / 'on_Crawl__90_chrome_launch.daemon.bg.stderr.log'
+    stderr_file.write_text('')
+    pid_file = plugin_output_dir / 'on_Crawl__90_chrome_launch.daemon.bg.pid'
+    pid_file.write_text('12345')
+
+    proc = AbxProcess(
+        cmd=['hook'],
+        pwd=str(plugin_output_dir),
+        timeout=60,
+        started_at=now_iso(),
+        plugin='chrome',
+        hook_name='on_Crawl__90_chrome_launch.daemon.bg',
+    )
+    process = FakeAsyncProcess()
+    event = SimpleNamespace(
+        plugin_name='chrome',
+        hook_name='on_Crawl__90_chrome_launch.daemon.bg',
+        hook_path='hook',
+        hook_args=['--url=https://example.org/'],
+        env={},
+        output_dir=str(plugin_output_dir),
+        timeout=60,
+        snapshot_id='snap-1',
+        is_background=True,
+    )
+
+    async def run_test():
+        await asyncio.wait_for(
+            service._monitor_background_process(
+                event=event,
+                proc=proc,
+                process=process,
+                plugin_output_dir=plugin_output_dir,
+                stdout_file=stdout_file,
+                stderr_file=stderr_file,
+                pid_file=pid_file,
+                files_before=set(),
+            ),
+            timeout=0.5,
+        )
+
+    asyncio.run(run_test())
+
+    assert pid_file.exists() is False
+    assert any(isinstance(event, ProcessCompletedEvent) for event, _ in emitted_events)
+
+
+def test_run_pending_crawls_runs_due_snapshot_in_place(monkeypatch):
+    from archivebox.base_models.models import get_or_create_system_user_pk
+    from archivebox.crawls.models import Crawl
+    from archivebox.core.models import Snapshot
+    from archivebox.services import runner as runner_module
+
+    crawl = Crawl.objects.create(
+        urls='https://example.com',
+        created_by_id=get_or_create_system_user_pk(),
+        status=Crawl.StatusChoices.SEALED,
+    )
+    snapshot = Snapshot.objects.create(
+        url='https://example.com',
+        crawl=crawl,
+        status=Snapshot.StatusChoices.QUEUED,
+        retry_at=runner_module.timezone.now(),
+    )
+
+    monkeypatch.setattr(type(snapshot), 'claim_processing_lock', lambda self, lock_seconds=60: True)
+    monkeypatch.setattr(type(crawl), 'claim_processing_lock', lambda self, lock_seconds=60: True)
+
+    run_calls: list[tuple[str, list[str] | None, bool]] = []
+    def fake_run_crawl(crawl_id, snapshot_ids=None, selected_plugins=None, process_discovered_snapshots_inline=True):
+        run_calls.append((crawl_id, snapshot_ids, process_discovered_snapshots_inline))
+        snapshot.status = Snapshot.StatusChoices.SEALED
+        snapshot.retry_at = None
+        snapshot.save(update_fields=['status', 'retry_at', 'modified_at'])
+
+    monkeypatch.setattr(runner_module, 'run_crawl', fake_run_crawl)
+
+    result = runner_module.run_pending_crawls(daemon=False)
+
+    assert result == 0
+    assert run_calls == [(str(crawl.id), [str(snapshot.id)], False)]
+
+
+def test_run_pending_crawls_prioritizes_new_queued_crawl_before_snapshot_backlog(monkeypatch):
+    from archivebox.base_models.models import get_or_create_system_user_pk
+    from archivebox.crawls.models import Crawl
+    from archivebox.core.models import Snapshot
+    from archivebox.services import runner as runner_module
+
+    older_crawl = Crawl.objects.create(
+        urls='https://older.example.com',
+        created_by_id=get_or_create_system_user_pk(),
+        status=Crawl.StatusChoices.STARTED,
+    )
+    older_snapshot = Snapshot.objects.create(
+        url='https://older.example.com',
+        crawl=older_crawl,
+        status=Snapshot.StatusChoices.QUEUED,
+        retry_at=runner_module.timezone.now(),
+    )
+    newer_crawl = Crawl.objects.create(
+        urls='https://newer.example.com',
+        created_by_id=get_or_create_system_user_pk(),
+        status=Crawl.StatusChoices.QUEUED,
+        retry_at=runner_module.timezone.now(),
+    )
+
+    monkeypatch.setattr(type(older_snapshot), 'claim_processing_lock', lambda self, lock_seconds=60: True)
+    monkeypatch.setattr(type(older_crawl), 'claim_processing_lock', lambda self, lock_seconds=60: True)
+    monkeypatch.setattr(type(newer_crawl), 'claim_processing_lock', lambda self, lock_seconds=60: True)
+
+    run_calls: list[tuple[str, list[str] | None, bool]] = []
+
+    class _StopScheduling(Exception):
+        pass
+
+    def fake_run_crawl(crawl_id, snapshot_ids=None, selected_plugins=None, process_discovered_snapshots_inline=True):
+        run_calls.append((crawl_id, snapshot_ids, process_discovered_snapshots_inline))
+        raise _StopScheduling
+
+    monkeypatch.setattr(runner_module, 'run_crawl', fake_run_crawl)
+
+    with pytest.raises(_StopScheduling):
+        runner_module.run_pending_crawls(daemon=False)
+
+    assert run_calls == [(str(newer_crawl.id), None, False)]
--- a/archivebox/tests/test_tag_admin.py
+++ b/archivebox/tests/test_tag_admin.py
@@ -0,0 +1,205 @@
+import json
+from datetime import datetime
+from typing import cast
+
+import pytest
+from django.contrib.auth import get_user_model
+from django.contrib.auth.models import UserManager
+from django.urls import reverse
+from django.utils import timezone
+
+
+pytestmark = pytest.mark.django_db
+
+
+User = get_user_model()
+ADMIN_HOST = 'admin.archivebox.localhost:8000'
+
+
+@pytest.fixture
+def admin_user(db):
+    return cast(UserManager, User.objects).create_superuser(
+        username='tagadmin',
+        email='tagadmin@test.com',
+        password='testpassword',
+    )
+
+
+@pytest.fixture
+def api_token(admin_user):
+    from archivebox.api.auth import get_or_create_api_token
+
+    token = get_or_create_api_token(admin_user)
+    assert token is not None
+    return token.token
+
+
+@pytest.fixture
+def crawl(admin_user):
+    from archivebox.crawls.models import Crawl
+
+    return Crawl.objects.create(
+        urls='https://example.com',
+        created_by=admin_user,
+    )
+
+
+@pytest.fixture
+def tagged_data(crawl, admin_user):
+    from archivebox.core.models import Snapshot, Tag
+
+    tag = Tag.objects.create(name='Alpha Research', created_by=admin_user)
+    first = Snapshot.objects.create(
+        url='https://example.com/one',
+        title='Example One',
+        crawl=crawl,
+    )
+    second = Snapshot.objects.create(
+        url='https://example.com/two',
+        title='Example Two',
+        crawl=crawl,
+    )
+    first.tags.add(tag)
+    second.tags.add(tag)
+    return tag, [first, second]
+
+
+def test_tag_admin_changelist_renders_custom_ui(client, admin_user, tagged_data):
+    client.login(username='tagadmin', password='testpassword')
+
+    response = client.get(reverse('admin:core_tag_changelist'), HTTP_HOST=ADMIN_HOST)
+
+    assert response.status_code == 200
+    assert b'id="tag-live-search"' in response.content
+    assert b'id="tag-sort-select"' in response.content
+    assert b'id="tag-created-by-select"' in response.content
+    assert b'id="tag-year-select"' in response.content
+    assert b'id="tag-has-snapshots-select"' in response.content
+    assert b'Alpha Research' in response.content
+    assert b'class="tag-card"' in response.content
+
+
+def test_tag_admin_add_view_renders_similar_tag_reference(client, admin_user):
+    client.login(username='tagadmin', password='testpassword')
+
+    response = client.get(reverse('admin:core_tag_add'), HTTP_HOST=ADMIN_HOST)
+
+    assert response.status_code == 200
+    assert b'Similar Tags' in response.content
+    assert b'data-tag-name-input="1"' in response.content
+
+
+def test_tag_search_api_returns_card_payload(client, api_token, tagged_data):
+    tag, snapshots = tagged_data
+
+    response = client.get(
+        reverse('api-1:search_tags'),
+        {'q': 'Alpha', 'api_key': api_token},
+        HTTP_HOST=ADMIN_HOST,
+    )
+
+    assert response.status_code == 200
+    payload = response.json()
+    assert payload['sort'] == 'created_desc'
+    assert payload['created_by'] == ''
+    assert payload['year'] == ''
+    assert payload['has_snapshots'] == 'all'
+    assert payload['tags'][0]['id'] == tag.id
+    assert payload['tags'][0]['name'] == 'Alpha Research'
+    assert payload['tags'][0]['num_snapshots'] == 2
+    assert payload['tags'][0]['snapshots'][0]['title'] in {'Example One', 'Example Two'}
+    assert payload['tags'][0]['export_jsonl_url'].endswith(f'/api/v1/core/tag/{tag.id}/snapshots.jsonl')
+    assert payload['tags'][0]['filter_url'].endswith(f'/admin/core/snapshot/?tags__id__exact={tag.id}')
+    assert {snapshot['url'] for snapshot in payload['tags'][0]['snapshots']} == {snap.url for snap in snapshots}
+
+
+def test_tag_search_api_respects_sort_and_filters(client, api_token, admin_user, crawl, tagged_data):
+    from archivebox.core.models import Snapshot, Tag
+
+    other_user = cast(UserManager, User.objects).create_user(
+        username='tagother',
+        email='tagother@test.com',
+        password='unused',
+    )
+    tag_with_snapshots = tagged_data[0]
+    empty_tag = Tag.objects.create(name='Zulu Empty', created_by=other_user)
+    alpha_tag = Tag.objects.create(name='Alpha Empty', created_by=other_user)
+    Snapshot.objects.create(
+        url='https://example.com/three',
+        title='Example Three',
+        crawl=crawl,
+    ).tags.add(alpha_tag)
+
+    Tag.objects.filter(pk=empty_tag.pk).update(created_at=timezone.make_aware(datetime(2024, 1, 1, 12, 0, 0)))
+    Tag.objects.filter(pk=alpha_tag.pk).update(created_at=timezone.make_aware(datetime(2025, 1, 1, 12, 0, 0)))
+    Tag.objects.filter(pk=tag_with_snapshots.pk).update(created_at=timezone.make_aware(datetime(2026, 1, 1, 12, 0, 0)))
+
+    response = client.get(
+        reverse('api-1:search_tags'),
+        {
+            'sort': 'name_desc',
+            'created_by': str(other_user.pk),
+            'year': '2024',
+            'has_snapshots': 'no',
+            'api_key': api_token,
+        },
+        HTTP_HOST=ADMIN_HOST,
+    )
+
+    assert response.status_code == 200
+    payload = response.json()
+    assert payload['sort'] == 'name_desc'
+    assert payload['created_by'] == str(other_user.pk)
+    assert payload['year'] == '2024'
+    assert payload['has_snapshots'] == 'no'
+    assert [tag['name'] for tag in payload['tags']] == ['Zulu Empty']
+
+
+def test_tag_rename_api_updates_slug(client, api_token, tagged_data):
+    tag, _ = tagged_data
+
+    response = client.post(
+        f"{reverse('api-1:rename_tag', args=[tag.id])}?api_key={api_token}",
+        data=json.dumps({'name': 'Alpha Archive'}),
+        content_type='application/json',
+        HTTP_HOST=ADMIN_HOST,
+    )
+
+    assert response.status_code == 200
+
+    tag.refresh_from_db()
+    assert tag.name == 'Alpha Archive'
+    assert tag.slug == 'alpha-archive'
+
+
+def test_tag_snapshots_export_returns_jsonl(client, api_token, tagged_data):
+    tag, _ = tagged_data
+
+    response = client.get(
+        reverse('api-1:tag_snapshots_export', args=[tag.id]),
+        {'api_key': api_token},
+        HTTP_HOST=ADMIN_HOST,
+    )
+
+    assert response.status_code == 200
+    assert response['Content-Type'].startswith('application/x-ndjson')
+    assert f'tag-{tag.slug}-snapshots.jsonl' in response['Content-Disposition']
+    body = response.content.decode()
+    assert '"type": "Snapshot"' in body
+    assert '"tags": "Alpha Research"' in body
+
+
+def test_tag_urls_export_returns_plain_text_urls(client, api_token, tagged_data):
+    tag, snapshots = tagged_data
+
+    response = client.get(
+        reverse('api-1:tag_urls_export', args=[tag.id]),
+        {'api_key': api_token},
+        HTTP_HOST=ADMIN_HOST,
+    )
+
+    assert response.status_code == 200
+    assert response['Content-Type'].startswith('text/plain')
+    assert f'tag-{tag.slug}-urls.txt' in response['Content-Disposition']
+    exported_urls = set(filter(None, response.content.decode().splitlines()))
+    assert exported_urls == {snapshot.url for snapshot in snapshots}
--- a/archivebox/tests/test_urls.py
+++ b/archivebox/tests/test_urls.py
@@ -55,6 +55,7 @@ def _build_script(body: str) -> str:
        get_admin_host,
        get_api_host,
        get_web_host,
+        get_public_host,
        get_snapshot_host,
        get_original_host,
        get_listen_subdomain,
@@ -198,6 +199,7 @@ class TestUrlRouting:
            web_host = get_web_host()
            admin_host = get_admin_host()
            api_host = get_api_host()
+            public_host = get_public_host()
            snapshot_host = get_snapshot_host(snapshot_id)
            original_host = get_original_host(domain)
            base_host = SERVER_CONFIG.LISTEN_HOST
@@ -208,6 +210,7 @@ class TestUrlRouting:
            assert web_host == "web.archivebox.localhost:8000"
            assert admin_host == "admin.archivebox.localhost:8000"
            assert api_host == "api.archivebox.localhost:8000"
+            assert public_host == "public.archivebox.localhost:8000"
            assert snapshot_host == f"{snapshot_id}.archivebox.localhost:8000"
            assert original_host == f"{domain}.archivebox.localhost:8000"
            assert get_listen_subdomain(web_host) == "web"
@@ -302,6 +305,20 @@ class TestUrlRouting:
            assert resp.status_code == 200
            assert response_body(resp) == response_file.read_bytes()

+            resp = client.get("/index.html", HTTP_HOST=snapshot_host)
+            assert resp.status_code == 200
+            snapshot_html = response_body(resp).decode("utf-8", "ignore")
+            assert f"http://{snapshot_host}/" in snapshot_html
+            assert "See all files..." in snapshot_html
+            assert ">WARC<" not in snapshot_html
+            assert ">Media<" not in snapshot_html
+            assert ">Git<" not in snapshot_html
+
+            resp = client.get("/?files=1", HTTP_HOST=snapshot_host)
+            assert resp.status_code == 200
+            files_html = response_body(resp).decode("utf-8", "ignore")
+            assert output_rel.split("/", 1)[0] in files_html
+
            print("OK")
            """
        )
@@ -479,6 +496,7 @@ class TestUrlRouting:
            snapshot_host = get_snapshot_host(snapshot_id)
            admin_host = get_admin_host()
            web_host = get_web_host()
+            public_host = get_public_host()

            client = Client()

@@ -491,10 +509,17 @@ class TestUrlRouting:
            assert resp.status_code == 200
            live_html = response_body(resp).decode("utf-8", "ignore")
            assert f"http://{snapshot_host}/" in live_html
-            assert "http://web.archivebox.localhost:8000" in live_html
+            assert f"http://{public_host}/static/archive.png" in live_html
+            assert ">WARC<" not in live_html
+            assert ">Media<" not in live_html
+            assert ">Git<" not in live_html

            static_html = Path(snapshot.output_dir, "index.html").read_text(encoding="utf-8", errors="ignore")
            assert f"http://{snapshot_host}/" in static_html
+            assert f"http://{public_host}/static/archive.png" in static_html
+            assert ">WARC<" not in static_html
+            assert ">Media<" not in static_html
+            assert ">Git<" not in static_html

            client.login(username="testadmin", password="testpassword")
            resp = client.get(f"/admin/core/snapshot/{snapshot_id}/change/", HTTP_HOST=admin_host)