Files
ArchiveBox/archivebox/tests/test_title.py
Nick Sweeting b749b26c5d wip
2026-03-23 03:58:32 -07:00

57 lines
1.8 KiB
Python

import os
import sqlite3
import subprocess
from .fixtures import disable_extractors_dict, process
FIXTURES = (disable_extractors_dict, process)
def test_title_is_extracted(tmp_path, process, disable_extractors_dict):
"""Test that title is extracted from the page."""
disable_extractors_dict.update({"SAVE_TITLE": "true"})
add_process = subprocess.run(
["archivebox", "add", "--plugins=title", "https://example.com"],
capture_output=True,
text=True,
env=disable_extractors_dict,
)
assert add_process.returncode == 0, add_process.stderr or add_process.stdout
os.chdir(tmp_path)
conn = sqlite3.connect("index.sqlite3")
conn.row_factory = sqlite3.Row
c = conn.cursor()
c.execute("SELECT title FROM core_snapshot")
snapshot = c.fetchone()
conn.close()
assert snapshot[0] is not None
assert "Example" in snapshot[0]
def test_title_is_htmlencoded_in_index_html(tmp_path, process, disable_extractors_dict):
"""
https://github.com/ArchiveBox/ArchiveBox/issues/330
Unencoded content should not be rendered as it facilitates xss injections
and breaks the layout.
"""
disable_extractors_dict.update({"SAVE_TITLE": "true"})
add_process = subprocess.run(
["archivebox", "add", "--plugins=title", "https://example.com"],
capture_output=True,
text=True,
env=disable_extractors_dict,
)
assert add_process.returncode == 0, add_process.stderr or add_process.stdout
list_process = subprocess.run(
["archivebox", "search", "--html"],
capture_output=True,
text=True,
)
assert list_process.returncode == 0, list_process.stderr or list_process.stdout
# Should not contain unescaped HTML tags in output
output = list_process.stdout
assert "https://example.com" in output