This commit is contained in:
Nick Sweeting
2025-12-28 17:51:54 -08:00
parent 54f91c1339
commit f0aa19fa7d
157 changed files with 6774 additions and 5061 deletions

View File

@@ -250,68 +250,13 @@ def process_records(
yield result
def get_or_create_snapshot(record: Dict[str, Any], created_by_id: Optional[int] = None):
"""
Get or create a Snapshot from a JSONL record.
Returns the Snapshot instance.
"""
from core.models import Snapshot
from archivebox.base_models.models import get_or_create_system_user_pk
from archivebox.misc.util import parse_date
created_by_id = created_by_id or get_or_create_system_user_pk()
# Extract fields from record
url = record.get('url')
if not url:
raise ValueError("Record missing required 'url' field")
title = record.get('title')
tags_str = record.get('tags', '')
bookmarked_at = record.get('bookmarked_at')
depth = record.get('depth', 0)
crawl_id = record.get('crawl_id')
parent_snapshot_id = record.get('parent_snapshot_id')
# Parse bookmarked_at if string
if bookmarked_at and isinstance(bookmarked_at, str):
bookmarked_at = parse_date(bookmarked_at)
# Use the manager's create_or_update_from_dict method
snapshot = Snapshot.objects.create_or_update_from_dict(
{'url': url, 'title': title, 'tags': tags_str},
created_by_id=created_by_id
)
# Update additional fields if provided
update_fields = []
if depth is not None and snapshot.depth != depth:
snapshot.depth = depth
update_fields.append('depth')
if parent_snapshot_id and str(snapshot.parent_snapshot_id) != str(parent_snapshot_id):
snapshot.parent_snapshot_id = parent_snapshot_id
update_fields.append('parent_snapshot_id')
if bookmarked_at and snapshot.bookmarked_at != bookmarked_at:
snapshot.bookmarked_at = bookmarked_at
update_fields.append('bookmarked_at')
if crawl_id and str(snapshot.crawl_id) != str(crawl_id):
snapshot.crawl_id = crawl_id
update_fields.append('crawl_id')
if update_fields:
snapshot.save(update_fields=update_fields + ['modified_at'])
return snapshot
def get_or_create_tag(record: Dict[str, Any]):
"""
Get or create a Tag from a JSONL record.
Returns the Tag instance.
"""
from core.models import Tag
from archivebox.core.models import Tag
name = record.get('name')
if not name:
@@ -353,8 +298,11 @@ def process_jsonl_records(records: Iterator[Dict[str, Any]], created_by_id: Opti
elif record_type == TYPE_SNAPSHOT or 'url' in record:
try:
snapshot = get_or_create_snapshot(record, created_by_id=created_by_id)
results['snapshots'].append(snapshot)
from archivebox.core.models import Snapshot
overrides = {'created_by_id': created_by_id} if created_by_id else {}
snapshot = Snapshot.from_jsonl(record, overrides=overrides)
if snapshot:
results['snapshots'].append(snapshot)
except ValueError:
continue

View File

@@ -17,7 +17,7 @@ from dataclasses import dataclass
from typing import Any, Optional, List, Dict, Union, Iterable, IO, TYPE_CHECKING
if TYPE_CHECKING:
from core.models import Snapshot
from archivebox.core.models import Snapshot
from rich import print
from rich.panel import Panel
@@ -257,7 +257,7 @@ def log_archiving_paused(num_links: int, idx: int, timestamp: str):
def log_archiving_finished(num_links: int):
from core.models import Snapshot
from archivebox.core.models import Snapshot
end_ts = datetime.now(timezone.utc)
_LAST_RUN_STATS.archiving_end_ts = end_ts
@@ -395,7 +395,7 @@ def log_list_started(filter_patterns: Optional[List[str]], filter_type: str):
print(' {}'.format(' '.join(filter_patterns or ())))
def log_list_finished(snapshots):
from core.models import Snapshot
from archivebox.core.models import Snapshot
print()
print('---------------------------------------------------------------------------------------------------')
print(Snapshot.objects.filter(pk__in=[s.pk for s in snapshots]).to_csv(cols=['timestamp', 'is_archived', 'num_outputs', 'url'], header=True, ljust=16, separator=' | '))

View File

@@ -1,335 +0,0 @@
__package__ = 'abx.archivebox'
# from django.test import TestCase
# from .toml_util import convert, TOML_HEADER
# TEST_INPUT = """
# [SERVER_CONFIG]
# IS_TTY=False
# USE_COLOR=False
# SHOW_PROGRESS=False
# IN_DOCKER=False
# IN_QEMU=False
# PUID=501
# PGID=20
# CONFIG_FILE=/opt/archivebox/data/ArchiveBox.conf
# ONLY_NEW=True
# TIMEOUT=60
# MEDIA_TIMEOUT=3600
# OUTPUT_PERMISSIONS=644
# RESTRICT_FILE_NAMES=windows
# URL_DENYLIST=\.(css|js|otf|ttf|woff|woff2|gstatic\.com|googleapis\.com/css)(\?.*)?$
# URL_ALLOWLIST=None
# ADMIN_USERNAME=None
# ADMIN_PASSWORD=None
# ENFORCE_ATOMIC_WRITES=True
# TAG_SEPARATOR_PATTERN=[,]
# SECRET_KEY=xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
# BIND_ADDR=127.0.0.1:8000
# ALLOWED_HOSTS=*
# DEBUG=False
# PUBLIC_INDEX=True
# PUBLIC_SNAPSHOTS=True
# PUBLIC_ADD_VIEW=False
# FOOTER_INFO=Content is hosted for personal archiving purposes only. Contact server owner for any takedown requests.
# SNAPSHOTS_PER_PAGE=40
# CUSTOM_TEMPLATES_DIR=None
# TIME_ZONE=UTC
# TIMEZONE=UTC
# REVERSE_PROXY_USER_HEADER=Remote-User
# REVERSE_PROXY_WHITELIST=
# LOGOUT_REDIRECT_URL=/
# PREVIEW_ORIGINALS=True
# LDAP=False
# LDAP_SERVER_URI=None
# LDAP_BIND_DN=None
# LDAP_BIND_PASSWORD=None
# LDAP_USER_BASE=None
# LDAP_USER_FILTER=None
# LDAP_USERNAME_ATTR=None
# LDAP_FIRSTNAME_ATTR=None
# LDAP_LASTNAME_ATTR=None
# LDAP_EMAIL_ATTR=None
# LDAP_CREATE_SUPERUSER=False
# SAVE_TITLE=True
# SAVE_FAVICON=True
# SAVE_WGET=True
# SAVE_WGET_REQUISITES=True
# SAVE_SINGLEFILE=True
# SAVE_READABILITY=True
# SAVE_MERCURY=True
# SAVE_HTMLTOTEXT=True
# SAVE_PDF=True
# SAVE_SCREENSHOT=True
# SAVE_DOM=True
# SAVE_HEADERS=True
# SAVE_WARC=True
# SAVE_GIT=True
# SAVE_MEDIA=True
# SAVE_ARCHIVE_DOT_ORG=True
# RESOLUTION=1440,2000
# GIT_DOMAINS=github.com,bitbucket.org,gitlab.com,gist.github.com,codeberg.org,gitea.com,git.sr.ht
# CHECK_SSL_VALIDITY=True
# MEDIA_MAX_SIZE=750m
# USER_AGENT=None
# CURL_USER_AGENT=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 ArchiveBox/0.8.0 (+https://github.com/ArchiveBox/ArchiveBox/) curl/curl 8.4.0 (x86_64-apple-darwin23.0)
# WGET_USER_AGENT=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 ArchiveBox/0.8.0 (+https://github.com/ArchiveBox/ArchiveBox/) wget/GNU Wget 1.24.5
# CHROME_USER_AGENT=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 ArchiveBox/0.8.0 (+https://github.com/ArchiveBox/ArchiveBox/)
# COOKIES_FILE=None
# CHROME_USER_DATA_DIR=None
# CHROME_TIMEOUT=0
# CHROME_HEADLESS=True
# CHROME_SANDBOX=True
# CHROME_EXTRA_ARGS=[]
# YOUTUBEDL_ARGS=['--restrict-filenames', '--trim-filenames', '128', '--write-description', '--write-info-json', '--write-annotations', '--write-thumbnail', '--no-call-home', '--write-sub', '--write-auto-subs', '--convert-subs=srt', '--yes-playlist', '--continue', '--no-abort-on-error', '--ignore-errors', '--geo-bypass', '--add-metadata', '--format=(bv*+ba/b)[filesize<=750m][filesize_approx<=?750m]/(bv*+ba/b)']
# YOUTUBEDL_EXTRA_ARGS=[]
# WGET_ARGS=['--no-verbose', '--adjust-extension', '--convert-links', '--force-directories', '--backup-converted', '--span-hosts', '--no-parent', '-e', 'robots=off']
# WGET_EXTRA_ARGS=[]
# CURL_ARGS=['--silent', '--location', '--compressed']
# CURL_EXTRA_ARGS=[]
# GIT_ARGS=['--recursive']
# SINGLEFILE_ARGS=[]
# SINGLEFILE_EXTRA_ARGS=[]
# MERCURY_ARGS=['--format=text']
# MERCURY_EXTRA_ARGS=[]
# FAVICON_PROVIDER=https://www.google.com/s2/favicons?domain={}
# USE_INDEXING_BACKEND=True
# USE_SEARCHING_BACKEND=True
# SEARCH_BACKEND_ENGINE=ripgrep
# SEARCH_BACKEND_HOST_NAME=localhost
# SEARCH_BACKEND_PORT=1491
# SEARCH_BACKEND_PASSWORD=SecretPassword
# SEARCH_PROCESS_HTML=True
# SONIC_COLLECTION=archivebox
# SONIC_BUCKET=snapshots
# SEARCH_BACKEND_TIMEOUT=90
# FTS_SEPARATE_DATABASE=True
# FTS_TOKENIZERS=porter unicode61 remove_diacritics 2
# FTS_SQLITE_MAX_LENGTH=1000000000
# USE_CURL=True
# USE_WGET=True
# USE_SINGLEFILE=True
# USE_READABILITY=True
# USE_MERCURY=True
# USE_GIT=True
# USE_CHROME=True
# USE_NODE=True
# USE_YOUTUBEDL=True
# USE_RIPGREP=True
# CURL_BINARY=curl
# GIT_BINARY=git
# WGET_BINARY=wget
# SINGLEFILE_BINARY=single-file
# READABILITY_BINARY=readability-extractor
# MERCURY_BINARY=postlight-parser
# YOUTUBEDL_BINARY=yt-dlp
# NODE_BINARY=node
# RIPGREP_BINARY=rg
# CHROME_BINARY=chrome
# POCKET_CONSUMER_KEY=None
# USER=squash
# PACKAGE_DIR=/opt/archivebox/archivebox
# TEMPLATES_DIR=/opt/archivebox/archivebox/templates
# ARCHIVE_DIR=/opt/archivebox/data/archive
# SOURCES_DIR=/opt/archivebox/data/sources
# LOGS_DIR=/opt/archivebox/data/logs
# PERSONAS_DIR=/opt/archivebox/data/personas
# URL_DENYLIST_PTN=re.compile('\\.(css|js|otf|ttf|woff|woff2|gstatic\\.com|googleapis\\.com/css)(\\?.*)?$', re.IGNORECASE|re.MULTILINE)
# URL_ALLOWLIST_PTN=None
# DIR_OUTPUT_PERMISSIONS=755
# ARCHIVEBOX_BINARY=/opt/archivebox/.venv/bin/archivebox
# VERSION=0.8.0
# COMMIT_HASH=102e87578c6036bb0132dd1ebd17f8f05ffc880f
# BUILD_TIME=2024-05-15 03:28:05 1715768885
# VERSIONS_AVAILABLE=None
# CAN_UPGRADE=False
# PYTHON_BINARY=/opt/archivebox/.venv/bin/python3.10
# PYTHON_VERSION=3.10.14
# DJANGO_BINARY=/opt/archivebox/.venv/lib/python3.10/site-packages/django/__init__.py
# DJANGO_VERSION=5.0.6 final (0)
# SQLITE_BINARY=/opt/homebrew/Cellar/python@3.10/3.10.14/Frameworks/Python.framework/Versions/3.10/lib/python3.10/sqlite3/dbapi2.py
# SQLITE_VERSION=2.6.0
# CURL_VERSION=curl 8.4.0 (x86_64-apple-darwin23.0)
# WGET_VERSION=GNU Wget 1.24.5
# WGET_AUTO_COMPRESSION=True
# RIPGREP_VERSION=ripgrep 14.1.0
# SINGLEFILE_VERSION=None
# READABILITY_VERSION=None
# MERCURY_VERSION=None
# GIT_VERSION=git version 2.44.0
# YOUTUBEDL_VERSION=2024.04.09
# CHROME_VERSION=Google Chrome 124.0.6367.207
# NODE_VERSION=v21.7.3
# """
# EXPECTED_OUTPUT = TOML_HEADER + '''[SERVER_CONFIG]
# IS_TTY = false
# USE_COLOR = false
# SHOW_PROGRESS = false
# IN_DOCKER = false
# IN_QEMU = false
# PUID = 501
# PGID = 20
# CONFIG_FILE = "/opt/archivebox/data/ArchiveBox.conf"
# ONLY_NEW = true
# TIMEOUT = 60
# MEDIA_TIMEOUT = 3600
# OUTPUT_PERMISSIONS = 644
# RESTRICT_FILE_NAMES = "windows"
# URL_DENYLIST = "\\\\.(css|js|otf|ttf|woff|woff2|gstatic\\\\.com|googleapis\\\\.com/css)(\\\\?.*)?$"
# URL_ALLOWLIST = null
# ADMIN_USERNAME = null
# ADMIN_PASSWORD = null
# ENFORCE_ATOMIC_WRITES = true
# TAG_SEPARATOR_PATTERN = "[,]"
# SECRET_KEY = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
# BIND_ADDR = "127.0.0.1:8000"
# ALLOWED_HOSTS = "*"
# DEBUG = false
# PUBLIC_INDEX = true
# PUBLIC_SNAPSHOTS = true
# PUBLIC_ADD_VIEW = false
# FOOTER_INFO = "Content is hosted for personal archiving purposes only. Contact server owner for any takedown requests."
# SNAPSHOTS_PER_PAGE = 40
# CUSTOM_TEMPLATES_DIR = null
# TIME_ZONE = "UTC"
# TIMEZONE = "UTC"
# REVERSE_PROXY_USER_HEADER = "Remote-User"
# REVERSE_PROXY_WHITELIST = ""
# LOGOUT_REDIRECT_URL = "/"
# PREVIEW_ORIGINALS = true
# LDAP = false
# LDAP_SERVER_URI = null
# LDAP_BIND_DN = null
# LDAP_BIND_PASSWORD = null
# LDAP_USER_BASE = null
# LDAP_USER_FILTER = null
# LDAP_USERNAME_ATTR = null
# LDAP_FIRSTNAME_ATTR = null
# LDAP_LASTNAME_ATTR = null
# LDAP_EMAIL_ATTR = null
# LDAP_CREATE_SUPERUSER = false
# SAVE_TITLE = true
# SAVE_FAVICON = true
# SAVE_WGET = true
# SAVE_WGET_REQUISITES = true
# SAVE_SINGLEFILE = true
# SAVE_READABILITY = true
# SAVE_MERCURY = true
# SAVE_HTMLTOTEXT = true
# SAVE_PDF = true
# SAVE_SCREENSHOT = true
# SAVE_DOM = true
# SAVE_HEADERS = true
# SAVE_WARC = true
# SAVE_GIT = true
# SAVE_MEDIA = true
# SAVE_ARCHIVE_DOT_ORG = true
# RESOLUTION = [1440, 2000]
# GIT_DOMAINS = "github.com,bitbucket.org,gitlab.com,gist.github.com,codeberg.org,gitea.com,git.sr.ht"
# CHECK_SSL_VALIDITY = true
# MEDIA_MAX_SIZE = "750m"
# USER_AGENT = null
# CURL_USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 ArchiveBox/0.8.0 (+https://github.com/ArchiveBox/ArchiveBox/) curl/curl 8.4.0 (x86_64-apple-darwin23.0)"
# WGET_USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 ArchiveBox/0.8.0 (+https://github.com/ArchiveBox/ArchiveBox/) wget/GNU Wget 1.24.5"
# CHROME_USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 ArchiveBox/0.8.0 (+https://github.com/ArchiveBox/ArchiveBox/)"
# COOKIES_FILE = null
# CHROME_USER_DATA_DIR = null
# CHROME_TIMEOUT = false
# CHROME_HEADLESS = true
# CHROME_SANDBOX = true
# CHROME_EXTRA_ARGS = []
# YOUTUBEDL_ARGS = ["--restrict-filenames", "--trim-filenames", "128", "--write-description", "--write-info-json", "--write-annotations", "--write-thumbnail", "--no-call-home", "--write-sub", "--write-auto-subs", "--convert-subs=srt", "--yes-playlist", "--continue", "--no-abort-on-error", "--ignore-errors", "--geo-bypass", "--add-metadata", "--format=(bv*+ba/b)[filesize<=750m][filesize_approx<=?750m]/(bv*+ba/b)"]
# YOUTUBEDL_EXTRA_ARGS = []
# WGET_ARGS = ["--no-verbose", "--adjust-extension", "--convert-links", "--force-directories", "--backup-converted", "--span-hosts", "--no-parent", "-e", "robots=off"]
# WGET_EXTRA_ARGS = []
# CURL_ARGS = ["--silent", "--location", "--compressed"]
# CURL_EXTRA_ARGS = []
# GIT_ARGS = ["--recursive"]
# SINGLEFILE_ARGS = []
# SINGLEFILE_EXTRA_ARGS = []
# MERCURY_ARGS = ["--format=text"]
# MERCURY_EXTRA_ARGS = []
# FAVICON_PROVIDER = "https://www.google.com/s2/favicons?domain={}"
# USE_INDEXING_BACKEND = true
# USE_SEARCHING_BACKEND = true
# SEARCH_BACKEND_ENGINE = "ripgrep"
# SEARCH_BACKEND_HOST_NAME = "localhost"
# SEARCH_BACKEND_PORT = 1491
# SEARCH_BACKEND_PASSWORD = "SecretPassword"
# SEARCH_PROCESS_HTML = true
# SONIC_COLLECTION = "archivebox"
# SONIC_BUCKET = "snapshots"
# SEARCH_BACKEND_TIMEOUT = 90
# FTS_SEPARATE_DATABASE = true
# FTS_TOKENIZERS = "porter unicode61 remove_diacritics 2"
# FTS_SQLITE_MAX_LENGTH = 1000000000
# USE_CURL = true
# USE_WGET = true
# USE_SINGLEFILE = true
# USE_READABILITY = true
# USE_MERCURY = true
# USE_GIT = true
# USE_CHROME = true
# USE_NODE = true
# USE_YOUTUBEDL = true
# USE_RIPGREP = true
# CURL_BINARY = "curl"
# GIT_BINARY = "git"
# WGET_BINARY = "wget"
# SINGLEFILE_BINARY = "single-file"
# READABILITY_BINARY = "readability-extractor"
# MERCURY_BINARY = "postlight-parser"
# YOUTUBEDL_BINARY = "yt-dlp"
# NODE_BINARY = "node"
# RIPGREP_BINARY = "rg"
# CHROME_BINARY = "chrome"
# POCKET_CONSUMER_KEY = null
# USER = "squash"
# PACKAGE_DIR = "/opt/archivebox/archivebox"
# TEMPLATES_DIR = "/opt/archivebox/archivebox/templates"
# ARCHIVE_DIR = "/opt/archivebox/data/archive"
# SOURCES_DIR = "/opt/archivebox/data/sources"
# LOGS_DIR = "/opt/archivebox/data/logs"
# PERSONAS_DIR = "/opt/archivebox/data/personas"
# URL_DENYLIST_PTN = "re.compile(\'\\\\.(css|js|otf|ttf|woff|woff2|gstatic\\\\.com|googleapis\\\\.com/css)(\\\\?.*)?$\', re.IGNORECASE|re.MULTILINE)"
# URL_ALLOWLIST_PTN = null
# DIR_OUTPUT_PERMISSIONS = 755
# ARCHIVEBOX_BINARY = "/opt/archivebox/.venv/bin/archivebox"
# VERSION = "0.8.0"
# COMMIT_HASH = "102e87578c6036bb0132dd1ebd17f8f05ffc880f"
# BUILD_TIME = "2024-05-15 03:28:05 1715768885"
# VERSIONS_AVAILABLE = null
# CAN_UPGRADE = false
# PYTHON_BINARY = "/opt/archivebox/.venv/bin/python3.10"
# PYTHON_VERSION = "3.10.14"
# DJANGO_BINARY = "/opt/archivebox/.venv/lib/python3.10/site-packages/django/__init__.py"
# DJANGO_VERSION = "5.0.6 final (0)"
# SQLITE_BINARY = "/opt/homebrew/Cellar/python@3.10/3.10.14/Frameworks/Python.framework/Versions/3.10/lib/python3.10/sqlite3/dbapi2.py"
# SQLITE_VERSION = "2.6.0"
# CURL_VERSION = "curl 8.4.0 (x86_64-apple-darwin23.0)"
# WGET_VERSION = "GNU Wget 1.24.5"
# WGET_AUTO_COMPRESSION = true
# RIPGREP_VERSION = "ripgrep 14.1.0"
# SINGLEFILE_VERSION = null
# READABILITY_VERSION = null
# MERCURY_VERSION = null
# GIT_VERSION = "git version 2.44.0"
# YOUTUBEDL_VERSION = "2024.04.09"
# CHROME_VERSION = "Google Chrome 124.0.6367.207"
# NODE_VERSION = "v21.7.3"'''
# class IniToTomlTests(TestCase):
# def test_convert(self):
# first_output = convert(TEST_INPUT) # make sure ini -> toml parses correctly
# second_output = convert(first_output) # make sure toml -> toml parses/dumps consistently
# assert first_output == second_output == EXPECTED_OUTPUT # make sure parsing is indempotent
# # DEBUGGING
# import sys
# import difflib
# sys.stdout.writelines(difflib.context_diff(first_output, second_output, fromfile='first', tofile='second'))
# print(repr(second_output))

View File

@@ -478,62 +478,6 @@ for url_str, num_urls in _test_url_strs.items():
### Chrome Helpers
def chrome_args(**options) -> List[str]:
"""Helper to build up a chrome shell command with arguments."""
import shutil
from archivebox.config import CHECK_SSL_VALIDITY, RESOLUTION, USER_AGENT, CHROME_BINARY
chrome_binary = options.get('CHROME_BINARY', CHROME_BINARY)
chrome_headless = options.get('CHROME_HEADLESS', True)
chrome_sandbox = options.get('CHROME_SANDBOX', True)
check_ssl = options.get('CHECK_SSL_VALIDITY', CHECK_SSL_VALIDITY)
user_agent = options.get('CHROME_USER_AGENT', USER_AGENT)
resolution = options.get('RESOLUTION', RESOLUTION)
timeout = options.get('CHROME_TIMEOUT', 0)
user_data_dir = options.get('CHROME_USER_DATA_DIR', None)
if not chrome_binary:
raise Exception('Could not find any CHROME_BINARY installed on your system')
cmd_args = [chrome_binary]
if chrome_headless:
cmd_args += ("--headless=new",)
if not chrome_sandbox:
# running in docker or other sandboxed environment
cmd_args += (
"--no-sandbox",
"--no-zygote",
"--disable-dev-shm-usage",
"--disable-software-rasterizer",
"--run-all-compositor-stages-before-draw",
"--hide-scrollbars",
"--autoplay-policy=no-user-gesture-required",
"--no-first-run",
"--use-fake-ui-for-media-stream",
"--use-fake-device-for-media-stream",
"--disable-sync",
)
if not check_ssl:
cmd_args += ('--disable-web-security', '--ignore-certificate-errors')
if user_agent:
cmd_args += (f'--user-agent={user_agent}',)
if resolution:
cmd_args += (f'--window-size={resolution}',)
if timeout:
cmd_args += (f'--timeout={timeout * 1000}',)
if user_data_dir:
cmd_args += (f'--user-data-dir={user_data_dir}',)
return cmd_args
def chrome_cleanup():
"""
Cleans up any state or runtime files that chrome leaves behind when killed by