diff --git a/archivebox/cli/archivebox_extract.py b/archivebox/cli/archivebox_extract.py index 6747e74e..7dc043ae 100644 --- a/archivebox/cli/archivebox_extract.py +++ b/archivebox/cli/archivebox_extract.py @@ -100,6 +100,9 @@ def run_plugins( is_tty = sys.stdout.isatty() + # Parse comma-separated plugins list once (reused in creation and filtering) + plugins_list = [p.strip() for p in plugins.split(',') if p.strip()] if plugins else [] + # Collect all input records records = list(read_args_or_stdin(args)) @@ -147,10 +150,7 @@ def run_plugins( continue # Create pending ArchiveResults if needed - if plugins: - # Parse comma-separated plugins list - plugins_list = [p.strip() for p in plugins.split(',') if p.strip()] - + if plugins_list: # Only create for specific plugins for plugin_name in plugins_list: result, created = ArchiveResult.objects.get_or_create( @@ -195,9 +195,7 @@ def run_plugins( try: snapshot = Snapshot.objects.get(id=snapshot_id) results = snapshot.archiveresult_set.all() - if plugins: - # Parse comma-separated plugins list - plugins_list = [p.strip() for p in plugins.split(',') if p.strip()] + if plugins_list: results = results.filter(plugin__in=plugins_list) for result in results: diff --git a/archivebox/plugins/chrome/config.json b/archivebox/plugins/chrome/config.json index 56316089..4ff40faa 100644 --- a/archivebox/plugins/chrome/config.json +++ b/archivebox/plugins/chrome/config.json @@ -63,6 +63,31 @@ "default": [], "x-aliases": ["CHROME_EXTRA_ARGS"], "description": "Extra arguments to append to Chrome command" + }, + "CHROME_PAGELOAD_TIMEOUT": { + "type": "integer", + "default": 60, + "minimum": 5, + "x-fallback": "CHROME_TIMEOUT", + "description": "Timeout for page navigation/load in seconds" + }, + "CHROME_WAIT_FOR": { + "type": "string", + "default": "networkidle2", + "enum": ["domcontentloaded", "load", "networkidle0", "networkidle2"], + "description": "Page load completion condition (domcontentloaded, load, networkidle0, networkidle2)" + }, + "CHROME_DELAY_AFTER_LOAD": { + "type": "number", + "default": 0, + "minimum": 0, + "description": "Extra delay in seconds after page load completes before archiving (useful for JS-heavy SPAs)" + }, + "CHROME_CHECK_SSL_VALIDITY": { + "type": "boolean", + "default": true, + "x-fallback": "CHECK_SSL_VALIDITY", + "description": "Whether to verify SSL certificates (disable for self-signed certs)" } } }