more migration id/uuid and config propagation fixes

This commit is contained in:
Nick Sweeting
2026-01-04 16:16:26 -08:00
parent 839ae744cf
commit 456aaee287
16 changed files with 789 additions and 94 deletions

View File

@@ -118,12 +118,12 @@ class BaseConfigSet(BaseSettings):
def get_config(
scope: str = "global",
defaults: Optional[Dict] = None,
persona: Any = None,
user: Any = None,
crawl: Any = None,
snapshot: Any = None,
machine: Any = None,
) -> Dict[str, Any]:
"""
Get merged config from all sources.
@@ -134,17 +134,18 @@ def get_config(
3. Per-user config (user.config JSON field)
4. Per-persona config (persona.get_derived_config() - includes CHROME_USER_DATA_DIR etc.)
5. Environment variables
6. Config file (ArchiveBox.conf)
7. Plugin schema defaults (config.json)
8. Core config defaults
6. Per-machine config (machine.config JSON field - resolved binary paths)
7. Config file (ArchiveBox.conf)
8. Plugin schema defaults (config.json)
9. Core config defaults
Args:
scope: Config scope ('global', 'crawl', 'snapshot', etc.)
defaults: Default values to start with
persona: Persona object (provides derived paths like CHROME_USER_DATA_DIR)
user: User object with config JSON field
crawl: Crawl object with config JSON field
snapshot: Snapshot object with config JSON field
machine: Machine object with config JSON field (defaults to Machine.current())
Returns:
Merged config dict
@@ -184,6 +185,18 @@ def get_config(
file_config = BaseConfigSet.load_from_file(config_file)
config.update(file_config)
# Apply machine config overrides (cached binary paths, etc.)
if machine is None:
# Default to current machine if not provided
try:
from archivebox.machine.models import Machine
machine = Machine.current()
except Exception:
pass # Machine might not be available during early init
if machine and hasattr(machine, "config") and machine.config:
config.update(machine.config)
# Override with environment variables
for key in config:
env_val = os.environ.get(key)
@@ -221,8 +234,8 @@ def get_config(
config.update(crawl.config)
# Add CRAWL_OUTPUT_DIR for snapshot hooks to find shared Chrome session
if crawl and hasattr(crawl, "OUTPUT_DIR"):
config['CRAWL_OUTPUT_DIR'] = str(crawl.OUTPUT_DIR)
if crawl and hasattr(crawl, "output_dir"):
config['CRAWL_OUTPUT_DIR'] = str(crawl.output_dir)
# Apply snapshot config overrides (highest priority)
if snapshot and hasattr(snapshot, "config") and snapshot.config:
@@ -260,7 +273,7 @@ def get_flat_config() -> Dict[str, Any]:
Replaces abx.pm.hook.get_FLAT_CONFIG()
"""
return get_config(scope="global")
return get_config()
def get_all_configs() -> Dict[str, BaseConfigSet]: