From 1fc860e90167ad53e88b7e2a9a5abefea36f60f5 Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Sun, 15 Mar 2026 11:45:04 -0700 Subject: [PATCH] Remove legacy binary override coercion --- archivebox/machine/models.py | 28 ++--------------- .../machine/tests/test_machine_models.py | 31 +++++++++++++++++++ 2 files changed, 33 insertions(+), 26 deletions(-) diff --git a/archivebox/machine/models.py b/archivebox/machine/models.py index 244303ff..1abdbfc4 100755 --- a/archivebox/machine/models.py +++ b/archivebox/machine/models.py @@ -344,7 +344,8 @@ class Binary(ModelWithHealthStats, ModelWithStateMachine): machine = Machine.current() overrides = overrides or {} - normalized_overrides = Binary._normalize_overrides(record.get('overrides', {})) + binary_overrides = record.get('overrides', {}) + normalized_overrides = binary_overrides if isinstance(binary_overrides, dict) else {} # Case 1: Already installed (from on_Crawl hooks) - has abspath AND binproviders # This happens when on_Crawl hooks detect already-installed binaries @@ -413,31 +414,6 @@ class Binary(ModelWithHealthStats, ModelWithStateMachine): self.modified_at = timezone.now() self.save() - @staticmethod - def _normalize_overrides(overrides: dict | None) -> dict: - """Normalize hook-emitted binary overrides to the canonical install_args shape.""" - if not isinstance(overrides, dict): - return {} - - normalized: dict = {} - reserved_keys = {'custom_cmd', 'cmd', 'command'} - - for provider, value in overrides.items(): - if provider in reserved_keys: - normalized[provider] = value - continue - - if isinstance(value, dict): - normalized[provider] = value - elif isinstance(value, (list, tuple)): - normalized[provider] = {'install_args': list(value)} - elif isinstance(value, str) and value.strip(): - normalized[provider] = {'install_args': value.strip()} - else: - normalized[provider] = value - - return normalized - def _allowed_binproviders(self) -> set[str] | None: """Return the allowed binproviders for this binary, or None for wildcard.""" providers = str(self.binproviders or '').strip() diff --git a/archivebox/machine/tests/test_machine_models.py b/archivebox/machine/tests/test_machine_models.py index b36fd7a2..4904f1a8 100644 --- a/archivebox/machine/tests/test_machine_models.py +++ b/archivebox/machine/tests/test_machine_models.py @@ -198,6 +198,37 @@ class TestBinaryModel(TestCase): self.assertEqual(binary.status, Binary.StatusChoices.QUEUED) self.assertGreater(binary.modified_at, old_modified) + def test_binary_from_json_preserves_install_args_overrides(self): + """Binary.from_json() should persist canonical install_args overrides unchanged.""" + overrides = { + 'apt': {'install_args': ['chromium']}, + 'npm': {'install_args': 'puppeteer'}, + 'custom': {'install_args': ['bash', '-lc', 'echo ok']}, + } + + binary = Binary.from_json({ + 'name': 'chrome', + 'binproviders': 'apt,npm,custom', + 'overrides': overrides, + }) + + self.assertEqual(binary.overrides, overrides) + + def test_binary_from_json_does_not_coerce_legacy_override_shapes(self): + """Binary.from_json() should no longer translate legacy non-dict provider overrides.""" + overrides = { + 'apt': ['chromium'], + 'npm': 'puppeteer', + } + + binary = Binary.from_json({ + 'name': 'chrome', + 'binproviders': 'apt,npm', + 'overrides': overrides, + }) + + self.assertEqual(binary.overrides, overrides) + class TestBinaryStateMachine(TestCase): """Test the BinaryMachine state machine."""