mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2026-01-03 01:15:57 +10:00
Update install hooks to respect XYZ_BINARY env vars
- All install hooks now respect their respective XYZ_BINARY env vars (e.g., WGET_BINARY, CHROME_BINARY, YTDLP_BINARY, etc.) - Support both absolute paths (/usr/bin/wget2) and binary names (wget2) - Dynamic bin_name used in Dependency JSONL output - Updated 11 install hooks to follow the new pattern - Mark checklist items as complete in TODO_hook_architecture.md
This commit is contained in:
@@ -118,7 +118,7 @@ def run(self):
|
||||
self.save()
|
||||
```
|
||||
|
||||
### Validation Hook Pattern (on_Crawl__00_validate_*.py)
|
||||
### Install Hook Pattern (on_Crawl__00_install_*.py)
|
||||
|
||||
**Purpose**: Check if binary exists, emit Dependency if not found.
|
||||
|
||||
@@ -831,11 +831,11 @@ const cmd = ['wget', '-p', '-k', url]; // Ignores WGET_BINARY
|
||||
|
||||
#### Install Hook Checklist
|
||||
|
||||
- [ ] Renamed from `on_Crawl__*_validate_*` to `on_Crawl__*_install_*`
|
||||
- [ ] Reads `XYZ_BINARY` env var and handles both absolute paths + bin names
|
||||
- [ ] Emits `{"type": "Dependency", ...}` JSONL (NOT hardcoded to always check for 'wget')
|
||||
- [ ] Does NOT call npm/apt/brew/pip directly
|
||||
- [ ] Follows standard pattern from section 4.1
|
||||
- [x] Renamed from `on_Crawl__*_validate_*` to `on_Crawl__*_install_*`
|
||||
- [x] Reads `XYZ_BINARY` env var and handles both absolute paths + bin names
|
||||
- [x] Emits `{"type": "Dependency", ...}` JSONL (uses configured bin_name)
|
||||
- [x] Does NOT call npm/apt/brew/pip directly
|
||||
- [x] Follows standard pattern from section 4.1
|
||||
|
||||
#### Snapshot Hook Checklist
|
||||
|
||||
@@ -1973,4 +1973,4 @@ All phases of the hook architecture implementation are now complete:
|
||||
- ✅ Phase 6: ArchiveResult.run() updated
|
||||
- ✅ Phase 7: Background hook support
|
||||
|
||||
Total hooks updated: **32 hooks** across 6 dependency providers, 11 validate hooks, 8 Python snapshot hooks, and 14 JS snapshot hooks (3 of which are background hooks).
|
||||
Total hooks updated: **32 hooks** across 6 dependency providers, 13 install hooks (renamed from validate), 8 Python snapshot hooks, and 14 JS snapshot hooks (3 of which are background hooks).
|
||||
|
||||
@@ -1,23 +1,34 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Validation hook for Chrome/Chromium binary.
|
||||
Install hook for Chrome/Chromium binary.
|
||||
|
||||
Runs at crawl start to verify Chrome is available.
|
||||
Outputs JSONL for InstalledBinary and Machine config updates.
|
||||
Respects CHROME_BINARY env var for custom binary paths.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def find_chrome() -> dict | None:
|
||||
"""Find Chrome/Chromium binary."""
|
||||
"""Find Chrome/Chromium binary, respecting CHROME_BINARY env var."""
|
||||
try:
|
||||
from abx_pkg import Binary, AptProvider, BrewProvider, EnvProvider
|
||||
|
||||
# Try common Chrome/Chromium binary names
|
||||
for name in ['google-chrome', 'chromium', 'chromium-browser', 'google-chrome-stable', 'chrome']:
|
||||
binary = Binary(name=name, binproviders=[AptProvider(), BrewProvider(), EnvProvider()])
|
||||
# Check if user has configured a custom binary
|
||||
configured_binary = os.environ.get('CHROME_BINARY', '').strip()
|
||||
|
||||
if configured_binary:
|
||||
# User specified a custom binary path or name
|
||||
if '/' in configured_binary:
|
||||
bin_name = Path(configured_binary).name
|
||||
else:
|
||||
bin_name = configured_binary
|
||||
|
||||
binary = Binary(name=bin_name, binproviders=[EnvProvider()])
|
||||
loaded = binary.load()
|
||||
if loaded and loaded.abspath:
|
||||
return {
|
||||
@@ -27,6 +38,19 @@ def find_chrome() -> dict | None:
|
||||
'sha256': loaded.sha256 if hasattr(loaded, 'sha256') else None,
|
||||
'binprovider': loaded.binprovider.name if loaded.binprovider else 'env',
|
||||
}
|
||||
else:
|
||||
# Try common Chrome/Chromium binary names
|
||||
for name in ['google-chrome', 'chromium', 'chromium-browser', 'google-chrome-stable', 'chrome']:
|
||||
binary = Binary(name=name, binproviders=[AptProvider(), BrewProvider(), EnvProvider()])
|
||||
loaded = binary.load()
|
||||
if loaded and loaded.abspath:
|
||||
return {
|
||||
'name': 'chrome',
|
||||
'abspath': str(loaded.abspath),
|
||||
'version': str(loaded.version) if loaded.version else None,
|
||||
'sha256': loaded.sha256 if hasattr(loaded, 'sha256') else None,
|
||||
'binprovider': loaded.binprovider.name if loaded.binprovider else 'env',
|
||||
}
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
@@ -1,25 +1,39 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Validation hook for forum-dl.
|
||||
Install hook for forum-dl.
|
||||
|
||||
Runs at crawl start to verify forum-dl binary is available.
|
||||
Outputs JSONL for InstalledBinary and Machine config updates.
|
||||
Respects FORUMDL_BINARY env var for custom binary paths.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def find_forumdl() -> dict | None:
|
||||
"""Find forum-dl binary."""
|
||||
"""Find forum-dl binary, respecting FORUMDL_BINARY env var."""
|
||||
try:
|
||||
from abx_pkg import Binary, PipProvider, EnvProvider
|
||||
|
||||
binary = Binary(name='forum-dl', binproviders=[PipProvider(), EnvProvider()])
|
||||
# Check if user has configured a custom binary
|
||||
configured_binary = os.environ.get('FORUMDL_BINARY', '').strip()
|
||||
|
||||
if configured_binary:
|
||||
if '/' in configured_binary:
|
||||
bin_name = Path(configured_binary).name
|
||||
else:
|
||||
bin_name = configured_binary
|
||||
else:
|
||||
bin_name = 'forum-dl'
|
||||
|
||||
binary = Binary(name=bin_name, binproviders=[PipProvider(), EnvProvider()])
|
||||
loaded = binary.load()
|
||||
if loaded and loaded.abspath:
|
||||
return {
|
||||
'name': 'forum-dl',
|
||||
'name': bin_name,
|
||||
'abspath': str(loaded.abspath),
|
||||
'version': str(loaded.version) if loaded.version else None,
|
||||
'sha256': loaded.sha256 if hasattr(loaded, 'sha256') else None,
|
||||
@@ -32,6 +46,15 @@ def find_forumdl() -> dict | None:
|
||||
|
||||
|
||||
def main():
|
||||
# Determine binary name from config
|
||||
configured_binary = os.environ.get('FORUMDL_BINARY', '').strip()
|
||||
if configured_binary and '/' in configured_binary:
|
||||
bin_name = Path(configured_binary).name
|
||||
elif configured_binary:
|
||||
bin_name = configured_binary
|
||||
else:
|
||||
bin_name = 'forum-dl'
|
||||
|
||||
# Check for forum-dl (required)
|
||||
forumdl_result = find_forumdl()
|
||||
|
||||
@@ -67,7 +90,7 @@ def main():
|
||||
# Provide overrides to install with chardet instead
|
||||
print(json.dumps({
|
||||
'type': 'Dependency',
|
||||
'bin_name': 'forum-dl',
|
||||
'bin_name': bin_name,
|
||||
'bin_providers': 'pip,env',
|
||||
'overrides': {
|
||||
'pip': {
|
||||
@@ -77,7 +100,7 @@ def main():
|
||||
}
|
||||
}
|
||||
}))
|
||||
missing_deps.append('forum-dl')
|
||||
missing_deps.append(bin_name)
|
||||
|
||||
if missing_deps:
|
||||
print(f"Missing dependencies: {', '.join(missing_deps)}", file=sys.stderr)
|
||||
|
||||
@@ -1,25 +1,39 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Validation hook for gallery-dl.
|
||||
Install hook for gallery-dl.
|
||||
|
||||
Runs at crawl start to verify gallery-dl binary is available.
|
||||
Outputs JSONL for InstalledBinary and Machine config updates.
|
||||
Respects GALLERYDL_BINARY env var for custom binary paths.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def find_gallerydl() -> dict | None:
|
||||
"""Find gallery-dl binary."""
|
||||
"""Find gallery-dl binary, respecting GALLERYDL_BINARY env var."""
|
||||
try:
|
||||
from abx_pkg import Binary, PipProvider, EnvProvider
|
||||
|
||||
binary = Binary(name='gallery-dl', binproviders=[PipProvider(), EnvProvider()])
|
||||
# Check if user has configured a custom binary
|
||||
configured_binary = os.environ.get('GALLERYDL_BINARY', '').strip()
|
||||
|
||||
if configured_binary:
|
||||
if '/' in configured_binary:
|
||||
bin_name = Path(configured_binary).name
|
||||
else:
|
||||
bin_name = configured_binary
|
||||
else:
|
||||
bin_name = 'gallery-dl'
|
||||
|
||||
binary = Binary(name=bin_name, binproviders=[PipProvider(), EnvProvider()])
|
||||
loaded = binary.load()
|
||||
if loaded and loaded.abspath:
|
||||
return {
|
||||
'name': 'gallery-dl',
|
||||
'name': bin_name,
|
||||
'abspath': str(loaded.abspath),
|
||||
'version': str(loaded.version) if loaded.version else None,
|
||||
'sha256': loaded.sha256 if hasattr(loaded, 'sha256') else None,
|
||||
@@ -32,6 +46,15 @@ def find_gallerydl() -> dict | None:
|
||||
|
||||
|
||||
def main():
|
||||
# Determine binary name from config
|
||||
configured_binary = os.environ.get('GALLERYDL_BINARY', '').strip()
|
||||
if configured_binary and '/' in configured_binary:
|
||||
bin_name = Path(configured_binary).name
|
||||
elif configured_binary:
|
||||
bin_name = configured_binary
|
||||
else:
|
||||
bin_name = 'gallery-dl'
|
||||
|
||||
# Check for gallery-dl (required)
|
||||
gallerydl_result = find_gallerydl()
|
||||
|
||||
@@ -65,10 +88,10 @@ def main():
|
||||
else:
|
||||
print(json.dumps({
|
||||
'type': 'Dependency',
|
||||
'bin_name': 'gallery-dl',
|
||||
'bin_name': bin_name,
|
||||
'bin_providers': 'pip,env',
|
||||
}))
|
||||
missing_deps.append('gallery-dl')
|
||||
missing_deps.append(bin_name)
|
||||
|
||||
if missing_deps:
|
||||
print(f"Missing dependencies: {', '.join(missing_deps)}", file=sys.stderr)
|
||||
|
||||
@@ -1,25 +1,39 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Validation hook for git binary.
|
||||
Install hook for git binary.
|
||||
|
||||
Runs at crawl start to verify git is available.
|
||||
Outputs JSONL for InstalledBinary and Machine config updates.
|
||||
Respects GIT_BINARY env var for custom binary paths.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def find_git() -> dict | None:
|
||||
"""Find git binary."""
|
||||
"""Find git binary, respecting GIT_BINARY env var."""
|
||||
try:
|
||||
from abx_pkg import Binary, EnvProvider
|
||||
|
||||
binary = Binary(name='git', binproviders=[EnvProvider()])
|
||||
# Check if user has configured a custom binary
|
||||
configured_binary = os.environ.get('GIT_BINARY', '').strip()
|
||||
|
||||
if configured_binary:
|
||||
if '/' in configured_binary:
|
||||
bin_name = Path(configured_binary).name
|
||||
else:
|
||||
bin_name = configured_binary
|
||||
else:
|
||||
bin_name = 'git'
|
||||
|
||||
binary = Binary(name=bin_name, binproviders=[EnvProvider()])
|
||||
loaded = binary.load()
|
||||
if loaded and loaded.abspath:
|
||||
return {
|
||||
'name': 'git',
|
||||
'name': bin_name,
|
||||
'abspath': str(loaded.abspath),
|
||||
'version': str(loaded.version) if loaded.version else None,
|
||||
'sha256': loaded.sha256 if hasattr(loaded, 'sha256') else None,
|
||||
@@ -32,6 +46,15 @@ def find_git() -> dict | None:
|
||||
|
||||
|
||||
def main():
|
||||
# Determine binary name from config
|
||||
configured_binary = os.environ.get('GIT_BINARY', '').strip()
|
||||
if configured_binary and '/' in configured_binary:
|
||||
bin_name = Path(configured_binary).name
|
||||
elif configured_binary:
|
||||
bin_name = configured_binary
|
||||
else:
|
||||
bin_name = 'git'
|
||||
|
||||
result = find_git()
|
||||
|
||||
if result and result.get('abspath'):
|
||||
@@ -63,10 +86,10 @@ def main():
|
||||
else:
|
||||
print(json.dumps({
|
||||
'type': 'Dependency',
|
||||
'bin_name': 'git',
|
||||
'bin_name': bin_name,
|
||||
'bin_providers': 'apt,brew,env',
|
||||
}))
|
||||
print(f"git binary not found", file=sys.stderr)
|
||||
print(f"{bin_name} binary not found", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
|
||||
@@ -1,25 +1,39 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Validation hook for yt-dlp and its dependencies (node, ffmpeg).
|
||||
Install hook for yt-dlp and its dependencies (node, ffmpeg).
|
||||
|
||||
Runs at crawl start to verify yt-dlp and required binaries are available.
|
||||
Outputs JSONL for InstalledBinary and Machine config updates.
|
||||
Respects YTDLP_BINARY, NODE_BINARY, FFMPEG_BINARY env vars.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def get_bin_name(env_var: str, default: str) -> str:
|
||||
"""Get binary name from env var or use default."""
|
||||
configured = os.environ.get(env_var, '').strip()
|
||||
if configured:
|
||||
if '/' in configured:
|
||||
return Path(configured).name
|
||||
return configured
|
||||
return default
|
||||
|
||||
|
||||
def find_ytdlp() -> dict | None:
|
||||
"""Find yt-dlp binary."""
|
||||
"""Find yt-dlp binary, respecting YTDLP_BINARY env var."""
|
||||
try:
|
||||
from abx_pkg import Binary, PipProvider, BrewProvider, AptProvider, EnvProvider
|
||||
|
||||
binary = Binary(name='yt-dlp', binproviders=[PipProvider(), BrewProvider(), AptProvider(), EnvProvider()])
|
||||
bin_name = get_bin_name('YTDLP_BINARY', 'yt-dlp')
|
||||
binary = Binary(name=bin_name, binproviders=[PipProvider(), BrewProvider(), AptProvider(), EnvProvider()])
|
||||
loaded = binary.load()
|
||||
if loaded and loaded.abspath:
|
||||
return {
|
||||
'name': 'yt-dlp',
|
||||
'name': bin_name,
|
||||
'abspath': str(loaded.abspath),
|
||||
'version': str(loaded.version) if loaded.version else None,
|
||||
'sha256': loaded.sha256 if hasattr(loaded, 'sha256') else None,
|
||||
@@ -32,15 +46,16 @@ def find_ytdlp() -> dict | None:
|
||||
|
||||
|
||||
def find_node() -> dict | None:
|
||||
"""Find node binary."""
|
||||
"""Find node binary, respecting NODE_BINARY env var."""
|
||||
try:
|
||||
from abx_pkg import Binary, AptProvider, BrewProvider, EnvProvider
|
||||
|
||||
binary = Binary(name='node', binproviders=[AptProvider(), BrewProvider(), EnvProvider()])
|
||||
bin_name = get_bin_name('NODE_BINARY', 'node')
|
||||
binary = Binary(name=bin_name, binproviders=[AptProvider(), BrewProvider(), EnvProvider()])
|
||||
loaded = binary.load()
|
||||
if loaded and loaded.abspath:
|
||||
return {
|
||||
'name': 'node',
|
||||
'name': bin_name,
|
||||
'abspath': str(loaded.abspath),
|
||||
'version': str(loaded.version) if loaded.version else None,
|
||||
'sha256': loaded.sha256 if hasattr(loaded, 'sha256') else None,
|
||||
@@ -53,15 +68,16 @@ def find_node() -> dict | None:
|
||||
|
||||
|
||||
def find_ffmpeg() -> dict | None:
|
||||
"""Find ffmpeg binary."""
|
||||
"""Find ffmpeg binary, respecting FFMPEG_BINARY env var."""
|
||||
try:
|
||||
from abx_pkg import Binary, AptProvider, BrewProvider, EnvProvider
|
||||
|
||||
binary = Binary(name='ffmpeg', binproviders=[AptProvider(), BrewProvider(), EnvProvider()])
|
||||
bin_name = get_bin_name('FFMPEG_BINARY', 'ffmpeg')
|
||||
binary = Binary(name=bin_name, binproviders=[AptProvider(), BrewProvider(), EnvProvider()])
|
||||
loaded = binary.load()
|
||||
if loaded and loaded.abspath:
|
||||
return {
|
||||
'name': 'ffmpeg',
|
||||
'name': bin_name,
|
||||
'abspath': str(loaded.abspath),
|
||||
'version': str(loaded.version) if loaded.version else None,
|
||||
'sha256': loaded.sha256 if hasattr(loaded, 'sha256') else None,
|
||||
@@ -85,6 +101,11 @@ def main():
|
||||
|
||||
missing_deps = []
|
||||
|
||||
# Get configured binary names
|
||||
ytdlp_bin_name = get_bin_name('YTDLP_BINARY', 'yt-dlp')
|
||||
node_bin_name = get_bin_name('NODE_BINARY', 'node')
|
||||
ffmpeg_bin_name = get_bin_name('FFMPEG_BINARY', 'ffmpeg')
|
||||
|
||||
# Emit results for yt-dlp
|
||||
if ytdlp_result and ytdlp_result.get('abspath'):
|
||||
print(json.dumps({
|
||||
@@ -113,10 +134,10 @@ def main():
|
||||
else:
|
||||
print(json.dumps({
|
||||
'type': 'Dependency',
|
||||
'bin_name': 'yt-dlp',
|
||||
'bin_name': ytdlp_bin_name,
|
||||
'bin_providers': 'pip,brew,apt,env',
|
||||
}))
|
||||
missing_deps.append('yt-dlp')
|
||||
missing_deps.append(ytdlp_bin_name)
|
||||
|
||||
# Emit results for node
|
||||
if node_result and node_result.get('abspath'):
|
||||
@@ -147,13 +168,13 @@ def main():
|
||||
# node is installed as 'nodejs' package on apt
|
||||
print(json.dumps({
|
||||
'type': 'Dependency',
|
||||
'bin_name': 'node',
|
||||
'bin_name': node_bin_name,
|
||||
'bin_providers': 'apt,brew,env',
|
||||
'overrides': {
|
||||
'apt': {'packages': ['nodejs']}
|
||||
}
|
||||
}))
|
||||
missing_deps.append('node')
|
||||
missing_deps.append(node_bin_name)
|
||||
|
||||
# Emit results for ffmpeg
|
||||
if ffmpeg_result and ffmpeg_result.get('abspath'):
|
||||
@@ -183,10 +204,10 @@ def main():
|
||||
else:
|
||||
print(json.dumps({
|
||||
'type': 'Dependency',
|
||||
'bin_name': 'ffmpeg',
|
||||
'bin_name': ffmpeg_bin_name,
|
||||
'bin_providers': 'apt,brew,env',
|
||||
}))
|
||||
missing_deps.append('ffmpeg')
|
||||
missing_deps.append(ffmpeg_bin_name)
|
||||
|
||||
if missing_deps:
|
||||
print(f"Missing dependencies: {', '.join(missing_deps)}", file=sys.stderr)
|
||||
|
||||
@@ -1,25 +1,39 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Validation hook for postlight-parser binary.
|
||||
Install hook for postlight-parser binary.
|
||||
|
||||
Runs at crawl start to verify postlight-parser is available.
|
||||
Outputs JSONL for InstalledBinary and Machine config updates.
|
||||
Respects MERCURY_BINARY env var for custom binary paths.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def find_mercury() -> dict | None:
|
||||
"""Find postlight-parser binary."""
|
||||
"""Find postlight-parser binary, respecting MERCURY_BINARY env var."""
|
||||
try:
|
||||
from abx_pkg import Binary, NpmProvider, EnvProvider
|
||||
|
||||
binary = Binary(name='postlight-parser', binproviders=[NpmProvider(), EnvProvider()])
|
||||
# Check if user has configured a custom binary
|
||||
configured_binary = os.environ.get('MERCURY_BINARY', '').strip()
|
||||
|
||||
if configured_binary:
|
||||
if '/' in configured_binary:
|
||||
bin_name = Path(configured_binary).name
|
||||
else:
|
||||
bin_name = configured_binary
|
||||
else:
|
||||
bin_name = 'postlight-parser'
|
||||
|
||||
binary = Binary(name=bin_name, binproviders=[NpmProvider(), EnvProvider()])
|
||||
loaded = binary.load()
|
||||
if loaded and loaded.abspath:
|
||||
return {
|
||||
'name': 'postlight-parser',
|
||||
'name': bin_name,
|
||||
'abspath': str(loaded.abspath),
|
||||
'version': str(loaded.version) if loaded.version else None,
|
||||
'sha256': loaded.sha256 if hasattr(loaded, 'sha256') else None,
|
||||
@@ -32,6 +46,15 @@ def find_mercury() -> dict | None:
|
||||
|
||||
|
||||
def main():
|
||||
# Determine binary name from config
|
||||
configured_binary = os.environ.get('MERCURY_BINARY', '').strip()
|
||||
if configured_binary and '/' in configured_binary:
|
||||
bin_name = Path(configured_binary).name
|
||||
elif configured_binary:
|
||||
bin_name = configured_binary
|
||||
else:
|
||||
bin_name = 'postlight-parser'
|
||||
|
||||
result = find_mercury()
|
||||
|
||||
if result and result.get('abspath'):
|
||||
@@ -64,13 +87,13 @@ def main():
|
||||
# postlight-parser is installed as @postlight/parser in npm
|
||||
print(json.dumps({
|
||||
'type': 'Dependency',
|
||||
'bin_name': 'postlight-parser',
|
||||
'bin_name': bin_name,
|
||||
'bin_providers': 'npm,env',
|
||||
'overrides': {
|
||||
'npm': {'packages': ['@postlight/parser']}
|
||||
}
|
||||
}))
|
||||
print(f"postlight-parser binary not found", file=sys.stderr)
|
||||
print(f"{bin_name} binary not found", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
|
||||
@@ -1,25 +1,39 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Validation hook for papers-dl.
|
||||
Install hook for papers-dl.
|
||||
|
||||
Runs at crawl start to verify papers-dl binary is available.
|
||||
Outputs JSONL for InstalledBinary and Machine config updates.
|
||||
Respects PAPERSDL_BINARY env var for custom binary paths.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def find_papersdl() -> dict | None:
|
||||
"""Find papers-dl binary."""
|
||||
"""Find papers-dl binary, respecting PAPERSDL_BINARY env var."""
|
||||
try:
|
||||
from abx_pkg import Binary, PipProvider, EnvProvider
|
||||
|
||||
binary = Binary(name='papers-dl', binproviders=[PipProvider(), EnvProvider()])
|
||||
# Check if user has configured a custom binary
|
||||
configured_binary = os.environ.get('PAPERSDL_BINARY', '').strip()
|
||||
|
||||
if configured_binary:
|
||||
if '/' in configured_binary:
|
||||
bin_name = Path(configured_binary).name
|
||||
else:
|
||||
bin_name = configured_binary
|
||||
else:
|
||||
bin_name = 'papers-dl'
|
||||
|
||||
binary = Binary(name=bin_name, binproviders=[PipProvider(), EnvProvider()])
|
||||
loaded = binary.load()
|
||||
if loaded and loaded.abspath:
|
||||
return {
|
||||
'name': 'papers-dl',
|
||||
'name': bin_name,
|
||||
'abspath': str(loaded.abspath),
|
||||
'version': str(loaded.version) if loaded.version else None,
|
||||
'sha256': loaded.sha256 if hasattr(loaded, 'sha256') else None,
|
||||
@@ -32,6 +46,15 @@ def find_papersdl() -> dict | None:
|
||||
|
||||
|
||||
def main():
|
||||
# Determine binary name from config
|
||||
configured_binary = os.environ.get('PAPERSDL_BINARY', '').strip()
|
||||
if configured_binary and '/' in configured_binary:
|
||||
bin_name = Path(configured_binary).name
|
||||
elif configured_binary:
|
||||
bin_name = configured_binary
|
||||
else:
|
||||
bin_name = 'papers-dl'
|
||||
|
||||
# Check for papers-dl (required)
|
||||
papersdl_result = find_papersdl()
|
||||
|
||||
@@ -65,10 +88,10 @@ def main():
|
||||
else:
|
||||
print(json.dumps({
|
||||
'type': 'Dependency',
|
||||
'bin_name': 'papers-dl',
|
||||
'bin_name': bin_name,
|
||||
'bin_providers': 'pip,env',
|
||||
}))
|
||||
missing_deps.append('papers-dl')
|
||||
missing_deps.append(bin_name)
|
||||
|
||||
if missing_deps:
|
||||
print(f"Missing dependencies: {', '.join(missing_deps)}", file=sys.stderr)
|
||||
|
||||
@@ -1,25 +1,39 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Validation hook for readability-extractor binary.
|
||||
Install hook for readability-extractor binary.
|
||||
|
||||
Runs at crawl start to verify readability-extractor is available.
|
||||
Outputs JSONL for InstalledBinary and Machine config updates.
|
||||
Respects READABILITY_BINARY env var for custom binary paths.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def find_readability() -> dict | None:
|
||||
"""Find readability-extractor binary."""
|
||||
"""Find readability-extractor binary, respecting READABILITY_BINARY env var."""
|
||||
try:
|
||||
from abx_pkg import Binary, NpmProvider, EnvProvider
|
||||
|
||||
binary = Binary(name='readability-extractor', binproviders=[NpmProvider(), EnvProvider()])
|
||||
# Check if user has configured a custom binary
|
||||
configured_binary = os.environ.get('READABILITY_BINARY', '').strip()
|
||||
|
||||
if configured_binary:
|
||||
if '/' in configured_binary:
|
||||
bin_name = Path(configured_binary).name
|
||||
else:
|
||||
bin_name = configured_binary
|
||||
else:
|
||||
bin_name = 'readability-extractor'
|
||||
|
||||
binary = Binary(name=bin_name, binproviders=[NpmProvider(), EnvProvider()])
|
||||
loaded = binary.load()
|
||||
if loaded and loaded.abspath:
|
||||
return {
|
||||
'name': 'readability-extractor',
|
||||
'name': bin_name,
|
||||
'abspath': str(loaded.abspath),
|
||||
'version': str(loaded.version) if loaded.version else None,
|
||||
'sha256': loaded.sha256 if hasattr(loaded, 'sha256') else None,
|
||||
@@ -32,6 +46,15 @@ def find_readability() -> dict | None:
|
||||
|
||||
|
||||
def main():
|
||||
# Determine binary name from config
|
||||
configured_binary = os.environ.get('READABILITY_BINARY', '').strip()
|
||||
if configured_binary and '/' in configured_binary:
|
||||
bin_name = Path(configured_binary).name
|
||||
elif configured_binary:
|
||||
bin_name = configured_binary
|
||||
else:
|
||||
bin_name = 'readability-extractor'
|
||||
|
||||
result = find_readability()
|
||||
|
||||
if result and result.get('abspath'):
|
||||
@@ -64,13 +87,13 @@ def main():
|
||||
# readability-extractor is installed from GitHub
|
||||
print(json.dumps({
|
||||
'type': 'Dependency',
|
||||
'bin_name': 'readability-extractor',
|
||||
'bin_name': bin_name,
|
||||
'bin_providers': 'npm,env',
|
||||
'overrides': {
|
||||
'npm': {'packages': ['github:ArchiveBox/readability-extractor']}
|
||||
}
|
||||
}))
|
||||
print(f"readability-extractor binary not found", file=sys.stderr)
|
||||
print(f"{bin_name} binary not found", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
|
||||
@@ -1,26 +1,39 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Validation hook for ripgrep binary.
|
||||
Install hook for ripgrep binary.
|
||||
|
||||
Only runs if SEARCH_BACKEND_ENGINE is set to 'ripgrep'.
|
||||
Outputs JSONL for InstalledBinary and Machine config updates.
|
||||
Respects RIPGREP_BINARY env var for custom binary paths.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def find_ripgrep() -> dict | None:
|
||||
"""Find ripgrep binary."""
|
||||
"""Find ripgrep binary, respecting RIPGREP_BINARY env var."""
|
||||
try:
|
||||
from abx_pkg import Binary, AptProvider, BrewProvider, EnvProvider
|
||||
|
||||
binary = Binary(name='rg', binproviders=[AptProvider(), BrewProvider(), EnvProvider()])
|
||||
# Check if user has configured a custom binary
|
||||
configured_binary = os.environ.get('RIPGREP_BINARY', '').strip()
|
||||
|
||||
if configured_binary:
|
||||
if '/' in configured_binary:
|
||||
bin_name = Path(configured_binary).name
|
||||
else:
|
||||
bin_name = configured_binary
|
||||
else:
|
||||
bin_name = 'rg'
|
||||
|
||||
binary = Binary(name=bin_name, binproviders=[AptProvider(), BrewProvider(), EnvProvider()])
|
||||
loaded = binary.load()
|
||||
if loaded and loaded.abspath:
|
||||
return {
|
||||
'name': 'rg',
|
||||
'name': bin_name,
|
||||
'abspath': str(loaded.abspath),
|
||||
'version': str(loaded.version) if loaded.version else None,
|
||||
'sha256': loaded.sha256 if hasattr(loaded, 'sha256') else None,
|
||||
@@ -33,7 +46,7 @@ def find_ripgrep() -> dict | None:
|
||||
|
||||
|
||||
def main():
|
||||
"""Validate ripgrep binary and output JSONL."""
|
||||
"""Find ripgrep binary and output JSONL."""
|
||||
|
||||
# Check if ripgrep search backend is enabled
|
||||
search_backend = os.environ.get('SEARCH_BACKEND_ENGINE', '').lower()
|
||||
@@ -42,6 +55,15 @@ def main():
|
||||
# No-op: ripgrep is not the active search backend
|
||||
sys.exit(0)
|
||||
|
||||
# Determine binary name from config
|
||||
configured_binary = os.environ.get('RIPGREP_BINARY', '').strip()
|
||||
if configured_binary and '/' in configured_binary:
|
||||
bin_name = Path(configured_binary).name
|
||||
elif configured_binary:
|
||||
bin_name = configured_binary
|
||||
else:
|
||||
bin_name = 'rg'
|
||||
|
||||
result = find_ripgrep()
|
||||
|
||||
if result and result.get('abspath'):
|
||||
@@ -76,12 +98,12 @@ def main():
|
||||
# Output Dependency request
|
||||
print(json.dumps({
|
||||
'type': 'Dependency',
|
||||
'bin_name': 'rg',
|
||||
'bin_name': bin_name,
|
||||
'bin_providers': 'apt,brew,cargo,env',
|
||||
}))
|
||||
|
||||
# Exit non-zero to indicate binary not found
|
||||
print(f"ripgrep binary not found", file=sys.stderr)
|
||||
print(f"{bin_name} binary not found", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
|
||||
@@ -1,25 +1,39 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Validation hook for single-file binary.
|
||||
Install hook for single-file binary.
|
||||
|
||||
Runs at crawl start to verify single-file (npm package) is available.
|
||||
Outputs JSONL for InstalledBinary and Machine config updates.
|
||||
Respects SINGLEFILE_BINARY env var for custom binary paths.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def find_singlefile() -> dict | None:
|
||||
"""Find single-file binary."""
|
||||
"""Find single-file binary, respecting SINGLEFILE_BINARY env var."""
|
||||
try:
|
||||
from abx_pkg import Binary, NpmProvider, EnvProvider
|
||||
|
||||
binary = Binary(name='single-file', binproviders=[NpmProvider(), EnvProvider()])
|
||||
# Check if user has configured a custom binary
|
||||
configured_binary = os.environ.get('SINGLEFILE_BINARY', '').strip()
|
||||
|
||||
if configured_binary:
|
||||
if '/' in configured_binary:
|
||||
bin_name = Path(configured_binary).name
|
||||
else:
|
||||
bin_name = configured_binary
|
||||
else:
|
||||
bin_name = 'single-file'
|
||||
|
||||
binary = Binary(name=bin_name, binproviders=[NpmProvider(), EnvProvider()])
|
||||
loaded = binary.load()
|
||||
if loaded and loaded.abspath:
|
||||
return {
|
||||
'name': 'single-file',
|
||||
'name': bin_name,
|
||||
'abspath': str(loaded.abspath),
|
||||
'version': str(loaded.version) if loaded.version else None,
|
||||
'sha256': loaded.sha256 if hasattr(loaded, 'sha256') else None,
|
||||
@@ -32,6 +46,15 @@ def find_singlefile() -> dict | None:
|
||||
|
||||
|
||||
def main():
|
||||
# Determine binary name from config
|
||||
configured_binary = os.environ.get('SINGLEFILE_BINARY', '').strip()
|
||||
if configured_binary and '/' in configured_binary:
|
||||
bin_name = Path(configured_binary).name
|
||||
elif configured_binary:
|
||||
bin_name = configured_binary
|
||||
else:
|
||||
bin_name = 'single-file'
|
||||
|
||||
result = find_singlefile()
|
||||
|
||||
if result and result.get('abspath'):
|
||||
@@ -63,10 +86,10 @@ def main():
|
||||
else:
|
||||
print(json.dumps({
|
||||
'type': 'Dependency',
|
||||
'bin_name': 'single-file',
|
||||
'bin_name': bin_name,
|
||||
'bin_providers': 'npm,env',
|
||||
}))
|
||||
print(f"single-file binary not found", file=sys.stderr)
|
||||
print(f"{bin_name} binary not found", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
|
||||
@@ -1,25 +1,43 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Validation hook for wget binary.
|
||||
Install hook for wget binary.
|
||||
|
||||
Runs at crawl start to verify wget is available.
|
||||
Outputs JSONL for InstalledBinary and Machine config updates.
|
||||
Respects WGET_BINARY env var for custom binary paths.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def find_wget() -> dict | None:
|
||||
"""Find wget binary using abx-pkg."""
|
||||
"""Find wget binary using abx-pkg, respecting WGET_BINARY env var."""
|
||||
try:
|
||||
from abx_pkg import Binary, EnvProvider
|
||||
|
||||
binary = Binary(name='wget', binproviders=[EnvProvider()])
|
||||
# Check if user has configured a custom binary
|
||||
configured_binary = os.environ.get('WGET_BINARY', '').strip()
|
||||
|
||||
if configured_binary:
|
||||
# User specified a custom binary path or name
|
||||
if '/' in configured_binary:
|
||||
# Absolute path - extract name from path
|
||||
bin_name = Path(configured_binary).name
|
||||
else:
|
||||
# Just a binary name
|
||||
bin_name = configured_binary
|
||||
else:
|
||||
# Default to 'wget'
|
||||
bin_name = 'wget'
|
||||
|
||||
binary = Binary(name=bin_name, binproviders=[EnvProvider()])
|
||||
loaded = binary.load()
|
||||
if loaded and loaded.abspath:
|
||||
return {
|
||||
'name': 'wget',
|
||||
'name': bin_name,
|
||||
'abspath': str(loaded.abspath),
|
||||
'version': str(loaded.version) if loaded.version else None,
|
||||
'sha256': loaded.sha256 if hasattr(loaded, 'sha256') else None,
|
||||
@@ -32,7 +50,15 @@ def find_wget() -> dict | None:
|
||||
|
||||
|
||||
def main():
|
||||
"""Validate wget binary and output JSONL."""
|
||||
"""Find wget binary and output JSONL."""
|
||||
# Determine binary name from config
|
||||
configured_binary = os.environ.get('WGET_BINARY', '').strip()
|
||||
if configured_binary and '/' in configured_binary:
|
||||
bin_name = Path(configured_binary).name
|
||||
elif configured_binary:
|
||||
bin_name = configured_binary
|
||||
else:
|
||||
bin_name = 'wget'
|
||||
|
||||
result = find_wget()
|
||||
|
||||
@@ -65,15 +91,15 @@ def main():
|
||||
|
||||
sys.exit(0)
|
||||
else:
|
||||
# Output Dependency request
|
||||
# Output Dependency request (uses configured bin_name)
|
||||
print(json.dumps({
|
||||
'type': 'Dependency',
|
||||
'bin_name': 'wget',
|
||||
'bin_name': bin_name,
|
||||
'bin_providers': 'apt,brew,env',
|
||||
}))
|
||||
|
||||
# Exit non-zero to indicate binary not found
|
||||
print(f"wget binary not found", file=sys.stderr)
|
||||
print(f"{bin_name} binary not found", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user