diff --git a/README.md b/README.md
index 66545085..2da5f877 100644
--- a/README.md
+++ b/README.md
@@ -764,7 +764,7 @@ The configuration is documented here: **[Configuration Wiki](https://github.com/
TIMEOUT=240 # default: 60 add more seconds on slower networks
CHECK_SSL_VALIDITY=False # default: True False = allow saving URLs w/ bad SSL
SAVE_ARCHIVEDOTORG=False # default: True False = disable Archive.org saving
-MAX_MEDIA_SIZE=1500m # default: 750m raise/lower youtubedl output size
+YTDLP_MAX_SIZE=1500m # default: 750m raise/lower yt-dlp output size
PUBLIC_INDEX=True # default: True whether anon users can view index
PUBLIC_SNAPSHOTS=True # default: True whether anon users can view pages
@@ -1089,7 +1089,7 @@ Because ArchiveBox is designed to ingest a large volume of URLs with multiple co
SAVE_MEDIA=True and whether you lower MEDIA_MAX_SIZE=750mb.YTDLP_ENABLED=True and whether you lower YTDLP_MAX_SIZE=750m.fdupes or rdfind.
data/archive/ folder.
diff --git a/TODO_hook_concurrency.md b/TODO_hook_concurrency.md
index 877b295b..c076cc70 100644
--- a/TODO_hook_concurrency.md
+++ b/TODO_hook_concurrency.md
@@ -451,12 +451,12 @@ except subprocess.TimeoutExpired:
### Background Hook (Long-Running Download)
```python
#!/usr/bin/env python3
-# archivebox/plugins/media/on_Snapshot__63_media.bg.py
+# archivebox/plugins/ytdlp/on_Snapshot__63_ytdlp.bg.py
# Runs at step 6, doesn't block step progression
-# Gets full MEDIA_TIMEOUT (e.g., 3600s) regardless of when step 99 completes
+# Gets full YTDLP_TIMEOUT (e.g., 3600s) regardless of when step 99 completes
-timeout = get_env_int('YTDLP_TIMEOUT') or get_env_int('MEDIA_TIMEOUT') or get_env_int('TIMEOUT', 3600)
+timeout = get_env_int('YTDLP_TIMEOUT') or get_env_int('TIMEOUT', 3600)
try:
result = subprocess.run(['yt-dlp', url], capture_output=True, timeout=timeout)
diff --git a/archivebox/cli/tests_piping.py b/archivebox/cli/tests_piping.py
index 23967550..26125935 100644
--- a/archivebox/cli/tests_piping.py
+++ b/archivebox/cli/tests_piping.py
@@ -44,7 +44,7 @@ TEST_CONFIG = {
'SAVE_READABILITY': 'False',
'SAVE_MERCURY': 'False',
'SAVE_GIT': 'False',
- 'SAVE_MEDIA': 'False',
+ 'SAVE_YTDLP': 'False',
'SAVE_HEADERS': 'False',
'USE_CURL': 'False',
'USE_WGET': 'False',
diff --git a/archivebox/plugins/gallerydl/on_Snapshot__64_gallerydl.bg.py b/archivebox/plugins/gallerydl/on_Snapshot__64_gallerydl.bg.py
index 6244e31e..86ff868b 100755
--- a/archivebox/plugins/gallerydl/on_Snapshot__64_gallerydl.bg.py
+++ b/archivebox/plugins/gallerydl/on_Snapshot__64_gallerydl.bg.py
@@ -59,7 +59,6 @@ def get_env_int(name: str, default: int = 0) -> int:
STATICFILE_DIR = '../staticfile'
-MEDIA_DIR = '../media'
def has_staticfile_output() -> bool:
"""Check if staticfile extractor already downloaded this URL."""
@@ -67,12 +66,6 @@ def has_staticfile_output() -> bool:
return staticfile_dir.exists() and any(staticfile_dir.iterdir())
-def has_media_output() -> bool:
- """Check if media extractor already downloaded this URL."""
- media_dir = Path(MEDIA_DIR)
- return media_dir.exists() and any(media_dir.iterdir())
-
-
# Default gallery-dl args
def get_gallerydl_default_args() -> list[str]:
"""Build default gallery-dl arguments."""
@@ -185,7 +178,7 @@ def main(url: str, snapshot_id: str):
# Temporary failure (config disabled) - NO JSONL emission
sys.exit(0)
- # Check if staticfile or media extractors already handled this (permanent skip)
+ # Check if staticfile extractor already handled this (permanent skip)
if has_staticfile_output():
print(f'Skipping gallery-dl - staticfile extractor already downloaded this', file=sys.stderr)
print(json.dumps({
@@ -195,15 +188,6 @@ def main(url: str, snapshot_id: str):
}))
sys.exit(0)
- if has_media_output():
- print(f'Skipping gallery-dl - media extractor already downloaded this', file=sys.stderr)
- print(json.dumps({
- 'type': 'ArchiveResult',
- 'status': 'skipped',
- 'output_str': 'media already handled',
- }))
- sys.exit(0)
-
# Get binary from environment
binary = get_env('GALLERYDL_BINARY', 'gallery-dl')
diff --git a/archivebox/plugins/media/binaries.jsonl b/archivebox/plugins/ytdlp/binaries.jsonl
similarity index 100%
rename from archivebox/plugins/media/binaries.jsonl
rename to archivebox/plugins/ytdlp/binaries.jsonl
diff --git a/archivebox/plugins/media/config.json b/archivebox/plugins/ytdlp/config.json
similarity index 66%
rename from archivebox/plugins/media/config.json
rename to archivebox/plugins/ytdlp/config.json
index 2c18b233..be6f89a4 100644
--- a/archivebox/plugins/media/config.json
+++ b/archivebox/plugins/ytdlp/config.json
@@ -3,36 +3,38 @@
"type": "object",
"additionalProperties": false,
"properties": {
- "MEDIA_ENABLED": {
+ "YTDLP_ENABLED": {
"type": "boolean",
"default": true,
- "x-aliases": ["SAVE_MEDIA", "USE_MEDIA", "USE_YTDLP", "FETCH_MEDIA"],
- "description": "Enable media downloading with yt-dlp"
+ "x-aliases": ["MEDIA_ENABLED", "SAVE_MEDIA", "USE_MEDIA", "USE_YTDLP", "FETCH_MEDIA", "SAVE_YTDLP"],
+ "description": "Enable video/audio downloading with yt-dlp"
},
"YTDLP_BINARY": {
"type": "string",
"default": "yt-dlp",
- "x-aliases": ["YOUTUBEDL_BINARY", "YOUTUBE_DL_BINARY", "MEDIA_BINARY"],
+ "x-aliases": ["MEDIA_BINARY", "YOUTUBEDL_BINARY", "YOUTUBE_DL_BINARY"],
"description": "Path to yt-dlp binary"
},
- "MEDIA_TIMEOUT": {
+ "YTDLP_TIMEOUT": {
"type": "integer",
"default": 3600,
"minimum": 30,
"x-fallback": "TIMEOUT",
- "description": "Timeout for media downloads in seconds"
+ "x-aliases": ["MEDIA_TIMEOUT"],
+ "description": "Timeout for yt-dlp downloads in seconds"
},
- "MEDIA_MAX_SIZE": {
+ "YTDLP_MAX_SIZE": {
"type": "string",
"default": "750m",
"pattern": "^\\d+[kmgKMG]?$",
- "description": "Maximum file size for media downloads"
+ "x-aliases": ["MEDIA_MAX_SIZE"],
+ "description": "Maximum file size for yt-dlp downloads"
},
- "MEDIA_CHECK_SSL_VALIDITY": {
+ "YTDLP_CHECK_SSL_VALIDITY": {
"type": "boolean",
"default": true,
"x-fallback": "CHECK_SSL_VALIDITY",
- "x-aliases": ["YTDLP_CHECK_SSL_VALIDITY"],
+ "x-aliases": ["MEDIA_CHECK_SSL_VALIDITY"],
"description": "Whether to verify SSL certificates"
},
"YTDLP_ARGS": {
diff --git a/archivebox/plugins/media/on_Snapshot__63_media.bg.py b/archivebox/plugins/ytdlp/on_Snapshot__63_ytdlp.bg.py
similarity index 79%
rename from archivebox/plugins/media/on_Snapshot__63_media.bg.py
rename to archivebox/plugins/ytdlp/on_Snapshot__63_ytdlp.bg.py
index 94339540..6a5282e0 100644
--- a/archivebox/plugins/media/on_Snapshot__63_media.bg.py
+++ b/archivebox/plugins/ytdlp/on_Snapshot__63_ytdlp.bg.py
@@ -1,25 +1,23 @@
#!/usr/bin/env python3
"""
-Download media from a URL using yt-dlp.
+Download video/audio from a URL using yt-dlp.
-Usage: on_Snapshot__media.py --url=