way better plugin hooks system wip

2026-04-06 07:47:53 +10:00 · 2025-12-28 03:39:59 -08:00
parent a38624a4dd
commit 50e527ec65
156 changed files with 10275 additions and 7149 deletions
--- a/archivebox/plugins/staticfile/on_Snapshot__31_staticfile.bg.js
+++ b/archivebox/plugins/staticfile/on_Snapshot__31_staticfile.bg.js
@@ -0,0 +1,427 @@
+#!/usr/bin/env node
+/**
+ * Detect and download static files using CDP during initial request.
+ *
+ * This hook sets up CDP listeners BEFORE chrome_navigate to capture the
+ * Content-Type from the initial response. If it's a static file (PDF, image, etc.),
+ * it downloads the content directly using CDP.
+ *
+ * Usage: on_Snapshot__26_chrome_staticfile.bg.js --url=<url> --snapshot-id=<uuid>
+ * Output: Downloads static file + writes hook.pid
+ */
+
+const fs = require('fs');
+const path = require('path');
+const puppeteer = require('puppeteer-core');
+
+const EXTRACTOR_NAME = 'staticfile';
+const OUTPUT_DIR = '.';
+const PID_FILE = 'hook.pid';
+const CHROME_SESSION_DIR = '../chrome';
+
+// Content-Types that indicate static files
+const STATIC_CONTENT_TYPES = new Set([
+    // Documents
+    'application/pdf',
+    'application/msword',
+    'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
+    'application/vnd.ms-excel',
+    'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
+    'application/vnd.ms-powerpoint',
+    'application/vnd.openxmlformats-officedocument.presentationml.presentation',
+    'application/rtf',
+    'application/epub+zip',
+    // Images
+    'image/png',
+    'image/jpeg',
+    'image/gif',
+    'image/webp',
+    'image/svg+xml',
+    'image/x-icon',
+    'image/bmp',
+    'image/tiff',
+    'image/avif',
+    'image/heic',
+    'image/heif',
+    // Audio
+    'audio/mpeg',
+    'audio/mp3',
+    'audio/wav',
+    'audio/flac',
+    'audio/aac',
+    'audio/ogg',
+    'audio/webm',
+    'audio/m4a',
+    'audio/opus',
+    // Video
+    'video/mp4',
+    'video/webm',
+    'video/x-matroska',
+    'video/avi',
+    'video/quicktime',
+    'video/x-ms-wmv',
+    'video/x-flv',
+    // Archives
+    'application/zip',
+    'application/x-tar',
+    'application/gzip',
+    'application/x-bzip2',
+    'application/x-xz',
+    'application/x-7z-compressed',
+    'application/x-rar-compressed',
+    'application/vnd.rar',
+    // Data
+    'application/json',
+    'application/xml',
+    'text/csv',
+    'text/xml',
+    'application/x-yaml',
+    // Executables/Binaries
+    'application/octet-stream',
+    'application/x-executable',
+    'application/x-msdos-program',
+    'application/x-apple-diskimage',
+    'application/vnd.debian.binary-package',
+    'application/x-rpm',
+    // Other
+    'application/x-bittorrent',
+    'application/wasm',
+]);
+
+const STATIC_CONTENT_TYPE_PREFIXES = [
+    'image/',
+    'audio/',
+    'video/',
+    'application/zip',
+    'application/x-',
+];
+
+// Global state
+let originalUrl = '';
+let detectedContentType = null;
+let isStaticFile = false;
+let downloadedFilePath = null;
+let downloadError = null;
+let page = null;
+let browser = null;
+
+function parseArgs() {
+    const args = {};
+    process.argv.slice(2).forEach(arg => {
+        if (arg.startsWith('--')) {
+            const [key, ...valueParts] = arg.slice(2).split('=');
+            args[key.replace(/-/g, '_')] = valueParts.join('=') || true;
+        }
+    });
+    return args;
+}
+
+function getEnv(name, defaultValue = '') {
+    return (process.env[name] || defaultValue).trim();
+}
+
+function getEnvBool(name, defaultValue = false) {
+    const val = getEnv(name, '').toLowerCase();
+    if (['true', '1', 'yes', 'on'].includes(val)) return true;
+    if (['false', '0', 'no', 'off'].includes(val)) return false;
+    return defaultValue;
+}
+
+function getEnvInt(name, defaultValue = 0) {
+    const val = parseInt(getEnv(name, String(defaultValue)), 10);
+    return isNaN(val) ? defaultValue : val;
+}
+
+async function waitForChromeTabOpen(timeoutMs = 60000) {
+    const cdpFile = path.join(CHROME_SESSION_DIR, 'cdp_url.txt');
+    const targetIdFile = path.join(CHROME_SESSION_DIR, 'target_id.txt');
+    const startTime = Date.now();
+
+    while (Date.now() - startTime < timeoutMs) {
+        if (fs.existsSync(cdpFile) && fs.existsSync(targetIdFile)) {
+            return true;
+        }
+        // Wait 100ms before checking again
+        await new Promise(resolve => setTimeout(resolve, 100));
+    }
+
+    return false;
+}
+
+function getCdpUrl() {
+    const cdpFile = path.join(CHROME_SESSION_DIR, 'cdp_url.txt');
+    if (fs.existsSync(cdpFile)) {
+        return fs.readFileSync(cdpFile, 'utf8').trim();
+    }
+    return null;
+}
+
+function getPageId() {
+    const targetIdFile = path.join(CHROME_SESSION_DIR, 'target_id.txt');
+    if (fs.existsSync(targetIdFile)) {
+        return fs.readFileSync(targetIdFile, 'utf8').trim();
+    }
+    return null;
+}
+
+function isStaticContentType(contentType) {
+    if (!contentType) return false;
+
+    const ct = contentType.split(';')[0].trim().toLowerCase();
+
+    // Check exact match
+    if (STATIC_CONTENT_TYPES.has(ct)) return true;
+
+    // Check prefixes
+    for (const prefix of STATIC_CONTENT_TYPE_PREFIXES) {
+        if (ct.startsWith(prefix)) return true;
+    }
+
+    return false;
+}
+
+function sanitizeFilename(str, maxLen = 200) {
+    return str
+        .replace(/[^a-zA-Z0-9._-]/g, '_')
+        .slice(0, maxLen);
+}
+
+function getFilenameFromUrl(url) {
+    try {
+        const pathname = new URL(url).pathname;
+        const filename = path.basename(pathname) || 'downloaded_file';
+        return sanitizeFilename(filename);
+    } catch (e) {
+        return 'downloaded_file';
+    }
+}
+
+async function setupStaticFileListener() {
+    // Wait for chrome tab to be open (up to 60s)
+    const tabOpen = await waitForChromeTabOpen(60000);
+    if (!tabOpen) {
+        throw new Error('Chrome tab not open after 60s (chrome plugin must run first)');
+    }
+
+    const cdpUrl = getCdpUrl();
+    if (!cdpUrl) {
+        throw new Error('No Chrome session found');
+    }
+
+    browser = await puppeteer.connect({ browserWSEndpoint: cdpUrl });
+
+    // Find our page
+    const pages = await browser.pages();
+    const targetId = getPageId();
+
+    if (targetId) {
+        page = pages.find(p => {
+            const target = p.target();
+            return target && target._targetId === targetId;
+        });
+    }
+    if (!page) {
+        page = pages[pages.length - 1];
+    }
+
+    if (!page) {
+        throw new Error('No page found');
+    }
+
+    // Track the first response to check Content-Type
+    let firstResponseHandled = false;
+
+    page.on('response', async (response) => {
+        if (firstResponseHandled) return;
+
+        try {
+            const url = response.url();
+            const headers = response.headers();
+            const contentType = headers['content-type'] || '';
+            const status = response.status();
+
+            // Only process the main document response
+            if (url !== originalUrl) return;
+            if (status < 200 || status >= 300) return;
+
+            firstResponseHandled = true;
+            detectedContentType = contentType.split(';')[0].trim();
+
+            console.error(`Detected Content-Type: ${detectedContentType}`);
+
+            // Check if it's a static file
+            if (!isStaticContentType(detectedContentType)) {
+                console.error('Not a static file, skipping download');
+                return;
+            }
+
+            isStaticFile = true;
+            console.error('Static file detected, downloading...');
+
+            // Download the file
+            const maxSize = getEnvInt('STATICFILE_MAX_SIZE', 1024 * 1024 * 1024); // 1GB default
+            const buffer = await response.buffer();
+
+            if (buffer.length > maxSize) {
+                downloadError = `File too large: ${buffer.length} bytes > ${maxSize} max`;
+                return;
+            }
+
+            // Determine filename
+            let filename = getFilenameFromUrl(url);
+
+            // Check content-disposition header for better filename
+            const contentDisp = headers['content-disposition'] || '';
+            if (contentDisp.includes('filename=')) {
+                const match = contentDisp.match(/filename[*]?=["']?([^"';\n]+)/);
+                if (match) {
+                    filename = sanitizeFilename(match[1].trim());
+                }
+            }
+
+            const outputPath = path.join(OUTPUT_DIR, filename);
+            fs.writeFileSync(outputPath, buffer);
+
+            downloadedFilePath = filename;
+            console.error(`Static file downloaded (${buffer.length} bytes): ${filename}`);
+
+        } catch (e) {
+            downloadError = `${e.name}: ${e.message}`;
+            console.error(`Error downloading static file: ${downloadError}`);
+        }
+    });
+
+    return { browser, page };
+}
+
+async function waitForNavigation() {
+    // Wait for chrome_navigate to complete
+    const navDir = '../chrome';
+    const pageLoadedMarker = path.join(navDir, 'page_loaded.txt');
+    const maxWait = 120000; // 2 minutes
+    const pollInterval = 100;
+    let waitTime = 0;
+
+    while (!fs.existsSync(pageLoadedMarker) && waitTime < maxWait) {
+        await new Promise(resolve => setTimeout(resolve, pollInterval));
+        waitTime += pollInterval;
+    }
+
+    if (!fs.existsSync(pageLoadedMarker)) {
+        throw new Error('Timeout waiting for navigation (chrome_navigate did not complete)');
+    }
+
+    // Wait a bit longer to ensure response handler completes
+    await new Promise(resolve => setTimeout(resolve, 500));
+}
+
+function handleShutdown(signal) {
+    console.error(`\nReceived ${signal}, emitting final results...`);
+
+    let result;
+
+    if (!detectedContentType) {
+        // No Content-Type detected (shouldn't happen, but handle it)
+        result = {
+            type: 'ArchiveResult',
+            status: 'skipped',
+            output_str: 'No Content-Type detected',
+            extractor: EXTRACTOR_NAME,
+        };
+    } else if (!isStaticFile) {
+        // Not a static file (normal case for HTML pages)
+        result = {
+            type: 'ArchiveResult',
+            status: 'skipped',
+            output_str: `Not a static file (Content-Type: ${detectedContentType})`,
+            extractor: EXTRACTOR_NAME,
+            content_type: detectedContentType,
+        };
+    } else if (downloadError) {
+        // Static file but download failed
+        result = {
+            type: 'ArchiveResult',
+            status: 'failed',
+            output_str: downloadError,
+            extractor: EXTRACTOR_NAME,
+            content_type: detectedContentType,
+        };
+    } else if (downloadedFilePath) {
+        // Static file downloaded successfully
+        result = {
+            type: 'ArchiveResult',
+            status: 'succeeded',
+            output_str: downloadedFilePath,
+            extractor: EXTRACTOR_NAME,
+            content_type: detectedContentType,
+        };
+    } else {
+        // Static file detected but no download happened (unexpected)
+        result = {
+            type: 'ArchiveResult',
+            status: 'failed',
+            output_str: 'Static file detected but download did not complete',
+            extractor: EXTRACTOR_NAME,
+            content_type: detectedContentType,
+        };
+    }
+
+    console.log(JSON.stringify(result));
+    process.exit(0);
+}
+
+async function main() {
+    const args = parseArgs();
+    const url = args.url;
+    const snapshotId = args.snapshot_id;
+
+    if (!url || !snapshotId) {
+        console.error('Usage: on_Snapshot__26_chrome_staticfile.bg.js --url=<url> --snapshot-id=<uuid>');
+        process.exit(1);
+    }
+
+    originalUrl = url;
+
+    if (!getEnvBool('SAVE_STATICFILE', true)) {
+        console.error('Skipping (SAVE_STATICFILE=False)');
+        console.log(JSON.stringify({type: 'ArchiveResult', status: 'skipped', output_str: 'SAVE_STATICFILE=False'}));
+        process.exit(0);
+    }
+
+    // Register signal handlers for graceful shutdown
+    process.on('SIGTERM', () => handleShutdown('SIGTERM'));
+    process.on('SIGINT', () => handleShutdown('SIGINT'));
+
+    try {
+        // Set up static file listener BEFORE navigation
+        await setupStaticFileListener();
+
+        // Write PID file
+        fs.writeFileSync(path.join(OUTPUT_DIR, PID_FILE), String(process.pid));
+
+        // Wait for chrome_navigate to complete (BLOCKING)
+        await waitForNavigation();
+
+        // Keep process alive until killed by cleanup
+        console.error('Static file detection complete, waiting for cleanup signal...');
+
+        // Keep the process alive indefinitely
+        await new Promise(() => {}); // Never resolves
+
+    } catch (e) {
+        const error = `${e.name}: ${e.message}`;
+        console.error(`ERROR: ${error}`);
+
+        console.log(JSON.stringify({
+            type: 'ArchiveResult',
+            status: 'failed',
+            output_str: error,
+        }));
+        process.exit(1);
+    }
+}
+
+main().catch(e => {
+    console.error(`Fatal error: ${e.message}`);
+    process.exit(1);
+});
--- a/archivebox/plugins/staticfile/on_Snapshot__31_staticfile.py
+++ b/archivebox/plugins/staticfile/on_Snapshot__31_staticfile.py
@@ -1,336 +0,0 @@
-#!/usr/bin/env python3
-"""
-Download static files (PDFs, images, archives, etc.) directly.
-
-This extractor runs AFTER chrome_session and checks the Content-Type header
-from chrome_session/response_headers.json to determine if the URL points to
-a static file that should be downloaded directly.
-
-Other extractors check for the presence of this extractor's output directory
-to know if they should skip (since Chrome-based extractors can't meaningfully
-process static files like PDFs, images, etc.).
-
-Usage: on_Snapshot__21_staticfile.py --url=<url> --snapshot-id=<uuid>
-Output: Downloads file to staticfile/<filename>
-
-Environment variables:
-    STATICFILE_TIMEOUT: Timeout in seconds (default: 300)
-    STATICFILE_MAX_SIZE: Maximum file size in bytes (default: 1GB)
-    USER_AGENT: User agent string (optional)
-    CHECK_SSL_VALIDITY: Whether to check SSL certificates (default: True)
-"""
-
-import json
-import os
-import sys
-from datetime import datetime, timezone
-from pathlib import Path
-from urllib.parse import urlparse, unquote
-
-import rich_click as click
-
-# Extractor metadata
-EXTRACTOR_NAME = 'staticfile'
-OUTPUT_DIR = '.'
-CHROME_SESSION_DIR = '../chrome_session'
-
-# Content-Types that indicate static files
-# These can't be meaningfully processed by Chrome-based extractors
-STATIC_CONTENT_TYPES = {
-    # Documents
-    'application/pdf',
-    'application/msword',
-    'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
-    'application/vnd.ms-excel',
-    'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
-    'application/vnd.ms-powerpoint',
-    'application/vnd.openxmlformats-officedocument.presentationml.presentation',
-    'application/rtf',
-    'application/epub+zip',
-    # Images
-    'image/png',
-    'image/jpeg',
-    'image/gif',
-    'image/webp',
-    'image/svg+xml',
-    'image/x-icon',
-    'image/bmp',
-    'image/tiff',
-    'image/avif',
-    'image/heic',
-    'image/heif',
-    # Audio
-    'audio/mpeg',
-    'audio/mp3',
-    'audio/wav',
-    'audio/flac',
-    'audio/aac',
-    'audio/ogg',
-    'audio/webm',
-    'audio/m4a',
-    'audio/opus',
-    # Video
-    'video/mp4',
-    'video/webm',
-    'video/x-matroska',
-    'video/avi',
-    'video/quicktime',
-    'video/x-ms-wmv',
-    'video/x-flv',
-    # Archives
-    'application/zip',
-    'application/x-tar',
-    'application/gzip',
-    'application/x-bzip2',
-    'application/x-xz',
-    'application/x-7z-compressed',
-    'application/x-rar-compressed',
-    'application/vnd.rar',
-    # Data
-    'application/json',
-    'application/xml',
-    'text/csv',
-    'text/xml',
-    'application/x-yaml',
-    # Executables/Binaries
-    'application/octet-stream',  # Generic binary
-    'application/x-executable',
-    'application/x-msdos-program',
-    'application/x-apple-diskimage',
-    'application/vnd.debian.binary-package',
-    'application/x-rpm',
-    # Other
-    'application/x-bittorrent',
-    'application/wasm',
-}
-
-# Also check Content-Type prefixes for categories
-STATIC_CONTENT_TYPE_PREFIXES = (
-    'image/',
-    'audio/',
-    'video/',
-    'application/zip',
-    'application/x-',
-)
-
-
-def get_env(name: str, default: str = '') -> str:
-    return os.environ.get(name, default).strip()
-
-
-def get_env_bool(name: str, default: bool = False) -> bool:
-    val = get_env(name, '').lower()
-    if val in ('true', '1', 'yes', 'on'):
-        return True
-    if val in ('false', '0', 'no', 'off'):
-        return False
-    return default
-
-
-def get_env_int(name: str, default: int = 0) -> int:
-    try:
-        return int(get_env(name, str(default)))
-    except ValueError:
-        return default
-
-
-def get_content_type_from_chrome_session() -> str | None:
-    """Read Content-Type from chrome_session's response headers."""
-    headers_file = Path(CHROME_SESSION_DIR) / 'response_headers.json'
-    if not headers_file.exists():
-        return None
-
-    try:
-        with open(headers_file) as f:
-            headers = json.load(f)
-        # Headers might be nested or flat depending on chrome_session format
-        content_type = headers.get('content-type') or headers.get('Content-Type') or ''
-        # Strip charset and other parameters
-        return content_type.split(';')[0].strip().lower()
-    except Exception:
-        return None
-
-
-def is_static_content_type(content_type: str) -> bool:
-    """Check if Content-Type indicates a static file."""
-    if not content_type:
-        return False
-
-    # Check exact match
-    if content_type in STATIC_CONTENT_TYPES:
-        return True
-
-    # Check prefixes
-    for prefix in STATIC_CONTENT_TYPE_PREFIXES:
-        if content_type.startswith(prefix):
-            return True
-
-    return False
-
-
-def get_filename_from_url(url: str) -> str:
-    """Extract filename from URL."""
-    parsed = urlparse(url)
-    path = unquote(parsed.path)
-    filename = path.split('/')[-1] or 'downloaded_file'
-
-    # Sanitize filename
-    filename = filename.replace('/', '_').replace('\\', '_')
-    if len(filename) > 200:
-        filename = filename[:200]
-
-    return filename
-
-
-def download_file(url: str) -> tuple[bool, str | None, str]:
-    """
-    Download a static file.
-
-    Returns: (success, output_path, error_message)
-    """
-    import requests
-
-    timeout = get_env_int('STATICFILE_TIMEOUT', 300)
-    max_size = get_env_int('STATICFILE_MAX_SIZE', 1024 * 1024 * 1024)  # 1GB default
-    user_agent = get_env('USER_AGENT', 'Mozilla/5.0 (compatible; ArchiveBox/1.0)')
-    check_ssl = get_env_bool('CHECK_SSL_VALIDITY', True)
-
-    headers = {'User-Agent': user_agent}
-
-    try:
-        # Stream download to handle large files
-        response = requests.get(
-            url,
-            headers=headers,
-            timeout=timeout,
-            stream=True,
-            verify=check_ssl,
-            allow_redirects=True,
-        )
-        response.raise_for_status()
-
-        # Check content length if available
-        content_length = response.headers.get('content-length')
-        if content_length and int(content_length) > max_size:
-            return False, None, f'File too large: {int(content_length)} bytes > {max_size} max'
-
-        # Output directory is current directory (hook already runs in output dir)
-        output_dir = Path(OUTPUT_DIR)
-
-        # Determine filename
-        filename = get_filename_from_url(url)
-
-        # Check content-disposition header for better filename
-        content_disp = response.headers.get('content-disposition', '')
-        if 'filename=' in content_disp:
-            import re
-            match = re.search(r'filename[*]?=["\']?([^"\';\n]+)', content_disp)
-            if match:
-                filename = match.group(1).strip()
-
-        output_path = output_dir / filename
-
-        # Download in chunks
-        downloaded_size = 0
-        with open(output_path, 'wb') as f:
-            for chunk in response.iter_content(chunk_size=8192):
-                if chunk:
-                    downloaded_size += len(chunk)
-                    if downloaded_size > max_size:
-                        f.close()
-                        output_path.unlink()
-                        return False, None, f'File too large: exceeded {max_size} bytes'
-                    f.write(chunk)
-
-        return True, str(output_path), ''
-
-    except requests.exceptions.Timeout:
-        return False, None, f'Timed out after {timeout} seconds'
-    except requests.exceptions.SSLError as e:
-        return False, None, f'SSL error: {e}'
-    except requests.exceptions.RequestException as e:
-        return False, None, f'Download failed: {e}'
-    except Exception as e:
-        return False, None, f'{type(e).__name__}: {e}'
-
-
-@click.command()
-@click.option('--url', required=True, help='URL to download')
-@click.option('--snapshot-id', required=True, help='Snapshot UUID')
-def main(url: str, snapshot_id: str):
-    """Download static files based on Content-Type from chrome_session."""
-
-    start_ts = datetime.now(timezone.utc)
-    output = None
-    status = 'failed'
-    error = ''
-
-    # Check Content-Type from chrome_session's response headers
-    content_type = get_content_type_from_chrome_session()
-
-    # If chrome_session didn't run or no Content-Type, skip
-    if not content_type:
-        print(f'No Content-Type found (chrome_session may not have run)')
-        print(f'START_TS={start_ts.isoformat()}')
-        print(f'END_TS={datetime.now(timezone.utc).isoformat()}')
-        print(f'STATUS=skipped')
-        print(f'RESULT_JSON={json.dumps({"extractor": EXTRACTOR_NAME, "status": "skipped", "url": url, "snapshot_id": snapshot_id})}')
-        sys.exit(0)  # Permanent skip - can't determine content type
-
-    # If not a static file type, skip (this is the normal case for HTML pages)
-    if not is_static_content_type(content_type):
-        print(f'Not a static file (Content-Type: {content_type})')
-        print(f'START_TS={start_ts.isoformat()}')
-        print(f'END_TS={datetime.now(timezone.utc).isoformat()}')
-        print(f'STATUS=skipped')
-        print(f'RESULT_JSON={json.dumps({"extractor": EXTRACTOR_NAME, "status": "skipped", "url": url, "snapshot_id": snapshot_id, "content_type": content_type})}')
-        sys.exit(0)  # Permanent skip - not a static file
-
-    try:
-        # Download the file
-        print(f'Static file detected (Content-Type: {content_type}), downloading...')
-        success, output, error = download_file(url)
-        status = 'succeeded' if success else 'failed'
-
-        if success and output:
-            size = Path(output).stat().st_size
-            print(f'Static file downloaded ({size} bytes): {output}')
-
-    except Exception as e:
-        error = f'{type(e).__name__}: {e}'
-        status = 'failed'
-
-    # Print results
-    end_ts = datetime.now(timezone.utc)
-    duration = (end_ts - start_ts).total_seconds()
-
-    print(f'START_TS={start_ts.isoformat()}')
-    print(f'END_TS={end_ts.isoformat()}')
-    print(f'DURATION={duration:.2f}')
-    if output:
-        print(f'OUTPUT={output}')
-    print(f'STATUS={status}')
-
-    if error:
-        print(f'ERROR={error}', file=sys.stderr)
-
-    # Print JSON result
-    result_json = {
-        'extractor': EXTRACTOR_NAME,
-        'url': url,
-        'snapshot_id': snapshot_id,
-        'status': status,
-        'content_type': content_type,
-        'start_ts': start_ts.isoformat(),
-        'end_ts': end_ts.isoformat(),
-        'duration': round(duration, 2),
-        'output': output,
-        'error': error or None,
-    }
-    print(f'RESULT_JSON={json.dumps(result_json)}')
-
-    sys.exit(0 if status == 'succeeded' else 1)
-
-
-if __name__ == '__main__':
-    main()
--- a/archivebox/plugins/staticfile/templates/icon.html
+++ b/archivebox/plugins/staticfile/templates/icon.html
@@ -1 +0,0 @@
-📁