mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2026-01-03 09:25:42 +10:00
add mcp server support
This commit is contained in:
@@ -3,7 +3,10 @@
|
||||
"allow": [
|
||||
"Bash(python -m archivebox:*)",
|
||||
"Bash(ls:*)",
|
||||
"Bash(xargs:*)"
|
||||
"Bash(xargs:*)",
|
||||
"Bash(python -c:*)",
|
||||
"Bash(printf:*)",
|
||||
"Bash(pkill:*)"
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
49
archivebox/cli/archivebox_mcp.py
Normal file
49
archivebox/cli/archivebox_mcp.py
Normal file
@@ -0,0 +1,49 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
archivebox mcp
|
||||
|
||||
Start the Model Context Protocol (MCP) server in stdio mode.
|
||||
Exposes all ArchiveBox CLI commands as MCP tools for AI agents.
|
||||
"""
|
||||
|
||||
__package__ = 'archivebox.cli'
|
||||
__command__ = 'archivebox mcp'
|
||||
|
||||
import rich_click as click
|
||||
|
||||
from archivebox.misc.util import docstring, enforce_types
|
||||
|
||||
|
||||
@enforce_types
|
||||
def mcp():
|
||||
"""
|
||||
Start the MCP server in stdio mode for AI agent control.
|
||||
|
||||
The MCP (Model Context Protocol) server exposes all ArchiveBox CLI commands
|
||||
as tools that AI agents can discover and execute. It communicates via JSON-RPC
|
||||
2.0 over stdin/stdout.
|
||||
|
||||
Example usage with an MCP client:
|
||||
archivebox mcp < requests.jsonl > responses.jsonl
|
||||
|
||||
Or interactively:
|
||||
archivebox mcp
|
||||
{"jsonrpc":"2.0","id":1,"method":"initialize","params":{}}
|
||||
{"jsonrpc":"2.0","id":2,"method":"tools/list","params":{}}
|
||||
"""
|
||||
|
||||
from mcp.server import run_mcp_server
|
||||
|
||||
# Run the stdio server (blocks until stdin closes)
|
||||
run_mcp_server()
|
||||
|
||||
|
||||
@click.command()
|
||||
@docstring(mcp.__doc__)
|
||||
def main(**kwargs):
|
||||
"""Start the MCP server in stdio mode"""
|
||||
mcp()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
138
archivebox/mcp/README.md
Normal file
138
archivebox/mcp/README.md
Normal file
@@ -0,0 +1,138 @@
|
||||
# ArchiveBox MCP Server
|
||||
|
||||
Model Context Protocol (MCP) server for ArchiveBox that exposes all CLI commands as tools for AI agents.
|
||||
|
||||
## Overview
|
||||
|
||||
This is a lightweight, stateless MCP server that dynamically introspects ArchiveBox's Click CLI commands and exposes them as MCP tools. It requires **zero manual schema definitions** - everything is auto-generated from the existing CLI metadata.
|
||||
|
||||
## Features
|
||||
|
||||
- ✅ **Auto-discovery**: Dynamically discovers all 19+ ArchiveBox CLI commands
|
||||
- ✅ **Zero duplication**: Reuses existing Click command definitions, types, and help text
|
||||
- ✅ **Auto-sync**: Changes to CLI commands automatically reflected in MCP tools
|
||||
- ✅ **Stateless**: No database models or state management required
|
||||
- ✅ **Lightweight**: ~200 lines of code
|
||||
|
||||
## Usage
|
||||
|
||||
### Start the MCP Server
|
||||
|
||||
```bash
|
||||
archivebox mcp
|
||||
```
|
||||
|
||||
The server runs in stdio mode, reading JSON-RPC 2.0 requests from stdin and writing responses to stdout.
|
||||
|
||||
### Example Client
|
||||
|
||||
```python
|
||||
import subprocess
|
||||
import json
|
||||
|
||||
# Start MCP server
|
||||
proc = subprocess.Popen(
|
||||
['archivebox', 'mcp'],
|
||||
stdin=subprocess.PIPE,
|
||||
stdout=subprocess.PIPE,
|
||||
text=True
|
||||
)
|
||||
|
||||
# Send initialize request
|
||||
request = {"jsonrpc": "2.0", "id": 1, "method": "initialize", "params": {}}
|
||||
proc.stdin.write(json.dumps(request) + '\n')
|
||||
proc.stdin.flush()
|
||||
|
||||
# Read response
|
||||
response = json.loads(proc.stdout.readline())
|
||||
print(response)
|
||||
```
|
||||
|
||||
### Example Requests
|
||||
|
||||
**Initialize:**
|
||||
```json
|
||||
{"jsonrpc":"2.0","id":1,"method":"initialize","params":{}}
|
||||
```
|
||||
|
||||
**List all available tools:**
|
||||
```json
|
||||
{"jsonrpc":"2.0","id":2,"method":"tools/list","params":{}}
|
||||
```
|
||||
|
||||
**Call a tool:**
|
||||
```json
|
||||
{
|
||||
"jsonrpc":"2.0",
|
||||
"id":3,
|
||||
"method":"tools/call",
|
||||
"params":{
|
||||
"name":"version",
|
||||
"arguments":{"quiet":true}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Supported MCP Methods
|
||||
|
||||
- `initialize` - Handshake and capability negotiation
|
||||
- `tools/list` - List all available CLI commands as MCP tools
|
||||
- `tools/call` - Execute a CLI command with arguments
|
||||
|
||||
## Available Tools
|
||||
|
||||
The server exposes all ArchiveBox CLI commands:
|
||||
|
||||
**Meta**: `help`, `version`, `mcp`
|
||||
**Setup**: `init`, `install`
|
||||
**Archive**: `add`, `remove`, `update`, `search`, `status`, `config`
|
||||
**Workers**: `orchestrator`, `worker`
|
||||
**Tasks**: `crawl`, `snapshot`, `extract`
|
||||
**Server**: `server`, `schedule`
|
||||
**Utilities**: `shell`, `manage`
|
||||
|
||||
## Architecture
|
||||
|
||||
### Dynamic Introspection
|
||||
|
||||
Instead of manually defining schemas, the server uses Click's introspection API to automatically generate MCP tool definitions:
|
||||
|
||||
```python
|
||||
# Auto-discover commands
|
||||
from archivebox.cli import ArchiveBoxGroup
|
||||
cli_group = ArchiveBoxGroup()
|
||||
all_commands = cli_group.all_subcommands
|
||||
|
||||
# Auto-generate schemas from Click metadata
|
||||
for cmd_name in all_commands:
|
||||
click_cmd = cli_group.get_command(None, cmd_name)
|
||||
# Extract params, types, help text, etc.
|
||||
tool_schema = click_command_to_mcp_tool(cmd_name, click_cmd)
|
||||
```
|
||||
|
||||
### Tool Execution
|
||||
|
||||
Commands are executed using Click's `CliRunner`:
|
||||
|
||||
```python
|
||||
from click.testing import CliRunner
|
||||
|
||||
runner = CliRunner()
|
||||
result = runner.invoke(click_command, args)
|
||||
```
|
||||
|
||||
## Files
|
||||
|
||||
- `server.py` (~350 lines) - Core MCP server with Click introspection
|
||||
- `archivebox/cli/archivebox_mcp.py` (~50 lines) - CLI entry point
|
||||
- `apps.py`, `__init__.py` - Django app boilerplate
|
||||
|
||||
## MCP Specification
|
||||
|
||||
Implements the [MCP 2025-11-25 specification](https://modelcontextprotocol.io/specification/2025-11-25).
|
||||
|
||||
## Sources
|
||||
|
||||
- [MCP Specification](https://modelcontextprotocol.io/specification/2025-11-25)
|
||||
- [MCP Introduction](https://www.anthropic.com/news/model-context-protocol)
|
||||
- [MCP GitHub](https://github.com/modelcontextprotocol/modelcontextprotocol)
|
||||
8
archivebox/mcp/__init__.py
Normal file
8
archivebox/mcp/__init__.py
Normal file
@@ -0,0 +1,8 @@
|
||||
__package__ = 'archivebox.mcp'
|
||||
|
||||
"""
|
||||
Model Context Protocol (MCP) server for ArchiveBox.
|
||||
|
||||
Exposes all ArchiveBox CLI commands as MCP tools via dynamic Click introspection.
|
||||
Provides a JSON-RPC 2.0 interface over stdio for AI agents to control ArchiveBox.
|
||||
"""
|
||||
9
archivebox/mcp/apps.py
Normal file
9
archivebox/mcp/apps.py
Normal file
@@ -0,0 +1,9 @@
|
||||
__package__ = 'archivebox.mcp'
|
||||
|
||||
from django.apps import AppConfig
|
||||
|
||||
|
||||
class MCPConfig(AppConfig):
|
||||
name = 'mcp'
|
||||
verbose_name = 'Model Context Protocol Server'
|
||||
default_auto_field = 'django.db.models.BigAutoField'
|
||||
353
archivebox/mcp/server.py
Normal file
353
archivebox/mcp/server.py
Normal file
@@ -0,0 +1,353 @@
|
||||
__package__ = 'archivebox.mcp'
|
||||
|
||||
"""
|
||||
Model Context Protocol (MCP) server implementation for ArchiveBox.
|
||||
|
||||
Dynamically exposes all ArchiveBox CLI commands as MCP tools by introspecting
|
||||
Click command metadata. Handles JSON-RPC 2.0 requests over stdio transport.
|
||||
"""
|
||||
|
||||
import sys
|
||||
import json
|
||||
import traceback
|
||||
from typing import Any, Dict, List, Optional
|
||||
from io import StringIO
|
||||
from contextlib import redirect_stdout, redirect_stderr
|
||||
|
||||
import click
|
||||
from click.testing import CliRunner
|
||||
|
||||
from archivebox.config.version import VERSION
|
||||
|
||||
|
||||
class MCPJSONEncoder(json.JSONEncoder):
|
||||
"""Custom JSON encoder that handles Click sentinel values and other special types"""
|
||||
|
||||
def default(self, obj):
|
||||
# Handle Click's sentinel values
|
||||
if hasattr(click, 'core') and hasattr(click.core, '_SentinelClass'):
|
||||
if isinstance(obj, click.core._SentinelClass):
|
||||
return None
|
||||
|
||||
# Handle tuples (convert to lists)
|
||||
if isinstance(obj, tuple):
|
||||
return list(obj)
|
||||
|
||||
# Handle any other non-serializable objects
|
||||
try:
|
||||
return super().default(obj)
|
||||
except TypeError:
|
||||
return str(obj)
|
||||
|
||||
|
||||
# Type mapping from Click types to JSON Schema types
|
||||
def click_type_to_json_schema_type(click_type) -> dict:
|
||||
"""Convert a Click parameter type to JSON Schema type definition"""
|
||||
|
||||
if isinstance(click_type, click.types.StringParamType):
|
||||
return {"type": "string"}
|
||||
elif isinstance(click_type, click.types.IntParamType):
|
||||
return {"type": "integer"}
|
||||
elif isinstance(click_type, click.types.FloatParamType):
|
||||
return {"type": "number"}
|
||||
elif isinstance(click_type, click.types.BoolParamType):
|
||||
return {"type": "boolean"}
|
||||
elif isinstance(click_type, click.types.Choice):
|
||||
return {"type": "string", "enum": click_type.choices}
|
||||
elif isinstance(click_type, click.types.Path):
|
||||
return {"type": "string", "description": "File or directory path"}
|
||||
elif isinstance(click_type, click.types.File):
|
||||
return {"type": "string", "description": "File path"}
|
||||
elif isinstance(click_type, click.types.Tuple):
|
||||
# Multiple arguments of same type
|
||||
return {"type": "array", "items": {"type": "string"}}
|
||||
else:
|
||||
# Default to string for unknown types
|
||||
return {"type": "string"}
|
||||
|
||||
|
||||
def click_command_to_mcp_tool(cmd_name: str, click_command: click.Command) -> dict:
|
||||
"""
|
||||
Convert a Click command to an MCP tool definition with JSON Schema.
|
||||
|
||||
Introspects the Click command's parameters to automatically generate
|
||||
the input schema without manual definition.
|
||||
"""
|
||||
|
||||
properties = {}
|
||||
required = []
|
||||
|
||||
# Extract parameters from Click command
|
||||
for param in click_command.params:
|
||||
# Skip internal parameters
|
||||
if param.name in ('help', 'version'):
|
||||
continue
|
||||
|
||||
param_schema = click_type_to_json_schema_type(param.type)
|
||||
|
||||
# Add description from Click help text
|
||||
if param.help:
|
||||
param_schema["description"] = param.help
|
||||
|
||||
# Handle default values
|
||||
if param.default is not None and param.default != ():
|
||||
param_schema["default"] = param.default
|
||||
|
||||
# Handle multiple values (like multiple URLs)
|
||||
if param.multiple:
|
||||
properties[param.name] = {
|
||||
"type": "array",
|
||||
"items": param_schema,
|
||||
"description": param_schema.get("description", f"Multiple {param.name} values")
|
||||
}
|
||||
else:
|
||||
properties[param.name] = param_schema
|
||||
|
||||
# Mark as required if Click requires it
|
||||
if param.required:
|
||||
required.append(param.name)
|
||||
|
||||
return {
|
||||
"name": cmd_name,
|
||||
"description": click_command.help or click_command.short_help or f"Run archivebox {cmd_name} command",
|
||||
"inputSchema": {
|
||||
"type": "object",
|
||||
"properties": properties,
|
||||
"required": required
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
def execute_click_command(cmd_name: str, click_command: click.Command, arguments: dict) -> dict:
|
||||
"""
|
||||
Execute a Click command programmatically with given arguments.
|
||||
|
||||
Returns MCP-formatted result with captured output and error status.
|
||||
"""
|
||||
|
||||
# Use Click's test runner to invoke command programmatically
|
||||
runner = CliRunner()
|
||||
|
||||
# Convert arguments dict to CLI args list
|
||||
args = []
|
||||
for key, value in arguments.items():
|
||||
param_name = key.replace('_', '-') # Click uses dashes
|
||||
|
||||
if isinstance(value, bool):
|
||||
if value:
|
||||
args.append(f'--{param_name}')
|
||||
elif isinstance(value, list):
|
||||
# Multiple values (e.g., multiple URLs)
|
||||
for item in value:
|
||||
args.append(str(item))
|
||||
elif value is not None:
|
||||
args.append(f'--{param_name}')
|
||||
args.append(str(value))
|
||||
|
||||
# Execute the command
|
||||
try:
|
||||
result = runner.invoke(click_command, args, catch_exceptions=False)
|
||||
|
||||
# Format output as MCP content
|
||||
content = []
|
||||
|
||||
if result.output:
|
||||
content.append({
|
||||
"type": "text",
|
||||
"text": result.output
|
||||
})
|
||||
|
||||
if result.stderr_bytes:
|
||||
stderr_text = result.stderr_bytes.decode('utf-8', errors='replace')
|
||||
if stderr_text.strip():
|
||||
content.append({
|
||||
"type": "text",
|
||||
"text": f"[stderr]\n{stderr_text}"
|
||||
})
|
||||
|
||||
# Check exit code
|
||||
is_error = result.exit_code != 0
|
||||
|
||||
if is_error and not content:
|
||||
content.append({
|
||||
"type": "text",
|
||||
"text": f"Command failed with exit code {result.exit_code}"
|
||||
})
|
||||
|
||||
return {
|
||||
"content": content or [{"type": "text", "text": "(no output)"}],
|
||||
"isError": is_error
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
# Capture any exceptions during execution
|
||||
error_trace = traceback.format_exc()
|
||||
return {
|
||||
"content": [{
|
||||
"type": "text",
|
||||
"text": f"Error executing {cmd_name}: {str(e)}\n\n{error_trace}"
|
||||
}],
|
||||
"isError": True
|
||||
}
|
||||
|
||||
|
||||
class MCPServer:
|
||||
"""
|
||||
Model Context Protocol server for ArchiveBox.
|
||||
|
||||
Provides JSON-RPC 2.0 interface over stdio, dynamically exposing
|
||||
all Click commands as MCP tools.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
# Import here to avoid circular imports
|
||||
from archivebox.cli import ArchiveBoxGroup
|
||||
|
||||
self.cli_group = ArchiveBoxGroup()
|
||||
self.protocol_version = "2025-11-25"
|
||||
self._tool_cache = {} # Cache loaded Click commands
|
||||
|
||||
def get_click_command(self, cmd_name: str) -> Optional[click.Command]:
|
||||
"""Get a Click command by name, with caching"""
|
||||
if cmd_name not in self._tool_cache:
|
||||
if cmd_name not in self.cli_group.all_subcommands:
|
||||
return None
|
||||
self._tool_cache[cmd_name] = self.cli_group.get_command(None, cmd_name)
|
||||
return self._tool_cache[cmd_name]
|
||||
|
||||
def handle_initialize(self, params: dict) -> dict:
|
||||
"""Handle MCP initialize request"""
|
||||
return {
|
||||
"protocolVersion": self.protocol_version,
|
||||
"capabilities": {
|
||||
"tools": {}
|
||||
},
|
||||
"serverInfo": {
|
||||
"name": "archivebox-mcp",
|
||||
"version": VERSION
|
||||
}
|
||||
}
|
||||
|
||||
def handle_tools_list(self, params: dict) -> dict:
|
||||
"""Handle MCP tools/list request - returns all available CLI commands as tools"""
|
||||
tools = []
|
||||
|
||||
for cmd_name in self.cli_group.all_subcommands.keys():
|
||||
click_cmd = self.get_click_command(cmd_name)
|
||||
if click_cmd:
|
||||
try:
|
||||
tool_def = click_command_to_mcp_tool(cmd_name, click_cmd)
|
||||
tools.append(tool_def)
|
||||
except Exception as e:
|
||||
# Log but don't fail - skip problematic commands
|
||||
print(f"Warning: Could not generate tool for {cmd_name}: {e}", file=sys.stderr)
|
||||
|
||||
return {"tools": tools}
|
||||
|
||||
def handle_tools_call(self, params: dict) -> dict:
|
||||
"""Handle MCP tools/call request - executes a CLI command"""
|
||||
tool_name = params.get('name')
|
||||
arguments = params.get('arguments', {})
|
||||
|
||||
if not tool_name:
|
||||
raise ValueError("Missing required parameter: name")
|
||||
|
||||
click_cmd = self.get_click_command(tool_name)
|
||||
if not click_cmd:
|
||||
raise ValueError(f"Unknown tool: {tool_name}")
|
||||
|
||||
# Execute the command and return MCP-formatted result
|
||||
return execute_click_command(tool_name, click_cmd, arguments)
|
||||
|
||||
def handle_request(self, request: dict) -> dict:
|
||||
"""
|
||||
Handle a JSON-RPC 2.0 request and return response.
|
||||
|
||||
Supports MCP methods: initialize, tools/list, tools/call
|
||||
"""
|
||||
|
||||
method = request.get('method')
|
||||
params = request.get('params', {})
|
||||
request_id = request.get('id')
|
||||
|
||||
try:
|
||||
# Route to appropriate handler
|
||||
if method == 'initialize':
|
||||
result = self.handle_initialize(params)
|
||||
elif method == 'tools/list':
|
||||
result = self.handle_tools_list(params)
|
||||
elif method == 'tools/call':
|
||||
result = self.handle_tools_call(params)
|
||||
else:
|
||||
# Method not found
|
||||
return {
|
||||
"jsonrpc": "2.0",
|
||||
"id": request_id,
|
||||
"error": {
|
||||
"code": -32601,
|
||||
"message": f"Method not found: {method}"
|
||||
}
|
||||
}
|
||||
|
||||
# Success response
|
||||
return {
|
||||
"jsonrpc": "2.0",
|
||||
"id": request_id,
|
||||
"result": result
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
# Error response
|
||||
error_trace = traceback.format_exc()
|
||||
return {
|
||||
"jsonrpc": "2.0",
|
||||
"id": request_id,
|
||||
"error": {
|
||||
"code": -32603,
|
||||
"message": str(e),
|
||||
"data": error_trace
|
||||
}
|
||||
}
|
||||
|
||||
def run_stdio_server(self):
|
||||
"""
|
||||
Run the MCP server in stdio mode.
|
||||
|
||||
Reads JSON-RPC requests from stdin (one per line),
|
||||
writes JSON-RPC responses to stdout (one per line).
|
||||
"""
|
||||
|
||||
# Read requests from stdin line by line
|
||||
for line in sys.stdin:
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
|
||||
try:
|
||||
# Parse JSON-RPC request
|
||||
request = json.loads(line)
|
||||
|
||||
# Handle request
|
||||
response = self.handle_request(request)
|
||||
|
||||
# Write response to stdout (use custom encoder for Click types)
|
||||
print(json.dumps(response, cls=MCPJSONEncoder), flush=True)
|
||||
|
||||
except json.JSONDecodeError as e:
|
||||
# Invalid JSON
|
||||
error_response = {
|
||||
"jsonrpc": "2.0",
|
||||
"id": None,
|
||||
"error": {
|
||||
"code": -32700,
|
||||
"message": "Parse error",
|
||||
"data": str(e)
|
||||
}
|
||||
}
|
||||
print(json.dumps(error_response, cls=MCPJSONEncoder), flush=True)
|
||||
|
||||
|
||||
def run_mcp_server():
|
||||
"""Main entry point for MCP server"""
|
||||
server = MCPServer()
|
||||
server.run_stdio_server()
|
||||
Reference in New Issue
Block a user