commit a3be1032322536c210247125ee3373c7e9ab9d35 Author: help4bis Date: Sun Apr 26 14:49:44 2026 +1000 initial: ollama-compatible facade for claude -p diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..e0e05ac --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +venv/ +__pycache__/ +*.pyc +config.yaml.bak.* diff --git a/app.py b/app.py new file mode 100644 index 0000000..0b04134 --- /dev/null +++ b/app.py @@ -0,0 +1,258 @@ +""" +claude-code-proxy: Ollama-compatible HTTP facade for `claude -p`. + +Exposes a subset of the Ollama API on http://127.0.0.1:11435 and translates +each request into a `claude -p` subprocess invocation. This lets external +tools that already speak Ollama (Open WebUI, AnythingLLM, n8n nodes, etc.) +talk to Claude Code instead of a local Ollama instance. + +Endpoints: + GET / health check + GET /api/version Ollama version stub + GET /api/tags list "models" (so clients can validate) + POST /api/show model details stub + POST /api/generate single-shot prompt -> response + POST /api/chat multi-message conversation -> response + +Both /api/generate and /api/chat honour the `stream` flag in the request +body (Ollama default is True). When true, responses are emitted as +NDJSON chunks; when false, a single JSON object is returned. + +Environment variables: + CLAUDE_BIN path to claude CLI (default: "claude") + CLAUDE_PROXY_CONCURRENCY max concurrent claude subprocesses (default: 3) + CLAUDE_PROXY_MODEL name advertised in /api/tags (default: "claude-code") + CLAUDE_PROXY_TIMEOUT per-request timeout in seconds (default: 300) + CLAUDE_CODE_OAUTH_TOKEN long-lived auth token, inherited by claude subprocess +""" + +from __future__ import annotations + +import asyncio +import json +import logging +import os +import time +from datetime import datetime, timezone +from typing import Any, AsyncIterator + +from fastapi import FastAPI, Request +from fastapi.responses import JSONResponse, StreamingResponse + +# --- Configuration ---------------------------------------------------------- + +CLAUDE_BIN = os.environ.get("CLAUDE_BIN", "claude") +CONCURRENCY = int(os.environ.get("CLAUDE_PROXY_CONCURRENCY", "3")) +DEFAULT_MODEL = os.environ.get("CLAUDE_PROXY_MODEL", "claude-code") +TIMEOUT_SECONDS = int(os.environ.get("CLAUDE_PROXY_TIMEOUT", "300")) + +logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s") +log = logging.getLogger("claude-proxy") + +app = FastAPI(title="claude-code-proxy") +_semaphore = asyncio.Semaphore(CONCURRENCY) + + +# --- Helpers ---------------------------------------------------------------- + +def _now_iso() -> str: + return datetime.now(timezone.utc).isoformat().replace("+00:00", "Z") + + +async def _run_claude(prompt: str) -> str: + """Run `claude -p ` and return stdout as a string.""" + async with _semaphore: + log.info("claude -p invoked (prompt %d chars)", len(prompt)) + proc = await asyncio.create_subprocess_exec( + CLAUDE_BIN, "-p", prompt, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + env=os.environ.copy(), + ) + try: + stdout, stderr = await asyncio.wait_for( + proc.communicate(), timeout=TIMEOUT_SECONDS + ) + except asyncio.TimeoutError: + proc.kill() + await proc.wait() + raise RuntimeError(f"claude -p timed out after {TIMEOUT_SECONDS}s") + + if proc.returncode != 0: + err = stderr.decode("utf-8", errors="replace")[:1000] + raise RuntimeError(f"claude -p exited {proc.returncode}: {err}") + + return stdout.decode("utf-8", errors="replace") + + +def _build_prompt_from_messages(messages: list[dict]) -> str: + """Flatten OpenAI/Ollama-style messages into a single prompt string.""" + system_parts = [m["content"] for m in messages if m.get("role") == "system"] + convo: list[str] = [] + for m in messages: + role = m.get("role", "user") + if role == "system": + continue + prefix = "User" if role == "user" else "Assistant" + convo.append(f"{prefix}: {m.get('content', '')}") + convo.append("Assistant:") + body = "\n\n".join(convo) + if system_parts: + return "[System]\n" + "\n\n".join(system_parts) + "\n\n" + body + return body + + +# --- Streaming generators --------------------------------------------------- + +async def _stream_generate(base: dict, text: str) -> AsyncIterator[bytes]: + """Emit Ollama-style NDJSON for /api/generate: incremental chunks then done.""" + chunk_size = 64 + started = time.time() + for i in range(0, len(text), chunk_size): + frame = {**base, "response": text[i:i + chunk_size], "done": False} + yield (json.dumps(frame) + "\n").encode("utf-8") + await asyncio.sleep(0) + final = { + **base, + "response": "", + "done": True, + "done_reason": "stop", + "total_duration": int((time.time() - started) * 1e9), + } + yield (json.dumps(final) + "\n").encode("utf-8") + + +async def _stream_chat(base: dict, text: str) -> AsyncIterator[bytes]: + """Emit Ollama-style NDJSON for /api/chat: each frame carries a message.""" + chunk_size = 64 + started = time.time() + for i in range(0, len(text), chunk_size): + frame = { + **base, + "message": {"role": "assistant", "content": text[i:i + chunk_size]}, + "done": False, + } + yield (json.dumps(frame) + "\n").encode("utf-8") + await asyncio.sleep(0) + final = { + **base, + "message": {"role": "assistant", "content": ""}, + "done": True, + "done_reason": "stop", + "total_duration": int((time.time() - started) * 1e9), + } + yield (json.dumps(final) + "\n").encode("utf-8") + + +# --- Routes ----------------------------------------------------------------- + +@app.get("/") +async def root() -> dict: + return {"status": "ok", "service": "claude-code-proxy"} + + +@app.get("/api/version") +async def version() -> dict: + return {"version": "0.1.0-claude-proxy"} + + +@app.get("/api/tags") +async def tags() -> dict: + """Ollama-style model list. Many clients hit this to verify the endpoint.""" + return { + "models": [{ + "name": DEFAULT_MODEL, + "model": DEFAULT_MODEL, + "modified_at": _now_iso(), + "size": 0, + "digest": "sha256:claude-code", + "details": { + "parent_model": "", + "format": "claude", + "family": "claude", + "families": ["claude"], + "parameter_size": "unknown", + "quantization_level": "none", + }, + }] + } + + +@app.post("/api/show") +async def show(req: Request) -> dict: + body = await req.json() + name = body.get("name", DEFAULT_MODEL) + return { + "modelfile": f"FROM {name}", + "parameters": "", + "template": "", + "details": { + "format": "claude", + "family": "claude", + "parameter_size": "unknown", + "quantization_level": "none", + }, + } + + +@app.post("/api/generate") +async def generate(req: Request) -> Any: + body = await req.json() + model = body.get("model", DEFAULT_MODEL) + prompt = body.get("prompt", "") + system = body.get("system") + stream = bool(body.get("stream", True)) + + full_prompt = f"[System]\n{system}\n\n{prompt}" if system else prompt + + started = time.time() + try: + text = await _run_claude(full_prompt) + except Exception as e: + log.exception("claude invocation failed") + return JSONResponse({"error": str(e)}, status_code=500) + + base = {"model": model, "created_at": _now_iso()} + if stream: + return StreamingResponse( + _stream_generate(base, text), + media_type="application/x-ndjson", + ) + return { + **base, + "response": text, + "done": True, + "done_reason": "stop", + "total_duration": int((time.time() - started) * 1e9), + } + + +@app.post("/api/chat") +async def chat(req: Request) -> Any: + body = await req.json() + model = body.get("model", DEFAULT_MODEL) + messages = body.get("messages", []) + stream = bool(body.get("stream", True)) + + prompt = _build_prompt_from_messages(messages) + + started = time.time() + try: + text = await _run_claude(prompt) + except Exception as e: + log.exception("claude invocation failed") + return JSONResponse({"error": str(e)}, status_code=500) + + base = {"model": model, "created_at": _now_iso()} + if stream: + return StreamingResponse( + _stream_chat(base, text), + media_type="application/x-ndjson", + ) + return { + **base, + "message": {"role": "assistant", "content": text}, + "done": True, + "done_reason": "stop", + "total_duration": int((time.time() - started) * 1e9), + } diff --git a/claude-code-proxy.service b/claude-code-proxy.service new file mode 100644 index 0000000..3a44c50 --- /dev/null +++ b/claude-code-proxy.service @@ -0,0 +1,25 @@ +[Unit] +Description=claude-code-proxy: Ollama-compatible HTTP facade for `claude -p` +Documentation=file:///home/help4bis/claude-proxy/README.md +After=network-online.target +Wants=network-online.target + +[Service] +Type=exec +User=help4bis +Group=help4bis +WorkingDirectory=/home/help4bis/claude-proxy +Environment=HOME=/home/help4bis +Environment=PATH=/home/help4bis/.local/bin:/home/help4bis/claude-proxy/venv/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin +Environment=CLAUDE_PROXY_CONCURRENCY=3 +ExecStart=/home/help4bis/claude-proxy/venv/bin/uvicorn app:app --host 127.0.0.1 --port 11435 --workers 1 --log-level info +Restart=always +RestartSec=5s + +# Hardening — kept minimal because claude CLI needs free access to $HOME +# for session state, auth cache (~/.claude, ~/.local/state/claude, ~/.cache/claude) +# and the $HOME path is an /mnt bind mount which doesn't play with ProtectHome. +NoNewPrivileges=true + +[Install] +WantedBy=multi-user.target diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..38465d9 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +fastapi>=0.115,<0.116 +uvicorn[standard]>=0.34,<0.35 +pydantic>=2.10,<3