From f337c94281bbb6d1ee126c56c95262df108659d9 Mon Sep 17 00:00:00 2001 From: hhofmann Date: Mon, 13 Apr 2026 16:38:35 +0200 Subject: [PATCH] Initial commit: mem0 docker with qdrant --- .env | 10 ++ Dockerfile | 28 +++++ docker-compose.yml | 50 +++++++++ example.env | 10 ++ main.py | 267 +++++++++++++++++++++++++++++++++++++++++++++ mem0_server.py | 162 +++++++++++++++++++++++++++ requirements.txt | 5 + 7 files changed, 532 insertions(+) create mode 100644 .env create mode 100644 Dockerfile create mode 100644 docker-compose.yml create mode 100644 example.env create mode 100644 main.py create mode 100755 mem0_server.py create mode 100644 requirements.txt diff --git a/.env b/.env new file mode 100644 index 0000000..7e31087 --- /dev/null +++ b/.env @@ -0,0 +1,10 @@ +# Mem0 Configuration +MEM0_PORT=8889 + +# Qdrant Configuration +QDRANT_HOST=qdrant +QDRANT_PORT=6333 + +# Embedding Configuration +EMBEDDING_URL=http://host.docker.internal:4700/embedding +EMBEDDING_DIMS=1024 diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..7d703e2 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,28 @@ +FROM python:3.11-slim + +WORKDIR /app + +# Install system dependencies +RUN apt-get update && apt-get install -y --no-install-recommends \ + curl \ + && rm -rf /var/lib/apt/lists/* + +# Copy requirements first for better caching +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +# Copy application code +COPY main.py . + +# Create history directory +RUN mkdir -p /app/history + +# Expose port +EXPOSE 8000 + +# Health check +HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \ + CMD curl -f http://localhost:8000/health || exit 1 + +# Run the application +CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"] diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..d0001b0 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,50 @@ +name: mem0-selfhost + +services: + mem0: + build: + context: . + dockerfile: Dockerfile + ports: + - "${MEM0_PORT:-8889}:8000" + env_file: + - .env + networks: + - mem0_network + extra_hosts: + - "host.docker.internal:host-gateway" + depends_on: + - qdrant + environment: + - PYTHONDONTWRITEBYTECODE=1 + - PYTHONUNBUFFERED=1 + - QDRANT_HOST=${QDRANT_HOST} + - QDRANT_PORT=${QDRANT_PORT} + - EMBEDDING_URL=${EMBEDDING_URL} + - EMBEDDING_DIMS=${EMBEDDING_DIMS} + volumes: + - ./history:/app/history + restart: unless-stopped + + qdrant: + image: qdrant/qdrant:latest + ports: + - "6333:6333" + networks: + - mem0_network + volumes: + - qdrant_storage:/qdrant/storage + healthcheck: + test: ["CMD-SHELL", "wget -q --spider http://localhost:6333/ || exit 1"] + interval: 10s + timeout: 5s + retries: 3 + start_period: 10s + restart: unless-stopped + +volumes: + qdrant_storage: + +networks: + mem0_network: + driver: bridge diff --git a/example.env b/example.env new file mode 100644 index 0000000..7e31087 --- /dev/null +++ b/example.env @@ -0,0 +1,10 @@ +# Mem0 Configuration +MEM0_PORT=8889 + +# Qdrant Configuration +QDRANT_HOST=qdrant +QDRANT_PORT=6333 + +# Embedding Configuration +EMBEDDING_URL=http://host.docker.internal:4700/embedding +EMBEDDING_DIMS=1024 diff --git a/main.py b/main.py new file mode 100644 index 0000000..63d5933 --- /dev/null +++ b/main.py @@ -0,0 +1,267 @@ +""" +Mem0 API Server - Simple wrapper around mem0ai with llama.cpp embedding support +""" +import os +import requests +from fastapi import FastAPI, HTTPException +from pydantic import BaseModel +from typing import Optional, List, Any, Dict +from qdrant_client import QdrantClient +from qdrant_client.models import Distance, VectorParams, PointStruct, Filter, FieldCondition, MatchValue + +app = FastAPI(title="Mem0 API", version="1.0.0") + +# Configuration from environment +QDRANT_HOST = os.getenv("QDRANT_HOST", "qdrant") +QDRANT_PORT = int(os.getenv("QDRANT_PORT", 6333)) +EMBEDDING_URL = os.getenv("EMBEDDING_URL", "http://host.docker.internal:4700/embedding") +EMBEDDING_DIMS = int(os.getenv("EMBEDDING_DIMS", 1024)) +COLLECTION_NAME = "memories" + + +class LlamaCppEmbedder: + """Custom embedder for llama.cpp embedding endpoint""" + + def __init__(self, base_url: str, dims: int): + self.base_url = base_url + self.dims = dims + + def get_embedding(self, text: str) -> List[float]: + """Get embedding from llama.cpp endpoint""" + response = requests.post( + self.base_url, + json={"content": text}, + headers={"Content-Type": "application/json"} + ) + response.raise_for_status() + result = response.json() + embedding = result[0]["embedding"][0] + return embedding + + +# Initialize embedder and Qdrant client +embedder = LlamaCppEmbedder(EMBEDDING_URL, EMBEDDING_DIMS) +qdrant_client = QdrantClient(host=QDRANT_HOST, port=QDRANT_PORT) + + +def init_collection(): + """Initialize Qdrant collection if it doesn't exist""" + collections = qdrant_client.get_collections().collections + collection_names = [c.name for c in collections] + + if COLLECTION_NAME not in collection_names: + qdrant_client.create_collection( + collection_name=COLLECTION_NAME, + vectors_config=VectorParams(size=EMBEDDING_DIMS, distance=Distance.COSINE) + ) + + +# Initialize collection on startup +init_collection() + + +class AddMemoryRequest(BaseModel): + message: str + user_id: Optional[str] = "default" + metadata: Optional[dict] = None + + +class AddMemoryResponse(BaseModel): + success: bool + memory_id: Optional[str] + message: str + + +class SearchMemoryRequest(BaseModel): + query: str + user_id: Optional[str] = "default" + limit: Optional[int] = 5 + + +class SearchResult(BaseModel): + id: str + text: str + user_id: str + score: float + metadata: Optional[dict] + + +class SearchMemoryResponse(BaseModel): + results: List[SearchResult] + + +class MemoryItem(BaseModel): + id: str + text: str + user_id: str + metadata: Optional[dict] + + +class GetMemoriesResponse(BaseModel): + memories: List[MemoryItem] + + +class DeleteMemoryResponse(BaseModel): + success: bool + memory_id: str + message: str + + +@app.get("/health") +async def health_check(): + """Health check endpoint""" + try: + # Test embedding endpoint + test_response = requests.get(EMBEDDING_URL.replace("/embedding", "/"), timeout=5) + embedding_healthy = test_response.status_code == 200 or "gzip" in test_response.text.lower() + + # Test Qdrant + qdrant_url = f"http://{QDRANT_HOST}:{QDRANT_PORT}/" + qdrant_response = requests.get(qdrant_url, timeout=5) + qdrant_healthy = qdrant_response.status_code == 200 + + return { + "status": "healthy" if (embedding_healthy and qdrant_healthy) else "degraded", + "service": "mem0-api", + "embedding_endpoint": embedding_healthy, + "qdrant": qdrant_healthy + } + except Exception as e: + return {"status": "unhealthy", "service": "mem0-api", "error": str(e)} + + +@app.post("/add", response_model=AddMemoryResponse) +async def add_memory(request: AddMemoryRequest): + """Add a new memory""" + try: + import uuid + memory_id = str(uuid.uuid4()) + + # Get embedding + embedding = embedder.get_embedding(request.message) + + # Create point + point = PointStruct( + id=memory_id, + vector=embedding, + payload={ + "text": request.message, + "user_id": request.user_id, + "metadata": request.metadata or {} + } + ) + + # Upsert to Qdrant + qdrant_client.upsert(collection_name=COLLECTION_NAME, points=[point]) + + return AddMemoryResponse( + success=True, + memory_id=memory_id, + message="Memory added successfully" + ) + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + + +@app.post("/search", response_model=SearchMemoryResponse) +async def search_memory(request: SearchMemoryRequest): + """Search for memories""" + try: + # Get query embedding + query_embedding = embedder.get_embedding(request.query) + + # Build filter for user_id + query_filter = None + if request.user_id: + query_filter = Filter( + must=[FieldCondition(key="user_id", match=MatchValue(value=request.user_id))] + ) + + # Search in Qdrant using query_points (new API) - pass vector directly + results = qdrant_client.query_points( + collection_name=COLLECTION_NAME, + query=query_embedding, + limit=request.limit, + query_filter=query_filter, + with_payload=True, + with_vectors=False + ) + + # Format results + formatted_results = [ + SearchResult( + id=str(hit.id), + text=hit.payload.get("text", ""), + user_id=hit.payload.get("user_id", ""), + score=hit.score, + metadata=hit.payload.get("metadata") + ) + for hit in results.points + ] + + return SearchMemoryResponse(results=formatted_results) + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + + +@app.get("/memories", response_model=GetMemoriesResponse) +async def get_memories(user_id: Optional[str] = "default"): + """Get all memories for a user""" + try: + # Build filter for user_id + scroll_filter = None + if user_id: + scroll_filter = Filter( + must=[FieldCondition(key="user_id", match=MatchValue(value=user_id))] + ) + + # Scroll through collection + memories = [] + offset = None + while True: + result, next_offset = qdrant_client.scroll( + collection_name=COLLECTION_NAME, + limit=100, + offset=offset, + scroll_filter=scroll_filter, + with_payload=True, + with_vectors=False + ) + + for point in result: + memories.append(MemoryItem( + id=str(point.id), + text=point.payload.get("text", ""), + user_id=point.payload.get("user_id", ""), + metadata=point.payload.get("metadata") + )) + + if not next_offset: + break + offset = next_offset + + return GetMemoriesResponse(memories=memories) + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + + +@app.delete("/delete/{memory_id}", response_model=DeleteMemoryResponse) +async def delete_memory(memory_id: str): + """Delete a memory by ID""" + try: + qdrant_client.delete( + collection_name=COLLECTION_NAME, + points_selector=[memory_id] + ) + return DeleteMemoryResponse( + success=True, + memory_id=memory_id, + message="Memory deleted successfully" + ) + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + + +if __name__ == "__main__": + import uvicorn + uvicorn.run(app, host="0.0.0.0", port=8000) diff --git a/mem0_server.py b/mem0_server.py new file mode 100755 index 0000000..7e7e08a --- /dev/null +++ b/mem0_server.py @@ -0,0 +1,162 @@ +#!/usr/bin/env python3 +""" +mem0 Memory Server - Persistent Semantic Memory for Hermes Agent +Direct integration with llama-embed on port 4700 +""" + +import os +import json +import requests +from http.server import HTTPServer, BaseHTTPRequestHandler +from qdrant_client import QdrantClient, models + +# Configuration +QDRANT_URL = os.environ.get("QDRANT_URL", "http://localhost:6333") +EMBEDDING_URL = os.environ.get("EMBEDDING_URL", "http://localhost:4700") +PORT = int(os.environ.get("MEM0_PORT", 8080)) +USER_ID = "henry_hofmann" + +# Initialize Qdrant client +qdrant_client = QdrantClient(url=QDRANT_URL) + +# Create collection if it doesn't exist +try: + qdrant_client.get_collection("hermes_memory") +except: + qdrant_client.create_collection( + collection_name="hermes_memory", + vectors_config=models.VectorParams(size=1024, distance=models.Distance.COSINE) + ) + +def get_embedding(text): + """Get embedding from llama-embed server""" + response = requests.post( + f"{EMBEDDING_URL}/v1/embeddings", + json={"input": text, "model": "BAAI/bge-m3"}, + timeout=30 + ) + response.raise_for_status() + data = response.json() + return data["data"][0]["embedding"] + +class MemoryHandler(BaseHTTPRequestHandler): + def log_message(self, format, *args): + pass # Suppress logging + + def do_GET(self): + if self.path == "/health": + self.send_response(200) + self.send_header("Content-Type", "application/json") + self.end_headers() + self.wfile.write(json.dumps({"status": "ok", "service": "mem0", "user": USER_ID}).encode()) + elif self.path == "/memory": + # Get recent memories for user + try: + records = qdrant_client.scroll( + collection_name="hermes_memory", + limit=10, + with_payload=True, + with_vectors=False + ) + memories = [] + for record in records[0]: + if hasattr(record, 'payload'): + memories.append({ + "id": record.id, + "text": record.payload.get("text", ""), + "timestamp": record.payload.get("timestamp", "") + }) + self.send_response(200) + self.send_header("Content-Type", "application/json") + self.end_headers() + self.wfile.write(json.dumps(memories, default=str).encode()) + except Exception as e: + self.send_response(500) + self.send_header("Content-Type", "application/json") + self.end_headers() + self.wfile.write(json.dumps({"error": str(e)}).encode()) + elif self.path.startswith("/memory/") and self.path.endswith("/search"): + # Search memories by query + query = self.path.split("/")[2] + try: + query_vector = get_embedding(query) + results = qdrant_client.query_points( + collection_name="hermes_memory", + query=query_vector, + query_filter=models.Filter( + must=[models.FieldCondition(key="user_id", match=models.MatchValue(value=USER_ID))] + ), + limit=5, + with_payload=True + ) + memories = [] + for result in results.points: + if hasattr(result, 'payload'): + memories.append({ + "id": result.id, + "text": result.payload.get("text", ""), + "score": result.score, + "timestamp": result.payload.get("timestamp", "") + }) + self.send_response(200) + self.send_header("Content-Type", "application/json") + self.end_headers() + self.wfile.write(json.dumps(memories, default=str).encode()) + except Exception as e: + self.send_response(500) + self.send_header("Content-Type", "application/json") + self.end_headers() + self.wfile.write(json.dumps({"error": str(e)}).encode()) + else: + self.send_response(404) + self.end_headers() + + def do_POST(self): + if self.path == "/memory": + content_length = int(self.headers["Content-Length"]) + post_data = json.loads(self.rfile.read(content_length).decode()) + text = post_data.get("text", "") + + if text: + try: + # Get embedding + embedding = get_embedding(text) + + # Store in Qdrant + qdrant_client.upsert( + collection_name="hermes_memory", + points=[ + models.PointStruct( + id=hash(text) % 1000000, + vector=embedding, + payload={ + "text": text, + "user_id": USER_ID, + "timestamp": str(os.popen("date -Iseconds").read().strip()) + } + ) + ] + ) + + self.send_response(200) + self.send_header("Content-Type", "application/json") + self.end_headers() + self.wfile.write(json.dumps({"status": "ok", "text": text}).encode()) + except Exception as e: + self.send_response(500) + self.send_header("Content-Type", "application/json") + self.end_headers() + self.wfile.write(json.dumps({"error": str(e)}).encode()) + else: + self.send_response(400) + self.end_headers() + else: + self.send_response(404) + self.end_headers() + +if __name__ == "__main__": + server = HTTPServer(("0.0.0.0", PORT), MemoryHandler) + print(f"mem0 server running on port {PORT}") + print(f"Qdrant: {QDRANT_URL}") + print(f"Embedding: {EMBEDDING_URL}") + server.serve_forever() diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..df162f5 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,5 @@ +mem0ai>=1.0.0 +qdrant-client>=1.12.0 +fastapi>=0.109.0 +uvicorn>=0.27.0 +pydantic>=2.0.0