Initial commit: mem0 docker with qdrant

2026-04-13 16:38:35 +02:00
commit f337c94281
7 changed files with 532 additions and 0 deletions
@@ -0,0 +1,10 @@
 # Mem0 Configuration
 MEM0_PORT=8889
 # Qdrant Configuration
 QDRANT_HOST=qdrant
 QDRANT_PORT=6333
 # Embedding Configuration
 EMBEDDING_URL=http://host.docker.internal:4700/embedding
 EMBEDDING_DIMS=1024
@@ -0,0 +1,28 @@
 FROM python:3.11-slim
 WORKDIR /app
 # Install system dependencies
 RUN apt-get update && apt-get install -y --no-install-recommends \
    curl \
    && rm -rf /var/lib/apt/lists/*
 # Copy requirements first for better caching
 COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
 # Copy application code
 COPY main.py .
 # Create history directory
 RUN mkdir -p /app/history
 # Expose port
 EXPOSE 8000
 # Health check
 HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
    CMD curl -f http://localhost:8000/health || exit 1
 # Run the application
 CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
@@ -0,0 +1,50 @@
 name: mem0-selfhost
 services:
  mem0:
    build:
      context: .
      dockerfile: Dockerfile
    ports:
      - "${MEM0_PORT:-8889}:8000"
    env_file:
      - .env
    networks:
      - mem0_network
    extra_hosts:
      - "host.docker.internal:host-gateway"
    depends_on:
      - qdrant
    environment:
      - PYTHONDONTWRITEBYTECODE=1
      - PYTHONUNBUFFERED=1
      - QDRANT_HOST=${QDRANT_HOST}
      - QDRANT_PORT=${QDRANT_PORT}
      - EMBEDDING_URL=${EMBEDDING_URL}
      - EMBEDDING_DIMS=${EMBEDDING_DIMS}
    volumes:
      - ./history:/app/history
    restart: unless-stopped
  qdrant:
    image: qdrant/qdrant:latest
    ports:
      - "6333:6333"
    networks:
      - mem0_network
    volumes:
      - qdrant_storage:/qdrant/storage
    healthcheck:
      test: ["CMD-SHELL", "wget -q --spider http://localhost:6333/ || exit 1"]
      interval: 10s
      timeout: 5s
      retries: 3
      start_period: 10s
    restart: unless-stopped
 volumes:
  qdrant_storage:
 networks:
  mem0_network:
    driver: bridge
@@ -0,0 +1,10 @@
 # Mem0 Configuration
 MEM0_PORT=8889
 # Qdrant Configuration
 QDRANT_HOST=qdrant
 QDRANT_PORT=6333
 # Embedding Configuration
 EMBEDDING_URL=http://host.docker.internal:4700/embedding
 EMBEDDING_DIMS=1024
@@ -0,0 +1,267 @@
 """
 Mem0 API Server - Simple wrapper around mem0ai with llama.cpp embedding support
 """
 import os
 import requests
 from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel
 from typing import Optional, List, Any, Dict
 from qdrant_client import QdrantClient
 from qdrant_client.models import Distance, VectorParams, PointStruct, Filter, FieldCondition, MatchValue
 app = FastAPI(title="Mem0 API", version="1.0.0")
 # Configuration from environment
 QDRANT_HOST = os.getenv("QDRANT_HOST", "qdrant")
 QDRANT_PORT = int(os.getenv("QDRANT_PORT", 6333))
 EMBEDDING_URL = os.getenv("EMBEDDING_URL", "http://host.docker.internal:4700/embedding")
 EMBEDDING_DIMS = int(os.getenv("EMBEDDING_DIMS", 1024))
 COLLECTION_NAME = "memories"
 class LlamaCppEmbedder:
    """Custom embedder for llama.cpp embedding endpoint"""
    def __init__(self, base_url: str, dims: int):
        self.base_url = base_url
        self.dims = dims
    def get_embedding(self, text: str) -> List[float]:
        """Get embedding from llama.cpp endpoint"""
        response = requests.post(
            self.base_url,
            json={"content": text},
            headers={"Content-Type": "application/json"}
        )
        response.raise_for_status()
        result = response.json()
        embedding = result[0]["embedding"][0]
        return embedding
 # Initialize embedder and Qdrant client
 embedder = LlamaCppEmbedder(EMBEDDING_URL, EMBEDDING_DIMS)
 qdrant_client = QdrantClient(host=QDRANT_HOST, port=QDRANT_PORT)
 def init_collection():
    """Initialize Qdrant collection if it doesn't exist"""
    collections = qdrant_client.get_collections().collections
    collection_names = [c.name for c in collections]
    if COLLECTION_NAME not in collection_names:
        qdrant_client.create_collection(
            collection_name=COLLECTION_NAME,
            vectors_config=VectorParams(size=EMBEDDING_DIMS, distance=Distance.COSINE)
        )
 # Initialize collection on startup
 init_collection()
 class AddMemoryRequest(BaseModel):
    message: str
    user_id: Optional[str] = "default"
    metadata: Optional[dict] = None
 class AddMemoryResponse(BaseModel):
    success: bool
    memory_id: Optional[str]
    message: str
 class SearchMemoryRequest(BaseModel):
    query: str
    user_id: Optional[str] = "default"
    limit: Optional[int] = 5
 class SearchResult(BaseModel):
    id: str
    text: str
    user_id: str
    score: float
    metadata: Optional[dict]
 class SearchMemoryResponse(BaseModel):
    results: List[SearchResult]
 class MemoryItem(BaseModel):
    id: str
    text: str
    user_id: str
    metadata: Optional[dict]
 class GetMemoriesResponse(BaseModel):
    memories: List[MemoryItem]
 class DeleteMemoryResponse(BaseModel):
    success: bool
    memory_id: str
    message: str
@app.get("/health")
 async def health_check():
    """Health check endpoint"""
    try:
        # Test embedding endpoint
        test_response = requests.get(EMBEDDING_URL.replace("/embedding", "/"), timeout=5)
        embedding_healthy = test_response.status_code == 200 or "gzip" in test_response.text.lower()
        # Test Qdrant
        qdrant_url = f"http://{QDRANT_HOST}:{QDRANT_PORT}/"
        qdrant_response = requests.get(qdrant_url, timeout=5)
        qdrant_healthy = qdrant_response.status_code == 200
        return {
            "status": "healthy" if (embedding_healthy and qdrant_healthy) else "degraded",
            "service": "mem0-api",
            "embedding_endpoint": embedding_healthy,
            "qdrant": qdrant_healthy
        }
    except Exception as e:
        return {"status": "unhealthy", "service": "mem0-api", "error": str(e)}
@app.post("/add", response_model=AddMemoryResponse)
 async def add_memory(request: AddMemoryRequest):
    """Add a new memory"""
    try:
        import uuid
        memory_id = str(uuid.uuid4())
        # Get embedding
        embedding = embedder.get_embedding(request.message)
        # Create point
        point = PointStruct(
            id=memory_id,
            vector=embedding,
            payload={
                "text": request.message,
                "user_id": request.user_id,
                "metadata": request.metadata or {}
            }
        )
        # Upsert to Qdrant
        qdrant_client.upsert(collection_name=COLLECTION_NAME, points=[point])
        return AddMemoryResponse(
            success=True,
            memory_id=memory_id,
            message="Memory added successfully"
        )
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))
@app.post("/search", response_model=SearchMemoryResponse)
 async def search_memory(request: SearchMemoryRequest):
    """Search for memories"""
    try:
        # Get query embedding
        query_embedding = embedder.get_embedding(request.query)
        # Build filter for user_id
        query_filter = None
        if request.user_id:
            query_filter = Filter(
                must=[FieldCondition(key="user_id", match=MatchValue(value=request.user_id))]
            )
        # Search in Qdrant using query_points (new API) - pass vector directly
        results = qdrant_client.query_points(
            collection_name=COLLECTION_NAME,
            query=query_embedding,
            limit=request.limit,
            query_filter=query_filter,
            with_payload=True,
            with_vectors=False
        )
        # Format results
        formatted_results = [
            SearchResult(
                id=str(hit.id),
                text=hit.payload.get("text", ""),
                user_id=hit.payload.get("user_id", ""),
                score=hit.score,
                metadata=hit.payload.get("metadata")
            )
            for hit in results.points
        ]
        return SearchMemoryResponse(results=formatted_results)
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))
@app.get("/memories", response_model=GetMemoriesResponse)
 async def get_memories(user_id: Optional[str] = "default"):
    """Get all memories for a user"""
    try:
        # Build filter for user_id
        scroll_filter = None
        if user_id:
            scroll_filter = Filter(
                must=[FieldCondition(key="user_id", match=MatchValue(value=user_id))]
            )
        # Scroll through collection
        memories = []
        offset = None
        while True:
            result, next_offset = qdrant_client.scroll(
                collection_name=COLLECTION_NAME,
                limit=100,
                offset=offset,
                scroll_filter=scroll_filter,
                with_payload=True,
                with_vectors=False
            )
            for point in result:
                memories.append(MemoryItem(
                    id=str(point.id),
                    text=point.payload.get("text", ""),
                    user_id=point.payload.get("user_id", ""),
                    metadata=point.payload.get("metadata")
                ))
            if not next_offset:
                break
            offset = next_offset
        return GetMemoriesResponse(memories=memories)
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))
@app.delete("/delete/{memory_id}", response_model=DeleteMemoryResponse)
 async def delete_memory(memory_id: str):
    """Delete a memory by ID"""
    try:
        qdrant_client.delete(
            collection_name=COLLECTION_NAME,
            points_selector=[memory_id]
        )
        return DeleteMemoryResponse(
            success=True,
            memory_id=memory_id,
            message="Memory deleted successfully"
        )
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))
 if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=8000)
@@ -0,0 +1,162 @@
 #!/usr/bin/env python3
 """
 mem0 Memory Server - Persistent Semantic Memory for Hermes Agent
 Direct integration with llama-embed on port 4700
 """
 import os
 import json
 import requests
 from http.server import HTTPServer, BaseHTTPRequestHandler
 from qdrant_client import QdrantClient, models
 # Configuration
 QDRANT_URL = os.environ.get("QDRANT_URL", "http://localhost:6333")
 EMBEDDING_URL = os.environ.get("EMBEDDING_URL", "http://localhost:4700")
 PORT = int(os.environ.get("MEM0_PORT", 8080))
 USER_ID = "henry_hofmann"
 # Initialize Qdrant client
 qdrant_client = QdrantClient(url=QDRANT_URL)
 # Create collection if it doesn't exist
 try:
    qdrant_client.get_collection("hermes_memory")
 except:
    qdrant_client.create_collection(
        collection_name="hermes_memory",
        vectors_config=models.VectorParams(size=1024, distance=models.Distance.COSINE)
    )
 def get_embedding(text):
    """Get embedding from llama-embed server"""
    response = requests.post(
        f"{EMBEDDING_URL}/v1/embeddings",
        json={"input": text, "model": "BAAI/bge-m3"},
        timeout=30
    )
    response.raise_for_status()
    data = response.json()
    return data["data"][0]["embedding"]
 class MemoryHandler(BaseHTTPRequestHandler):
    def log_message(self, format, *args):
        pass  # Suppress logging
    def do_GET(self):
        if self.path == "/health":
            self.send_response(200)
            self.send_header("Content-Type", "application/json")
            self.end_headers()
            self.wfile.write(json.dumps({"status": "ok", "service": "mem0", "user": USER_ID}).encode())
        elif self.path == "/memory":
            # Get recent memories for user
            try:
                records = qdrant_client.scroll(
                    collection_name="hermes_memory",
                    limit=10,
                    with_payload=True,
                    with_vectors=False
                )
                memories = []
                for record in records[0]:
                    if hasattr(record, 'payload'):
                        memories.append({
                            "id": record.id,
                            "text": record.payload.get("text", ""),
                            "timestamp": record.payload.get("timestamp", "")
                        })
                self.send_response(200)
                self.send_header("Content-Type", "application/json")
                self.end_headers()
                self.wfile.write(json.dumps(memories, default=str).encode())
            except Exception as e:
                self.send_response(500)
                self.send_header("Content-Type", "application/json")
                self.end_headers()
                self.wfile.write(json.dumps({"error": str(e)}).encode())
        elif self.path.startswith("/memory/") and self.path.endswith("/search"):
            # Search memories by query
            query = self.path.split("/")[2]
            try:
                query_vector = get_embedding(query)
                results = qdrant_client.query_points(
                    collection_name="hermes_memory",
                    query=query_vector,
                    query_filter=models.Filter(
                        must=[models.FieldCondition(key="user_id", match=models.MatchValue(value=USER_ID))]
                    ),
                    limit=5,
                    with_payload=True
                )
                memories = []
                for result in results.points:
                    if hasattr(result, 'payload'):
                        memories.append({
                            "id": result.id,
                            "text": result.payload.get("text", ""),
                            "score": result.score,
                            "timestamp": result.payload.get("timestamp", "")
                        })
                self.send_response(200)
                self.send_header("Content-Type", "application/json")
                self.end_headers()
                self.wfile.write(json.dumps(memories, default=str).encode())
            except Exception as e:
                self.send_response(500)
                self.send_header("Content-Type", "application/json")
                self.end_headers()
                self.wfile.write(json.dumps({"error": str(e)}).encode())
        else:
            self.send_response(404)
            self.end_headers()
    def do_POST(self):
        if self.path == "/memory":
            content_length = int(self.headers["Content-Length"])
            post_data = json.loads(self.rfile.read(content_length).decode())
            text = post_data.get("text", "")
            if text:
                try:
                    # Get embedding
                    embedding = get_embedding(text)
                    # Store in Qdrant
                    qdrant_client.upsert(
                        collection_name="hermes_memory",
                        points=[
                            models.PointStruct(
                                id=hash(text) % 1000000,
                                vector=embedding,
                                payload={
                                    "text": text,
                                    "user_id": USER_ID,
                                    "timestamp": str(os.popen("date -Iseconds").read().strip())
                                }
                            )
                        ]
                    )
                    self.send_response(200)
                    self.send_header("Content-Type", "application/json")
                    self.end_headers()
                    self.wfile.write(json.dumps({"status": "ok", "text": text}).encode())
                except Exception as e:
                    self.send_response(500)
                    self.send_header("Content-Type", "application/json")
                    self.end_headers()
                    self.wfile.write(json.dumps({"error": str(e)}).encode())
            else:
                self.send_response(400)
                self.end_headers()
        else:
            self.send_response(404)
            self.end_headers()
 if __name__ == "__main__":
    server = HTTPServer(("0.0.0.0", PORT), MemoryHandler)
    print(f"mem0 server running on port {PORT}")
    print(f"Qdrant: {QDRANT_URL}")
    print(f"Embedding: {EMBEDDING_URL}")
    server.serve_forever()
@@ -0,0 +1,5 @@
 mem0ai>=1.0.0
 qdrant-client>=1.12.0
 fastapi>=0.109.0
 uvicorn>=0.27.0
 pydantic>=2.0.0