Initial commit: mem0 docker with qdrant

This commit is contained in:
hhofmann
2026-04-13 16:38:35 +02:00
commit f337c94281
7 changed files with 532 additions and 0 deletions
+10
View File
@@ -0,0 +1,10 @@
# Mem0 Configuration
MEM0_PORT=8889
# Qdrant Configuration
QDRANT_HOST=qdrant
QDRANT_PORT=6333
# Embedding Configuration
EMBEDDING_URL=http://host.docker.internal:4700/embedding
EMBEDDING_DIMS=1024
+28
View File
@@ -0,0 +1,28 @@
FROM python:3.11-slim
WORKDIR /app
# Install system dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
curl \
&& rm -rf /var/lib/apt/lists/*
# Copy requirements first for better caching
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
# Copy application code
COPY main.py .
# Create history directory
RUN mkdir -p /app/history
# Expose port
EXPOSE 8000
# Health check
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
CMD curl -f http://localhost:8000/health || exit 1
# Run the application
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
+50
View File
@@ -0,0 +1,50 @@
name: mem0-selfhost
services:
mem0:
build:
context: .
dockerfile: Dockerfile
ports:
- "${MEM0_PORT:-8889}:8000"
env_file:
- .env
networks:
- mem0_network
extra_hosts:
- "host.docker.internal:host-gateway"
depends_on:
- qdrant
environment:
- PYTHONDONTWRITEBYTECODE=1
- PYTHONUNBUFFERED=1
- QDRANT_HOST=${QDRANT_HOST}
- QDRANT_PORT=${QDRANT_PORT}
- EMBEDDING_URL=${EMBEDDING_URL}
- EMBEDDING_DIMS=${EMBEDDING_DIMS}
volumes:
- ./history:/app/history
restart: unless-stopped
qdrant:
image: qdrant/qdrant:latest
ports:
- "6333:6333"
networks:
- mem0_network
volumes:
- qdrant_storage:/qdrant/storage
healthcheck:
test: ["CMD-SHELL", "wget -q --spider http://localhost:6333/ || exit 1"]
interval: 10s
timeout: 5s
retries: 3
start_period: 10s
restart: unless-stopped
volumes:
qdrant_storage:
networks:
mem0_network:
driver: bridge
+10
View File
@@ -0,0 +1,10 @@
# Mem0 Configuration
MEM0_PORT=8889
# Qdrant Configuration
QDRANT_HOST=qdrant
QDRANT_PORT=6333
# Embedding Configuration
EMBEDDING_URL=http://host.docker.internal:4700/embedding
EMBEDDING_DIMS=1024
+267
View File
@@ -0,0 +1,267 @@
"""
Mem0 API Server - Simple wrapper around mem0ai with llama.cpp embedding support
"""
import os
import requests
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from typing import Optional, List, Any, Dict
from qdrant_client import QdrantClient
from qdrant_client.models import Distance, VectorParams, PointStruct, Filter, FieldCondition, MatchValue
app = FastAPI(title="Mem0 API", version="1.0.0")
# Configuration from environment
QDRANT_HOST = os.getenv("QDRANT_HOST", "qdrant")
QDRANT_PORT = int(os.getenv("QDRANT_PORT", 6333))
EMBEDDING_URL = os.getenv("EMBEDDING_URL", "http://host.docker.internal:4700/embedding")
EMBEDDING_DIMS = int(os.getenv("EMBEDDING_DIMS", 1024))
COLLECTION_NAME = "memories"
class LlamaCppEmbedder:
"""Custom embedder for llama.cpp embedding endpoint"""
def __init__(self, base_url: str, dims: int):
self.base_url = base_url
self.dims = dims
def get_embedding(self, text: str) -> List[float]:
"""Get embedding from llama.cpp endpoint"""
response = requests.post(
self.base_url,
json={"content": text},
headers={"Content-Type": "application/json"}
)
response.raise_for_status()
result = response.json()
embedding = result[0]["embedding"][0]
return embedding
# Initialize embedder and Qdrant client
embedder = LlamaCppEmbedder(EMBEDDING_URL, EMBEDDING_DIMS)
qdrant_client = QdrantClient(host=QDRANT_HOST, port=QDRANT_PORT)
def init_collection():
"""Initialize Qdrant collection if it doesn't exist"""
collections = qdrant_client.get_collections().collections
collection_names = [c.name for c in collections]
if COLLECTION_NAME not in collection_names:
qdrant_client.create_collection(
collection_name=COLLECTION_NAME,
vectors_config=VectorParams(size=EMBEDDING_DIMS, distance=Distance.COSINE)
)
# Initialize collection on startup
init_collection()
class AddMemoryRequest(BaseModel):
message: str
user_id: Optional[str] = "default"
metadata: Optional[dict] = None
class AddMemoryResponse(BaseModel):
success: bool
memory_id: Optional[str]
message: str
class SearchMemoryRequest(BaseModel):
query: str
user_id: Optional[str] = "default"
limit: Optional[int] = 5
class SearchResult(BaseModel):
id: str
text: str
user_id: str
score: float
metadata: Optional[dict]
class SearchMemoryResponse(BaseModel):
results: List[SearchResult]
class MemoryItem(BaseModel):
id: str
text: str
user_id: str
metadata: Optional[dict]
class GetMemoriesResponse(BaseModel):
memories: List[MemoryItem]
class DeleteMemoryResponse(BaseModel):
success: bool
memory_id: str
message: str
@app.get("/health")
async def health_check():
"""Health check endpoint"""
try:
# Test embedding endpoint
test_response = requests.get(EMBEDDING_URL.replace("/embedding", "/"), timeout=5)
embedding_healthy = test_response.status_code == 200 or "gzip" in test_response.text.lower()
# Test Qdrant
qdrant_url = f"http://{QDRANT_HOST}:{QDRANT_PORT}/"
qdrant_response = requests.get(qdrant_url, timeout=5)
qdrant_healthy = qdrant_response.status_code == 200
return {
"status": "healthy" if (embedding_healthy and qdrant_healthy) else "degraded",
"service": "mem0-api",
"embedding_endpoint": embedding_healthy,
"qdrant": qdrant_healthy
}
except Exception as e:
return {"status": "unhealthy", "service": "mem0-api", "error": str(e)}
@app.post("/add", response_model=AddMemoryResponse)
async def add_memory(request: AddMemoryRequest):
"""Add a new memory"""
try:
import uuid
memory_id = str(uuid.uuid4())
# Get embedding
embedding = embedder.get_embedding(request.message)
# Create point
point = PointStruct(
id=memory_id,
vector=embedding,
payload={
"text": request.message,
"user_id": request.user_id,
"metadata": request.metadata or {}
}
)
# Upsert to Qdrant
qdrant_client.upsert(collection_name=COLLECTION_NAME, points=[point])
return AddMemoryResponse(
success=True,
memory_id=memory_id,
message="Memory added successfully"
)
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@app.post("/search", response_model=SearchMemoryResponse)
async def search_memory(request: SearchMemoryRequest):
"""Search for memories"""
try:
# Get query embedding
query_embedding = embedder.get_embedding(request.query)
# Build filter for user_id
query_filter = None
if request.user_id:
query_filter = Filter(
must=[FieldCondition(key="user_id", match=MatchValue(value=request.user_id))]
)
# Search in Qdrant using query_points (new API) - pass vector directly
results = qdrant_client.query_points(
collection_name=COLLECTION_NAME,
query=query_embedding,
limit=request.limit,
query_filter=query_filter,
with_payload=True,
with_vectors=False
)
# Format results
formatted_results = [
SearchResult(
id=str(hit.id),
text=hit.payload.get("text", ""),
user_id=hit.payload.get("user_id", ""),
score=hit.score,
metadata=hit.payload.get("metadata")
)
for hit in results.points
]
return SearchMemoryResponse(results=formatted_results)
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@app.get("/memories", response_model=GetMemoriesResponse)
async def get_memories(user_id: Optional[str] = "default"):
"""Get all memories for a user"""
try:
# Build filter for user_id
scroll_filter = None
if user_id:
scroll_filter = Filter(
must=[FieldCondition(key="user_id", match=MatchValue(value=user_id))]
)
# Scroll through collection
memories = []
offset = None
while True:
result, next_offset = qdrant_client.scroll(
collection_name=COLLECTION_NAME,
limit=100,
offset=offset,
scroll_filter=scroll_filter,
with_payload=True,
with_vectors=False
)
for point in result:
memories.append(MemoryItem(
id=str(point.id),
text=point.payload.get("text", ""),
user_id=point.payload.get("user_id", ""),
metadata=point.payload.get("metadata")
))
if not next_offset:
break
offset = next_offset
return GetMemoriesResponse(memories=memories)
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@app.delete("/delete/{memory_id}", response_model=DeleteMemoryResponse)
async def delete_memory(memory_id: str):
"""Delete a memory by ID"""
try:
qdrant_client.delete(
collection_name=COLLECTION_NAME,
points_selector=[memory_id]
)
return DeleteMemoryResponse(
success=True,
memory_id=memory_id,
message="Memory deleted successfully"
)
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=8000)
Executable
+162
View File
@@ -0,0 +1,162 @@
#!/usr/bin/env python3
"""
mem0 Memory Server - Persistent Semantic Memory for Hermes Agent
Direct integration with llama-embed on port 4700
"""
import os
import json
import requests
from http.server import HTTPServer, BaseHTTPRequestHandler
from qdrant_client import QdrantClient, models
# Configuration
QDRANT_URL = os.environ.get("QDRANT_URL", "http://localhost:6333")
EMBEDDING_URL = os.environ.get("EMBEDDING_URL", "http://localhost:4700")
PORT = int(os.environ.get("MEM0_PORT", 8080))
USER_ID = "henry_hofmann"
# Initialize Qdrant client
qdrant_client = QdrantClient(url=QDRANT_URL)
# Create collection if it doesn't exist
try:
qdrant_client.get_collection("hermes_memory")
except:
qdrant_client.create_collection(
collection_name="hermes_memory",
vectors_config=models.VectorParams(size=1024, distance=models.Distance.COSINE)
)
def get_embedding(text):
"""Get embedding from llama-embed server"""
response = requests.post(
f"{EMBEDDING_URL}/v1/embeddings",
json={"input": text, "model": "BAAI/bge-m3"},
timeout=30
)
response.raise_for_status()
data = response.json()
return data["data"][0]["embedding"]
class MemoryHandler(BaseHTTPRequestHandler):
def log_message(self, format, *args):
pass # Suppress logging
def do_GET(self):
if self.path == "/health":
self.send_response(200)
self.send_header("Content-Type", "application/json")
self.end_headers()
self.wfile.write(json.dumps({"status": "ok", "service": "mem0", "user": USER_ID}).encode())
elif self.path == "/memory":
# Get recent memories for user
try:
records = qdrant_client.scroll(
collection_name="hermes_memory",
limit=10,
with_payload=True,
with_vectors=False
)
memories = []
for record in records[0]:
if hasattr(record, 'payload'):
memories.append({
"id": record.id,
"text": record.payload.get("text", ""),
"timestamp": record.payload.get("timestamp", "")
})
self.send_response(200)
self.send_header("Content-Type", "application/json")
self.end_headers()
self.wfile.write(json.dumps(memories, default=str).encode())
except Exception as e:
self.send_response(500)
self.send_header("Content-Type", "application/json")
self.end_headers()
self.wfile.write(json.dumps({"error": str(e)}).encode())
elif self.path.startswith("/memory/") and self.path.endswith("/search"):
# Search memories by query
query = self.path.split("/")[2]
try:
query_vector = get_embedding(query)
results = qdrant_client.query_points(
collection_name="hermes_memory",
query=query_vector,
query_filter=models.Filter(
must=[models.FieldCondition(key="user_id", match=models.MatchValue(value=USER_ID))]
),
limit=5,
with_payload=True
)
memories = []
for result in results.points:
if hasattr(result, 'payload'):
memories.append({
"id": result.id,
"text": result.payload.get("text", ""),
"score": result.score,
"timestamp": result.payload.get("timestamp", "")
})
self.send_response(200)
self.send_header("Content-Type", "application/json")
self.end_headers()
self.wfile.write(json.dumps(memories, default=str).encode())
except Exception as e:
self.send_response(500)
self.send_header("Content-Type", "application/json")
self.end_headers()
self.wfile.write(json.dumps({"error": str(e)}).encode())
else:
self.send_response(404)
self.end_headers()
def do_POST(self):
if self.path == "/memory":
content_length = int(self.headers["Content-Length"])
post_data = json.loads(self.rfile.read(content_length).decode())
text = post_data.get("text", "")
if text:
try:
# Get embedding
embedding = get_embedding(text)
# Store in Qdrant
qdrant_client.upsert(
collection_name="hermes_memory",
points=[
models.PointStruct(
id=hash(text) % 1000000,
vector=embedding,
payload={
"text": text,
"user_id": USER_ID,
"timestamp": str(os.popen("date -Iseconds").read().strip())
}
)
]
)
self.send_response(200)
self.send_header("Content-Type", "application/json")
self.end_headers()
self.wfile.write(json.dumps({"status": "ok", "text": text}).encode())
except Exception as e:
self.send_response(500)
self.send_header("Content-Type", "application/json")
self.end_headers()
self.wfile.write(json.dumps({"error": str(e)}).encode())
else:
self.send_response(400)
self.end_headers()
else:
self.send_response(404)
self.end_headers()
if __name__ == "__main__":
server = HTTPServer(("0.0.0.0", PORT), MemoryHandler)
print(f"mem0 server running on port {PORT}")
print(f"Qdrant: {QDRANT_URL}")
print(f"Embedding: {EMBEDDING_URL}")
server.serve_forever()
+5
View File
@@ -0,0 +1,5 @@
mem0ai>=1.0.0
qdrant-client>=1.12.0
fastapi>=0.109.0
uvicorn>=0.27.0
pydantic>=2.0.0