Initial commit: mem0 docker with qdrant
This commit is contained in:
@@ -0,0 +1,10 @@
|
||||
# Mem0 Configuration
|
||||
MEM0_PORT=8889
|
||||
|
||||
# Qdrant Configuration
|
||||
QDRANT_HOST=qdrant
|
||||
QDRANT_PORT=6333
|
||||
|
||||
# Embedding Configuration
|
||||
EMBEDDING_URL=http://host.docker.internal:4700/embedding
|
||||
EMBEDDING_DIMS=1024
|
||||
+28
@@ -0,0 +1,28 @@
|
||||
FROM python:3.11-slim
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Install system dependencies
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
curl \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Copy requirements first for better caching
|
||||
COPY requirements.txt .
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
# Copy application code
|
||||
COPY main.py .
|
||||
|
||||
# Create history directory
|
||||
RUN mkdir -p /app/history
|
||||
|
||||
# Expose port
|
||||
EXPOSE 8000
|
||||
|
||||
# Health check
|
||||
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
|
||||
CMD curl -f http://localhost:8000/health || exit 1
|
||||
|
||||
# Run the application
|
||||
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
|
||||
@@ -0,0 +1,50 @@
|
||||
name: mem0-selfhost
|
||||
|
||||
services:
|
||||
mem0:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: Dockerfile
|
||||
ports:
|
||||
- "${MEM0_PORT:-8889}:8000"
|
||||
env_file:
|
||||
- .env
|
||||
networks:
|
||||
- mem0_network
|
||||
extra_hosts:
|
||||
- "host.docker.internal:host-gateway"
|
||||
depends_on:
|
||||
- qdrant
|
||||
environment:
|
||||
- PYTHONDONTWRITEBYTECODE=1
|
||||
- PYTHONUNBUFFERED=1
|
||||
- QDRANT_HOST=${QDRANT_HOST}
|
||||
- QDRANT_PORT=${QDRANT_PORT}
|
||||
- EMBEDDING_URL=${EMBEDDING_URL}
|
||||
- EMBEDDING_DIMS=${EMBEDDING_DIMS}
|
||||
volumes:
|
||||
- ./history:/app/history
|
||||
restart: unless-stopped
|
||||
|
||||
qdrant:
|
||||
image: qdrant/qdrant:latest
|
||||
ports:
|
||||
- "6333:6333"
|
||||
networks:
|
||||
- mem0_network
|
||||
volumes:
|
||||
- qdrant_storage:/qdrant/storage
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "wget -q --spider http://localhost:6333/ || exit 1"]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 3
|
||||
start_period: 10s
|
||||
restart: unless-stopped
|
||||
|
||||
volumes:
|
||||
qdrant_storage:
|
||||
|
||||
networks:
|
||||
mem0_network:
|
||||
driver: bridge
|
||||
+10
@@ -0,0 +1,10 @@
|
||||
# Mem0 Configuration
|
||||
MEM0_PORT=8889
|
||||
|
||||
# Qdrant Configuration
|
||||
QDRANT_HOST=qdrant
|
||||
QDRANT_PORT=6333
|
||||
|
||||
# Embedding Configuration
|
||||
EMBEDDING_URL=http://host.docker.internal:4700/embedding
|
||||
EMBEDDING_DIMS=1024
|
||||
@@ -0,0 +1,267 @@
|
||||
"""
|
||||
Mem0 API Server - Simple wrapper around mem0ai with llama.cpp embedding support
|
||||
"""
|
||||
import os
|
||||
import requests
|
||||
from fastapi import FastAPI, HTTPException
|
||||
from pydantic import BaseModel
|
||||
from typing import Optional, List, Any, Dict
|
||||
from qdrant_client import QdrantClient
|
||||
from qdrant_client.models import Distance, VectorParams, PointStruct, Filter, FieldCondition, MatchValue
|
||||
|
||||
app = FastAPI(title="Mem0 API", version="1.0.0")
|
||||
|
||||
# Configuration from environment
|
||||
QDRANT_HOST = os.getenv("QDRANT_HOST", "qdrant")
|
||||
QDRANT_PORT = int(os.getenv("QDRANT_PORT", 6333))
|
||||
EMBEDDING_URL = os.getenv("EMBEDDING_URL", "http://host.docker.internal:4700/embedding")
|
||||
EMBEDDING_DIMS = int(os.getenv("EMBEDDING_DIMS", 1024))
|
||||
COLLECTION_NAME = "memories"
|
||||
|
||||
|
||||
class LlamaCppEmbedder:
|
||||
"""Custom embedder for llama.cpp embedding endpoint"""
|
||||
|
||||
def __init__(self, base_url: str, dims: int):
|
||||
self.base_url = base_url
|
||||
self.dims = dims
|
||||
|
||||
def get_embedding(self, text: str) -> List[float]:
|
||||
"""Get embedding from llama.cpp endpoint"""
|
||||
response = requests.post(
|
||||
self.base_url,
|
||||
json={"content": text},
|
||||
headers={"Content-Type": "application/json"}
|
||||
)
|
||||
response.raise_for_status()
|
||||
result = response.json()
|
||||
embedding = result[0]["embedding"][0]
|
||||
return embedding
|
||||
|
||||
|
||||
# Initialize embedder and Qdrant client
|
||||
embedder = LlamaCppEmbedder(EMBEDDING_URL, EMBEDDING_DIMS)
|
||||
qdrant_client = QdrantClient(host=QDRANT_HOST, port=QDRANT_PORT)
|
||||
|
||||
|
||||
def init_collection():
|
||||
"""Initialize Qdrant collection if it doesn't exist"""
|
||||
collections = qdrant_client.get_collections().collections
|
||||
collection_names = [c.name for c in collections]
|
||||
|
||||
if COLLECTION_NAME not in collection_names:
|
||||
qdrant_client.create_collection(
|
||||
collection_name=COLLECTION_NAME,
|
||||
vectors_config=VectorParams(size=EMBEDDING_DIMS, distance=Distance.COSINE)
|
||||
)
|
||||
|
||||
|
||||
# Initialize collection on startup
|
||||
init_collection()
|
||||
|
||||
|
||||
class AddMemoryRequest(BaseModel):
|
||||
message: str
|
||||
user_id: Optional[str] = "default"
|
||||
metadata: Optional[dict] = None
|
||||
|
||||
|
||||
class AddMemoryResponse(BaseModel):
|
||||
success: bool
|
||||
memory_id: Optional[str]
|
||||
message: str
|
||||
|
||||
|
||||
class SearchMemoryRequest(BaseModel):
|
||||
query: str
|
||||
user_id: Optional[str] = "default"
|
||||
limit: Optional[int] = 5
|
||||
|
||||
|
||||
class SearchResult(BaseModel):
|
||||
id: str
|
||||
text: str
|
||||
user_id: str
|
||||
score: float
|
||||
metadata: Optional[dict]
|
||||
|
||||
|
||||
class SearchMemoryResponse(BaseModel):
|
||||
results: List[SearchResult]
|
||||
|
||||
|
||||
class MemoryItem(BaseModel):
|
||||
id: str
|
||||
text: str
|
||||
user_id: str
|
||||
metadata: Optional[dict]
|
||||
|
||||
|
||||
class GetMemoriesResponse(BaseModel):
|
||||
memories: List[MemoryItem]
|
||||
|
||||
|
||||
class DeleteMemoryResponse(BaseModel):
|
||||
success: bool
|
||||
memory_id: str
|
||||
message: str
|
||||
|
||||
|
||||
@app.get("/health")
|
||||
async def health_check():
|
||||
"""Health check endpoint"""
|
||||
try:
|
||||
# Test embedding endpoint
|
||||
test_response = requests.get(EMBEDDING_URL.replace("/embedding", "/"), timeout=5)
|
||||
embedding_healthy = test_response.status_code == 200 or "gzip" in test_response.text.lower()
|
||||
|
||||
# Test Qdrant
|
||||
qdrant_url = f"http://{QDRANT_HOST}:{QDRANT_PORT}/"
|
||||
qdrant_response = requests.get(qdrant_url, timeout=5)
|
||||
qdrant_healthy = qdrant_response.status_code == 200
|
||||
|
||||
return {
|
||||
"status": "healthy" if (embedding_healthy and qdrant_healthy) else "degraded",
|
||||
"service": "mem0-api",
|
||||
"embedding_endpoint": embedding_healthy,
|
||||
"qdrant": qdrant_healthy
|
||||
}
|
||||
except Exception as e:
|
||||
return {"status": "unhealthy", "service": "mem0-api", "error": str(e)}
|
||||
|
||||
|
||||
@app.post("/add", response_model=AddMemoryResponse)
|
||||
async def add_memory(request: AddMemoryRequest):
|
||||
"""Add a new memory"""
|
||||
try:
|
||||
import uuid
|
||||
memory_id = str(uuid.uuid4())
|
||||
|
||||
# Get embedding
|
||||
embedding = embedder.get_embedding(request.message)
|
||||
|
||||
# Create point
|
||||
point = PointStruct(
|
||||
id=memory_id,
|
||||
vector=embedding,
|
||||
payload={
|
||||
"text": request.message,
|
||||
"user_id": request.user_id,
|
||||
"metadata": request.metadata or {}
|
||||
}
|
||||
)
|
||||
|
||||
# Upsert to Qdrant
|
||||
qdrant_client.upsert(collection_name=COLLECTION_NAME, points=[point])
|
||||
|
||||
return AddMemoryResponse(
|
||||
success=True,
|
||||
memory_id=memory_id,
|
||||
message="Memory added successfully"
|
||||
)
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@app.post("/search", response_model=SearchMemoryResponse)
|
||||
async def search_memory(request: SearchMemoryRequest):
|
||||
"""Search for memories"""
|
||||
try:
|
||||
# Get query embedding
|
||||
query_embedding = embedder.get_embedding(request.query)
|
||||
|
||||
# Build filter for user_id
|
||||
query_filter = None
|
||||
if request.user_id:
|
||||
query_filter = Filter(
|
||||
must=[FieldCondition(key="user_id", match=MatchValue(value=request.user_id))]
|
||||
)
|
||||
|
||||
# Search in Qdrant using query_points (new API) - pass vector directly
|
||||
results = qdrant_client.query_points(
|
||||
collection_name=COLLECTION_NAME,
|
||||
query=query_embedding,
|
||||
limit=request.limit,
|
||||
query_filter=query_filter,
|
||||
with_payload=True,
|
||||
with_vectors=False
|
||||
)
|
||||
|
||||
# Format results
|
||||
formatted_results = [
|
||||
SearchResult(
|
||||
id=str(hit.id),
|
||||
text=hit.payload.get("text", ""),
|
||||
user_id=hit.payload.get("user_id", ""),
|
||||
score=hit.score,
|
||||
metadata=hit.payload.get("metadata")
|
||||
)
|
||||
for hit in results.points
|
||||
]
|
||||
|
||||
return SearchMemoryResponse(results=formatted_results)
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@app.get("/memories", response_model=GetMemoriesResponse)
|
||||
async def get_memories(user_id: Optional[str] = "default"):
|
||||
"""Get all memories for a user"""
|
||||
try:
|
||||
# Build filter for user_id
|
||||
scroll_filter = None
|
||||
if user_id:
|
||||
scroll_filter = Filter(
|
||||
must=[FieldCondition(key="user_id", match=MatchValue(value=user_id))]
|
||||
)
|
||||
|
||||
# Scroll through collection
|
||||
memories = []
|
||||
offset = None
|
||||
while True:
|
||||
result, next_offset = qdrant_client.scroll(
|
||||
collection_name=COLLECTION_NAME,
|
||||
limit=100,
|
||||
offset=offset,
|
||||
scroll_filter=scroll_filter,
|
||||
with_payload=True,
|
||||
with_vectors=False
|
||||
)
|
||||
|
||||
for point in result:
|
||||
memories.append(MemoryItem(
|
||||
id=str(point.id),
|
||||
text=point.payload.get("text", ""),
|
||||
user_id=point.payload.get("user_id", ""),
|
||||
metadata=point.payload.get("metadata")
|
||||
))
|
||||
|
||||
if not next_offset:
|
||||
break
|
||||
offset = next_offset
|
||||
|
||||
return GetMemoriesResponse(memories=memories)
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@app.delete("/delete/{memory_id}", response_model=DeleteMemoryResponse)
|
||||
async def delete_memory(memory_id: str):
|
||||
"""Delete a memory by ID"""
|
||||
try:
|
||||
qdrant_client.delete(
|
||||
collection_name=COLLECTION_NAME,
|
||||
points_selector=[memory_id]
|
||||
)
|
||||
return DeleteMemoryResponse(
|
||||
success=True,
|
||||
memory_id=memory_id,
|
||||
message="Memory deleted successfully"
|
||||
)
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import uvicorn
|
||||
uvicorn.run(app, host="0.0.0.0", port=8000)
|
||||
Executable
+162
@@ -0,0 +1,162 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
mem0 Memory Server - Persistent Semantic Memory for Hermes Agent
|
||||
Direct integration with llama-embed on port 4700
|
||||
"""
|
||||
|
||||
import os
|
||||
import json
|
||||
import requests
|
||||
from http.server import HTTPServer, BaseHTTPRequestHandler
|
||||
from qdrant_client import QdrantClient, models
|
||||
|
||||
# Configuration
|
||||
QDRANT_URL = os.environ.get("QDRANT_URL", "http://localhost:6333")
|
||||
EMBEDDING_URL = os.environ.get("EMBEDDING_URL", "http://localhost:4700")
|
||||
PORT = int(os.environ.get("MEM0_PORT", 8080))
|
||||
USER_ID = "henry_hofmann"
|
||||
|
||||
# Initialize Qdrant client
|
||||
qdrant_client = QdrantClient(url=QDRANT_URL)
|
||||
|
||||
# Create collection if it doesn't exist
|
||||
try:
|
||||
qdrant_client.get_collection("hermes_memory")
|
||||
except:
|
||||
qdrant_client.create_collection(
|
||||
collection_name="hermes_memory",
|
||||
vectors_config=models.VectorParams(size=1024, distance=models.Distance.COSINE)
|
||||
)
|
||||
|
||||
def get_embedding(text):
|
||||
"""Get embedding from llama-embed server"""
|
||||
response = requests.post(
|
||||
f"{EMBEDDING_URL}/v1/embeddings",
|
||||
json={"input": text, "model": "BAAI/bge-m3"},
|
||||
timeout=30
|
||||
)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
return data["data"][0]["embedding"]
|
||||
|
||||
class MemoryHandler(BaseHTTPRequestHandler):
|
||||
def log_message(self, format, *args):
|
||||
pass # Suppress logging
|
||||
|
||||
def do_GET(self):
|
||||
if self.path == "/health":
|
||||
self.send_response(200)
|
||||
self.send_header("Content-Type", "application/json")
|
||||
self.end_headers()
|
||||
self.wfile.write(json.dumps({"status": "ok", "service": "mem0", "user": USER_ID}).encode())
|
||||
elif self.path == "/memory":
|
||||
# Get recent memories for user
|
||||
try:
|
||||
records = qdrant_client.scroll(
|
||||
collection_name="hermes_memory",
|
||||
limit=10,
|
||||
with_payload=True,
|
||||
with_vectors=False
|
||||
)
|
||||
memories = []
|
||||
for record in records[0]:
|
||||
if hasattr(record, 'payload'):
|
||||
memories.append({
|
||||
"id": record.id,
|
||||
"text": record.payload.get("text", ""),
|
||||
"timestamp": record.payload.get("timestamp", "")
|
||||
})
|
||||
self.send_response(200)
|
||||
self.send_header("Content-Type", "application/json")
|
||||
self.end_headers()
|
||||
self.wfile.write(json.dumps(memories, default=str).encode())
|
||||
except Exception as e:
|
||||
self.send_response(500)
|
||||
self.send_header("Content-Type", "application/json")
|
||||
self.end_headers()
|
||||
self.wfile.write(json.dumps({"error": str(e)}).encode())
|
||||
elif self.path.startswith("/memory/") and self.path.endswith("/search"):
|
||||
# Search memories by query
|
||||
query = self.path.split("/")[2]
|
||||
try:
|
||||
query_vector = get_embedding(query)
|
||||
results = qdrant_client.query_points(
|
||||
collection_name="hermes_memory",
|
||||
query=query_vector,
|
||||
query_filter=models.Filter(
|
||||
must=[models.FieldCondition(key="user_id", match=models.MatchValue(value=USER_ID))]
|
||||
),
|
||||
limit=5,
|
||||
with_payload=True
|
||||
)
|
||||
memories = []
|
||||
for result in results.points:
|
||||
if hasattr(result, 'payload'):
|
||||
memories.append({
|
||||
"id": result.id,
|
||||
"text": result.payload.get("text", ""),
|
||||
"score": result.score,
|
||||
"timestamp": result.payload.get("timestamp", "")
|
||||
})
|
||||
self.send_response(200)
|
||||
self.send_header("Content-Type", "application/json")
|
||||
self.end_headers()
|
||||
self.wfile.write(json.dumps(memories, default=str).encode())
|
||||
except Exception as e:
|
||||
self.send_response(500)
|
||||
self.send_header("Content-Type", "application/json")
|
||||
self.end_headers()
|
||||
self.wfile.write(json.dumps({"error": str(e)}).encode())
|
||||
else:
|
||||
self.send_response(404)
|
||||
self.end_headers()
|
||||
|
||||
def do_POST(self):
|
||||
if self.path == "/memory":
|
||||
content_length = int(self.headers["Content-Length"])
|
||||
post_data = json.loads(self.rfile.read(content_length).decode())
|
||||
text = post_data.get("text", "")
|
||||
|
||||
if text:
|
||||
try:
|
||||
# Get embedding
|
||||
embedding = get_embedding(text)
|
||||
|
||||
# Store in Qdrant
|
||||
qdrant_client.upsert(
|
||||
collection_name="hermes_memory",
|
||||
points=[
|
||||
models.PointStruct(
|
||||
id=hash(text) % 1000000,
|
||||
vector=embedding,
|
||||
payload={
|
||||
"text": text,
|
||||
"user_id": USER_ID,
|
||||
"timestamp": str(os.popen("date -Iseconds").read().strip())
|
||||
}
|
||||
)
|
||||
]
|
||||
)
|
||||
|
||||
self.send_response(200)
|
||||
self.send_header("Content-Type", "application/json")
|
||||
self.end_headers()
|
||||
self.wfile.write(json.dumps({"status": "ok", "text": text}).encode())
|
||||
except Exception as e:
|
||||
self.send_response(500)
|
||||
self.send_header("Content-Type", "application/json")
|
||||
self.end_headers()
|
||||
self.wfile.write(json.dumps({"error": str(e)}).encode())
|
||||
else:
|
||||
self.send_response(400)
|
||||
self.end_headers()
|
||||
else:
|
||||
self.send_response(404)
|
||||
self.end_headers()
|
||||
|
||||
if __name__ == "__main__":
|
||||
server = HTTPServer(("0.0.0.0", PORT), MemoryHandler)
|
||||
print(f"mem0 server running on port {PORT}")
|
||||
print(f"Qdrant: {QDRANT_URL}")
|
||||
print(f"Embedding: {EMBEDDING_URL}")
|
||||
server.serve_forever()
|
||||
@@ -0,0 +1,5 @@
|
||||
mem0ai>=1.0.0
|
||||
qdrant-client>=1.12.0
|
||||
fastapi>=0.109.0
|
||||
uvicorn>=0.27.0
|
||||
pydantic>=2.0.0
|
||||
Reference in New Issue
Block a user