From 3a141d91804b9b3708eb134eabe21f19b38e9cbb Mon Sep 17 00:00:00 2001
From: ARIA <no-reply@zephyre.one>
Date: Fri, 10 Apr 2026 12:53:15 +0200
Subject: [PATCH] Initial release: Mem0 local server memory provider for
 Hermes-Agent

- Self-hosted Mem0 integration (no cloud dependency)
- Async prefetch with ~40ms latency
- Automatic context injection via pre_llm_call hook
- Circuit breaker for server resilience
- Full tool support: profile, search, conclude
---
 .gitignore       |  36 +++++
 LICENSE          |  21 +++
 README.md        | 239 +++++++++++++++++++++++++++
 __init__.py      | 408 +++++++++++++++++++++++++++++++++++++++++++++++
 after-install.md | 194 ++++++++++++++++++++++
 client.py        | 139 ++++++++++++++++
 plugin.yaml      |  20 +++
 7 files changed, 1057 insertions(+)
 create mode 100644 .gitignore
 create mode 100644 LICENSE
 create mode 100644 README.md
 create mode 100644 __init__.py
 create mode 100644 after-install.md
 create mode 100644 client.py
 create mode 100644 plugin.yaml

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..acfdc7b
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,36 @@
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# IDE
+.idea/
+.vscode/
+*.swp
+*.swo
+*~
+
+# Environment
+.env
+.env.local
+.env.*.local
+
+# OS
+.DS_Store
+Thumbs.db
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..4d7e990
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2024 Henry Hofmann
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..9777087
--- /dev/null
+++ b/README.md
@@ -0,0 +1,239 @@
+# Mem0 Local Hermes Plugin
+
+Self-hosted Mem0 memory provider for Hermes-Agent. Provides semantic memory search, automatic fact extraction, and context injection without tool calls.
+
+## Features
+
+- **Local Mem0 server** — No cloud dependency, full data privacy
+- **Async prefetch** — Memory retrieval happens in background (~40ms)
+- **Context injection** — Relevant memories injected directly into LLM prompt
+- **Automatic fact extraction** — Server-side LLM extracts facts from conversations
+- **Semantic search** — Find memories by meaning, not keywords
+- **Circuit breaker** — Automatic failover on server unavailability
+
+## Prerequisites
+
+1. **Mem0 server running locally**:
+
+   Using Docker:
+   ```bash
+   docker run -d -p 8000:8000 mem0ai/mem0:latest
+   ```
+
+   Or via Docker Compose with custom config:
+   ```yaml
+   version: "3.8"
+   services:
+     mem0:
+       image: mem0ai/mem0:latest
+       ports:
+         - "8000:8000"
+       environment:
+         - MEM0_CONFIG_PATH=/app/config.yaml
+       volumes:
+         - ./mem0-config.yaml:/app/config.yaml
+         - mem0_data:/app/data
+   volumes:
+     mem0_data:
+   ```
+
+2. **Verify server is reachable**:
+   ```bash
+   curl http://localhost:8000/health
+   ```
+
+   Your setup uses port 8889:
+   ```bash
+   curl http://10.0.0.150:8889/health
+   ```
+
+## Installation
+
+From GitHub repository:
+```bash
+hermes plugins install https://github.com/yourusername/mem0-local-hermes-plugin.git
+```
+
+Or with shorthand:
+```bash
+hermes plugins install yourusername/mem0-local-hermes-plugin
+```
+
+From local directory (during development):
+```bash
+hermes plugins install /path/to/mem0-local-hermes-plugin
+```
+
+The installer will prompt for:
+- `MEM0_BASE_URL` — Your local Mem0 server URL (default: `http://localhost:8000`)
+- `MEM0_USER_ID` — User identifier for memory scoping (default: `hermes-user`)
+
+## Configuration
+
+The plugin supports two configuration methods that work together:
+
+1. **Environment variables** (`~/.hermes/.env`) - Primary configuration
+2. **Config file** (`~/.hermes/mem0-local.json`) - Optional overrides
+
+**Precedence**: Config file values override environment variables. This allows you to set defaults in `.env` and override specific values in the JSON file.
+
+### Method 1: Environment Variables (Recommended)
+
+Set in `~/.hermes/.env`:
+
+| Variable | Description | Default |
+|----------|-------------|---------|
+| `MEM0_BASE_URL` | Local Mem0 server URL | `http://localhost:8000` |
+| `MEM0_USER_ID` | User identifier | `hermes-user` |
+| `MEM0_AGENT_ID` | Agent identifier | `hermes` |
+
+Example:
+```env
+MEM0_BASE_URL=http://10.0.0.150:8889
+MEM0_USER_ID=henry_hofmann
+MEM0_AGENT_ID=hermes
+```
+
+### Method 2: Config File (Optional Overrides)
+
+Create `~/.hermes/mem0-local.json` to override specific settings:
+```json
+{
+  "base_url": "http://10.0.0.150:8889",
+  "user_id": "henry_hofmann",
+  "agent_id": "hermes",
+  "rerank": true,
+  "timeout": 10.0
+}
+```
+
+Example for your setup:
+```json
+{
+  "base_url": "http://10.0.0.150:8889",
+  "user_id": "henry_hofmann",
+  "agent_id": "hermes",
+  "rerank": true,
+  "timeout": 10.0
+}
+```
+
+## Usage
+
+### Activate the Memory Provider
+
+```bash
+hermes memory mem0-local
+```
+
+### Restart Gateway
+
+```bash
+hermes gateway restart
+```
+
+### How It Works
+
+1. **User message received** → `queue_prefetch()` spawns background thread
+2. **Mem0 search** → Semantic search for relevant memories (~40ms)
+3. **Context injection** → Results injected via `pre_llm_call` hook
+4. **LLM receives** → User message + memory context (no tool call needed!)
+
+**Example**:
+```
+User: "Hey, is a new episode out from my favorite anime?"
+
+↓ [Background: mem0.prefetch() searches for "favorite anime"]
+
+LLM receives:
+"""
+Hey, is a new episode out from my favorite anime?
+
+## Mem0 Memory
+- My favorite animes are Naruto, One Piece, and Demon Slayer (score: 0.87)
+"""
+
+Assistant: "Let me check for new episodes of Naruto, One Piece, and Demon Slayer..."
+```
+
+### Available Tools
+
+The plugin also provides explicit memory tools:
+
+| Tool | Description |
+|------|-------------|
+| `mem0_profile` | Retrieve all stored memories about the user |
+| `mem0_search` | Search memories by semantic similarity |
+| `mem0_conclude` | Store a fact verbatim (no LLM extraction) |
+
+**Tool usage examples**:
+
+```
+# Get all memories
+mem0_profile()
+
+# Search with reranking
+mem0_search(query="project deadlines", rerank=true, top_k=5)
+
+# Store a fact explicitly
+mem0_conclude(conclusion="I prefer Python over JavaScript for backend development")
+```
+
+## Circuit Breaker
+
+After 5 consecutive API failures, the plugin pauses requests for 120 seconds to avoid hammering a down server. The breaker resets automatically.
+
+## Troubleshooting
+
+### "Mem0 server temporarily unavailable"
+
+- Check server is running: `curl http://your-server:port/health`
+- Verify `MEM0_BASE_URL` is correct
+- Wait 2 minutes for circuit breaker to reset
+
+### "No memories stored yet"
+
+- Mem0 extracts facts automatically from conversations
+- Or use `mem0_conclude` to store facts explicitly
+
+### Memory not injected
+
+- Check `is_available()` returns `True` in logs
+- Verify `prefetch()` is being called (debug logs)
+- Ensure Mem0 server has indexed memories
+- Check network connectivity to your local server
+
+### Connection issues
+
+If your Mem0 server is on a different machine (like your 10.0.0.150):
+- Ensure firewall allows connections on port 8889
+- Verify the server binds to 0.0.0.0, not just localhost
+- Check network routing between Hermes-Agent and Mem0 server
+
+## Differences from Cloud Version
+
+| Aspect | Cloud Version | Local Version |
+|--------|--------------|---------------|
+| **Client** | `mem0.MemoryClient(api_key=...)` | HTTP requests to local server |
+| **Auth** | API key | None (local network) |
+| **Config** | `MEM0_API_KEY` | `MEM0_BASE_URL` |
+| **Latency** | Network-dependent | ~40ms (local) |
+| **Privacy** | Cloud processing | Full local control |
+| **Cost** | Pay-per-use | Free (self-hosted) |
+
+## Development
+
+For development, install from local path:
+```bash
+hermes plugins install /path/to/mem0-local-hermes-plugin
+```
+
+Watch for changes:
+```bash
+# In plugin directory
+hermes gateway restart  # After each change
+```
+
+## License
+
+MIT
diff --git a/__init__.py b/__init__.py
new file mode 100644
index 0000000..4e2f2bf
--- /dev/null
+++ b/__init__.py
@@ -0,0 +1,408 @@
+"""Mem0 local server memory plugin — MemoryProvider interface.
+
+Self-hosted Mem0 server with semantic search and automatic fact extraction.
+
+Config via environment variables:
+  MEM0_BASE_URL  — Local Mem0 server URL (required, e.g., http://localhost:8000)
+  MEM0_USER_ID   — User identifier for memory scoping (default: hermes-user)
+  MEM0_AGENT_ID  — Agent identifier (default: hermes)
+
+Or via $HERMES_HOME/mem0-local.json.
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+import os
+import threading
+import time
+from typing import Any, Dict, List, Optional
+
+from agent.memory_provider import MemoryProvider
+from tools.registry import tool_error
+
+from .client import LocalMem0Client
+
+logger = logging.getLogger(__name__)
+
+# Circuit breaker: after this many consecutive failures, pause API calls
+_BREAKER_THRESHOLD = 5
+_BREAKER_COOLDOWN_SECS = 120
+
+
+# ---------------------------------------------------------------------------
+# Config
+# ---------------------------------------------------------------------------
+
+
+def _load_config() -> dict:
+    """Load config from env vars, with $HERMES_HOME/mem0-local.json overrides."""
+    from hermes_constants import get_hermes_home
+
+    config = {
+        "base_url": os.environ.get("MEM0_BASE_URL", "http://localhost:8000"),
+        "user_id": os.environ.get("MEM0_USER_ID", "hermes-user"),
+        "agent_id": os.environ.get("MEM0_AGENT_ID", "hermes"),
+        "rerank": True,
+        "timeout": 10.0,
+    }
+
+    config_path = get_hermes_home() / "mem0-local.json"
+    if config_path.exists():
+        try:
+            file_cfg = json.loads(config_path.read_text(encoding="utf-8"))
+            config.update(
+                {k: v for k, v in file_cfg.items() if v is not None and v != ""}
+            )
+        except Exception as e:
+            logger.warning("Failed to load mem0-local.json: %s", e)
+
+    return config
+
+
+# ---------------------------------------------------------------------------
+# Tool schemas
+# ---------------------------------------------------------------------------
+
+PROFILE_SCHEMA = {
+    "name": "mem0_profile",
+    "description": (
+        "Retrieve all stored memories about the user — preferences, facts, "
+        "project context. Fast, no reranking. Use at conversation start."
+    ),
+    "parameters": {"type": "object", "properties": {}, "required": []},
+}
+
+SEARCH_SCHEMA = {
+    "name": "mem0_search",
+    "description": (
+        "Search memories by meaning. Returns relevant facts ranked by similarity. "
+        "Set rerank=true for higher accuracy on important queries."
+    ),
+    "parameters": {
+        "type": "object",
+        "properties": {
+            "query": {"type": "string", "description": "What to search for."},
+            "rerank": {
+                "type": "boolean",
+                "description": "Enable reranking for precision (default: false).",
+            },
+            "top_k": {
+                "type": "integer",
+                "description": "Max results (default: 10, max: 50).",
+            },
+        },
+        "required": ["query"],
+    },
+}
+
+CONCLUDE_SCHEMA = {
+    "name": "mem0_conclude",
+    "description": (
+        "Store a durable fact about the user. Stored verbatim (no LLM extraction). "
+        "Use for explicit preferences, corrections, or decisions."
+    ),
+    "parameters": {
+        "type": "object",
+        "properties": {
+            "conclusion": {"type": "string", "description": "The fact to store."},
+        },
+        "required": ["conclusion"],
+    },
+}
+
+
+# ---------------------------------------------------------------------------
+# MemoryProvider implementation
+# ---------------------------------------------------------------------------
+
+
+class Mem0LocalMemoryProvider(MemoryProvider):
+    """Self-hosted Mem0 memory with semantic search and fact extraction."""
+
+    def __init__(self):
+        self._config = None
+        self._client: Optional[LocalMem0Client] = None
+        self._client_lock = threading.Lock()
+        self._user_id = "hermes-user"
+        self._agent_id = "hermes"
+        self._rerank = True
+        self._prefetch_result = ""
+        self._prefetch_lock = threading.Lock()
+        self._prefetch_thread = None
+        self._sync_thread = None
+        # Circuit breaker state
+        self._consecutive_failures = 0
+        self._breaker_open_until = 0.0
+
+    @property
+    def name(self) -> str:
+        return "mem0-local"
+
+    def is_available(self) -> bool:
+        cfg = _load_config()
+        base_url = cfg.get("base_url", "")
+        if not base_url:
+            return False
+        # Try to reach the server
+        try:
+            client = LocalMem0Client(base_url)
+            return client.health()
+        except Exception:
+            return False
+
+    def save_config(self, values: dict, hermes_home):
+        """Write config to $HERMES_HOME/mem0-local.json."""
+        from pathlib import Path
+
+        config_path = Path(hermes_home) / "mem0-local.json"
+        existing = {}
+        if config_path.exists():
+            try:
+                existing = json.loads(config_path.read_text())
+            except Exception:
+                pass
+        existing.update(values)
+        config_path.write_text(json.dumps(existing, indent=2))
+
+    def get_config_schema(self):
+        return [
+            {
+                "key": "base_url",
+                "description": "Local Mem0 server URL",
+                "required": True,
+                "env_var": "MEM0_BASE_URL",
+                "url": "https://github.com/mem0ai/mem0",
+            },
+            {
+                "key": "user_id",
+                "description": "User identifier",
+                "default": "hermes-user",
+            },
+            {"key": "agent_id", "description": "Agent identifier", "default": "hermes"},
+            {
+                "key": "rerank",
+                "description": "Enable reranking for recall",
+                "default": "true",
+                "choices": ["true", "false"],
+            },
+            {
+                "key": "timeout",
+                "description": "Request timeout in seconds",
+                "default": "10.0",
+            },
+        ]
+
+    def _get_client(self) -> LocalMem0Client:
+        """Thread-safe client accessor with lazy initialization."""
+        with self._client_lock:
+            if self._client is not None:
+                return self._client
+            base_url = self._config.get("base_url", "http://localhost:8000")
+            timeout = float(self._config.get("timeout", 10.0))
+            self._client = LocalMem0Client(base_url, timeout=timeout)
+            return self._client
+
+    def _is_breaker_open(self) -> bool:
+        """Return True if the circuit breaker is tripped (too many failures)."""
+        if self._consecutive_failures < _BREAKER_THRESHOLD:
+            return False
+        if time.monotonic() >= self._breaker_open_until:
+            self._consecutive_failures = 0
+            return False
+        return True
+
+    def _record_success(self):
+        self._consecutive_failures = 0
+
+    def _record_failure(self):
+        self._consecutive_failures += 1
+        if self._consecutive_failures >= _BREAKER_THRESHOLD:
+            self._breaker_open_until = time.monotonic() + _BREAKER_COOLDOWN_SECS
+            logger.warning(
+                "Mem0 circuit breaker tripped after %d consecutive failures. "
+                "Pausing API calls for %ds.",
+                self._consecutive_failures,
+                _BREAKER_COOLDOWN_SECS,
+            )
+
+    def initialize(self, session_id: str, **kwargs) -> None:
+        self._config = _load_config()
+        # Prefer gateway-provided user_id for per-user memory scoping
+        self._user_id = kwargs.get("user_id") or self._config.get(
+            "user_id", "hermes-user"
+        )
+        self._agent_id = self._config.get("agent_id", "hermes")
+        self._rerank = self._config.get("rerank", True)
+
+    def _read_filters(self) -> Dict[str, Any]:
+        """Filters for search/get_all — scoped to user only."""
+        return {"user_id": self._user_id}
+
+    def _write_filters(self) -> Dict[str, Any]:
+        """Filters for add — scoped to user + agent."""
+        return {"user_id": self._user_id, "agent_id": self._agent_id}
+
+    def system_prompt_block(self) -> str:
+        return (
+            "# Mem0 Memory (Local)\n"
+            f"Active. User: {self._user_id}.\n"
+            "Use mem0_search to find memories, mem0_conclude to store facts, "
+            "mem0_profile for a full overview."
+        )
+
+    def prefetch(self, query: str, *, session_id: str = "") -> str:
+        """Return cached prefetch result from previous turn."""
+        if self._prefetch_thread and self._prefetch_thread.is_alive():
+            self._prefetch_thread.join(timeout=3.0)
+        with self._prefetch_lock:
+            result = self._prefetch_result
+            self._prefetch_result = ""
+        if not result:
+            return ""
+        return f"## Mem0 Memory\n{result}"
+
+    def queue_prefetch(self, query: str, *, session_id: str = "") -> None:
+        """Queue async prefetch for next turn (called before LLM request)."""
+        if self._is_breaker_open():
+            return
+
+        def _run():
+            try:
+                client = self._get_client()
+                results = client.search(
+                    query=query,
+                    filters=self._read_filters(),
+                    rerank=self._rerank,
+                    top_k=5,
+                )
+                if results:
+                    lines = [
+                        r.get("text") or r.get("memory", "")
+                        for r in results
+                        if r.get("text") or r.get("memory")
+                    ]
+                    with self._prefetch_lock:
+                        self._prefetch_result = "\n".join(f"- {l}" for l in lines)
+                self._record_success()
+            except Exception as e:
+                self._record_failure()
+                logger.debug("Mem0 prefetch failed: %s", e)
+
+        self._prefetch_thread = threading.Thread(
+            target=_run, daemon=True, name="mem0-local-prefetch"
+        )
+        self._prefetch_thread.start()
+
+    def sync_turn(
+        self, user_content: str, assistant_content: str, *, session_id: str = ""
+    ) -> None:
+        """Send the turn to Mem0 for server-side fact extraction (non-blocking)."""
+        if self._is_breaker_open():
+            return
+
+        def _sync():
+            try:
+                client = self._get_client()
+                messages = [
+                    {"role": "user", "content": user_content},
+                    {"role": "assistant", "content": assistant_content},
+                ]
+                client.add(messages, filters=self._write_filters(), infer=True)
+                self._record_success()
+            except Exception as e:
+                self._record_failure()
+                logger.warning("Mem0 sync failed: %s", e)
+
+        if self._sync_thread and self._sync_thread.is_alive():
+            self._sync_thread.join(timeout=5.0)
+
+        self._sync_thread = threading.Thread(
+            target=_sync, daemon=True, name="mem0-local-sync"
+        )
+        self._sync_thread.start()
+
+    def get_tool_schemas(self) -> List[Dict[str, Any]]:
+        return [PROFILE_SCHEMA, SEARCH_SCHEMA, CONCLUDE_SCHEMA]
+
+    def handle_tool_call(self, tool_name: str, args: dict, **kwargs) -> str:
+        if self._is_breaker_open():
+            return json.dumps(
+                {
+                    "error": "Mem0 server temporarily unavailable (multiple consecutive failures). Will retry automatically."
+                }
+            )
+
+        try:
+            client = self._get_client()
+        except Exception as e:
+            return tool_error(str(e))
+
+        if tool_name == "mem0_profile":
+            try:
+                memories = client.get_all(filters=self._read_filters())
+                self._record_success()
+                if not memories:
+                    return json.dumps({"result": "No memories stored yet."})
+                lines = [
+                    m.get("text") or m.get("memory", "")
+                    for m in memories
+                    if m.get("text") or m.get("memory")
+                ]
+                return json.dumps({"result": "\n".join(lines), "count": len(lines)})
+            except Exception as e:
+                self._record_failure()
+                return tool_error(f"Failed to fetch profile: {e}")
+
+       elif tool_name == "mem0_search":
+            query = args.get("query", "")
+            if not query:
+                return tool_error("Missing required parameter: query")
+            rerank = args.get("rerank", False)
+            top_k = min(int(args.get("top_k", 10)), 50)
+            try:
+                results = client.search(
+                    query=query,
+                    filters=self._read_filters(),
+                    rerank=rerank,
+                    top_k=top_k,
+                )
+                self._record_success()
+                if not results:
+                    return json.dumps({"result": "No relevant memories found."})
+                items = [{"memory": r.get("text") or r.get("memory", ""), "score": r.get("score", 0)} for r in results]
+                return json.dumps({"results": items, "count": len(items)})
+            except Exception as e:
+                self._record_failure()
+                return tool_error(f"Search failed: {e}")
+
+        elif tool_name == "mem0_conclude":
+            conclusion = args.get("conclusion", "")
+            if not conclusion:
+                return tool_error("Missing required parameter: conclusion")
+            try:
+                client.add(
+                    [{"role": "user", "content": conclusion}],
+                    filters=self._write_filters(),
+                    infer=False,  # Store verbatim
+                )
+                self._record_success()
+                return json.dumps({"result": "Fact stored."})
+            except Exception as e:
+                self._record_failure()
+                return tool_error(f"Failed to store: {e}")
+
+        return tool_error(f"Unknown tool: {tool_name}")
+
+    def shutdown(self) -> None:
+        for t in (self._prefetch_thread, self._sync_thread):
+            if t and t.is_alive():
+                t.join(timeout=5.0)
+        with self._client_lock:
+            self._client = None
+
+
+def register(ctx) -> None:
+    """Register Mem0 local as a memory provider plugin."""
+    ctx.register_memory_provider(Mem0LocalMemoryProvider())
diff --git a/after-install.md b/after-install.md
new file mode 100644
index 0000000..7b02c39
--- /dev/null
+++ b/after-install.md
@@ -0,0 +1,194 @@
+# Mem0 Local Plugin Installed ✓
+
+## Quick Start
+
+### 1. Verify Mem0 Server is Running
+
+```bash
+curl http://localhost:8000/health
+```
+
+If not running:
+```bash
+docker run -d -p 8000:8000 mem0ai/mem0:latest
+```
+
+For your setup on 10.0.0.150:8889:
+```bash
+curl http://10.0.0.150:8889/health
+```
+
+### 2. Configure the Plugin
+
+The installer should have prompted you for configuration. To verify or modify:
+
+```bash
+nano ~/.hermes/.env
+```
+
+Add or update:
+```env
+MEM0_BASE_URL=http://10.0.0.150:8889
+MEM0_USER_ID=henry_hofmann
+MEM0_AGENT_ID=hermes
+```
+
+Or create a config file:
+```bash
+cat > ~/.hermes/mem0-local.json << 'EOF'
+{
+  "base_url": "http://10.0.0.150:8889",
+  "user_id": "henry_hofmann",
+  "agent_id": "hermes",
+  "rerank": true,
+  "timeout": 10.0
+}
+EOF
+```
+
+### 3. Restart Hermes Gateway
+
+```bash
+hermes gateway restart
+```
+
+### 4. Activate Memory Provider
+
+```bash
+hermes memory mem0-local
+```
+
+Verify it's active:
+```bash
+hermes memory status
+```
+
+## Configuration
+
+The plugin supports two configuration methods that work together:
+
+### Method 1: Environment Variables (Primary)
+
+Edit `~/.hermes/.env`:
+```env
+MEM0_BASE_URL=http://10.0.0.150:8889
+MEM0_USER_ID=henry_hofmann
+MEM0_AGENT_ID=hermes
+```
+
+### Method 2: Config File (Overrides)
+
+Create `~/.hermes/mem0-local.json` to override specific settings:
+```json
+{
+  "base_url": "http://10.0.0.150:8889",
+  "user_id": "henry_hofmann",
+  "agent_id": "hermes",
+  "rerank": true,
+  "timeout": 10.0
+}
+```
+
+**Note**: Config file values override environment variables. Use `.env` for defaults and JSON for overrides.
+
+Key variables:
+- `MEM0_BASE_URL` — Local server URL (your setup: `http://10.0.0.150:8889`)
+- `MEM0_USER_ID` — User identifier for memory scoping (your setup: `henry_hofmann`)
+- `MEM0_AGENT_ID` — Agent identifier (default: `hermes`)
+- `rerank` — Enable reranking for higher precision (default: `true`)
+- `timeout` — Request timeout in seconds (default: `10.0`)
+
+## How It Works
+
+Memory injection happens **automatically** without tool calls:
+
+1. You send a message
+2. Plugin searches Mem0 in background (~40ms)
+3. Relevant memories injected into LLM prompt
+4. LLM responds with full context
+
+**Example**: If you stored "My favorite anime is Naruto", then ask "Is there a new episode of my favorite anime?", the LLM will receive:
+
+```
+Is there a new episode of my favorite anime?
+
+## Mem0 Memory
+- My favorite anime is Naruto
+```
+
+No tool call needed — instant context!
+
+## Migration from Hardcoded Config
+
+Your previous hardcoded configuration:
+```yaml
+mem0:
+  enabled: true
+  api_url: http://localhost:8889
+  user_id: henry_hofmann
+  collection_name: hermes_memory 
+mode: local
+  transparent:
+    enabled: true
+    search_threshold: 0.6
+    max_results: 3
+    include_match_score: true
+    injection_format: system_context 
+```
+
+Is now replaced by the plugin with:
+- Same functionality via `MEM0_BASE_URL`, `MEM0_USER_ID`
+- Transparent memory injection via `queue_prefetch()` + `prefetch()`
+- Configurable via `~/.hermes/mem0-local.json`
+
+**Note**: The plugin uses a slightly different approach - memories are injected into the user message (not system prompt) to preserve prompt caching efficiency.
+
+## Next Steps
+
+- Start a conversation and mention personal facts
+- Mem0 will automatically extract and store them
+- Try asking questions that require remembering past conversations
+- Use `mem0_profile` tool to see all stored memories
+
+## Troubleshooting
+
+If memory doesn't work:
+
+1. **Check server connectivity**:
+   ```bash
+   curl http://10.0.0.150:8889/health
+   ```
+
+2. **Check gateway logs**:
+   ```bash
+   hermes gateway logs
+   ```
+
+3. **Verify provider is active**:
+   ```bash
+   hermes memory status
+   ```
+
+4. **Check plugin is loaded**:
+   ```bash
+   hermes plugins list
+   ```
+
+5. **Test manual memory operations**:
+   ```bash
+   # In a conversation, ask Hermes to:
+   - "Store that my favorite color is blue"
+   - "What's my favorite color?"
+   ```
+
+## Available Tools
+
+| Tool | Description |
+|------|-------------|
+| `mem0_profile` | Retrieve all stored memories |
+| `mem0_search` | Search memories semantically |
+| `mem0_conclude` | Store a fact explicitly |
+
+---
+
+**Enjoy your private, self-hosted memory!** 🚀
diff --git a/client.py b/client.py
new file mode 100644
index 0000000..ecce8bc
--- /dev/null
+++ b/client.py
@@ -0,0 +1,139 @@
+"""Local Mem0 server HTTP client."""
+
+from __future__ import annotations
+
+import logging
+from typing import Any, Dict, List, Optional
+
+import requests
+
+logger = logging.getLogger(__name__)
+
+
+class LocalMem0Client:
+    """HTTP client for self-hosted Mem0 server.
+
+    Expects Mem0 server at MEM0_BASE_URL with endpoints:
+    - POST /search
+    - GET /memories
+    - POST /memories
+    """
+
+    def __init__(self, base_url: str, timeout: float = 10.0):
+        self.base_url = base_url.rstrip("/")
+        self.timeout = timeout
+        self.session = requests.Session()
+        self.session.headers.update(
+            {
+                "Content-Type": "application/json",
+                "User-Agent": "hermes-agent-mem0-local-plugin/1.0.0",
+            }
+        )
+
+    def _request(self, method: str, endpoint: str, json: Optional[Dict] = None, params: Optional[Dict] = None) -> Dict:
+        """Make HTTP request with error handling."""
+        url = f"{self.base_url}{endpoint}"
+        try:
+            resp = self.session.request(method, url, json=json, params=params, timeout=self.timeout)
+            resp.raise_for_status()
+            return resp.json()
+        except requests.exceptions.Timeout:
+            logger.error("Mem0 request timed out after %ss", self.timeout)
+            raise
+        except requests.exceptions.ConnectionError as e:
+            logger.error("Failed to connect to Mem0 server at %s: %s", self.base_url, e)
+            raise
+        except requests.exceptions.HTTPError as e:
+            logger.error(
+                "Mem0 API error: %s - %s", e.response.status_code, e.response.text
+            )
+            raise
+
+    def search(
+        self,
+        query: str,
+        filters: Dict[str, Any],
+        rerank: bool = False,
+        top_k: int = 10,
+    ) -> List[Dict]:
+        """Search memories by semantic similarity.
+
+        Args:
+            query: Search query string
+            filters: Filters dict (e.g., {"user_id": "hermes-user"})
+            rerank: Enable reranking for higher precision
+            top_k: Maximum results to return
+
+        Returns:
+            List of memory dicts with "text", "score", etc.
+        """
+        payload = {
+            "query": query,
+            "user_id": filters.get("user_id"),
+            "agent_id": filters.get("agent_id"),
+            "top_k": top_k,
+        }
+        if rerank is not None:
+            payload["rerank"] = rerank
+        result = self._request("POST", "/search", json=payload)
+        return self._unwrap_results(result)
+
+   def get_all(self, filters: Dict[str, Any]) -> List[Dict]:
+        """Get all memories matching filters.
+        
+        Args:
+            filters: Filters dict (e.g., {"user_id": "hermes-user"})
+            
+        Returns:
+            List of all matching memory dicts.
+        """
+        params = filters
+        result = self._request("GET", "/memories", params=params)
+        return self._unwrap_results(result)
+
+  def add(
+        self,
+        messages: List[Dict[str, str]],
+        filters: Dict[str, Any],
+        infer: bool = True,
+    ) -> Dict:
+        """Add conversation messages for fact extraction.
+        
+        Args:
+            messages: List of {"role": "user|assistant", "content": "..."}
+            filters: Filters dict for scoping (user_id, agent_id)
+            infer: Whether to extract facts via LLM (True) or store verbatim (False)
+            
+        Returns:
+            Response dict with added memory IDs.
+        """
+        payload = {
+            "messages": messages,
+            "user_id": filters.get("user_id"),
+            "agent_id": filters.get("agent_id"),
+        }
+        if not infer:
+            payload["messages"] = [{"role": "user", "content": messages[0].get("content", "") if isinstance(messages[0], dict) else messages[0]}]
+        return self._request("POST", "/memories", json=payload)
+
+    @staticmethod
+    def _unwrap_results(response: Any) -> List[Dict]:
+        """Normalize Mem0 API response.
+        
+        OSS server returns {"memories": [...]} or {"results": [...]}
+        Cloud API returns {"results": [...]}
+        """
+        if isinstance(response, dict):
+            # Try "memories" first (OSS server), then "results" (cloud/API v2)
+            return response.get("memories", response.get("results", []))
+        if isinstance(response, list):
+            return response
+        return []
+
+    def health(self) -> bool:
+        """Check if server is reachable."""
+        try:
+            resp = self.session.get(f"{self.base_url}/health", timeout=5.0)
+            return resp.status_code == 200
+        except requests.exceptions.RequestException:
+            return False
diff --git a/plugin.yaml b/plugin.yaml
new file mode 100644
index 0000000..03b2b58
--- /dev/null
+++ b/plugin.yaml
@@ -0,0 +1,20 @@
+name: mem0-local
+version: "1.0.0"
+description: "Mem0 local server memory provider (self-hosted)"
+author: "Henry Hofmann"
+manifest_version: 1
+
+requires_env:
+  - name: MEM0_BASE_URL
+    description: "Local Mem0 server URL (e.g., http://localhost:8000)"
+    url: "https://github.com/mem0ai/mem0"
+  - name: MEM0_USER_ID
+    description: "User identifier for memory scoping (default: hermes-user)"
+
+provides_tools:
+  - mem0_profile
+  - mem0_search
+  - mem0_conclude
+
+pip_dependencies:
+  - requests