Implement streaming responses from Open-WebUI and update configuration for streaming support

2025-08-19 12:22:25 +02:00
parent e9cfea1424
commit c8fba0594c
2 changed files with 105 additions and 7 deletions
--- a/discord_connector/example.config.yml
+++ b/discord_connector/example.config.yml
@@ -18,5 +18,7 @@ tools:
  - Tool_ID_1
  - Tool_ID_2

+use_streaming: true # Allows to stream the answer to feel more interactive.
+
 # optional system prompt (you can leave it empty to use the default one or the systemprompt given in open-webui for the specific model)
 system_prompt:     ""
--- a/discord_connector/open-webui_to_discord.py
+++ b/discord_connector/open-webui_to_discord.py
@@ -29,6 +29,7 @@ MODEL_NAME        = config["model_name"] # Model name to use, e.g., "gpt-3.5-tur
 KNOW_BASE         = config["knowledge_base"] # Knowledge base to use, e.g., "knowledge_base_v1"

 TOOLS             = config.get("tools", [])                # list of tool-ids
+USE_STREAMING     = config.get("use_streaming", False) # Enable/disable streaming responses

 SYSTEM_PROMPT     = config.get("system_prompt", None) # Optional system prompt to prepend to user messages
 ALLOW_DMS         = config.get("allow_dms", False) # Allow DMs to the bot (default: False)
@@ -76,6 +77,91 @@ async def _query_openwebui(user_text: str, channel_id: int, tools_list: list):
            logging.debug(f"Unparsed response from Open-WebUI: {response_data}")
            return response_data['choices'][0]['message']['content']

+async def _query_openwebui_streaming(user_text: str, channel_id: int, tools_list: list, message_to_edit):
+    """
+    Stream response from Open-WebUI and edit Discord message progressively.
+
+    Args:
+        user_text (str): The user's message to send to the Open-WebUI.
+        channel_id (int): The Discord channel ID where the message was sent.
+        tools_list (list): List of tool IDs to use, if any.
+        message_to_edit: The Discord message object to edit with streaming content.
+    """
+    async with aiohttp.ClientSession() as session:
+        payload = {
+            "model": MODEL_NAME,
+            "stream": True,  # Enable streaming
+            "messages": [
+                {
+                    "role": "user",
+                    "content": user_text
+                }
+            ]
+        }
+
+        if tools_list:
+            payload["tool_ids"] = tools_list
+            logging.debug(f"🔧 Using tools: {payload['tool_ids']}")
+
+        logging.debug(f"Request payload to Open-WebUI: {payload}")
+
+        async with session.post(f"{OPENWEBUI_URL}/api/chat/completions",
+                               json=payload,
+                               headers={"Authorization": f"Bearer {OPENWEBUI_API_KEY}"}) as resp:
+
+            if resp.status != 200:
+                data = await resp.text()
+                raise RuntimeError(f"Open-WebUI responded {resp.status}: {data}")
+
+            accumulated_content = ""
+            last_edit_time = 0
+            edit_interval = 1.0  # Edit every 1 second to avoid rate limits
+
+            async for line in resp.content:
+                line = line.decode('utf-8').strip()
+
+                if line.startswith('data: '):
+                    data_str = line[6:]  # Remove 'data: ' prefix
+
+                    if data_str == '[DONE]':
+                        break
+
+                    try:
+                        import json
+                        chunk_data = json.loads(data_str)
+
+                        if 'choices' in chunk_data and len(chunk_data['choices']) > 0:
+                            delta = chunk_data['choices'][0].get('delta', {})
+                            if 'content' in delta:
+                                accumulated_content += delta['content']
+
+                                # Edit message periodically to avoid rate limits
+                                current_time = asyncio.get_event_loop().time()
+                                if current_time - last_edit_time >= edit_interval:
+                                    try:
+                                        # Limit message length to Discord's 2000 character limit
+                                        content_to_show = accumulated_content[:1900]
+                                        if len(accumulated_content) > 1900:
+                                            content_to_show += "..."
+
+                                        await message_to_edit.edit(content=content_to_show)
+                                        last_edit_time = current_time
+                                    except discord.HTTPException:
+                                        # Handle rate limits gracefully
+                                        pass
+
+                    except json.JSONDecodeError:
+                        continue
+
+            # Final edit with complete content
+            try:
+                final_content = accumulated_content[:2000]  # Respect Discord's limit
+                await message_to_edit.edit(content=final_content)
+            except discord.HTTPException:
+                pass
+
+            return accumulated_content
+
 # --------------------------------------------------------------------------- #
 #  Discord bot logic – discord.py
 # --------------------------------------------------------------------------- #
@@ -148,13 +234,23 @@ async def on_message(message):
    #   B.  Query Open-WebUI and show typing indicator
    # ----------------------------------------------------------------------- #
    try:
-        async with message.channel.typing():
-            # Query the Open-WebUI API while showing "Bot is typing..."
-            reply = await _query_openwebui(prompt, message.channel.id, TOOLS)
-            # Send the reply
-            await message.reply(reply)
+        if USE_STREAMING:
+            # Send initial "collecting information" message
+            initial_message = await message.reply("Bitte warte kurz, die Informationen werden gesammelt...")
+
+            # Start streaming response and edit the message
+            await _query_openwebui_streaming(prompt, message.channel.id, TOOLS, initial_message)
+        else:
+            # Use the original non-streaming approach
+            async with message.channel.typing():
+                reply = await _query_openwebui(prompt, message.channel.id, TOOLS)
+                await message.reply(reply)
    except Exception as e:
-        await message.reply(f"⚠ Error contacting the Open-WebUI API: {e}")
+        # If we're in streaming mode and have an initial message, edit it with error
+        if USE_STREAMING and 'initial_message' in locals():
+            await initial_message.edit(content=f"⚠ Error contacting the Open-WebUI API: {e}")
+        else:
+            await message.reply(f"⚠ Error contacting the Open-WebUI API: {e}")
        # No need to return here as the function ends after this block.

 # --------------------------------------------------------------------------- #