Implement streaming responses from Open-WebUI and update configuration for streaming support

2025-08-19 12:22:25 +02:00 · 2025-08-19 12:22:25 +02:00 · c8fba0594c
commit c8fba0594c
parent e9cfea1424
2 changed files with 105 additions and 7 deletions
--- a/discord_connector/example.config.yml
+++ b/discord_connector/example.config.yml
@ -18,5 +18,7 @@ tools:
  - Tool_ID_1
  - Tool_ID_2
 use_streaming: true # Allows to stream the answer to feel more interactive.
 # optional system prompt (you can leave it empty to use the default one or the systemprompt given in open-webui for the specific model)
 system_prompt:     ""
--- a/discord_connector/open-webui_to_discord.py
+++ b/discord_connector/open-webui_to_discord.py
@ -29,6 +29,7 @@ MODEL_NAME        = config["model_name"] # Model name to use, e.g., "gpt-3.5-tur
 KNOW_BASE         = config["knowledge_base"] # Knowledge base to use, e.g., "knowledge_base_v1"
 TOOLS             = config.get("tools", [])                # list of tool-ids
 USE_STREAMING     = config.get("use_streaming", False) # Enable/disable streaming responses
 SYSTEM_PROMPT     = config.get("system_prompt", None) # Optional system prompt to prepend to user messages
 ALLOW_DMS         = config.get("allow_dms", False) # Allow DMs to the bot (default: False)
@ -76,6 +77,91 @@ async def _query_openwebui(user_text: str, channel_id: int, tools_list: list):
            logging.debug(f"Unparsed response from Open-WebUI: {response_data}")
            return response_data['choices'][0]['message']['content']
 async def _query_openwebui_streaming(user_text: str, channel_id: int, tools_list: list, message_to_edit):
    """
    Stream response from Open-WebUI and edit Discord message progressively.
    Args:
        user_text (str): The user's message to send to the Open-WebUI.
        channel_id (int): The Discord channel ID where the message was sent.
        tools_list (list): List of tool IDs to use, if any.
        message_to_edit: The Discord message object to edit with streaming content.
    """
    async with aiohttp.ClientSession() as session:
        payload = {
            "model": MODEL_NAME,
            "stream": True,  # Enable streaming
            "messages": [
                {
                    "role": "user",
                    "content": user_text
                }
            ]
        }
        if tools_list:
            payload["tool_ids"] = tools_list
            logging.debug(f"🔧 Using tools: {payload['tool_ids']}")
        logging.debug(f"Request payload to Open-WebUI: {payload}")
        async with session.post(f"{OPENWEBUI_URL}/api/chat/completions",
                               json=payload,
                               headers={"Authorization": f"Bearer {OPENWEBUI_API_KEY}"}) as resp:
            if resp.status != 200:
                data = await resp.text()
                raise RuntimeError(f"Open-WebUI responded {resp.status}: {data}")
            accumulated_content = ""
            last_edit_time = 0
            edit_interval = 1.0  # Edit every 1 second to avoid rate limits
            async for line in resp.content:
                line = line.decode('utf-8').strip()
                if line.startswith('data: '):
                    data_str = line[6:]  # Remove 'data: ' prefix
                    if data_str == '[DONE]':
                        break
                    try:
                        import json
                        chunk_data = json.loads(data_str)
                        if 'choices' in chunk_data and len(chunk_data['choices']) > 0:
                            delta = chunk_data['choices'][0].get('delta', {})
                            if 'content' in delta:
                                accumulated_content += delta['content']
                                # Edit message periodically to avoid rate limits
                                current_time = asyncio.get_event_loop().time()
                                if current_time - last_edit_time >= edit_interval:
                                    try:
                                        # Limit message length to Discord's 2000 character limit
                                        content_to_show = accumulated_content[:1900]
                                        if len(accumulated_content) > 1900:
                                            content_to_show += "..."
                                        await message_to_edit.edit(content=content_to_show)
                                        last_edit_time = current_time
                                    except discord.HTTPException:
                                        # Handle rate limits gracefully
                                        pass
                    except json.JSONDecodeError:
                        continue
            # Final edit with complete content
            try:
                final_content = accumulated_content[:2000]  # Respect Discord's limit
                await message_to_edit.edit(content=final_content)
            except discord.HTTPException:
                pass
            return accumulated_content
 # --------------------------------------------------------------------------- #
 #  Discord bot logic – discord.py
 # --------------------------------------------------------------------------- #
@ -148,12 +234,22 @@ async def on_message(message):
    #   B.  Query Open-WebUI and show typing indicator
    # ----------------------------------------------------------------------- #
    try:
        if USE_STREAMING:
            # Send initial "collecting information" message
            initial_message = await message.reply("Bitte warte kurz, die Informationen werden gesammelt...")
            # Start streaming response and edit the message
            await _query_openwebui_streaming(prompt, message.channel.id, TOOLS, initial_message)
        else:
            # Use the original non-streaming approach
            async with message.channel.typing():
            # Query the Open-WebUI API while showing "Bot is typing..."
                reply = await _query_openwebui(prompt, message.channel.id, TOOLS)
            # Send the reply
                await message.reply(reply)
    except Exception as e:
        # If we're in streaming mode and have an initial message, edit it with error
        if USE_STREAMING and 'initial_message' in locals():
            await initial_message.edit(content=f"⚠ Error contacting the Open-WebUI API: {e}")
        else:
            await message.reply(f"⚠ Error contacting the Open-WebUI API: {e}")
        # No need to return here as the function ends after this block.