Implement streaming responses from Open-WebUI and update configuration for streaming support

This commit is contained in:
Pakobbix 2025-08-19 12:22:25 +02:00
parent e9cfea1424
commit c8fba0594c
2 changed files with 105 additions and 7 deletions

View File

@ -18,5 +18,7 @@ tools:
- Tool_ID_1 - Tool_ID_1
- Tool_ID_2 - Tool_ID_2
use_streaming: true # Allows to stream the answer to feel more interactive.
# optional system prompt (you can leave it empty to use the default one or the systemprompt given in open-webui for the specific model) # optional system prompt (you can leave it empty to use the default one or the systemprompt given in open-webui for the specific model)
system_prompt: "" system_prompt: ""

View File

@ -29,6 +29,7 @@ MODEL_NAME = config["model_name"] # Model name to use, e.g., "gpt-3.5-tur
KNOW_BASE = config["knowledge_base"] # Knowledge base to use, e.g., "knowledge_base_v1" KNOW_BASE = config["knowledge_base"] # Knowledge base to use, e.g., "knowledge_base_v1"
TOOLS = config.get("tools", []) # list of tool-ids TOOLS = config.get("tools", []) # list of tool-ids
USE_STREAMING = config.get("use_streaming", False) # Enable/disable streaming responses
SYSTEM_PROMPT = config.get("system_prompt", None) # Optional system prompt to prepend to user messages SYSTEM_PROMPT = config.get("system_prompt", None) # Optional system prompt to prepend to user messages
ALLOW_DMS = config.get("allow_dms", False) # Allow DMs to the bot (default: False) ALLOW_DMS = config.get("allow_dms", False) # Allow DMs to the bot (default: False)
@ -76,6 +77,91 @@ async def _query_openwebui(user_text: str, channel_id: int, tools_list: list):
logging.debug(f"Unparsed response from Open-WebUI: {response_data}") logging.debug(f"Unparsed response from Open-WebUI: {response_data}")
return response_data['choices'][0]['message']['content'] return response_data['choices'][0]['message']['content']
async def _query_openwebui_streaming(user_text: str, channel_id: int, tools_list: list, message_to_edit):
"""
Stream response from Open-WebUI and edit Discord message progressively.
Args:
user_text (str): The user's message to send to the Open-WebUI.
channel_id (int): The Discord channel ID where the message was sent.
tools_list (list): List of tool IDs to use, if any.
message_to_edit: The Discord message object to edit with streaming content.
"""
async with aiohttp.ClientSession() as session:
payload = {
"model": MODEL_NAME,
"stream": True, # Enable streaming
"messages": [
{
"role": "user",
"content": user_text
}
]
}
if tools_list:
payload["tool_ids"] = tools_list
logging.debug(f"🔧 Using tools: {payload['tool_ids']}")
logging.debug(f"Request payload to Open-WebUI: {payload}")
async with session.post(f"{OPENWEBUI_URL}/api/chat/completions",
json=payload,
headers={"Authorization": f"Bearer {OPENWEBUI_API_KEY}"}) as resp:
if resp.status != 200:
data = await resp.text()
raise RuntimeError(f"Open-WebUI responded {resp.status}: {data}")
accumulated_content = ""
last_edit_time = 0
edit_interval = 1.0 # Edit every 1 second to avoid rate limits
async for line in resp.content:
line = line.decode('utf-8').strip()
if line.startswith('data: '):
data_str = line[6:] # Remove 'data: ' prefix
if data_str == '[DONE]':
break
try:
import json
chunk_data = json.loads(data_str)
if 'choices' in chunk_data and len(chunk_data['choices']) > 0:
delta = chunk_data['choices'][0].get('delta', {})
if 'content' in delta:
accumulated_content += delta['content']
# Edit message periodically to avoid rate limits
current_time = asyncio.get_event_loop().time()
if current_time - last_edit_time >= edit_interval:
try:
# Limit message length to Discord's 2000 character limit
content_to_show = accumulated_content[:1900]
if len(accumulated_content) > 1900:
content_to_show += "..."
await message_to_edit.edit(content=content_to_show)
last_edit_time = current_time
except discord.HTTPException:
# Handle rate limits gracefully
pass
except json.JSONDecodeError:
continue
# Final edit with complete content
try:
final_content = accumulated_content[:2000] # Respect Discord's limit
await message_to_edit.edit(content=final_content)
except discord.HTTPException:
pass
return accumulated_content
# --------------------------------------------------------------------------- # # --------------------------------------------------------------------------- #
# Discord bot logic discord.py # Discord bot logic discord.py
# --------------------------------------------------------------------------- # # --------------------------------------------------------------------------- #
@ -148,12 +234,22 @@ async def on_message(message):
# B. Query Open-WebUI and show typing indicator # B. Query Open-WebUI and show typing indicator
# ----------------------------------------------------------------------- # # ----------------------------------------------------------------------- #
try: try:
if USE_STREAMING:
# Send initial "collecting information" message
initial_message = await message.reply("Bitte warte kurz, die Informationen werden gesammelt...")
# Start streaming response and edit the message
await _query_openwebui_streaming(prompt, message.channel.id, TOOLS, initial_message)
else:
# Use the original non-streaming approach
async with message.channel.typing(): async with message.channel.typing():
# Query the Open-WebUI API while showing "Bot is typing..."
reply = await _query_openwebui(prompt, message.channel.id, TOOLS) reply = await _query_openwebui(prompt, message.channel.id, TOOLS)
# Send the reply
await message.reply(reply) await message.reply(reply)
except Exception as e: except Exception as e:
# If we're in streaming mode and have an initial message, edit it with error
if USE_STREAMING and 'initial_message' in locals():
await initial_message.edit(content=f"⚠ Error contacting the Open-WebUI API: {e}")
else:
await message.reply(f"⚠ Error contacting the Open-WebUI API: {e}") await message.reply(f"⚠ Error contacting the Open-WebUI API: {e}")
# No need to return here as the function ends after this block. # No need to return here as the function ends after this block.