Implement streaming responses from Open-WebUI and update configuration for streaming support
This commit is contained in:
parent
e9cfea1424
commit
c8fba0594c
@ -18,5 +18,7 @@ tools:
|
|||||||
- Tool_ID_1
|
- Tool_ID_1
|
||||||
- Tool_ID_2
|
- Tool_ID_2
|
||||||
|
|
||||||
|
use_streaming: true # Allows to stream the answer to feel more interactive.
|
||||||
|
|
||||||
# optional system prompt (you can leave it empty to use the default one or the systemprompt given in open-webui for the specific model)
|
# optional system prompt (you can leave it empty to use the default one or the systemprompt given in open-webui for the specific model)
|
||||||
system_prompt: ""
|
system_prompt: ""
|
||||||
|
|||||||
@ -29,6 +29,7 @@ MODEL_NAME = config["model_name"] # Model name to use, e.g., "gpt-3.5-tur
|
|||||||
KNOW_BASE = config["knowledge_base"] # Knowledge base to use, e.g., "knowledge_base_v1"
|
KNOW_BASE = config["knowledge_base"] # Knowledge base to use, e.g., "knowledge_base_v1"
|
||||||
|
|
||||||
TOOLS = config.get("tools", []) # list of tool-ids
|
TOOLS = config.get("tools", []) # list of tool-ids
|
||||||
|
USE_STREAMING = config.get("use_streaming", False) # Enable/disable streaming responses
|
||||||
|
|
||||||
SYSTEM_PROMPT = config.get("system_prompt", None) # Optional system prompt to prepend to user messages
|
SYSTEM_PROMPT = config.get("system_prompt", None) # Optional system prompt to prepend to user messages
|
||||||
ALLOW_DMS = config.get("allow_dms", False) # Allow DMs to the bot (default: False)
|
ALLOW_DMS = config.get("allow_dms", False) # Allow DMs to the bot (default: False)
|
||||||
@ -76,6 +77,91 @@ async def _query_openwebui(user_text: str, channel_id: int, tools_list: list):
|
|||||||
logging.debug(f"Unparsed response from Open-WebUI: {response_data}")
|
logging.debug(f"Unparsed response from Open-WebUI: {response_data}")
|
||||||
return response_data['choices'][0]['message']['content']
|
return response_data['choices'][0]['message']['content']
|
||||||
|
|
||||||
|
async def _query_openwebui_streaming(user_text: str, channel_id: int, tools_list: list, message_to_edit):
|
||||||
|
"""
|
||||||
|
Stream response from Open-WebUI and edit Discord message progressively.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
user_text (str): The user's message to send to the Open-WebUI.
|
||||||
|
channel_id (int): The Discord channel ID where the message was sent.
|
||||||
|
tools_list (list): List of tool IDs to use, if any.
|
||||||
|
message_to_edit: The Discord message object to edit with streaming content.
|
||||||
|
"""
|
||||||
|
async with aiohttp.ClientSession() as session:
|
||||||
|
payload = {
|
||||||
|
"model": MODEL_NAME,
|
||||||
|
"stream": True, # Enable streaming
|
||||||
|
"messages": [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": user_text
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
if tools_list:
|
||||||
|
payload["tool_ids"] = tools_list
|
||||||
|
logging.debug(f"🔧 Using tools: {payload['tool_ids']}")
|
||||||
|
|
||||||
|
logging.debug(f"Request payload to Open-WebUI: {payload}")
|
||||||
|
|
||||||
|
async with session.post(f"{OPENWEBUI_URL}/api/chat/completions",
|
||||||
|
json=payload,
|
||||||
|
headers={"Authorization": f"Bearer {OPENWEBUI_API_KEY}"}) as resp:
|
||||||
|
|
||||||
|
if resp.status != 200:
|
||||||
|
data = await resp.text()
|
||||||
|
raise RuntimeError(f"Open-WebUI responded {resp.status}: {data}")
|
||||||
|
|
||||||
|
accumulated_content = ""
|
||||||
|
last_edit_time = 0
|
||||||
|
edit_interval = 1.0 # Edit every 1 second to avoid rate limits
|
||||||
|
|
||||||
|
async for line in resp.content:
|
||||||
|
line = line.decode('utf-8').strip()
|
||||||
|
|
||||||
|
if line.startswith('data: '):
|
||||||
|
data_str = line[6:] # Remove 'data: ' prefix
|
||||||
|
|
||||||
|
if data_str == '[DONE]':
|
||||||
|
break
|
||||||
|
|
||||||
|
try:
|
||||||
|
import json
|
||||||
|
chunk_data = json.loads(data_str)
|
||||||
|
|
||||||
|
if 'choices' in chunk_data and len(chunk_data['choices']) > 0:
|
||||||
|
delta = chunk_data['choices'][0].get('delta', {})
|
||||||
|
if 'content' in delta:
|
||||||
|
accumulated_content += delta['content']
|
||||||
|
|
||||||
|
# Edit message periodically to avoid rate limits
|
||||||
|
current_time = asyncio.get_event_loop().time()
|
||||||
|
if current_time - last_edit_time >= edit_interval:
|
||||||
|
try:
|
||||||
|
# Limit message length to Discord's 2000 character limit
|
||||||
|
content_to_show = accumulated_content[:1900]
|
||||||
|
if len(accumulated_content) > 1900:
|
||||||
|
content_to_show += "..."
|
||||||
|
|
||||||
|
await message_to_edit.edit(content=content_to_show)
|
||||||
|
last_edit_time = current_time
|
||||||
|
except discord.HTTPException:
|
||||||
|
# Handle rate limits gracefully
|
||||||
|
pass
|
||||||
|
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Final edit with complete content
|
||||||
|
try:
|
||||||
|
final_content = accumulated_content[:2000] # Respect Discord's limit
|
||||||
|
await message_to_edit.edit(content=final_content)
|
||||||
|
except discord.HTTPException:
|
||||||
|
pass
|
||||||
|
|
||||||
|
return accumulated_content
|
||||||
|
|
||||||
# --------------------------------------------------------------------------- #
|
# --------------------------------------------------------------------------- #
|
||||||
# Discord bot logic – discord.py
|
# Discord bot logic – discord.py
|
||||||
# --------------------------------------------------------------------------- #
|
# --------------------------------------------------------------------------- #
|
||||||
@ -148,13 +234,23 @@ async def on_message(message):
|
|||||||
# B. Query Open-WebUI and show typing indicator
|
# B. Query Open-WebUI and show typing indicator
|
||||||
# ----------------------------------------------------------------------- #
|
# ----------------------------------------------------------------------- #
|
||||||
try:
|
try:
|
||||||
async with message.channel.typing():
|
if USE_STREAMING:
|
||||||
# Query the Open-WebUI API while showing "Bot is typing..."
|
# Send initial "collecting information" message
|
||||||
reply = await _query_openwebui(prompt, message.channel.id, TOOLS)
|
initial_message = await message.reply("Bitte warte kurz, die Informationen werden gesammelt...")
|
||||||
# Send the reply
|
|
||||||
await message.reply(reply)
|
# Start streaming response and edit the message
|
||||||
|
await _query_openwebui_streaming(prompt, message.channel.id, TOOLS, initial_message)
|
||||||
|
else:
|
||||||
|
# Use the original non-streaming approach
|
||||||
|
async with message.channel.typing():
|
||||||
|
reply = await _query_openwebui(prompt, message.channel.id, TOOLS)
|
||||||
|
await message.reply(reply)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
await message.reply(f"⚠ Error contacting the Open-WebUI API: {e}")
|
# If we're in streaming mode and have an initial message, edit it with error
|
||||||
|
if USE_STREAMING and 'initial_message' in locals():
|
||||||
|
await initial_message.edit(content=f"⚠ Error contacting the Open-WebUI API: {e}")
|
||||||
|
else:
|
||||||
|
await message.reply(f"⚠ Error contacting the Open-WebUI API: {e}")
|
||||||
# No need to return here as the function ends after this block.
|
# No need to return here as the function ends after this block.
|
||||||
|
|
||||||
# --------------------------------------------------------------------------- #
|
# --------------------------------------------------------------------------- #
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user