Implement streaming responses from Open-WebUI and update configuration for streaming support
This commit is contained in:
parent
e9cfea1424
commit
c8fba0594c
@ -18,5 +18,7 @@ tools:
|
||||
- Tool_ID_1
|
||||
- Tool_ID_2
|
||||
|
||||
use_streaming: true # Allows to stream the answer to feel more interactive.
|
||||
|
||||
# optional system prompt (you can leave it empty to use the default one or the systemprompt given in open-webui for the specific model)
|
||||
system_prompt: ""
|
||||
@ -29,6 +29,7 @@ MODEL_NAME = config["model_name"] # Model name to use, e.g., "gpt-3.5-tur
|
||||
KNOW_BASE = config["knowledge_base"] # Knowledge base to use, e.g., "knowledge_base_v1"
|
||||
|
||||
TOOLS = config.get("tools", []) # list of tool-ids
|
||||
USE_STREAMING = config.get("use_streaming", False) # Enable/disable streaming responses
|
||||
|
||||
SYSTEM_PROMPT = config.get("system_prompt", None) # Optional system prompt to prepend to user messages
|
||||
ALLOW_DMS = config.get("allow_dms", False) # Allow DMs to the bot (default: False)
|
||||
@ -76,6 +77,91 @@ async def _query_openwebui(user_text: str, channel_id: int, tools_list: list):
|
||||
logging.debug(f"Unparsed response from Open-WebUI: {response_data}")
|
||||
return response_data['choices'][0]['message']['content']
|
||||
|
||||
async def _query_openwebui_streaming(user_text: str, channel_id: int, tools_list: list, message_to_edit):
|
||||
"""
|
||||
Stream response from Open-WebUI and edit Discord message progressively.
|
||||
|
||||
Args:
|
||||
user_text (str): The user's message to send to the Open-WebUI.
|
||||
channel_id (int): The Discord channel ID where the message was sent.
|
||||
tools_list (list): List of tool IDs to use, if any.
|
||||
message_to_edit: The Discord message object to edit with streaming content.
|
||||
"""
|
||||
async with aiohttp.ClientSession() as session:
|
||||
payload = {
|
||||
"model": MODEL_NAME,
|
||||
"stream": True, # Enable streaming
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": user_text
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
if tools_list:
|
||||
payload["tool_ids"] = tools_list
|
||||
logging.debug(f"🔧 Using tools: {payload['tool_ids']}")
|
||||
|
||||
logging.debug(f"Request payload to Open-WebUI: {payload}")
|
||||
|
||||
async with session.post(f"{OPENWEBUI_URL}/api/chat/completions",
|
||||
json=payload,
|
||||
headers={"Authorization": f"Bearer {OPENWEBUI_API_KEY}"}) as resp:
|
||||
|
||||
if resp.status != 200:
|
||||
data = await resp.text()
|
||||
raise RuntimeError(f"Open-WebUI responded {resp.status}: {data}")
|
||||
|
||||
accumulated_content = ""
|
||||
last_edit_time = 0
|
||||
edit_interval = 1.0 # Edit every 1 second to avoid rate limits
|
||||
|
||||
async for line in resp.content:
|
||||
line = line.decode('utf-8').strip()
|
||||
|
||||
if line.startswith('data: '):
|
||||
data_str = line[6:] # Remove 'data: ' prefix
|
||||
|
||||
if data_str == '[DONE]':
|
||||
break
|
||||
|
||||
try:
|
||||
import json
|
||||
chunk_data = json.loads(data_str)
|
||||
|
||||
if 'choices' in chunk_data and len(chunk_data['choices']) > 0:
|
||||
delta = chunk_data['choices'][0].get('delta', {})
|
||||
if 'content' in delta:
|
||||
accumulated_content += delta['content']
|
||||
|
||||
# Edit message periodically to avoid rate limits
|
||||
current_time = asyncio.get_event_loop().time()
|
||||
if current_time - last_edit_time >= edit_interval:
|
||||
try:
|
||||
# Limit message length to Discord's 2000 character limit
|
||||
content_to_show = accumulated_content[:1900]
|
||||
if len(accumulated_content) > 1900:
|
||||
content_to_show += "..."
|
||||
|
||||
await message_to_edit.edit(content=content_to_show)
|
||||
last_edit_time = current_time
|
||||
except discord.HTTPException:
|
||||
# Handle rate limits gracefully
|
||||
pass
|
||||
|
||||
except json.JSONDecodeError:
|
||||
continue
|
||||
|
||||
# Final edit with complete content
|
||||
try:
|
||||
final_content = accumulated_content[:2000] # Respect Discord's limit
|
||||
await message_to_edit.edit(content=final_content)
|
||||
except discord.HTTPException:
|
||||
pass
|
||||
|
||||
return accumulated_content
|
||||
|
||||
# --------------------------------------------------------------------------- #
|
||||
# Discord bot logic – discord.py
|
||||
# --------------------------------------------------------------------------- #
|
||||
@ -148,13 +234,23 @@ async def on_message(message):
|
||||
# B. Query Open-WebUI and show typing indicator
|
||||
# ----------------------------------------------------------------------- #
|
||||
try:
|
||||
async with message.channel.typing():
|
||||
# Query the Open-WebUI API while showing "Bot is typing..."
|
||||
reply = await _query_openwebui(prompt, message.channel.id, TOOLS)
|
||||
# Send the reply
|
||||
await message.reply(reply)
|
||||
if USE_STREAMING:
|
||||
# Send initial "collecting information" message
|
||||
initial_message = await message.reply("Bitte warte kurz, die Informationen werden gesammelt...")
|
||||
|
||||
# Start streaming response and edit the message
|
||||
await _query_openwebui_streaming(prompt, message.channel.id, TOOLS, initial_message)
|
||||
else:
|
||||
# Use the original non-streaming approach
|
||||
async with message.channel.typing():
|
||||
reply = await _query_openwebui(prompt, message.channel.id, TOOLS)
|
||||
await message.reply(reply)
|
||||
except Exception as e:
|
||||
await message.reply(f"⚠ Error contacting the Open-WebUI API: {e}")
|
||||
# If we're in streaming mode and have an initial message, edit it with error
|
||||
if USE_STREAMING and 'initial_message' in locals():
|
||||
await initial_message.edit(content=f"⚠ Error contacting the Open-WebUI API: {e}")
|
||||
else:
|
||||
await message.reply(f"⚠ Error contacting the Open-WebUI API: {e}")
|
||||
# No need to return here as the function ends after this block.
|
||||
|
||||
# --------------------------------------------------------------------------- #
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user