Compare commits

...

36 Commits
1.5 ... main

Author SHA1 Message Date
adbb97ecc2 added desc_act option 2023-06-03 16:27:13 +02:00
b10c023810 next try adding patchelf installation
Some checks reported warnings
Package Release / package_on_ubuntu (push) Has been cancelled
Package Release / package_on_windows (push) Has been cancelled
2023-05-27 15:27:17 +02:00
757debce3a trying nuitka with linux 2023-05-27 15:23:35 +02:00
a8f033ac5b
switching to nuitka, fix removing build folders 2023-05-27 15:19:49 +02:00
a32aff730e
fixes indentations 2023-05-27 15:10:37 +02:00
0cd44016f5 trying out nuitka on github actions 2023-05-27 15:09:44 +02:00
21930ba8e9 fixed ubuntu & Windows package creation 2023-05-27 13:38:52 +02:00
b96c0914ed
Update package.yml 2023-05-27 13:36:45 +02:00
a07146e879
deleted venv
Some checks reported warnings
Package Release / package (ubuntu-latest) (push) Has been cancelled
Package Release / package (windows-latest) (push) Has been cancelled
2023-05-27 13:21:57 +02:00
807dad51dd too tired to get it in one go 2023-05-27 11:12:04 +02:00
395c347020 mistake fixing 2023-05-27 11:11:16 +02:00
a75a8f42bc still trying to use venv for packaging p2 2023-05-27 11:10:35 +02:00
24e5876eff removed venv deletion for now. 2023-05-27 09:26:37 +02:00
c87e12d75a still trying to use venv for packaging p1 2023-05-27 09:08:07 +02:00
1ffd11eb5b copy & paste error
Some checks reported warnings
Package Release / package (ubuntu-latest) (push) Has been cancelled
Package Release / package (windows-latest) (push) Has been cancelled
2023-05-27 08:21:56 +02:00
311cbfed75 fixing the leave venv 2023-05-27 08:20:29 +02:00
646b7419a5 trying venv to hopefully decrease the binary size 2023-05-27 08:12:41 +02:00
0fb9a42cc6
Update package.yml 2023-05-27 09:45:12 +02:00
b2f2a894c9 removed git commit compare, fixed startup if no nvidia_gpu was found. 2023-05-26 23:23:53 +02:00
ce9ef8f419 last try to fix this
Some checks reported warnings
Package Release / package (ubuntu-latest) (push) Has been cancelled
Package Release / package (windows-latest) (push) Has been cancelled
2023-05-26 18:46:42 +02:00
db6ed71594 last try to fix this 2023-05-26 18:44:36 +02:00
86171f84d7 can't get this ***** fixed -.-
Some checks reported warnings
Package Release / package (ubuntu-latest) (push) Has been cancelled
Package Release / package (windows-latest) (push) Has been cancelled
2023-05-26 15:45:34 +02:00
077669ea2e try fix github actions 2023-05-26 15:24:40 +02:00
25087b1e2d try fix github actions 2023-05-26 15:21:02 +02:00
5d755a302c try fix github actions 2023-05-26 15:20:27 +02:00
cc485f5342 try fix github actions 2023-05-26 15:15:05 +02:00
20c847a532 try fix github actions 2023-05-26 15:11:42 +02:00
ab514acc40 fix for the pyinstaller cmd? 2023-05-26 15:10:31 +02:00
f28f1f21f9 Raised Version to 1.6 2023-05-26 15:06:56 +02:00
c3696284fa made the update text-gen-webui update button to disappear if already at the newest commit 2023-05-26 15:04:39 +02:00
5e40ae8456 added llamap n_ctx and seed options, removed cai_chat (despite docs not working) 2023-05-26 08:32:36 +02:00
d1de11ecf4 removed the webuiGUI.py necessity, disable accelerate for windows (incompatible), remove some useless code (still learning) 2023-05-25 19:55:33 +02:00
b063f23e71 added save and load for accelerate. 2023-05-25 18:44:08 +02:00
58772e86f6 Added Accelerate 4bit options, made DeepSpeed disable for windows systems, added pip install accelerate if 8bit is used. 2023-05-25 18:36:34 +02:00
c8ed510030 raised version to 1.5.1 due to fixes in pre_loading
Some checks reported warnings
Package Release / package (ubuntu-latest) (push) Has been cancelled
Package Release / package (windows-latest) (push) Has been cancelled
2023-05-23 18:33:34 +02:00
39b2f0c54d fixed pre_slayer slider crashing on changes 2023-05-22 07:45:06 +02:00
2 changed files with 363 additions and 117 deletions

View File

@ -6,11 +6,8 @@ on:
workflow_dispatch:
jobs:
package:
runs-on: self-hosted
strategy:
matrix:
os: [windows-latest, ubuntu-latest]
package_on_ubuntu:
runs-on: ubuntu-selfhosted
steps:
- name: Set up Python
uses: actions/setup-python@v3
@ -20,19 +17,67 @@ jobs:
- name: Checkout repository
uses: actions/checkout@v3
- name: Upgrade Pip
run: python -m pip install --upgrade pip
- name: Install dependencies
run: |
python -m pip install --upgrade pip
sudo apt-get update
sudo apt-get upgrade -y
sudo apt-get install patchelf -y
pip install -r requirements.txt
pip install pyinstaller
pip install nuitka
sudo apt install patchelf
- name: Build and package
run: python3 -m nuitka StartUI.py --onefile --enable-plugin=pyqt5 --product-version=1.6.0 --disable-console --include-data-files=webuiGUI.py=webuiGUI.py --output-dir=./dist --noinclude-pytest-mode=nofollow --noinclude-setuptools-mode=nofollow
- name: remove build folders
run: |
pyinstaller --noconfirm --onefile --windowed StartUI.py
cp webuiGUI.py dist/ # Copy webuiGUI.py to the dist directory
rm -rf dist/StartUI.build
rm -rf dist/StartUI.dist
rm -rf dist/StartUI.onefile-build
- name: Executive permission
run: |
chmod +x dist/StartUI
- name: Upload artifacts
uses: actions/upload-artifact@v3
with:
name: ${{ matrix.os }}-binary-v1.5
name: ubuntu-binary-v1.6
path: dist
package_on_windows:
runs-on: windows-selfhosted
steps:
- name: Set up Python
uses: actions/setup-python@v3
with:
python-version: 3.x
- name: Checkout repository
uses: actions/checkout@v3
- name: Upgrade Pip
run: python -m pip install --upgrade pip
- name: Install dependencies
run: |
pip install -r requirements.txt
pip install nuitka
- name: Build and package
run: nuitka StartUI.py --onefile --enable-plugin=pyqt5 --product-version=1.6.0 --disable-console --include-data-files=webuiGUI.py=webuiGUI.py --output-dir=./dist --noinclude-pytest-mode=nofollow --noinclude-setuptools-mode=nofollow
- name: remove build folders
run: |
Remove-Item -Recurse -Force ./dist/StartUI.build
Remove-Item -Recurse -Force ./dist/StartUI.dist
Remove-Item -Recurse -Force ./dist/StartUI.onefile-build
- name: Upload artifacts
uses: actions/upload-artifact@v3
with:
name: windows-binary-v1.6
path: dist

View File

@ -1,20 +1,28 @@
import sys, os, gpustat, json, subprocess, platform, psutil, re, requests, darkdetect, qdarkstyle
from PyQt5.QtWidgets import QApplication, QHBoxLayout, QToolBar, QMessageBox, QAction, QMainWindow, QSpinBox, QLabel, QVBoxLayout, QComboBox, QSlider, QCheckBox, QLineEdit, QFileDialog, QPushButton, QWidget, QListWidget, QListWidgetItem, QGridLayout, QRadioButton, QFrame
import sys, os, gpustat, json, subprocess, platform, psutil, re, requests, darkdetect, qdarkstyle, time
from PyQt5.QtCore import Qt
from PyQt5.QtGui import QDoubleValidator, QIntValidator
from PyQt5.QtWidgets import QAction, QApplication, QCheckBox, QComboBox, QFileDialog, QFrame, QGridLayout, QHBoxLayout, QLabel, QLineEdit, QListWidget, QListWidgetItem, QMainWindow, QMessageBox, QPushButton, QRadioButton, QSlider, QSpinBox, QToolBar, QVBoxLayout, QWidget
# For showing the current version and checking for updates
version = "1.5"
version = "1.6"
# Profile folder for loading and saving profiles.
profiles_folder = "./profiles"
# Create the profile folder if it doesn't exist
os.makedirs(profiles_folder, exist_ok=True)
repo_path = "./text-generation-webui"
model_folder = "./text-generation-webui/models"
extensions_folder = "./text-generation-webui/extensions"
loras_folder = "./text-generation-webui/loras"
characters_folder = "./text-generation-webui/characters"
if getattr(sys, 'frozen', False):
webui_file = sys._MEIPASS + '/webuiGUI.py'
else:
webui_file = 'webuiGUI.py'
# Get the current Max CPU threads to use, so the user can't exceed his thread count.
max_threads = psutil.cpu_count(logical=True)
@ -50,6 +58,7 @@ def run_cmd_with_conda(cmd, env=None):
# Open a separate terminal window and execute the command
subprocess.Popen(['start', 'cmd', '/k', full_cmd], shell=True, env=env)
elif platform.system() == 'Linux':
# Define the necessary variables from the bash script
install_dir = os.path.dirname(os.path.abspath(__file__))
@ -90,7 +99,14 @@ class MainWindow(QMainWindow):
def init_ui(self):
self.setWindowTitle(f'StartUI for oobabooga webui v{version}')
# ToolBar
##########################################
# _____ _ ____ #
# |_ _|__ ___ | | | __ ) __ _ _ __ #
# | |/ _ \ / _ \| | | _ \ / _` | '__| #
# | | (_) | (_) | | | |_) | (_| | | #
# |_|\___/ \___/|_| |____/ \__,_|_| #
# #
##########################################
toolbar = QToolBar()
toolbar.setMovable(False)
self.addToolBar(toolbar)
@ -100,28 +116,31 @@ class MainWindow(QMainWindow):
toolbar.addWidget(toolbar_label)
# Deepspeed checkbox
self.deepspeed_settings_checkbox = QCheckBox(" DeepSpeed ")
self.deepspeed_settings_checkbox = QCheckBox("\tDeepSpeed\t")
self.deepspeed_settings_checkbox.setToolTip("Enables specific DeepSpeed Settings.")
self.deepspeed_settings_checkbox.setChecked(False)
self.deepspeed_settings_checkbox.setToolTip("Enables Deepspeed Settings")
self.deepspeed_settings_checkbox.stateChanged.connect(self.on_deepspeed_settings_checkbox_stateChanged)
toolbar.addWidget(self.deepspeed_settings_checkbox)
if platform.system() == 'Windows':
self.deepspeed_settings_checkbox.setEnabled(False)
self.deepspeed_settings_checkbox.setToolTip("DeepSpeed is not Supported in Windows.")
# llama.cpp checkbox
self.llama_settings_checkbox = QCheckBox(" llama.cpp ")
self.llama_settings_checkbox = QCheckBox("\tllama.cpp\t")
self.llama_settings_checkbox.setChecked(False)
self.llama_settings_checkbox.setToolTip("Enables llama.cpp Settings")
self.llama_settings_checkbox.stateChanged.connect(self.on_llama_settings_checkbox_stateChanged)
toolbar.addWidget(self.llama_settings_checkbox)
# FlexGen Checkbox
self.flexgen_settings_checkbox = QCheckBox(" FlexGen ")
self.flexgen_settings_checkbox = QCheckBox("\tFlexGen\t")
self.flexgen_settings_checkbox.setChecked(False)
self.flexgen_settings_checkbox.setToolTip("Enables FlexGen Settings")
self.flexgen_settings_checkbox.stateChanged.connect(self.on_flexgen_settings_checkbox_stateChanged)
toolbar.addWidget(self.flexgen_settings_checkbox)
# RWKV Checkbox
self.rwkv_settings_checkbox = QCheckBox(" RWKV ")
self.rwkv_settings_checkbox = QCheckBox("\tRWKV\t")
self.rwkv_settings_checkbox.setChecked(False)
self.rwkv_settings_checkbox.setVisible(False)
self.rwkv_settings_checkbox.setToolTip("Enables RWKV Settings")
@ -129,13 +148,30 @@ class MainWindow(QMainWindow):
toolbar.addWidget(self.rwkv_settings_checkbox)
# API Checkbox
self.api_settings_checkbox = QCheckBox(" API ")
self.api_settings_checkbox = QCheckBox("\tAPI\t")
self.api_settings_checkbox.setChecked(False)
self.api_settings_checkbox.setToolTip("Enables API Settings")
self.api_settings_checkbox.stateChanged.connect(self.on_api_settings_checkbox_stateChanged)
toolbar.addWidget(self.api_settings_checkbox)
# Menu Bar
# Accelerate Checkbox
self.Accelerate_settings_checkbox = QCheckBox("\tAccelerate\t")
self.Accelerate_settings_checkbox.setChecked(False)
self.Accelerate_settings_checkbox.setToolTip("Enables API Settings")
self.Accelerate_settings_checkbox.stateChanged.connect(self.on_Accelerate_settings_checkbox_stateChanged)
toolbar.addWidget(self.Accelerate_settings_checkbox)
if platform.system() == 'Windows':
self.Accelerate_settings_checkbox.setEnabled(False)
self.Accelerate_settings_checkbox.setToolTip("Accelerate is not Supported in Windows.")
################################################
# __ __ ____ #
# | \/ | ___ _ __ _ _ | __ ) __ _ _ __ #
# | |\/| |/ _ \ '_ \| | | | | _ \ / _` | '__| #
# | | | | __/ | | | |_| | | |_) | (_| | | #
# |_| |_|\___|_| |_|\__,_| |____/ \__,_|_| #
# #
################################################
menu = self.menuBar()
# Main menu
@ -179,7 +215,15 @@ class MainWindow(QMainWindow):
report_bug_action.triggered.connect(self.on_report_bug_clicked)
help_menu.addAction(report_bug_action)
# Main Window Layout, column width
###################################################################
# __ __ _ __ ___ _ #
# | \/ | __ _(_)_ __ \ \ / (_)_ __ __| | _____ __ #
# | |\/| |/ _` | | '_ \ \ \ /\ / /| | '_ \ / _` |/ _ \ \ /\ / / #
# | | | | (_| | | | | | \ V V / | | | | | (_| | (_) \ V V / #
# |_| |_|\__,_|_|_| |_| \_/\_/ |_|_| |_|\__,_|\___/ \_/\_/ #
# #
###################################################################
layout = QGridLayout()
layout.setColumnMinimumWidth(0, 350)
layout.setColumnMinimumWidth(3, 30)
@ -216,7 +260,6 @@ class MainWindow(QMainWindow):
model_type_box.addWidget(self.model_type)
layout.addLayout(model_type_box, 1, 0)
# Character
character_box = QHBoxLayout()
@ -272,7 +315,7 @@ class MainWindow(QMainWindow):
# Interface Mode Dropdown
self.mode_dropdown = QComboBox()
self.mode_dropdown.addItems(["chat", "cai_chat", "notebook"])
self.mode_dropdown.addItems(["chat", "notebook"])
self.mode_dropdown.setToolTip("Choose what kind of Interface you want to load.")
interface_mode_box.addWidget(self.mode_dropdown)
layout.addLayout(interface_mode_box, 3, 0)
@ -373,13 +416,12 @@ class MainWindow(QMainWindow):
self.ram_slider.valueChanged.connect(self.on_ram_slider_changed)
# Pre-layer Slider
# Check if Nvidia_gpu is enabled, if not, we don't need multiple pre_layer slider.
if nvidia_gpu:
self.pre_layer_labels = []
self.pre_layer_slider = []
self.pre_layer_slider_value = []
self.pre_layer_amount_max = 100
# Don't get confused. With the latest changes, each GPU can have it's own pre_layer value. So we check again gpu_stats for the amount.
if nvidia_gpu:
for i, gpu in enumerate(gpu_stats):
pre_layer_labels = QLabel(f"{gpu.name} Pre_Layer:")
pre_layer_labels.setToolTip(f"The number of layers to allocate to the GPU.\nSetting this parameter enables CPU offloading for 4-bit models.\nFor multi-gpu, write the numbers separated by spaces, eg --pre_layer 30 60.")
@ -395,19 +437,6 @@ class MainWindow(QMainWindow):
pre_layer_sliders_value = QLabel("0")
layout.addWidget(pre_layer_sliders_value, 11 + (len(gpu_stats) * 2) + i, 2)
self.pre_layer_slider_value.append(pre_layer_sliders_value)
else:
self.pre_layer_slider = QSlider(Qt.Horizontal)
self.pre_layer_slider.setMinimum(0)
self.pre_layer_slider.setMaximum(100)
self.pre_layer_slider.setTickInterval(1)
self.pre_layer_slider.setSingleStep(1)
layout.addWidget(QLabel("Pre-layer:"), 11 + len(gpu_stats), 0)
self.pre_layer_slider.setToolTip("The number of layers to allocate to the GPU. Setting this parameter enables CPU offloading for 4-bit models.")
layout.addWidget(self.pre_layer_slider, 11 + len(gpu_stats), 1)
self.pre_layer_slider.valueChanged.connect(self.on_pre_layer_slider_changed)
self.pre_layer_value_label = QLabel("0")
layout.addWidget(self.pre_layer_value_label, 11 + len(gpu_stats), 2)
# Add horizontal line to seperate the Checkboxes
line = QFrame()
@ -423,7 +452,7 @@ class MainWindow(QMainWindow):
# Deactivate Streaming Output
self.use_nostream_checkbox = QCheckBox("No Stream")
self.use_nostream_checkbox.setToolTip("Don't stream the text output in real time. Increases Token/s by ~ 50%")
layout.addWidget(self.use_nostream_checkbox, 15 + (len(gpu_stats) * 2), 1)
layout.addWidget(self.use_nostream_checkbox, 14 + (len(gpu_stats) * 2), 1)
# Load in full 16bit precision
self.use_16bit_checkbox = QCheckBox("Load in 16bit")
@ -514,15 +543,27 @@ class MainWindow(QMainWindow):
self.use_triton_checkbox.setToolTip("Use Triton for inference.")
layout.addWidget(self.use_triton_checkbox, 22 + (len(gpu_stats) * 2), 1)
# Add desc_act option Checkbox
self.use_desc_act_checkbox = QCheckBox("Use desc_act")
self.use_desc_act_checkbox.setToolTip("For models that don\'t have a quantize_config.json, this parameter is used to define whether to set desc_act or not in BaseQuantizeConfig.")
layout.addWidget(self.use_desc_act_checkbox, 23 + (len(gpu_stats) * 2), 0)
# Add horizontal line to seperate the Checkboxes
line = QFrame()
line.setFrameShape(QFrame.HLine)
line.setFrameShadow(QFrame.Sunken)
layout.addWidget(line, 23 + (len(gpu_stats) * 2), 0, 1, 3)
layout.addWidget(line, 29 + (len(gpu_stats) * 2), 0, 1, 3)
# New GUI Options based on Toolbox Checkboxes.
# Deepspeed
######################################################
# ____ ____ _ #
# | _ \ ___ ___ _ __/ ___| _ __ ___ ___ __| | #
# | | | |/ _ \/ _ \ '_ \___ \| '_ \ / _ \/ _ \/ _` | #
# | |_| | __/ __/ |_) |__) | |_) | __/ __/ (_| | #
# |____/ \___|\___| .__/____/| .__/ \___|\___|\__,_| #
# |_| |_| #
######################################################
# Deepspeed Header
self.deepspeed_label_header = QLabel("Deepspeed Options:")
@ -601,7 +642,14 @@ class MainWindow(QMainWindow):
self.deepspeed_line.setVisible(False)
layout.addWidget(self.deepspeed_line, 36 + (len(gpu_stats) * 2), 0, 1, 3)
# llama.cpp
#################################################
# _ _ #
# | | | __ _ _ __ ___ __ _ ___ _ __ _ __ #
# | | |/ _` | '_ ` _ \ / _` | / __| '_ \| '_ \ #
# | | | (_| | | | | | | (_| || (__| |_) | |_) | #
# |_|_|\__,_|_| |_| |_|\__,_(_)___| .__/| .__/ #
# |_| |_| #
#################################################
# llama.cpp Header
self.llama_label_header = QLabel("llama.cpp Options:")
@ -703,14 +751,58 @@ class MainWindow(QMainWindow):
self.llama_gpu_layer_box.addWidget(self.llama_gpu_layer_spinbox)
layout.addLayout(self.llama_gpu_layer_box, 45 + (len(gpu_stats) * 2), 1, 1, 2)
# llama.cpp n_ctx inner layout
llama_n_ctx_inner_layout = QHBoxLayout()
# llama.cpp n_ctx label
self.llama_n_ctx_label = QLabel("n_ctx:")
self.llama_n_ctx_label.setVisible(False)
self.llama_n_ctx_label.setToolTip("Size of the prompt context.")
llama_n_ctx_inner_layout.addWidget(self.llama_n_ctx_label)
# llama.cpp n_ctx size dropdown
self.llama_n_ctx_dropdown = QComboBox()
self.llama_n_ctx_dropdown.setToolTip("Size of the prompt context.")
self.llama_n_ctx_dropdown.addItems(["128", "256", "512", "1024", "2048", "4096", "8192"])
self.llama_n_ctx_dropdown.setCurrentIndex(4)
self.llama_n_ctx_dropdown.setVisible(False)
llama_n_ctx_inner_layout.addWidget(self.llama_n_ctx_dropdown)
layout.addLayout(llama_n_ctx_inner_layout, 46 + (len(gpu_stats) * 2), 0)
# llama.cpp seed layout
llama_seed_inner_layout = QHBoxLayout()
# llama.cpp seed label
self.llama_seed_label = QLabel("Seed:")
self.llama_seed_label.setVisible(False)
self.llama_seed_label.setToolTip("Seed for llama-cpp models. Default 0 (random).")
llama_seed_inner_layout.addWidget(self.llama_seed_label)
# llama.cpp seed spinbox
self.llama_seed_spinbox = QSpinBox()
self.llama_seed_spinbox.setToolTip("Seed for llama-cpp models. Default 0 (random).")
self.llama_seed_spinbox.setRange(0, 2147483647)
self.llama_seed_spinbox.setValue(0)
self.llama_seed_spinbox.setSingleStep(1)
self.llama_seed_spinbox.setVisible(False)
llama_seed_inner_layout.addWidget(self.llama_seed_spinbox)
layout.addLayout(llama_seed_inner_layout, 46 + (len(gpu_stats) * 2), 1, 1, 2)
# Seperator for the Toolbox Options
self.llama_line = QFrame()
self.llama_line.setFrameShape(QFrame.HLine)
self.llama_line.setFrameShadow(QFrame.Sunken)
self.llama_line.setVisible(False)
layout.addWidget(self.llama_line, 46 + (len(gpu_stats) * 2), 0, 1, 3)
layout.addWidget(self.llama_line, 49 + (len(gpu_stats) * 2), 0, 1, 3)
# FlexGen Options
########################################
# _____ _ ____ #
# | ___| | _____ __/ ___| ___ _ __ #
# | |_ | |/ _ \ \/ / | _ / _ \ '_ \ #
# | _| | | __/> <| |_| | __/ | | | #
# |_| |_|\___/_/\_\\____|\___|_| |_| #
# #
########################################
# FlexGen Header Label
self.flexgen_header_label = QLabel("FlexGen Options")
@ -810,7 +902,14 @@ class MainWindow(QMainWindow):
self.flexline.setVisible(False)
layout.addWidget(self.flexline, 54 + (len(gpu_stats) * 2), 0, 1, 3)
# RWKV Options
###################################
# ______ ___ ____ __ #
# | _ \ \ / / |/ /\ \ / / #
# | |_) \ \ /\ / /| ' / \ \ / / #
# | _ < \ V V / | . \ \ V / #
# |_| \_\ \_/\_/ |_|\_\ \_/ #
# #
###################################
# RWKV Header
self.rwkv_header = QLabel("RWKV:")
@ -865,7 +964,14 @@ class MainWindow(QMainWindow):
self.rwkv_line.setVisible(False)
layout.addWidget(self.rwkv_line, 65 + (len(gpu_stats) * 2), 0, 1, 3)
# API Options
######################
# _ ____ ___ #
# / \ | _ \_ _| #
# / _ \ | |_) | | #
# / ___ \| __/| | #
# /_/ \_\_| |___| #
# #
######################
# API Header Label
self.api_header = QLabel("API:")
@ -878,7 +984,6 @@ class MainWindow(QMainWindow):
self.api_checkbox.setToolTip("Enable the API extension.")
self.api_checkbox.setVisible(False)
layout.addWidget(self.api_checkbox, 71 + (len(gpu_stats) * 2), 0)
#self.api_checkbox.stateChanged.connect(self.on_api_checkbox_changed)
# API blocking Port Checkbox
self.api_blocking_port_checkbox = QCheckBox("Change API Blocking Port")
@ -919,12 +1024,80 @@ class MainWindow(QMainWindow):
layout.addWidget(self.api_public_checkbox, 74 + (len(gpu_stats) * 2), 0)
self.api_public_checkbox.stateChanged.connect(self.on_api_public_checkbox_changed)
# Seperator for the Toolbox Options
self.toolboxapiline = QFrame()
self.toolboxapiline.setFrameShape(QFrame.HLine)
self.toolboxapiline.setFrameShadow(QFrame.Sunken)
self.toolboxapiline.setVisible(False)
layout.addWidget(self.toolboxapiline, 75 + (len(gpu_stats) * 2), 0, 1, 3)
#############################################################################
# _ _ _ _ _ _ _ _ #
# / \ ___ ___ ___| | ___ _ __ __ _| |_ ___ | || | | |__ (_) |_ #
# / _ \ / __/ __/ _ \ |/ _ \ '__/ _` | __/ _ \ | || |_ _____| '_ \| | __| #
# / ___ \ (_| (_| __/ | __/ | | (_| | || __/ |__ _|_____| |_) | | |_ #
# /_/ \_\___\___\___|_|\___|_| \__,_|\__\___| |_| |_.__/|_|\__| #
# #
#############################################################################
# Accelerate 4-bit Header
self.accelerate4bit_header = QLabel("Accelerate 4-bit:")
self.accelerate4bit_header.setVisible(False)
self.accelerate4bit_header.setToolTip("Accelerate 4-bit: Choose the settings to use for accelerating 4-bit models.")
layout.addWidget(self.accelerate4bit_header, 80 + (len(gpu_stats) * 2), 0)
# Accelerate 4-bit Checkbox
self.accelerate4bit_checkbox = QCheckBox("Load in 4-bit")
self.accelerate4bit_checkbox.setToolTip("Load the model with 4-bit precision (using bitsandbytes).")
self.accelerate4bit_checkbox.setVisible(False)
layout.addWidget(self.accelerate4bit_checkbox, 81 + (len(gpu_stats) * 2), 0)
# Compute type horizontal layout
compute_type_layout = QHBoxLayout()
# Compute type label
self.accelerate4bit_compute_type_label = QLabel("Compute Type:")
self.accelerate4bit_compute_type_label.setToolTip("The compute type to use for 4-bit acceleration.")
self.accelerate4bit_compute_type_label.setVisible(False)
compute_type_layout.addWidget(self.accelerate4bit_compute_type_label)
# Compute type dropdown
self.accelerate4bit_compute_type_dropdown = QComboBox()
self.accelerate4bit_compute_type_dropdown.setToolTip("The compute type to use for 4-bit acceleration.")
self.accelerate4bit_compute_type_dropdown.setVisible(False)
self.accelerate4bit_compute_type_dropdown.addItems([ "none", "bfloat16", "float16", "float32"])
compute_type_layout.addWidget(self.accelerate4bit_compute_type_dropdown)
layout.addLayout(compute_type_layout, 81 + (len(gpu_stats) * 2), 1)
# Quant Type Horizontal Box
quant_type_layout = QHBoxLayout()
# Quant type label
self.accelerate4bit_quant_type_label = QLabel("Quant Type:")
self.accelerate4bit_quant_type_label.setToolTip("The quantization type to use for 4-bit acceleration.")
self.accelerate4bit_quant_type_label.setVisible(False)
quant_type_layout.addWidget(self.accelerate4bit_quant_type_label)
# Quant type Dropdown
self.accelerate4bit_quant_type_dropdown = QComboBox()
self.accelerate4bit_quant_type_dropdown.setToolTip("The quantization type to use for 4-bit acceleration.")
self.accelerate4bit_quant_type_dropdown.setVisible(False)
self.accelerate4bit_quant_type_dropdown.addItems([ "none", "nf4", "fp4"])
quant_type_layout.addWidget(self.accelerate4bit_quant_type_dropdown)
layout.addLayout(quant_type_layout, 82 + (len(gpu_stats) * 2), 1)
# Use double quant checkbox
self.accelerate4bit_double_quant_checkbox = QCheckBox("Use Double Quant")
self.accelerate4bit_double_quant_checkbox.setToolTip("Use double quantization for 4-bit acceleration.")
self.accelerate4bit_double_quant_checkbox.setVisible(False)
layout.addWidget(self.accelerate4bit_double_quant_checkbox, 82 + (len(gpu_stats) * 2), 0)
# Seperator for the Toolbox Options
self.toolboxendline = QFrame()
self.toolboxendline.setFrameShape(QFrame.HLine)
self.toolboxendline.setFrameShadow(QFrame.Sunken)
self.toolboxendline.setVisible(False)
layout.addWidget(self.toolboxendline, 75 + (len(gpu_stats) * 2), 0, 1, 3)
layout.addWidget(self.toolboxendline, 84 + (len(gpu_stats) * 2), 0, 1, 3)
# Authentication Box
authentication_box = QHBoxLayout()
@ -946,7 +1119,7 @@ class MainWindow(QMainWindow):
self.choose_file_button.setToolTip("Choose a file to use for the authentication credentials. Credentials should be saved like:\nUSERNAME1:PASSWORD1\nUSERNAME2:PASSWORD2")
self.choose_file_button.clicked.connect(self.on_choose_file_button_clicked)
authentication_box.addWidget(self.choose_file_button)
layout.addLayout(authentication_box, 80 + (len(gpu_stats) * 2), 0, 1, 3)
layout.addLayout(authentication_box, 85 + (len(gpu_stats) * 2), 0, 1, 3)
# Extensions Selection Menu
self.use_extensions_checkbox = QCheckBox("Use Extensions")
@ -1054,6 +1227,16 @@ class MainWindow(QMainWindow):
central_widget.setLayout(layout)
self.setCentralWidget(central_widget)
def on_Accelerate_settings_checkbox_stateChanged(self, state):
self.accelerate4bit_header.setVisible(state == Qt.Checked)
self.accelerate4bit_checkbox.setVisible(state == Qt.Checked)
self.accelerate4bit_compute_type_label.setVisible(state == Qt.Checked)
self.accelerate4bit_compute_type_dropdown.setVisible(state == Qt.Checked)
self.accelerate4bit_quant_type_label.setVisible(state == Qt.Checked)
self.accelerate4bit_quant_type_dropdown.setVisible(state == Qt.Checked)
self.accelerate4bit_double_quant_checkbox.setVisible(state == Qt.Checked)
self.toolboxendline.setVisible(state == Qt.Checked)
def on_api_public_checkbox_changed(self, state):
self.api_streaming_port_SpinBox.setEnabled(False)
self.api_blocking_port_SpinBox.setEnabled(False)
@ -1074,7 +1257,7 @@ class MainWindow(QMainWindow):
self.api_streaming_port_checkbox.setVisible(state == Qt.Checked)
self.api_streaming_port_SpinBox.setVisible(state == Qt.Checked)
self.api_public_checkbox.setVisible(state == Qt.Checked)
self.toolboxendline.setVisible(state == Qt.Checked)
self.toolboxapiline.setVisible(state == Qt.Checked)
def on_rwkv_settings_checkbox_stateChanged(self, state):
self.rwkv_header.setVisible(state == Qt.Checked)
@ -1099,8 +1282,6 @@ class MainWindow(QMainWindow):
self.flexgen_pin_weight_label.setVisible(state == Qt.Checked)
self.flexgen_pin_weight_dropdown.setVisible(state == Qt.Checked)
self.flexline.setVisible(state == Qt.Checked)
#self.flexgen_line.setVisible(state == Qt.Checked)
#self.flexgen_line.setVisible(state == Qt.Checked)
def on_llama_settings_checkbox_stateChanged(self, state):
self.llama_label_header.setVisible(state == Qt.Checked)
@ -1116,6 +1297,10 @@ class MainWindow(QMainWindow):
self.llama_gpu_layer_label.setVisible(state == Qt.Checked)
self.llama_gpu_layer_spinbox.setVisible(state == Qt.Checked)
self.llama_cache_capacity_units.setVisible(state == Qt.Checked)
self.llama_n_ctx_label.setVisible(state == Qt.Checked)
self.llama_n_ctx_dropdown.setVisible(state == Qt.Checked)
self.llama_seed_label.setVisible(state == Qt.Checked)
self.llama_seed_spinbox.setVisible(state == Qt.Checked)
def on_deepspeed_nvme_button_clicked(self):
folder = QFileDialog.getExistingDirectory(self, "Offload Directory")
@ -1285,7 +1470,6 @@ class MainWindow(QMainWindow):
self.ram_value_label.setText(f"{value} GiB")
def on_pre_layer_slider_changed(self, value, idx):
if nvidia_gpu:
# Calculate the current total value of all sliders
total_value = sum(slider.value() for slider in self.pre_layer_slider)
@ -1298,13 +1482,8 @@ class MainWindow(QMainWindow):
if value > max_allowed_value:
self.pre_layer_slider[idx].setValue(max_allowed_value)
value = max_allowed_value
else:
# Update the value label with the current value of the pre-layer slider
self.pre_layer_value_label.setText(str(value))
def on_pre_layer_slider_changed(self, value):
# Update the value label with the current value of the pre-layer slider
self.pre_layer_value_label.setText(str(value))
self.pre_layer_slider_value[idx].setText(str(value))
def on_vram_slider_changed(self, value, gpu_idx):
self.gpu_vram_labels[gpu_idx].setText(f"{value} GiB")
@ -1409,6 +1588,11 @@ class MainWindow(QMainWindow):
"sdp_attention": self.use_sdp_attention_checkbox.isChecked(), # Saves the state of the sdp_attention checkbox
"autogptq": self.use_autogptq_checkbox.isChecked(), # Saves the state of the autogptq checkbox
"triton": self.use_triton_checkbox.isChecked(), # Saves the state of the triton checkbox
"acceleration": self.Accelerate_settings_checkbox.isChecked(), # Saves the state of the Accelerate checkbox
"use_4bit": self.accelerate4bit_checkbox.isChecked(), # Saves the state of the accelerate4bit checkbox
"compute_dtype": self.accelerate4bit_compute_type_dropdown.currentText(), # Saves the state of the accelerate4bit_compute_type_dropdown
"quant_type": self.accelerate4bit_quant_type_dropdown.currentText(), # Saves the state of the accelerate4bit_quant_type_dropdown
"use_x2_quant": self.accelerate4bit_double_quant_checkbox.isChecked(), # Saves the state of the accelerate4bit_double_quant_checkbox
"deepspeed": self.deepspeed_settings_checkbox.isChecked(), # Saves the state of the deepspeed checkbox
"deepspeed_enabled": self.deepspeed_checkbox.isChecked(), # Saves the state of the deepspeed checkbox
"deepspeed_gpu_num": self.deepspeed_gpu_num_spinbox.value(), # Saves the state of the deepspeed_gpu_num_spinbox
@ -1423,6 +1607,8 @@ class MainWindow(QMainWindow):
"llama_cache_capacity": self.llama_cache_capacity_spinbox.value(), # Saves the state of the llama_cache_capacity_spinbox
"llama_cache_units": self.llama_cache_capacity_units.currentText(), # Saves the state of the llama_cache_capacity_units
"llama_gpu_layer": self.llama_gpu_layer_spinbox.value(), # Saves the state of the llama_gpu_layer_spinbox
"llama_n_ctx": self.llama_n_ctx_dropdown.currentText(), # Saves the state of the llama_n_ctx_dropdown
"llama_seed": self.llama_seed_spinbox.value(), # Saves the state of the llama_seed_spinbox
"flexgen_settings": self.flexgen_settings_checkbox.isChecked(), # Saves the state of the flexgen_settings_checkbox
"use_flexgen": self.flexgen_checkbox.isChecked(), # Saves the state of the flexgen_checkbox
"flexgen_precentage_1": self.flexgen_percentage_spinbox1.value(), # Saves the state of the flexgen_percentage_spinbox1
@ -1462,12 +1648,11 @@ class MainWindow(QMainWindow):
"loras": [self.lora_list.item(i).text() for i in range(self.lora_list.count()) if self.lora_list.item(i).checkState() == Qt.Checked] # Saves the chosen loras
}
if nvidia_gpu:
settings["gpu_vram"] = [slider.value() for slider in self.gpu_vram_sliders]
pre_layer_values = [slider.value() for slider in self.pre_layer_slider]
settings["prelayer"] = pre_layer_values
else:
settings["prelayer"] = self.pre_layer_value_label.text()
if nvidia_gpu:
settings["gpu_vram"] = [slider.value() for slider in self.gpu_vram_sliders]
# Get the text entered in the text field
profile_name = self.profile_name_textfield.text()
@ -1479,7 +1664,6 @@ class MainWindow(QMainWindow):
def expression_check(self, command):
selected_model = self.model_dropdown.currentText()
#print(f"Selected model: {selected_model}")
# Use a regular expression to check if the selected model matches the pattern
if re.search(r".*mpt.*7b", selected_model, re.IGNORECASE):
@ -1499,6 +1683,8 @@ class MainWindow(QMainWindow):
command += f" --threads {self.llama_threads_spinbox.value()}"
command += f" --n_batch {self.llama_batch_size_spinbox.value()}"
command += f" --cache-capacity {self.llama_cache_capacity_spinbox.value()}{self.llama_cache_capacity_units.currentText()}"
command += f" --n_ctx {self.llama_n_ctx_dropdown.currentText()}"
command += f" --llama_cpp_seed {self.llama_seed_spinbox.value()}"
if self.llama_gpu_layer_spinbox.value() != 0:
command += f" --n-gpu-layers {self.llama_gpu_layer_spinbox.value()}"
@ -1525,9 +1711,8 @@ class MainWindow(QMainWindow):
command += f" --model {chosen_model}"
# Add the chosen model type to the command
chosen_model_type = self.model_type.currentText()
if self.model_type.currentText() != "none" and self.model_dropdown.currentText() != "none":
command += f" --model_type {chosen_model_type}"
command += f" --model_type {self.model_type.currentText()}"
# Add loras to the command
loras = [self.lora_list.item(i).text() for i in range(self.lora_list.count()) if self.lora_list.item(i).checkState() == Qt.Checked]
@ -1536,22 +1721,18 @@ class MainWindow(QMainWindow):
command += f" --lora {' '.join(loras)}"
# Add Characters to the command
chosen_characters = self.character_to_load.currentText()
if self.character_to_load.currentText() != "none":
command += f" --character {chosen_characters}"
print(chosen_characters)
command += f" --character {self.character_to_load.currentText()}"
# Adds wbits to the command, if not "none"
chosen_wbits = self.wbit_dropdown.currentText()
if self.wbit_dropdown.currentText() != "none":
if not self.cpu_radio_button.isChecked() and self.model_dropdown.currentText() != "none":
command += f" --wbits {chosen_wbits}"
command += f" --wbits {self.wbit_dropdown.currentText()}"
# Adds Groupsize to the command, if not "none"
chosen_gsize = self.gsize_dropdown.currentText()
if self.gsize_dropdown.currentText() != "none":
if not self.cpu_radio_button.isChecked() and self.model_dropdown.currentText() != "none":
command += f" --groupsize {chosen_gsize}"
command += f" --groupsize {self.gsize_dropdown.currentText()}"
# Add the chosen mode to the command (Chat, cai-chat, notebook)
chosen_mode = self.mode_dropdown.currentText()
@ -1620,6 +1801,21 @@ class MainWindow(QMainWindow):
if self.use_quant_checkbox.isChecked():
command += " --quant_attn"
# Accelerate 4-bit
# 4-bit usage
if self.accelerate4bit_checkbox.isChecked():
command += " --load-in-4bit"
if self.accelerate4bit_compute_type_dropdown.currentText() != "none":
command += f" --compute_dtype {self.accelerate4bit_compute_type_dropdown.currentText()}"
if self.accelerate4bit_quant_type_dropdown.currentText() != "none":
command += f" --quant_type {self.accelerate4bit_quant_type_dropdown.currentText()}"
if self.accelerate4bit_double_quant_checkbox.isChecked():
command += " --use_double_quant"
# Disable Cache
if self.use_nocache_checkbox.isChecked():
command += " --no-cache"
@ -1655,13 +1851,9 @@ class MainWindow(QMainWindow):
command += f" --gradio-auth-path {self.choose_file_label.text()}"
## Adds the Prelayer selection
if nvidia_gpu:
slider_values = [slider.value() for slider in self.pre_layer_slider]
if any(value > 0 for value in slider_values):
command += f" --pre_layer {' '.join(str(value) for value in slider_values if value > 0)}"
else:
if int(self.pre_layer_value_label.text()) > 0:
command += f" --pre_layer {self.pre_layer_value_label.text()}"
# IF sdp_attention is checked
if self.use_sdp_attention_checkbox.isChecked():
@ -1675,6 +1867,10 @@ class MainWindow(QMainWindow):
if self.use_triton_checkbox.isChecked():
command += " --triton"
# if desc_act is checked
if self.use_desc_act_checkbox.isChecked():
command += " --desc_act"
# Adds the chosen extensions to the list of the command.
extensions = [self.extensions_list.item(i).text() for i in range(self.extensions_list.count()) if self.extensions_list.item(i).checkState() == Qt.Checked]
if self.use_extensions_checkbox.isChecked():
@ -1692,7 +1888,7 @@ class MainWindow(QMainWindow):
command += f" --api-streaming-port {self.api_streaming_port_SpinBox.text()}"
# Just for debugging.
#print(f"Command generated: python webuiGUI.py {command}")
print(f"Command generated: python {webui_file} {command}")
# Based on the Model that's chosen, we will take care of some necessary stuff.
# Starts the webui in the conda env with the user given Options
@ -1711,22 +1907,16 @@ class MainWindow(QMainWindow):
QMessageBox.critical(self, "Error", message)
if not self.deepspeed_checkbox.isChecked():
run_cmd_with_conda(f"python webuiGUI.py {command}")
if self.use_8bit_checkbox.isChecked():
run_cmd_with_conda(f"pip install accelerate && python {webui_file} {command}")
else:
run_cmd_with_conda(f"python {webui_file} {command}")
if self.use_autoclose_checkbox.isChecked():
sys.exit()
def on_update_button_clicked(self):
run_cmd_with_conda("python webuiGUI.py --update && exit")
def load_profile(self, profile_file):
with open(profile_file, "r") as file:
try:
settings = json.load(file)
# Set the GUI elements based on the loaded settings...
except json.JSONDecodeError:
# Handle the case when the file is empty or not in valid JSON format
pass
run_cmd_with_conda(f"python {webui_file} --update && exit")
def populate_profiles_dropdown(self):
self.profiles_dropdown.clear()
@ -1770,6 +1960,13 @@ class MainWindow(QMainWindow):
self.use_sdp_attention_checkbox.setChecked(settings.get("sdp_attention", False))
self.use_autogptq_checkbox.setChecked(settings.get("autogptq", False))
self.use_triton_checkbox.setChecked(settings.get("triton", False))
# Acceleration 4bit
self.Accelerate_settings_checkbox.setChecked(settings.get("acceleration", False))
self.accelerate4bit_checkbox.setChecked(settings.get("use_4bit", False))
self.accelerate4bit_compute_type_dropdown.setCurrentText(settings.get("compute_dtype", ""))
self.accelerate4bit_quant_type_dropdown.setCurrentText(settings.get("quant_type", ""))
self.accelerate4bit_double_quant_checkbox.setChecked(settings.get("use_x2_quant", False))
# Deepspeed
self.deepspeed_settings_checkbox.setChecked(settings.get("deepspeed", False))
self.deepspeed_checkbox.setChecked(settings.get("deepspeed_enabled", False))
self.deepspeed_gpu_num_spinbox.setValue(int(settings.get("deepspeed_gpu_num", 0)))
@ -1777,6 +1974,7 @@ class MainWindow(QMainWindow):
self.deepspeed_nvme_current_label.setText(f"Current Directory Folder: {self.selected_offload_directory}")
self.deepspeed_nvme_checkbox.setChecked(settings.get("deepspeed_nvme_enabled", False))
self.deepspeed_local_rank_spinbox.setValue(int(settings.get("deepspeed_local_rank", 0)))
# llama
self.llama_settings_checkbox.setChecked(settings.get("llama_settings", False))
self.llama_threads_spinbox.setValue(int(settings.get("llama_threads", 0)))
self.llama_batch_size_spinbox.setValue(int(settings.get("llama_batch_size", 0)))
@ -1785,6 +1983,9 @@ class MainWindow(QMainWindow):
self.llama_cache_capacity_spinbox.setValue(int(settings.get("llama_cache_capacity", 0)))
self.llama_cache_capacity_units.setCurrentText(settings.get("llama_cache_units", ""))
self.llama_gpu_layer_spinbox.setValue(int(settings.get("llama_gpu_layer", 0)))
self.llama_n_ctx_dropdown.setCurrentText(settings.get("llama_n_ctx", ""))
self.llama_seed_spinbox.setValue(int(settings.get("llama_seed", 0)))
# flexgen
self.flexgen_settings_checkbox.setChecked(settings.get("flexgen_settings", False))
self.flexgen_checkbox.setChecked(settings.get("use_flexgen", False))
self.flexgen_percentage_spinbox1.setValue(int(settings.get("flexgen_precentage_1", 0)))
@ -1795,12 +1996,14 @@ class MainWindow(QMainWindow):
self.flexgen_percentage_spinbox6.setValue(int(settings.get("flexgen_precentage_6", 0)))
self.flexgen_compression_checkbox.setChecked(settings.get("flexgen_compression", False))
self.flexgen_pin_weight_dropdown.setCurrentText(settings.get("flexgen_pin_weight", ""))
# RWKV
self.rwkv_settings_checkbox.setChecked(settings.get("rwkv_settings", False))
self.rwkv_checkbox.setChecked(settings.get("use_rwkv", False))
self.rwkv_strategy_checkbox.setChecked(settings.get("rwkv_strategy", False))
self.rwkv_strategy_dropdown.setCurrentText(settings.get("rwkv_strategy_dropdown", ""))
self.rwkv_allocation_spinbox.setValue(int(settings.get("rwkv_allocation", 0)))
self.rwkv_cuda_checkbox.setChecked(settings.get("rwkv_cuda", False))
# API
self.api_settings_checkbox.setChecked(settings.get("api_settings", False))
self.api_checkbox.setChecked(settings.get("use_api", False))
self.api_blocking_port_checkbox.setChecked(settings.get("api_blocking_port_enabled", False))
@ -1815,16 +2018,14 @@ class MainWindow(QMainWindow):
self.authentication_checkbox.setChecked(settings.get("authentication", False))
self.choose_file_label.setText(settings.get("authentication_file", ""))
self.character_to_load.setCurrentText(settings.get("character", ""))
#self.pre_layer_slider.setValue(int(settings.get("prelayer", 0)))
self.use_autolaunch_checkbox.setChecked(settings.get("autolaunch", False))
self.use_network_checkbox.setChecked(settings.get("listen", False))
if nvidia_gpu:
if "prelayer" in settings:
pre_layer_values = settings["prelayer"]
for i, value in enumerate(pre_layer_values):
self.pre_layer_slider[i].setValue(value)
else:
self.pre_layer_slider.setValue(int(settings.get("prelayer", 0)))
if nvidia_gpu:
gpu_vram_settings = settings.get("gpu_vram", [])