15 Commits

Author SHA1 Message Date
b2f2a894c9 removed git commit compare, fixed startup if no nvidia_gpu was found. 2023-05-26 23:23:53 +02:00
ce9ef8f419 last try to fix this
All checks were successful
Package Release / package (ubuntu-latest) (push) Has been cancelled
Package Release / package (windows-latest) (push) Has been cancelled
2023-05-26 18:46:42 +02:00
db6ed71594 last try to fix this 2023-05-26 18:44:36 +02:00
86171f84d7 can't get this ***** fixed -.-
All checks were successful
Package Release / package (ubuntu-latest) (push) Has been cancelled
Package Release / package (windows-latest) (push) Has been cancelled
2023-05-26 15:45:34 +02:00
077669ea2e try fix github actions 2023-05-26 15:24:40 +02:00
25087b1e2d try fix github actions 2023-05-26 15:21:02 +02:00
5d755a302c try fix github actions 2023-05-26 15:20:27 +02:00
cc485f5342 try fix github actions 2023-05-26 15:15:05 +02:00
20c847a532 try fix github actions 2023-05-26 15:11:42 +02:00
ab514acc40 fix for the pyinstaller cmd? 2023-05-26 15:10:31 +02:00
f28f1f21f9 Raised Version to 1.6 2023-05-26 15:06:56 +02:00
c3696284fa made the update text-gen-webui update button to disappear if already at the newest commit 2023-05-26 15:04:39 +02:00
5e40ae8456 added llamap n_ctx and seed options, removed cai_chat (despite docs not working) 2023-05-26 08:32:36 +02:00
d1de11ecf4 removed the webuiGUI.py necessity, disable accelerate for windows (incompatible), remove some useless code (still learning) 2023-05-25 19:55:33 +02:00
b063f23e71 added save and load for accelerate. 2023-05-25 18:44:08 +02:00
2 changed files with 101 additions and 48 deletions

View File

@@ -28,11 +28,10 @@ jobs:
- name: Build and package
run: |
pyinstaller --noconfirm --onefile --windowed StartUI.py
cp webuiGUI.py dist/ # Copy webuiGUI.py to the dist directory
pyinstaller --noconfirm --onefile --windowed StartUI.py --add-data "webuiGUI.py:."
- name: Upload artifacts
uses: actions/upload-artifact@v3
with:
name: ${{ matrix.os }}-binary-v1.5.1
name: ${{ matrix.os }}-binary-v1.6
path: dist

View File

@@ -4,18 +4,24 @@ from PyQt5.QtCore import Qt
from PyQt5.QtGui import QDoubleValidator, QIntValidator
# For showing the current version and checking for updates
version = "1.5.1"
version = "1.6"
# Profile folder for loading and saving profiles.
profiles_folder = "./profiles"
# Create the profile folder if it doesn't exist
os.makedirs(profiles_folder, exist_ok=True)
repo_path = "./text-generation-webui"
model_folder = "./text-generation-webui/models"
extensions_folder = "./text-generation-webui/extensions"
loras_folder = "./text-generation-webui/loras"
characters_folder = "./text-generation-webui/characters"
if getattr(sys, 'frozen', False):
webui_file = sys._MEIPASS + '/webuiGUI.py'
else:
webui_file = 'webuiGUI.py'
# Get the current Max CPU threads to use, so the user can't exceed his thread count.
max_threads = psutil.cpu_count(logical=True)
@@ -154,7 +160,7 @@ class MainWindow(QMainWindow):
self.Accelerate_settings_checkbox.stateChanged.connect(self.on_Accelerate_settings_checkbox_stateChanged)
toolbar.addWidget(self.Accelerate_settings_checkbox)
if platform.system() == 'Windows':
#self.Accelerate_settings_checkbox.setEnabled(False)
self.Accelerate_settings_checkbox.setEnabled(False)
self.Accelerate_settings_checkbox.setToolTip("Accelerate is not Supported in Windows.")
################################################
@@ -216,6 +222,7 @@ class MainWindow(QMainWindow):
# |_| |_|\__,_|_|_| |_| \_/\_/ |_|_| |_|\__,_|\___/ \_/\_/ #
# #
###################################################################
layout = QGridLayout()
layout.setColumnMinimumWidth(0, 350)
layout.setColumnMinimumWidth(3, 30)
@@ -307,7 +314,7 @@ class MainWindow(QMainWindow):
# Interface Mode Dropdown
self.mode_dropdown = QComboBox()
self.mode_dropdown.addItems(["chat", "cai_chat", "notebook"])
self.mode_dropdown.addItems(["chat", "notebook"])
self.mode_dropdown.setToolTip("Choose what kind of Interface you want to load.")
interface_mode_box.addWidget(self.mode_dropdown)
layout.addLayout(interface_mode_box, 3, 0)
@@ -413,21 +420,22 @@ class MainWindow(QMainWindow):
self.pre_layer_slider_value = []
self.pre_layer_amount_max = 100
# Don't get confused. With the latest changes, each GPU can have it's own pre_layer value. So we check again gpu_stats for the amount.
for i, gpu in enumerate(gpu_stats):
pre_layer_labels = QLabel(f"{gpu.name} Pre_Layer:")
pre_layer_labels.setToolTip(f"The number of layers to allocate to the GPU.\nSetting this parameter enables CPU offloading for 4-bit models.\nFor multi-gpu, write the numbers separated by spaces, eg --pre_layer 30 60.")
layout.addWidget(pre_layer_labels, 11 + (len(gpu_stats) * 2) + i, 0)
self.pre_layer_labels.append(pre_layer_labels)
if nvidia_gpu:
for i, gpu in enumerate(gpu_stats):
pre_layer_labels = QLabel(f"{gpu.name} Pre_Layer:")
pre_layer_labels.setToolTip(f"The number of layers to allocate to the GPU.\nSetting this parameter enables CPU offloading for 4-bit models.\nFor multi-gpu, write the numbers separated by spaces, eg --pre_layer 30 60.")
layout.addWidget(pre_layer_labels, 11 + (len(gpu_stats) * 2) + i, 0)
self.pre_layer_labels.append(pre_layer_labels)
pre_layer_sliders = QSlider(Qt.Horizontal)
pre_layer_sliders.setMaximum(100)
pre_layer_sliders.valueChanged.connect(lambda value, idx=i: self.on_pre_layer_slider_changed(value, idx))
layout.addWidget(pre_layer_sliders, 11 + (len(gpu_stats) * 2) + i, 1)
self.pre_layer_slider.append(pre_layer_sliders)
pre_layer_sliders = QSlider(Qt.Horizontal)
pre_layer_sliders.setMaximum(100)
pre_layer_sliders.valueChanged.connect(lambda value, idx=i: self.on_pre_layer_slider_changed(value, idx))
layout.addWidget(pre_layer_sliders, 11 + (len(gpu_stats) * 2) + i, 1)
self.pre_layer_slider.append(pre_layer_sliders)
pre_layer_sliders_value = QLabel("0")
layout.addWidget(pre_layer_sliders_value, 11 + (len(gpu_stats) * 2) + i, 2)
self.pre_layer_slider_value.append(pre_layer_sliders_value)
pre_layer_sliders_value = QLabel("0")
layout.addWidget(pre_layer_sliders_value, 11 + (len(gpu_stats) * 2) + i, 2)
self.pre_layer_slider_value.append(pre_layer_sliders_value)
# Add horizontal line to seperate the Checkboxes
line = QFrame()
@@ -443,7 +451,7 @@ class MainWindow(QMainWindow):
# Deactivate Streaming Output
self.use_nostream_checkbox = QCheckBox("No Stream")
self.use_nostream_checkbox.setToolTip("Don't stream the text output in real time. Increases Token/s by ~ 50%")
layout.addWidget(self.use_nostream_checkbox, 15 + (len(gpu_stats) * 2), 1)
layout.addWidget(self.use_nostream_checkbox, 14 + (len(gpu_stats) * 2), 1)
# Load in full 16bit precision
self.use_16bit_checkbox = QCheckBox("Load in 16bit")
@@ -737,12 +745,49 @@ class MainWindow(QMainWindow):
self.llama_gpu_layer_box.addWidget(self.llama_gpu_layer_spinbox)
layout.addLayout(self.llama_gpu_layer_box, 45 + (len(gpu_stats) * 2), 1, 1, 2)
# llama.cpp n_ctx inner layout
llama_n_ctx_inner_layout = QHBoxLayout()
# llama.cpp n_ctx label
self.llama_n_ctx_label = QLabel("n_ctx:")
self.llama_n_ctx_label.setVisible(False)
self.llama_n_ctx_label.setToolTip("Size of the prompt context.")
llama_n_ctx_inner_layout.addWidget(self.llama_n_ctx_label)
# llama.cpp n_ctx size dropdown
self.llama_n_ctx_dropdown = QComboBox()
self.llama_n_ctx_dropdown.setToolTip("Size of the prompt context.")
self.llama_n_ctx_dropdown.addItems(["128", "256", "512", "1024", "2048", "4096", "8192"])
self.llama_n_ctx_dropdown.setCurrentIndex(4)
self.llama_n_ctx_dropdown.setVisible(False)
llama_n_ctx_inner_layout.addWidget(self.llama_n_ctx_dropdown)
layout.addLayout(llama_n_ctx_inner_layout, 46 + (len(gpu_stats) * 2), 0)
# llama.cpp seed layout
llama_seed_inner_layout = QHBoxLayout()
# llama.cpp seed label
self.llama_seed_label = QLabel("Seed:")
self.llama_seed_label.setVisible(False)
self.llama_seed_label.setToolTip("Seed for llama-cpp models. Default 0 (random).")
llama_seed_inner_layout.addWidget(self.llama_seed_label)
# llama.cpp seed spinbox
self.llama_seed_spinbox = QSpinBox()
self.llama_seed_spinbox.setToolTip("Seed for llama-cpp models. Default 0 (random).")
self.llama_seed_spinbox.setRange(0, 2147483647)
self.llama_seed_spinbox.setValue(0)
self.llama_seed_spinbox.setSingleStep(1)
self.llama_seed_spinbox.setVisible(False)
llama_seed_inner_layout.addWidget(self.llama_seed_spinbox)
layout.addLayout(llama_seed_inner_layout, 46 + (len(gpu_stats) * 2), 1, 1, 2)
# Seperator for the Toolbox Options
self.llama_line = QFrame()
self.llama_line.setFrameShape(QFrame.HLine)
self.llama_line.setFrameShadow(QFrame.Sunken)
self.llama_line.setVisible(False)
layout.addWidget(self.llama_line, 46 + (len(gpu_stats) * 2), 0, 1, 3)
layout.addWidget(self.llama_line, 49 + (len(gpu_stats) * 2), 0, 1, 3)
########################################
# _____ _ ____ #
@@ -1246,6 +1291,10 @@ class MainWindow(QMainWindow):
self.llama_gpu_layer_label.setVisible(state == Qt.Checked)
self.llama_gpu_layer_spinbox.setVisible(state == Qt.Checked)
self.llama_cache_capacity_units.setVisible(state == Qt.Checked)
self.llama_n_ctx_label.setVisible(state == Qt.Checked)
self.llama_n_ctx_dropdown.setVisible(state == Qt.Checked)
self.llama_seed_label.setVisible(state == Qt.Checked)
self.llama_seed_spinbox.setVisible(state == Qt.Checked)
def on_deepspeed_nvme_button_clicked(self):
folder = QFileDialog.getExistingDirectory(self, "Offload Directory")
@@ -1520,7 +1569,6 @@ class MainWindow(QMainWindow):
"use_cpu": self.cpu_radio_button.isChecked(), # Save the state of the CPU radio button
"use_auto": self.auto_radio_button.isChecked(), # Save the state of the auto device radio button
"built_in_ram": self.ram_slider.value(), # Save the value of the built-in RAM slider
#"prelayer": self.pre_layer_value_label.text(), # Saves the Prelayer value
"use_8bit": self.use_8bit_checkbox.isChecked(), # Saves the state of the 8bit checkbox
"no_stream": self.use_nostream_checkbox.isChecked(), # Saves the state of the no_stream checkbox
"use_16bit": self.use_16bit_checkbox.isChecked(), # Saves the state of the use_16bit checkbox
@@ -1534,6 +1582,11 @@ class MainWindow(QMainWindow):
"sdp_attention": self.use_sdp_attention_checkbox.isChecked(), # Saves the state of the sdp_attention checkbox
"autogptq": self.use_autogptq_checkbox.isChecked(), # Saves the state of the autogptq checkbox
"triton": self.use_triton_checkbox.isChecked(), # Saves the state of the triton checkbox
"acceleration": self.Accelerate_settings_checkbox.isChecked(), # Saves the state of the Accelerate checkbox
"use_4bit": self.accelerate4bit_checkbox.isChecked(), # Saves the state of the accelerate4bit checkbox
"compute_dtype": self.accelerate4bit_compute_type_dropdown.currentText(), # Saves the state of the accelerate4bit_compute_type_dropdown
"quant_type": self.accelerate4bit_quant_type_dropdown.currentText(), # Saves the state of the accelerate4bit_quant_type_dropdown
"use_x2_quant": self.accelerate4bit_double_quant_checkbox.isChecked(), # Saves the state of the accelerate4bit_double_quant_checkbox
"deepspeed": self.deepspeed_settings_checkbox.isChecked(), # Saves the state of the deepspeed checkbox
"deepspeed_enabled": self.deepspeed_checkbox.isChecked(), # Saves the state of the deepspeed checkbox
"deepspeed_gpu_num": self.deepspeed_gpu_num_spinbox.value(), # Saves the state of the deepspeed_gpu_num_spinbox
@@ -1548,6 +1601,8 @@ class MainWindow(QMainWindow):
"llama_cache_capacity": self.llama_cache_capacity_spinbox.value(), # Saves the state of the llama_cache_capacity_spinbox
"llama_cache_units": self.llama_cache_capacity_units.currentText(), # Saves the state of the llama_cache_capacity_units
"llama_gpu_layer": self.llama_gpu_layer_spinbox.value(), # Saves the state of the llama_gpu_layer_spinbox
"llama_n_ctx": self.llama_n_ctx_dropdown.currentText(), # Saves the state of the llama_n_ctx_dropdown
"llama_seed": self.llama_seed_spinbox.value(), # Saves the state of the llama_seed_spinbox
"flexgen_settings": self.flexgen_settings_checkbox.isChecked(), # Saves the state of the flexgen_settings_checkbox
"use_flexgen": self.flexgen_checkbox.isChecked(), # Saves the state of the flexgen_checkbox
"flexgen_precentage_1": self.flexgen_percentage_spinbox1.value(), # Saves the state of the flexgen_percentage_spinbox1
@@ -1603,7 +1658,6 @@ class MainWindow(QMainWindow):
def expression_check(self, command):
selected_model = self.model_dropdown.currentText()
#print(f"Selected model: {selected_model}")
# Use a regular expression to check if the selected model matches the pattern
if re.search(r".*mpt.*7b", selected_model, re.IGNORECASE):
@@ -1623,6 +1677,8 @@ class MainWindow(QMainWindow):
command += f" --threads {self.llama_threads_spinbox.value()}"
command += f" --n_batch {self.llama_batch_size_spinbox.value()}"
command += f" --cache-capacity {self.llama_cache_capacity_spinbox.value()}{self.llama_cache_capacity_units.currentText()}"
command += f" --n_ctx {self.llama_n_ctx_dropdown.currentText()}"
command += f" --llama_cpp_seed {self.llama_seed_spinbox.value()}"
if self.llama_gpu_layer_spinbox.value() != 0:
command += f" --n-gpu-layers {self.llama_gpu_layer_spinbox.value()}"
@@ -1649,9 +1705,8 @@ class MainWindow(QMainWindow):
command += f" --model {chosen_model}"
# Add the chosen model type to the command
chosen_model_type = self.model_type.currentText()
if self.model_type.currentText() != "none" and self.model_dropdown.currentText() != "none":
command += f" --model_type {chosen_model_type}"
command += f" --model_type {self.model_type.currentText()}"
# Add loras to the command
loras = [self.lora_list.item(i).text() for i in range(self.lora_list.count()) if self.lora_list.item(i).checkState() == Qt.Checked]
@@ -1660,22 +1715,18 @@ class MainWindow(QMainWindow):
command += f" --lora {' '.join(loras)}"
# Add Characters to the command
chosen_characters = self.character_to_load.currentText()
if self.character_to_load.currentText() != "none":
command += f" --character {chosen_characters}"
print(chosen_characters)
command += f" --character {self.character_to_load.currentText()}"
# Adds wbits to the command, if not "none"
chosen_wbits = self.wbit_dropdown.currentText()
if self.wbit_dropdown.currentText() != "none":
if not self.cpu_radio_button.isChecked() and self.model_dropdown.currentText() != "none":
command += f" --wbits {chosen_wbits}"
command += f" --wbits {self.wbit_dropdown.currentText()}"
# Adds Groupsize to the command, if not "none"
chosen_gsize = self.gsize_dropdown.currentText()
if self.gsize_dropdown.currentText() != "none":
if not self.cpu_radio_button.isChecked() and self.model_dropdown.currentText() != "none":
command += f" --groupsize {chosen_gsize}"
command += f" --groupsize {self.gsize_dropdown.currentText()}"
# Add the chosen mode to the command (Chat, cai-chat, notebook)
chosen_mode = self.mode_dropdown.currentText()
@@ -1750,10 +1801,10 @@ class MainWindow(QMainWindow):
if self.accelerate4bit_checkbox.isChecked():
command += " --load-in-4bit"
if self.accelerate4bit_compute_type_dropdown != "none":
if self.accelerate4bit_compute_type_dropdown.currentText() != "none":
command += f" --compute_dtype {self.accelerate4bit_compute_type_dropdown.currentText()}"
if self.accelerate4bit_quant_type_dropdown != "none":
if self.accelerate4bit_quant_type_dropdown.currentText() != "none":
command += f" --quant_type {self.accelerate4bit_quant_type_dropdown.currentText()}"
if self.accelerate4bit_double_quant_checkbox.isChecked():
@@ -1805,7 +1856,6 @@ class MainWindow(QMainWindow):
# If AutoGPTQ is checked
if self.use_autogptq_checkbox.isChecked():
command += " --autogptq"
run_cmd_with_conda("pip install auto_gptq && exit")
# If triton is checked
if self.use_triton_checkbox.isChecked():
@@ -1828,7 +1878,7 @@ class MainWindow(QMainWindow):
command += f" --api-streaming-port {self.api_streaming_port_SpinBox.text()}"
# Just for debugging.
print(f"Command generated: python webuiGUI.py {command}")
print(f"Command generated: python {webui_file} {command}")
# Based on the Model that's chosen, we will take care of some necessary stuff.
# Starts the webui in the conda env with the user given Options
@@ -1848,24 +1898,15 @@ class MainWindow(QMainWindow):
if not self.deepspeed_checkbox.isChecked():
if self.use_8bit_checkbox.isChecked():
run_cmd_with_conda(f"pip install accelerate && python webuiGUI.py {command}")
run_cmd_with_conda(f"pip install accelerate && python {webui_file} {command}")
else:
run_cmd_with_conda(f"python webuiGUI.py {command}")
run_cmd_with_conda(f"python {webui_file} {command}")
if self.use_autoclose_checkbox.isChecked():
sys.exit()
def on_update_button_clicked(self):
run_cmd_with_conda("python webuiGUI.py --update && exit")
def load_profile(self, profile_file):
with open(profile_file, "r") as file:
try:
settings = json.load(file)
# Set the GUI elements based on the loaded settings...
except json.JSONDecodeError:
# Handle the case when the file is empty or not in valid JSON format
pass
run_cmd_with_conda(f"python {webui_file} --update && exit")
def populate_profiles_dropdown(self):
self.profiles_dropdown.clear()
@@ -1909,6 +1950,13 @@ class MainWindow(QMainWindow):
self.use_sdp_attention_checkbox.setChecked(settings.get("sdp_attention", False))
self.use_autogptq_checkbox.setChecked(settings.get("autogptq", False))
self.use_triton_checkbox.setChecked(settings.get("triton", False))
# Acceleration 4bit
self.Accelerate_settings_checkbox.setChecked(settings.get("acceleration", False))
self.accelerate4bit_checkbox.setChecked(settings.get("use_4bit", False))
self.accelerate4bit_compute_type_dropdown.setCurrentText(settings.get("compute_dtype", ""))
self.accelerate4bit_quant_type_dropdown.setCurrentText(settings.get("quant_type", ""))
self.accelerate4bit_double_quant_checkbox.setChecked(settings.get("use_x2_quant", False))
# Deepspeed
self.deepspeed_settings_checkbox.setChecked(settings.get("deepspeed", False))
self.deepspeed_checkbox.setChecked(settings.get("deepspeed_enabled", False))
self.deepspeed_gpu_num_spinbox.setValue(int(settings.get("deepspeed_gpu_num", 0)))
@@ -1916,6 +1964,7 @@ class MainWindow(QMainWindow):
self.deepspeed_nvme_current_label.setText(f"Current Directory Folder: {self.selected_offload_directory}")
self.deepspeed_nvme_checkbox.setChecked(settings.get("deepspeed_nvme_enabled", False))
self.deepspeed_local_rank_spinbox.setValue(int(settings.get("deepspeed_local_rank", 0)))
# llama
self.llama_settings_checkbox.setChecked(settings.get("llama_settings", False))
self.llama_threads_spinbox.setValue(int(settings.get("llama_threads", 0)))
self.llama_batch_size_spinbox.setValue(int(settings.get("llama_batch_size", 0)))
@@ -1924,6 +1973,9 @@ class MainWindow(QMainWindow):
self.llama_cache_capacity_spinbox.setValue(int(settings.get("llama_cache_capacity", 0)))
self.llama_cache_capacity_units.setCurrentText(settings.get("llama_cache_units", ""))
self.llama_gpu_layer_spinbox.setValue(int(settings.get("llama_gpu_layer", 0)))
self.llama_n_ctx_dropdown.setCurrentText(settings.get("llama_n_ctx", ""))
self.llama_seed_spinbox.setValue(int(settings.get("llama_seed", 0)))
# flexgen
self.flexgen_settings_checkbox.setChecked(settings.get("flexgen_settings", False))
self.flexgen_checkbox.setChecked(settings.get("use_flexgen", False))
self.flexgen_percentage_spinbox1.setValue(int(settings.get("flexgen_precentage_1", 0)))
@@ -1934,12 +1986,14 @@ class MainWindow(QMainWindow):
self.flexgen_percentage_spinbox6.setValue(int(settings.get("flexgen_precentage_6", 0)))
self.flexgen_compression_checkbox.setChecked(settings.get("flexgen_compression", False))
self.flexgen_pin_weight_dropdown.setCurrentText(settings.get("flexgen_pin_weight", ""))
# RWKV
self.rwkv_settings_checkbox.setChecked(settings.get("rwkv_settings", False))
self.rwkv_checkbox.setChecked(settings.get("use_rwkv", False))
self.rwkv_strategy_checkbox.setChecked(settings.get("rwkv_strategy", False))
self.rwkv_strategy_dropdown.setCurrentText(settings.get("rwkv_strategy_dropdown", ""))
self.rwkv_allocation_spinbox.setValue(int(settings.get("rwkv_allocation", 0)))
self.rwkv_cuda_checkbox.setChecked(settings.get("rwkv_cuda", False))
# API
self.api_settings_checkbox.setChecked(settings.get("api_settings", False))
self.api_checkbox.setChecked(settings.get("use_api", False))
self.api_blocking_port_checkbox.setChecked(settings.get("api_blocking_port_enabled", False))