Spaces:
Running
Running
File size: 11,633 Bytes
e7d46fe 161b952 ad2885a 8bbf25a e7d46fe 5afd0da e7d46fe bfb4718 e7d46fe 75ddf07 4fa246a 9fc4864 4c81cd0 4fa246a 7c15ba1 eb65444 ae6a973 4fa246a e7d46fe 5711dc9 4fa246a 156a949 7c15ba1 e7d46fe 4c81cd0 004c95a e7d46fe 75ddf07 9fc4864 e7d46fe bfb4718 eb65444 e7d46fe 844d28d a83f86a 75ddf07 9fc4864 a83f86a 75ddf07 373e550 844d28d 75ddf07 a83f86a e7d46fe 1770781 f05ff77 e7d46fe 5a74e6e 1c04354 bfb4718 1c04354 bfb4718 e7d46fe 1770781 4c81cd0 1770781 4c81cd0 d540b7e 1770781 4c81cd0 1c76cd3 1770781 4c81cd0 844d28d a6be3cb 7c15ba1 844d28d 7c15ba1 edac9ec 7c15ba1 4c81cd0 c83e051 4c81cd0 844d28d 7c15ba1 4c81cd0 e7d46fe c885f15 1c04354 e7d46fe 844d28d 1c04354 844d28d 7c15ba1 1c04354 4c81cd0 7c15ba1 844d28d 1c04354 844d28d 4c81cd0 dfc8304 4c81cd0 e7d46fe 4c81cd0 eb65444 4c81cd0 56e675b e800237 e7d46fe eb65444 e7d46fe 5afd0da 4c81cd0 5afd0da e7d46fe 5afd0da 4fa246a ec83c5d 4fa246a 161b952 4fa246a a83f86a e7d46fe 4e8d2ff e181e2b 4c81cd0 e181e2b 5afd0da e7d46fe 4e8d2ff e7d46fe 0eebe7f 4c81cd0 e7d46fe dfc8304 004c95a 7c15ba1 e7d46fe |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 |
import os
import shutil
import time
import re
import chainlit as cl
from chainlit.input_widget import Slider
import tokeniser
from tavily import TavilyClient
from ddgs import DDGS
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Settings, PromptTemplate
from llama_index.core.callbacks import CallbackManager
from llama_index.core.callbacks.schema import CBEventType
from llama_index.core.llms import ChatMessage
from llama_index.llms.nvidia import NVIDIA
from llama_index.embeddings.nvidia import NVIDIAEmbedding
from transformers import pipeline
from collections import deque
from anyascii import anyascii
MAX_CONTEXT_WINDOW_TOKENS = 32000
MODEL = "speakleash/bielik-11b-v2.6-instruct"
GUARD_MODEL = "speakleash/Bielik-Guard-0.1B-v1.0"
EMBEDDING_MODEL = "baai/bge-m3"
TOP_K = 5
GUARD_THRESHOLD = 0.5
COMMANDS = [
{"id": "Wyszukaj", "icon": "globe", "description": "Wyszukaj w Internecie", "button": True, "persistent": True},
{"id": "Rozumuj", "icon": "brain", "description": "Rozumuj przed odpowiedzi膮", "button": True, "persistent": True},
{"id": "W+R", "icon": "badge-plus", "description": "Wyszukaj i Rozumuj", "button": True, "persistent": True},
{"id": "Chro艅", "icon": "shield", "description": "Chro艅 przed szkodliwymi tre艣ciami", "button": True, "persistent": True}
]
SYSTEM_PROMPT = "Jeste艣 pomocnym asystentem. Odpowiadaj wyczerpuj膮co w j臋zyku polskim na pytania u偶ytkownika. Dzisiaj jest " + time.strftime("%d.%m.%Y") + " r. "
SEARCH_SYSTEM_PROMPT = "Wykorzystaj poni偶sze wyniki wyszukiwania w Internecie. Na ko艅cu odpowiedzi umie艣膰 odno艣niki do stron 藕r贸d艂owych, na podstawie kt贸rych udzieli艂e艣 odpowiedzi. "
THINK_SYSTEM_PROMPT = "Dok艂adnie przeanalizuj pytania krok po kroku, wyra藕nie pokazuj膮c sw贸j proces rozumowania przed udzieleniem ostatecznej odpowiedzi. Ustrukturyzuj swoj膮 odpowied藕 w nast臋puj膮cy spos贸b: u偶yj znacznik贸w <think> i </think>, aby pokaza膰 szczeg贸艂owe etapy rozumowania, w tym analiz臋, podsumowanie, burz臋 m贸zg贸w, weryfikacj臋 dok艂adno艣ci, popraw臋 b艂臋d贸w i ponowne przeanalizowanie wcze艣niejszych punkt贸w, np. <think>{my艣li krok po kroku}</think>. U偶yj maksymalnie 500 s艂贸w na rozumowanie. Nast臋pnie wyra藕nie przedstaw ostateczn膮, dok艂adn膮, wyczerpuj膮c膮 odpowied藕 w oparciu o swoje rozumowanie. Odpowiadaj w j臋zyku polskim. "
TEXT_QA_SYSTEM_PROMPT = "Poni偶ej znajduje si臋 kontekst.\n---------------------\n{context_str}\n---------------------\nBior膮c pod uwag臋 kontekst, a nie wcze艣niejsz膮 wiedz臋, odpowiedz na pytanie.\nPytanie: {query_str}\nOdpowied藕: "
REFINE_SYSTEM_PROMPT = "Oryginalne zapytanie wygl膮da nast臋puj膮co: {query_str}\nPodali艣my tak膮 odpowied藕: {existing_answer}\nMamy mo偶liwo艣膰 doprecyzowania istniej膮cej odpowiedzi (tylko w razie potrzeby) o dodatkowy kontekst poni偶ej.\n------------\n{context_msg}\n------------\nBior膮c pod uwag臋 nowy kontekst, doprecyzuj oryginaln膮 odpowied藕, aby lepiej odpowiedzie膰 na zapytanie. Je艣li kontekst jest nieprzydatny, zwr贸膰 oryginaln膮 odpowied藕.\nDoprecyzowana odpowied藕: "
CB_IGNORE = [
CBEventType.CHUNKING,
CBEventType.SYNTHESIZE,
CBEventType.EMBEDDING,
CBEventType.NODE_PARSING,
CBEventType.TREE,
CBEventType.LLM
]
tavilyClient = TavilyClient(os.getenv("TAVILY_API_KEY"))
classifier = pipeline("text-classification", model=GUARD_MODEL, return_all_scores=True)
class BielikCallbackHandler(cl.LlamaIndexCallbackHandler):
def __init__(self):
super().__init__(event_starts_to_ignore=CB_IGNORE, event_ends_to_ignore=CB_IGNORE)
def on_event_start(self, event_type, payload = None, event_id = "", parent_id = "", **kwargs):
id = super().on_event_start(event_type, payload, event_id, parent_id, **kwargs)
if id in self.steps:
self.steps[id].show_input = False
return id
def truncate_messages(messages, max_tokens = MAX_CONTEXT_WINDOW_TOKENS):
if not messages:
return []
truncated = messages.copy()
if truncated and truncated[-1]["role"] == "assistant":
truncated = truncated[:-1]
total_tokens = 0
for i in range(len(truncated) - 1, -1, -1):
message_tokens = tokeniser.estimate_tokens(truncated[i]["content"])
total_tokens += message_tokens
if total_tokens > max_tokens:
truncated = truncated[i + 1:]
break
if truncated and truncated[-1]["role"] == "assistant":
truncated = truncated[:-1]
return truncated
@cl.step(name="wyszukiwanie", type="tool", show_input=False)
async def search_web(query):
try:
search_results = tavilyClient.search(query=query[:400], country="poland")["results"]
except Exception as e:
print(f"Tavily search failed: {e}. Falling back to DDGS.")
try:
search = DDGS().text(query, region="pl-pl", backend="duckduckgo, brave, google, mullvad_google, mullvad_brave", max_results=5)
search_results = [{"title": r["title"], "url": r["href"], "content": r["body"]} for r in search]
except Exception as e:
print(f"DDGS search failed: {e}")
return f"B艂膮d wyszukiwania: {str(e)}"
formatted_text = "Wyniki wyszukiwania:\n"
for i, result in enumerate(search_results, 1):
formatted_text += f"{i}. [{result['title']}]({result['url']})\n {result['content']}\n"
return formatted_text
@cl.step(name="rozumowanie", type="tool", show_input=False)
async def think(messages, llm):
current_step = cl.context.current_step
current_step.output = ""
stream = await infer(messages, llm)
think_content = ""
async for chunk in stream:
if chunk.delta:
think_content += chunk.delta
await current_step.stream_token(chunk.delta)
if think_content.endswith("</think>") or think_content.endswith("\n \n"):
break
return stream
async def infer(messages, llm):
return await llm.astream_chat([ChatMessage(role=m["role"], content=m["content"]) for m in messages])
async def ask_files(message, files):
dir = os.path.dirname(files[0].path)
cl.user_session.set("dir", dir)
documents = SimpleDirectoryReader(dir, exclude_hidden=False).load_data(show_progress=True)
index = VectorStoreIndex.from_documents(documents)
# index.storage_context.persist()
query_engine = index.as_query_engine(
streaming=True,
similarity_top_k=TOP_K,
service_context=Settings.callback_manager,
text_qa_template=PromptTemplate(TEXT_QA_SYSTEM_PROMPT),
refine_template=PromptTemplate(REFINE_SYSTEM_PROMPT)
)
return await query_engine.aquery(message)
@cl.step(name="klasyfikowanie", type="tool", show_input=False)
async def classify(message):
return classifier(re.sub(r"(?<=[A-Za-z])[\.,_-](?=[A-Za-z])", " ", anyascii(message)))[0]
def update_llm_settings(llm, settings):
llm.temperature = settings["Temp"]
llm.max_tokens = settings["MaxTokens"]
llm.additional_kwargs = {
"top_p": settings["TopP"],
"frequency_penalty": settings["FreqPenalty"],
"presence_penalty": settings["PresPenalty"]
}
return llm
async def run_chat(messages, files = None, search_enabled = False, think_enabled = False, guard_enabled = False):
llm = update_llm_settings(Settings.llm, cl.user_session.get("settings"))
msg = cl.Message(content="", author="Bielik")
response_content = ""
system_prompt = SYSTEM_PROMPT
curr_message = messages[-1]["content"].strip()
if not curr_message:
return
try:
if files:
stream = await ask_files(curr_message, files)
async for chunk in stream.response_gen:
response_content += chunk
await msg.stream_token(chunk)
await msg.send()
return response_content
if guard_enabled:
guard_results = await classify(curr_message)
if any(r["score"] > GUARD_THRESHOLD for r in guard_results):
msg.content = response_content = "Wykry艂em szkodliwe tre艣ci! Koniec rozmowy!"
await msg.send()
return response_content
if think_enabled:
system_prompt += THINK_SYSTEM_PROMPT
if search_enabled:
search_result = await search_web(curr_message)
system_prompt += SEARCH_SYSTEM_PROMPT + "\n\n" + search_result
context_messages = truncate_messages(messages)
messages = [{"role": "system", "content": system_prompt}, *context_messages]
print(messages)
stream = await think(messages, llm) if think_enabled else await infer(messages, llm)
async for chunk in stream:
if chunk.delta:
response_content += chunk.delta
await msg.stream_token(chunk.delta)
await msg.send()
response_content = re.sub(r'\\\[(.*?)\\\]', r'$$\1$$', re.sub(r'\\\((.*?)\\\)', r'$\1$', response_content, flags=re.S), flags=re.S) # LaTeX format
if guard_enabled:
guard_results = await classify(response_content)
if any(r["score"] > GUARD_THRESHOLD for r in guard_results):
response_content = "W mojej odpowiedzi wykry艂em szkodliwe tre艣ci. Gryz臋 si臋 w j臋zyk!"
msg.content = response_content
await msg.update()
return response_content
except Exception as e:
print(f"Response failed: {e}")
error_msg = f"B艂膮d generowania odpowiedzi: {str(e)}"
await cl.Message(content=error_msg).send()
return error_msg
@cl.on_chat_start
async def start_chat():
settings = await cl.ChatSettings([
Slider(id="Temp", label="Temperatura", initial=0.2, min=0, max=1, step=0.1),
Slider(id="TopP", label="Top P", initial=0.7, min=0.01, max=1, step=0.01),
Slider(id="FreqPenalty", label="Frequency Penalty", initial=0, min=-2, max=2, step=0.1),
Slider(id="PresPenalty", label="Presence Penalty", initial=0, min=-2, max=2, step=0.1),
Slider(id="MaxTokens", label="Max Token贸w", initial=4096, min=1, max=4096, step=64)
]).send()
await cl.context.emitter.set_commands(COMMANDS)
cl.user_session.set("chat_messages", [])
cl.user_session.set("settings", settings)
Settings.llm = NVIDIA(
model=MODEL,
is_chat_model=True,
context_window=32768,
temperature=settings["Temp"],
top_p=settings["TopP"],
max_tokens=settings["MaxTokens"],
frequency_penalty=settings["FreqPenalty"],
presence_penalty=settings["PresPenalty"],
streaming=True
)
Settings.embed_model = NVIDIAEmbedding(
model=EMBEDDING_MODEL
)
Settings.callback_manager = CallbackManager([BielikCallbackHandler()])
@cl.on_chat_end
def end_chat():
dir = cl.user_session.get("dir")
if dir and os.path.exists(dir):
shutil.rmtree(dir)
@cl.on_settings_update
async def setup_agent(settings):
cl.user_session.set("settings", settings)
@cl.on_message
async def on_message(msg):
chat_messages = cl.user_session.get("chat_messages", [])
chat_messages.append({"role": "user", "content": msg.content})
files = [el for el in msg.elements if el.path] or None
search_enabled = msg.command in ["Wyszukaj", "W+R"]
think_enabled = msg.command in ["Rozumuj", "W+R"]
guard_enabled = msg.command == "Chro艅"
response = await run_chat(chat_messages, files, search_enabled, think_enabled, guard_enabled)
chat_messages.append({"role": "assistant", "content": response})
cl.user_session.set("chat_messages", chat_messages)
|