Spaces:
Build error
Build error
Merge branch 'dev_branch' into text_extraction
Browse files- .github/workflows/push_to_hf_space_prototype.yml +14 -13
- .vscode/launch.json +35 -0
- .vscode/tasks.json +13 -0
- code/.chainlit/translations/en-US.json +0 -229
- code/main.py +18 -23
- code/modules/chat/chat_model_loader.py +13 -0
- code/modules/chat/helpers.py +5 -0
- code/modules/config/config.yml +4 -1
- code/modules/config/constants.py +1 -1
- code/modules/dataloader/data_loader.py +1 -1
.github/workflows/push_to_hf_space_prototype.yml
CHANGED
|
@@ -1,20 +1,21 @@
|
|
| 1 |
name: Push Prototype to HuggingFace
|
| 2 |
|
| 3 |
on:
|
| 4 |
-
|
| 5 |
-
branches:
|
| 6 |
-
|
| 7 |
-
|
|
|
|
| 8 |
|
| 9 |
jobs:
|
| 10 |
-
|
| 11 |
runs-on: ubuntu-latest
|
| 12 |
steps:
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
|
|
|
| 1 |
name: Push Prototype to HuggingFace
|
| 2 |
|
| 3 |
on:
|
| 4 |
+
push:
|
| 5 |
+
branches: [dev_branch]
|
| 6 |
+
|
| 7 |
+
# run this workflow manuall from the Actions tab
|
| 8 |
+
workflow_dispatch:
|
| 9 |
|
| 10 |
jobs:
|
| 11 |
+
sync-to-hub:
|
| 12 |
runs-on: ubuntu-latest
|
| 13 |
steps:
|
| 14 |
+
- uses: actions/checkout@v4
|
| 15 |
+
with:
|
| 16 |
+
fetch-depth: 0
|
| 17 |
+
lfs: true
|
| 18 |
+
- name: Deploy Prototype to HuggingFace
|
| 19 |
+
env:
|
| 20 |
+
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
| 21 |
+
run: git push https://trgardos:$HF_TOKEN@huggingface.co/spaces/dl4ds/tutor_dev dev_branch:main
|
.vscode/launch.json
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
// Use IntelliSense to learn about possible attributes.
|
| 3 |
+
// Hover to view descriptions of existing attributes.
|
| 4 |
+
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
| 5 |
+
"version": "0.2.0",
|
| 6 |
+
"configurations": [
|
| 7 |
+
{
|
| 8 |
+
"name": "Python Debugger: Chainlit run main.py",
|
| 9 |
+
"type": "debugpy",
|
| 10 |
+
"request": "launch",
|
| 11 |
+
"program": "${workspaceFolder}/.venv/bin/chainlit",
|
| 12 |
+
"console": "integratedTerminal",
|
| 13 |
+
"args": ["run", "main.py"],
|
| 14 |
+
"cwd": "${workspaceFolder}/code",
|
| 15 |
+
"justMyCode": true
|
| 16 |
+
},
|
| 17 |
+
{ "name":"Python Debugger: Module store_manager",
|
| 18 |
+
"type":"debugpy",
|
| 19 |
+
"request":"launch",
|
| 20 |
+
"module":"modules.vectorstore.store_manager",
|
| 21 |
+
"env": {"PYTHONPATH": "${workspaceFolder}/code"},
|
| 22 |
+
"cwd": "${workspaceFolder}/code",
|
| 23 |
+
"justMyCode": true
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"name": "Python Debugger: Module data_loader",
|
| 27 |
+
"type": "debugpy",
|
| 28 |
+
"request": "launch",
|
| 29 |
+
"module": "modules.dataloader.data_loader",
|
| 30 |
+
"env": {"PYTHONPATH": "${workspaceFolder}/code"},
|
| 31 |
+
"cwd": "${workspaceFolder}/code",
|
| 32 |
+
"justMyCode": true
|
| 33 |
+
}
|
| 34 |
+
]
|
| 35 |
+
}
|
.vscode/tasks.json
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
// See https://go.microsoft.com/fwlink/?LinkId=733558
|
| 3 |
+
// for the documentation about the tasks.json format
|
| 4 |
+
"version": "2.0.0",
|
| 5 |
+
"tasks": [
|
| 6 |
+
{
|
| 7 |
+
"label": "echo",
|
| 8 |
+
"type": "shell",
|
| 9 |
+
"command": "echo ${workspaceFolder}; ls ${workspaceFolder}/code",
|
| 10 |
+
"problemMatcher": []
|
| 11 |
+
}
|
| 12 |
+
]
|
| 13 |
+
}
|
code/.chainlit/translations/en-US.json
DELETED
|
@@ -1,229 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"components": {
|
| 3 |
-
"atoms": {
|
| 4 |
-
"buttons": {
|
| 5 |
-
"userButton": {
|
| 6 |
-
"menu": {
|
| 7 |
-
"settings": "Settings",
|
| 8 |
-
"settingsKey": "S",
|
| 9 |
-
"APIKeys": "API Keys",
|
| 10 |
-
"logout": "Logout"
|
| 11 |
-
}
|
| 12 |
-
}
|
| 13 |
-
}
|
| 14 |
-
},
|
| 15 |
-
"molecules": {
|
| 16 |
-
"newChatButton": {
|
| 17 |
-
"newChat": "New Chat"
|
| 18 |
-
},
|
| 19 |
-
"tasklist": {
|
| 20 |
-
"TaskList": {
|
| 21 |
-
"title": "\ud83d\uddd2\ufe0f Task List",
|
| 22 |
-
"loading": "Loading...",
|
| 23 |
-
"error": "An error occurred"
|
| 24 |
-
}
|
| 25 |
-
},
|
| 26 |
-
"attachments": {
|
| 27 |
-
"cancelUpload": "Cancel upload",
|
| 28 |
-
"removeAttachment": "Remove attachment"
|
| 29 |
-
},
|
| 30 |
-
"newChatDialog": {
|
| 31 |
-
"createNewChat": "Create new chat?",
|
| 32 |
-
"clearChat": "This will clear the current messages and start a new chat.",
|
| 33 |
-
"cancel": "Cancel",
|
| 34 |
-
"confirm": "Confirm"
|
| 35 |
-
},
|
| 36 |
-
"settingsModal": {
|
| 37 |
-
"settings": "Settings",
|
| 38 |
-
"expandMessages": "Expand Messages",
|
| 39 |
-
"hideChainOfThought": "Hide Chain of Thought",
|
| 40 |
-
"darkMode": "Dark Mode"
|
| 41 |
-
},
|
| 42 |
-
"detailsButton": {
|
| 43 |
-
"using": "Using",
|
| 44 |
-
"used": "Used"
|
| 45 |
-
},
|
| 46 |
-
"auth": {
|
| 47 |
-
"authLogin": {
|
| 48 |
-
"title": "Login to access the app.",
|
| 49 |
-
"form": {
|
| 50 |
-
"email": "Email address",
|
| 51 |
-
"password": "Password",
|
| 52 |
-
"noAccount": "Don't have an account?",
|
| 53 |
-
"alreadyHaveAccount": "Already have an account?",
|
| 54 |
-
"signup": "Sign Up",
|
| 55 |
-
"signin": "Sign In",
|
| 56 |
-
"or": "OR",
|
| 57 |
-
"continue": "Continue",
|
| 58 |
-
"forgotPassword": "Forgot password?",
|
| 59 |
-
"passwordMustContain": "Your password must contain:",
|
| 60 |
-
"emailRequired": "email is a required field",
|
| 61 |
-
"passwordRequired": "password is a required field"
|
| 62 |
-
},
|
| 63 |
-
"error": {
|
| 64 |
-
"default": "Unable to sign in.",
|
| 65 |
-
"signin": "Try signing in with a different account.",
|
| 66 |
-
"oauthsignin": "Try signing in with a different account.",
|
| 67 |
-
"redirect_uri_mismatch": "The redirect URI is not matching the oauth app configuration.",
|
| 68 |
-
"oauthcallbackerror": "Try signing in with a different account.",
|
| 69 |
-
"oauthcreateaccount": "Try signing in with a different account.",
|
| 70 |
-
"emailcreateaccount": "Try signing in with a different account.",
|
| 71 |
-
"callback": "Try signing in with a different account.",
|
| 72 |
-
"oauthaccountnotlinked": "To confirm your identity, sign in with the same account you used originally.",
|
| 73 |
-
"emailsignin": "The e-mail could not be sent.",
|
| 74 |
-
"emailverify": "Please verify your email, a new email has been sent.",
|
| 75 |
-
"credentialssignin": "Sign in failed. Check the details you provided are correct.",
|
| 76 |
-
"sessionrequired": "Please sign in to access this page."
|
| 77 |
-
}
|
| 78 |
-
},
|
| 79 |
-
"authVerifyEmail": {
|
| 80 |
-
"almostThere": "You're almost there! We've sent an email to ",
|
| 81 |
-
"verifyEmailLink": "Please click on the link in that email to complete your signup.",
|
| 82 |
-
"didNotReceive": "Can't find the email?",
|
| 83 |
-
"resendEmail": "Resend email",
|
| 84 |
-
"goBack": "Go Back",
|
| 85 |
-
"emailSent": "Email sent successfully.",
|
| 86 |
-
"verifyEmail": "Verify your email address"
|
| 87 |
-
},
|
| 88 |
-
"providerButton": {
|
| 89 |
-
"continue": "Continue with {{provider}}",
|
| 90 |
-
"signup": "Sign up with {{provider}}"
|
| 91 |
-
},
|
| 92 |
-
"authResetPassword": {
|
| 93 |
-
"newPasswordRequired": "New password is a required field",
|
| 94 |
-
"passwordsMustMatch": "Passwords must match",
|
| 95 |
-
"confirmPasswordRequired": "Confirm password is a required field",
|
| 96 |
-
"newPassword": "New password",
|
| 97 |
-
"confirmPassword": "Confirm password",
|
| 98 |
-
"resetPassword": "Reset Password"
|
| 99 |
-
},
|
| 100 |
-
"authForgotPassword": {
|
| 101 |
-
"email": "Email address",
|
| 102 |
-
"emailRequired": "email is a required field",
|
| 103 |
-
"emailSent": "Please check the email address {{email}} for instructions to reset your password.",
|
| 104 |
-
"enterEmail": "Enter your email address and we will send you instructions to reset your password.",
|
| 105 |
-
"resendEmail": "Resend email",
|
| 106 |
-
"continue": "Continue",
|
| 107 |
-
"goBack": "Go Back"
|
| 108 |
-
}
|
| 109 |
-
}
|
| 110 |
-
},
|
| 111 |
-
"organisms": {
|
| 112 |
-
"chat": {
|
| 113 |
-
"history": {
|
| 114 |
-
"index": {
|
| 115 |
-
"showHistory": "Show history",
|
| 116 |
-
"lastInputs": "Last Inputs",
|
| 117 |
-
"noInputs": "Such empty...",
|
| 118 |
-
"loading": "Loading..."
|
| 119 |
-
}
|
| 120 |
-
},
|
| 121 |
-
"inputBox": {
|
| 122 |
-
"input": {
|
| 123 |
-
"placeholder": "Type your message here..."
|
| 124 |
-
},
|
| 125 |
-
"speechButton": {
|
| 126 |
-
"start": "Start recording",
|
| 127 |
-
"stop": "Stop recording"
|
| 128 |
-
},
|
| 129 |
-
"SubmitButton": {
|
| 130 |
-
"sendMessage": "Send message",
|
| 131 |
-
"stopTask": "Stop Task"
|
| 132 |
-
},
|
| 133 |
-
"UploadButton": {
|
| 134 |
-
"attachFiles": "Attach files"
|
| 135 |
-
},
|
| 136 |
-
"waterMark": {
|
| 137 |
-
"text": "Built with"
|
| 138 |
-
}
|
| 139 |
-
},
|
| 140 |
-
"Messages": {
|
| 141 |
-
"index": {
|
| 142 |
-
"running": "Running",
|
| 143 |
-
"executedSuccessfully": "executed successfully",
|
| 144 |
-
"failed": "failed",
|
| 145 |
-
"feedbackUpdated": "Feedback updated",
|
| 146 |
-
"updating": "Updating"
|
| 147 |
-
}
|
| 148 |
-
},
|
| 149 |
-
"dropScreen": {
|
| 150 |
-
"dropYourFilesHere": "Drop your files here"
|
| 151 |
-
},
|
| 152 |
-
"index": {
|
| 153 |
-
"failedToUpload": "Failed to upload",
|
| 154 |
-
"cancelledUploadOf": "Cancelled upload of",
|
| 155 |
-
"couldNotReachServer": "Could not reach the server",
|
| 156 |
-
"continuingChat": "Continuing previous chat"
|
| 157 |
-
},
|
| 158 |
-
"settings": {
|
| 159 |
-
"settingsPanel": "Settings panel",
|
| 160 |
-
"reset": "Reset",
|
| 161 |
-
"cancel": "Cancel",
|
| 162 |
-
"confirm": "Confirm"
|
| 163 |
-
}
|
| 164 |
-
},
|
| 165 |
-
"threadHistory": {
|
| 166 |
-
"sidebar": {
|
| 167 |
-
"filters": {
|
| 168 |
-
"FeedbackSelect": {
|
| 169 |
-
"feedbackAll": "Feedback: All",
|
| 170 |
-
"feedbackPositive": "Feedback: Positive",
|
| 171 |
-
"feedbackNegative": "Feedback: Negative"
|
| 172 |
-
},
|
| 173 |
-
"SearchBar": {
|
| 174 |
-
"search": "Search"
|
| 175 |
-
}
|
| 176 |
-
},
|
| 177 |
-
"DeleteThreadButton": {
|
| 178 |
-
"confirmMessage": "This will delete the thread as well as it's messages and elements.",
|
| 179 |
-
"cancel": "Cancel",
|
| 180 |
-
"confirm": "Confirm",
|
| 181 |
-
"deletingChat": "Deleting chat",
|
| 182 |
-
"chatDeleted": "Chat deleted"
|
| 183 |
-
},
|
| 184 |
-
"index": {
|
| 185 |
-
"pastChats": "Past Chats"
|
| 186 |
-
},
|
| 187 |
-
"ThreadList": {
|
| 188 |
-
"empty": "Empty...",
|
| 189 |
-
"today": "Today",
|
| 190 |
-
"yesterday": "Yesterday",
|
| 191 |
-
"previous7days": "Previous 7 days",
|
| 192 |
-
"previous30days": "Previous 30 days"
|
| 193 |
-
},
|
| 194 |
-
"TriggerButton": {
|
| 195 |
-
"closeSidebar": "Close sidebar",
|
| 196 |
-
"openSidebar": "Open sidebar"
|
| 197 |
-
}
|
| 198 |
-
},
|
| 199 |
-
"Thread": {
|
| 200 |
-
"backToChat": "Go back to chat",
|
| 201 |
-
"chatCreatedOn": "This chat was created on"
|
| 202 |
-
}
|
| 203 |
-
},
|
| 204 |
-
"header": {
|
| 205 |
-
"chat": "Chat",
|
| 206 |
-
"readme": "Readme"
|
| 207 |
-
}
|
| 208 |
-
}
|
| 209 |
-
},
|
| 210 |
-
"hooks": {
|
| 211 |
-
"useLLMProviders": {
|
| 212 |
-
"failedToFetchProviders": "Failed to fetch providers:"
|
| 213 |
-
}
|
| 214 |
-
},
|
| 215 |
-
"pages": {
|
| 216 |
-
"Design": {},
|
| 217 |
-
"Env": {
|
| 218 |
-
"savedSuccessfully": "Saved successfully",
|
| 219 |
-
"requiredApiKeys": "Required API Keys",
|
| 220 |
-
"requiredApiKeysInfo": "To use this app, the following API keys are required. The keys are stored on your device's local storage."
|
| 221 |
-
},
|
| 222 |
-
"Page": {
|
| 223 |
-
"notPartOfProject": "You are not part of this project."
|
| 224 |
-
},
|
| 225 |
-
"ResumeButton": {
|
| 226 |
-
"resumeChat": "Resume Chat"
|
| 227 |
-
}
|
| 228 |
-
}
|
| 229 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
code/main.py
CHANGED
|
@@ -17,6 +17,7 @@ from modules.chat.helpers import (
|
|
| 17 |
get_sources,
|
| 18 |
get_history_chat_resume,
|
| 19 |
get_history_setup_llm,
|
|
|
|
| 20 |
)
|
| 21 |
import copy
|
| 22 |
from typing import Optional
|
|
@@ -55,7 +56,7 @@ class Chatbot:
|
|
| 55 |
"""
|
| 56 |
self.config = config
|
| 57 |
|
| 58 |
-
def _load_config(self):
|
| 59 |
"""
|
| 60 |
Load the configuration from a YAML file.
|
| 61 |
"""
|
|
@@ -277,7 +278,7 @@ class Chatbot:
|
|
| 277 |
rename_dict = {"Chatbot": "AI Tutor"}
|
| 278 |
return rename_dict.get(orig_author, orig_author)
|
| 279 |
|
| 280 |
-
async def start(self):
|
| 281 |
"""
|
| 282 |
Start the chatbot, initialize settings widgets,
|
| 283 |
and display and load previous conversation if chat logging is enabled.
|
|
@@ -285,6 +286,12 @@ class Chatbot:
|
|
| 285 |
|
| 286 |
start_time = time.time()
|
| 287 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 288 |
await self.make_llm_settings_widgets(self.config)
|
| 289 |
user = cl.user_session.get("user")
|
| 290 |
self.user = {
|
|
@@ -370,25 +377,6 @@ class Chatbot:
|
|
| 370 |
|
| 371 |
answer = res.get("answer", res.get("result"))
|
| 372 |
|
| 373 |
-
if cl_data._data_layer is not None:
|
| 374 |
-
with cl_data._data_layer.client.step(
|
| 375 |
-
type="run",
|
| 376 |
-
name="step_info",
|
| 377 |
-
thread_id=cl.context.session.thread_id,
|
| 378 |
-
# tags=self.tags,
|
| 379 |
-
) as step:
|
| 380 |
-
|
| 381 |
-
step.input = {"question": user_query_dict["input"]}
|
| 382 |
-
|
| 383 |
-
step.output = {
|
| 384 |
-
"chat_history": res.get("chat_history"),
|
| 385 |
-
"context": res.get("context"),
|
| 386 |
-
"answer": answer,
|
| 387 |
-
"rephrase_prompt": res.get("rephrase_prompt"),
|
| 388 |
-
"qa_prompt": res.get("qa_prompt"),
|
| 389 |
-
}
|
| 390 |
-
step.metadata = self.config
|
| 391 |
-
|
| 392 |
answer_with_sources, source_elements, sources_dict = get_sources(
|
| 393 |
res, answer, stream=stream, view_sources=view_sources
|
| 394 |
)
|
|
@@ -425,14 +413,21 @@ class Chatbot:
|
|
| 425 |
elements=source_elements,
|
| 426 |
author=LLM,
|
| 427 |
actions=actions,
|
|
|
|
| 428 |
).send()
|
| 429 |
|
| 430 |
async def on_chat_resume(self, thread: ThreadDict):
|
|
|
|
| 431 |
steps = thread["steps"]
|
| 432 |
-
k = self.config["llm_params"][
|
|
|
|
|
|
|
| 433 |
conversation_list = get_history_chat_resume(steps, k, SYSTEM, LLM)
|
|
|
|
|
|
|
|
|
|
| 434 |
cl.user_session.set("memory", conversation_list)
|
| 435 |
-
await self.start()
|
| 436 |
|
| 437 |
@cl.oauth_callback
|
| 438 |
def auth_callback(
|
|
|
|
| 17 |
get_sources,
|
| 18 |
get_history_chat_resume,
|
| 19 |
get_history_setup_llm,
|
| 20 |
+
get_last_config,
|
| 21 |
)
|
| 22 |
import copy
|
| 23 |
from typing import Optional
|
|
|
|
| 56 |
"""
|
| 57 |
self.config = config
|
| 58 |
|
| 59 |
+
async def _load_config(self):
|
| 60 |
"""
|
| 61 |
Load the configuration from a YAML file.
|
| 62 |
"""
|
|
|
|
| 278 |
rename_dict = {"Chatbot": "AI Tutor"}
|
| 279 |
return rename_dict.get(orig_author, orig_author)
|
| 280 |
|
| 281 |
+
async def start(self, config=None):
|
| 282 |
"""
|
| 283 |
Start the chatbot, initialize settings widgets,
|
| 284 |
and display and load previous conversation if chat logging is enabled.
|
|
|
|
| 286 |
|
| 287 |
start_time = time.time()
|
| 288 |
|
| 289 |
+
self.config = (
|
| 290 |
+
await self._load_config() if config is None else config
|
| 291 |
+
) # Reload the configuration on chat resume
|
| 292 |
+
|
| 293 |
+
await self.make_llm_settings_widgets(self.config) # Reload the settings widgets
|
| 294 |
+
|
| 295 |
await self.make_llm_settings_widgets(self.config)
|
| 296 |
user = cl.user_session.get("user")
|
| 297 |
self.user = {
|
|
|
|
| 377 |
|
| 378 |
answer = res.get("answer", res.get("result"))
|
| 379 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 380 |
answer_with_sources, source_elements, sources_dict = get_sources(
|
| 381 |
res, answer, stream=stream, view_sources=view_sources
|
| 382 |
)
|
|
|
|
| 413 |
elements=source_elements,
|
| 414 |
author=LLM,
|
| 415 |
actions=actions,
|
| 416 |
+
metadata=self.config,
|
| 417 |
).send()
|
| 418 |
|
| 419 |
async def on_chat_resume(self, thread: ThreadDict):
|
| 420 |
+
thread_config = None
|
| 421 |
steps = thread["steps"]
|
| 422 |
+
k = self.config["llm_params"][
|
| 423 |
+
"memory_window"
|
| 424 |
+
] # on resume, alwyas use the default memory window
|
| 425 |
conversation_list = get_history_chat_resume(steps, k, SYSTEM, LLM)
|
| 426 |
+
thread_config = get_last_config(
|
| 427 |
+
steps
|
| 428 |
+
) # TODO: Returns None for now - which causes config to be reloaded with default values
|
| 429 |
cl.user_session.set("memory", conversation_list)
|
| 430 |
+
await self.start(config=thread_config)
|
| 431 |
|
| 432 |
@cl.oauth_callback
|
| 433 |
def auth_callback(
|
code/modules/chat/chat_model_loader.py
CHANGED
|
@@ -5,6 +5,8 @@ from langchain_community.llms import LlamaCpp
|
|
| 5 |
import torch
|
| 6 |
import transformers
|
| 7 |
import os
|
|
|
|
|
|
|
| 8 |
from langchain.callbacks.manager import CallbackManager
|
| 9 |
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
|
| 10 |
from modules.config.constants import LLAMA_PATH
|
|
@@ -15,6 +17,14 @@ class ChatModelLoader:
|
|
| 15 |
self.config = config
|
| 16 |
self.huggingface_token = os.getenv("HUGGINGFACEHUB_API_TOKEN")
|
| 17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
def load_chat_model(self):
|
| 19 |
if self.config["llm_params"]["llm_loader"] in [
|
| 20 |
"gpt-3.5-turbo-1106",
|
|
@@ -24,6 +34,9 @@ class ChatModelLoader:
|
|
| 24 |
llm = ChatOpenAI(model_name=self.config["llm_params"]["llm_loader"])
|
| 25 |
elif self.config["llm_params"]["llm_loader"] == "local_llm":
|
| 26 |
n_batch = 512 # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.
|
|
|
|
|
|
|
|
|
|
| 27 |
llm = LlamaCpp(
|
| 28 |
model_path=LLAMA_PATH,
|
| 29 |
n_batch=n_batch,
|
|
|
|
| 5 |
import torch
|
| 6 |
import transformers
|
| 7 |
import os
|
| 8 |
+
from pathlib import Path
|
| 9 |
+
from huggingface_hub import hf_hub_download
|
| 10 |
from langchain.callbacks.manager import CallbackManager
|
| 11 |
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
|
| 12 |
from modules.config.constants import LLAMA_PATH
|
|
|
|
| 17 |
self.config = config
|
| 18 |
self.huggingface_token = os.getenv("HUGGINGFACEHUB_API_TOKEN")
|
| 19 |
|
| 20 |
+
def _verify_model_cache(self, model_cache_path):
|
| 21 |
+
hf_hub_download(
|
| 22 |
+
repo_id=self.config["llm_params"]["local_llm_params"]["repo_id"],
|
| 23 |
+
filename=self.config["llm_params"]["local_llm_params"]["filename"],
|
| 24 |
+
cache_dir=model_cache_path,
|
| 25 |
+
)
|
| 26 |
+
return str(list(Path(model_cache_path).glob("*/snapshots/*/*.gguf"))[0])
|
| 27 |
+
|
| 28 |
def load_chat_model(self):
|
| 29 |
if self.config["llm_params"]["llm_loader"] in [
|
| 30 |
"gpt-3.5-turbo-1106",
|
|
|
|
| 34 |
llm = ChatOpenAI(model_name=self.config["llm_params"]["llm_loader"])
|
| 35 |
elif self.config["llm_params"]["llm_loader"] == "local_llm":
|
| 36 |
n_batch = 512 # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.
|
| 37 |
+
model_path = self._verify_model_cache(
|
| 38 |
+
self.config["llm_params"]["local_llm_params"]["model"]
|
| 39 |
+
)
|
| 40 |
llm = LlamaCpp(
|
| 41 |
model_path=LLAMA_PATH,
|
| 42 |
n_batch=n_batch,
|
code/modules/chat/helpers.py
CHANGED
|
@@ -162,3 +162,8 @@ def get_history_setup_llm(memory_list):
|
|
| 162 |
raise ValueError("Invalid message type")
|
| 163 |
|
| 164 |
return conversation_list
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 162 |
raise ValueError("Invalid message type")
|
| 163 |
|
| 164 |
return conversation_list
|
| 165 |
+
|
| 166 |
+
|
| 167 |
+
def get_last_config(steps):
|
| 168 |
+
# TODO: Implement this function
|
| 169 |
+
return None
|
code/modules/config/config.yml
CHANGED
|
@@ -35,6 +35,9 @@ llm_params:
|
|
| 35 |
temperature: 0.7 # float
|
| 36 |
local_llm_params:
|
| 37 |
temperature: 0.7 # float
|
|
|
|
|
|
|
|
|
|
| 38 |
stream: False # bool
|
| 39 |
pdf_reader: 'gpt' # str [llama, pymupdf, gpt]
|
| 40 |
|
|
@@ -54,4 +57,4 @@ splitter_options:
|
|
| 54 |
chunk_separators : ["\n\n", "\n", " ", ""] # list of strings
|
| 55 |
front_chunks_to_remove : null # int or None
|
| 56 |
last_chunks_to_remove : null # int or None
|
| 57 |
-
delimiters_to_remove : ['\t', '\n', ' ', ' '] # list of strings
|
|
|
|
| 35 |
temperature: 0.7 # float
|
| 36 |
local_llm_params:
|
| 37 |
temperature: 0.7 # float
|
| 38 |
+
repo_id: 'TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF' # HuggingFace repo id
|
| 39 |
+
filename: 'tinyllama-1.1b-chat-v1.0.Q5_0.gguf' # Specific name of gguf file in the repo
|
| 40 |
+
pdf_reader: 'pymupdf' # str [llama, pymupdf, gpt]
|
| 41 |
stream: False # bool
|
| 42 |
pdf_reader: 'gpt' # str [llama, pymupdf, gpt]
|
| 43 |
|
|
|
|
| 57 |
chunk_separators : ["\n\n", "\n", " ", ""] # list of strings
|
| 58 |
front_chunks_to_remove : null # int or None
|
| 59 |
last_chunks_to_remove : null # int or None
|
| 60 |
+
delimiters_to_remove : ['\t', '\n', ' ', ' '] # list of strings
|
code/modules/config/constants.py
CHANGED
|
@@ -18,6 +18,6 @@ opening_message = f"Hey, What Can I Help You With?\n\nYou can me ask me question
|
|
| 18 |
|
| 19 |
# Model Paths
|
| 20 |
|
| 21 |
-
LLAMA_PATH = "../storage/models/tinyllama
|
| 22 |
|
| 23 |
RETRIEVER_HF_PATHS = {"RAGatouille": "XThomasBU/Colbert_Index"}
|
|
|
|
| 18 |
|
| 19 |
# Model Paths
|
| 20 |
|
| 21 |
+
LLAMA_PATH = "../storage/models/tinyllama"
|
| 22 |
|
| 23 |
RETRIEVER_HF_PATHS = {"RAGatouille": "XThomasBU/Colbert_Index"}
|
code/modules/dataloader/data_loader.py
CHANGED
|
@@ -98,7 +98,6 @@ class FileReader:
|
|
| 98 |
self.web_reader = HTMLReader()
|
| 99 |
self.logger.info(f"Initialized FileReader with {kind} PDF reader and HTML reader")
|
| 100 |
|
| 101 |
-
|
| 102 |
def extract_text_from_pdf(self, pdf_path):
|
| 103 |
text = ""
|
| 104 |
with open(pdf_path, "rb") as file:
|
|
@@ -315,6 +314,7 @@ class ChunkProcessor:
|
|
| 315 |
return
|
| 316 |
|
| 317 |
try:
|
|
|
|
| 318 |
if file_path in self.document_data:
|
| 319 |
self.logger.warning(f"File {file_name} already processed")
|
| 320 |
documents = [Document(page_content=content) for content in self.document_data[file_path].values()]
|
|
|
|
| 98 |
self.web_reader = HTMLReader()
|
| 99 |
self.logger.info(f"Initialized FileReader with {kind} PDF reader and HTML reader")
|
| 100 |
|
|
|
|
| 101 |
def extract_text_from_pdf(self, pdf_path):
|
| 102 |
text = ""
|
| 103 |
with open(pdf_path, "rb") as file:
|
|
|
|
| 314 |
return
|
| 315 |
|
| 316 |
try:
|
| 317 |
+
|
| 318 |
if file_path in self.document_data:
|
| 319 |
self.logger.warning(f"File {file_name} already processed")
|
| 320 |
documents = [Document(page_content=content) for content in self.document_data[file_path].values()]
|