Spaces:
Runtime error
Runtime error
Allen Park
commited on
Commit
·
e34f0a0
1
Parent(s):
6283f19
feat(check token size of context)
Browse files* fn that returns boolean for if token size is under 8000
* raise gr.Error if file exceeds token size
.DS_Store
ADDED
|
Binary file (6.15 kB). View file
|
|
|
app.py
CHANGED
|
@@ -189,6 +189,12 @@ def model_call(question, document, answer, client_base_url):
|
|
| 189 |
combined_reasoning = " ".join(reasoning)[1:-1]
|
| 190 |
return combined_reasoning, score
|
| 191 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 192 |
def get_filetype(filename):
|
| 193 |
return filename.split(".")[-1]
|
| 194 |
|
|
@@ -218,6 +224,11 @@ def upload_file(filepath):
|
|
| 218 |
extracted_file_text = extract_text_pymupdf(filepath)
|
| 219 |
elif filetype == "docx":
|
| 220 |
extracted_file_text = extract_text_python_docx(filepath)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 221 |
return [gr.UploadButton(visible=False), gr.Group(visible=True), gr.Markdown(f"**Uploaded file:** {name}"), extracted_file_text]
|
| 222 |
else:
|
| 223 |
return [gr.UploadButton(visible=True, file_count="single", file_types=UPLOADABLE_FILE_TYPES), gr.Group(visible=False), gr.Markdown(""), extracted_file_text]
|
|
|
|
| 189 |
combined_reasoning = " ".join(reasoning)[1:-1]
|
| 190 |
return combined_reasoning, score
|
| 191 |
|
| 192 |
+
def return_approximate_token_size(text):
|
| 193 |
+
MAX_TOKEN_LENGTH = 8000
|
| 194 |
+
number_of_total_characters = len(text)
|
| 195 |
+
number_of_tokens = number_of_total_characters / 4
|
| 196 |
+
return number_of_tokens < MAX_TOKEN_LENGTH
|
| 197 |
+
|
| 198 |
def get_filetype(filename):
|
| 199 |
return filename.split(".")[-1]
|
| 200 |
|
|
|
|
| 224 |
extracted_file_text = extract_text_pymupdf(filepath)
|
| 225 |
elif filetype == "docx":
|
| 226 |
extracted_file_text = extract_text_python_docx(filepath)
|
| 227 |
+
|
| 228 |
+
# return warning if file is too large
|
| 229 |
+
if not return_approximate_token_size(extracted_file_text):
|
| 230 |
+
raise gr.Error("File is too large to process. Please upload a smaller file.")
|
| 231 |
+
|
| 232 |
return [gr.UploadButton(visible=False), gr.Group(visible=True), gr.Markdown(f"**Uploaded file:** {name}"), extracted_file_text]
|
| 233 |
else:
|
| 234 |
return [gr.UploadButton(visible=True, file_count="single", file_types=UPLOADABLE_FILE_TYPES), gr.Group(visible=False), gr.Markdown(""), extracted_file_text]
|