Spaces:
Running
on
Zero
Running
on
Zero
jedick
commited on
Commit
Β·
1130c52
1
Parent(s):
9d0646a
Download data from Dropbox
Browse files- app.py +35 -5
- eval.py +10 -9
- requirements.txt +4 -3
app.py
CHANGED
|
@@ -6,11 +6,13 @@ from langgraph.checkpoint.memory import MemorySaver
|
|
| 6 |
from dotenv import load_dotenv
|
| 7 |
from main import openai_model, model_id
|
| 8 |
from util import get_sources, get_start_end_months
|
|
|
|
| 9 |
import zipfile
|
| 10 |
import shutil
|
| 11 |
import spaces
|
| 12 |
import torch
|
| 13 |
-
|
|
|
|
| 14 |
import uuid
|
| 15 |
import ast
|
| 16 |
import os
|
|
@@ -245,7 +247,7 @@ with gr.Blocks(
|
|
| 245 |
value=("local" if torch.cuda.is_available() else "remote"),
|
| 246 |
label="Compute Mode",
|
| 247 |
info=(
|
| 248 |
-
"NOTE: remote mode
|
| 249 |
if torch.cuda.is_available()
|
| 250 |
else "NOTE: local mode requires GPU"
|
| 251 |
),
|
|
@@ -359,7 +361,7 @@ with gr.Blocks(
|
|
| 359 |
if compute_mode == "local":
|
| 360 |
status_text = f"""
|
| 361 |
π Now in **local** mode, using ZeroGPU hardware<br>
|
| 362 |
-
β Response time is
|
| 363 |
β¨ [nomic-embed-text-v1.5](https://huggingface.co/nomic-ai/nomic-embed-text-v1.5) and [{model_id.split("/")[-1]}](https://huggingface.co/{model_id})<br>
|
| 364 |
π See the project's [GitHub repository](https://github.com/jedick/R-help-chat)
|
| 365 |
"""
|
|
@@ -549,7 +551,8 @@ with gr.Blocks(
|
|
| 549 |
def download():
|
| 550 |
"""Download the application data"""
|
| 551 |
|
| 552 |
-
# Code from
|
|
|
|
| 553 |
|
| 554 |
def aws_session(region_name="us-east-1"):
|
| 555 |
return boto3.session.Session(
|
|
@@ -564,9 +567,36 @@ with gr.Blocks(
|
|
| 564 |
bucket = s3_resource.Bucket(bucket_name)
|
| 565 |
bucket.download_file(Key=s3_key, Filename=dst_path)
|
| 566 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 567 |
if not os.path.isdir(db_dir):
|
| 568 |
if not os.path.exists("db.zip"):
|
| 569 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 570 |
|
| 571 |
return None
|
| 572 |
|
|
|
|
| 6 |
from dotenv import load_dotenv
|
| 7 |
from main import openai_model, model_id
|
| 8 |
from util import get_sources, get_start_end_months
|
| 9 |
+
import requests
|
| 10 |
import zipfile
|
| 11 |
import shutil
|
| 12 |
import spaces
|
| 13 |
import torch
|
| 14 |
+
|
| 15 |
+
# import boto3
|
| 16 |
import uuid
|
| 17 |
import ast
|
| 18 |
import os
|
|
|
|
| 247 |
value=("local" if torch.cuda.is_available() else "remote"),
|
| 248 |
label="Compute Mode",
|
| 249 |
info=(
|
| 250 |
+
"NOTE: remote mode **does not** use ZeroGPU"
|
| 251 |
if torch.cuda.is_available()
|
| 252 |
else "NOTE: local mode requires GPU"
|
| 253 |
),
|
|
|
|
| 361 |
if compute_mode == "local":
|
| 362 |
status_text = f"""
|
| 363 |
π Now in **local** mode, using ZeroGPU hardware<br>
|
| 364 |
+
β Response time is about 1 minute<br>
|
| 365 |
β¨ [nomic-embed-text-v1.5](https://huggingface.co/nomic-ai/nomic-embed-text-v1.5) and [{model_id.split("/")[-1]}](https://huggingface.co/{model_id})<br>
|
| 366 |
π See the project's [GitHub repository](https://github.com/jedick/R-help-chat)
|
| 367 |
"""
|
|
|
|
| 551 |
def download():
|
| 552 |
"""Download the application data"""
|
| 553 |
|
| 554 |
+
# NOTUSED: Code for file download from AWS S3 bucket
|
| 555 |
+
# https://thecodinginterface.com/blog/aws-s3-python-boto3
|
| 556 |
|
| 557 |
def aws_session(region_name="us-east-1"):
|
| 558 |
return boto3.session.Session(
|
|
|
|
| 567 |
bucket = s3_resource.Bucket(bucket_name)
|
| 568 |
bucket.download_file(Key=s3_key, Filename=dst_path)
|
| 569 |
|
| 570 |
+
def download_dropbox_file(shared_url, output_file):
|
| 571 |
+
"""Download file from Dropbox"""
|
| 572 |
+
|
| 573 |
+
# Modify the shared URL to enable direct download
|
| 574 |
+
direct_url = shared_url.replace(
|
| 575 |
+
"www.dropbox.com", "dl.dropboxusercontent.com"
|
| 576 |
+
).replace("?dl=0", "")
|
| 577 |
+
|
| 578 |
+
# Send a GET request to the direct URL
|
| 579 |
+
response = requests.get(direct_url, stream=True)
|
| 580 |
+
|
| 581 |
+
if response.status_code == 200:
|
| 582 |
+
# Write the content to a local file
|
| 583 |
+
with open(output_file, "wb") as file:
|
| 584 |
+
for chunk in response.iter_content(chunk_size=8192):
|
| 585 |
+
file.write(chunk)
|
| 586 |
+
print(f"File downloaded successfully as '{output_file}'")
|
| 587 |
+
else:
|
| 588 |
+
print(
|
| 589 |
+
f"Failed to download file. HTTP Status Code: {response.status_code}"
|
| 590 |
+
)
|
| 591 |
+
|
| 592 |
if not os.path.isdir(db_dir):
|
| 593 |
if not os.path.exists("db.zip"):
|
| 594 |
+
## For S3 (need AWS_ACCESS_KEY_ID and AWS_ACCESS_KEY_SECRET)
|
| 595 |
+
# download_file_from_bucket("r-help-chat", "db.zip", "db.zip")
|
| 596 |
+
# For Dropbox (shared file - key is in URL)
|
| 597 |
+
shared_link = "https://www.dropbox.com/scl/fi/jx90g5lorpgkkyyzeurtc/db.zip?rlkey=wvqa3p9hdy4rmod1r8yf2am09&st=l9tsam56&dl=0"
|
| 598 |
+
output_filename = "db.zip"
|
| 599 |
+
download_dropbox_file(shared_link, output_filename)
|
| 600 |
|
| 601 |
return None
|
| 602 |
|
eval.py
CHANGED
|
@@ -101,16 +101,17 @@ def run_evals_with_csv(csv_path):
|
|
| 101 |
for question, reference, retrieved_email, answer in zip(
|
| 102 |
questions, references, retrieved_emails, answers
|
| 103 |
):
|
| 104 |
-
retrieved_contexts = [
|
| 105 |
-
"\n\n\nFrom" + email for email in retrieved_email.split("\n\n\nFrom")
|
| 106 |
-
]
|
| 107 |
# Remove the source file names (e.g. R-help/2022-September.txt) as it confuses the evaluator
|
| 108 |
-
retrieved_contexts =
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 114 |
dataset.append(
|
| 115 |
{
|
| 116 |
"user_input": question,
|
|
|
|
| 101 |
for question, reference, retrieved_email, answer in zip(
|
| 102 |
questions, references, retrieved_emails, answers
|
| 103 |
):
|
|
|
|
|
|
|
|
|
|
| 104 |
# Remove the source file names (e.g. R-help/2022-September.txt) as it confuses the evaluator
|
| 105 |
+
retrieved_contexts = (
|
| 106 |
+
[
|
| 107 |
+
"\n\n\nFrom" + email.split("\n\n\nFrom")[1]
|
| 108 |
+
for email in retrieved_email.split(
|
| 109 |
+
"\n\n--- --- --- --- Next Email --- --- --- ---\n\n"
|
| 110 |
+
)
|
| 111 |
+
]
|
| 112 |
+
if retrieved_email != ""
|
| 113 |
+
else []
|
| 114 |
+
)
|
| 115 |
dataset.append(
|
| 116 |
{
|
| 117 |
"user_input": question,
|
requirements.txt
CHANGED
|
@@ -11,7 +11,7 @@ torch==2.5.1
|
|
| 11 |
# Stated requirements:
|
| 12 |
# SmolLM3: transformers>=4.53
|
| 13 |
# Gemma 3: transformers>=4.50
|
| 14 |
-
# Gemma 3 with transformers==4.54.0 gives:
|
| 15 |
# ValueError: Max cache length is not consistent across layers
|
| 16 |
transformers==4.51.3
|
| 17 |
# Commented because we have local modifications
|
|
@@ -24,7 +24,8 @@ posthog==5.4.0
|
|
| 24 |
# Gradio for the web interface
|
| 25 |
gradio==5.38.2
|
| 26 |
spaces==0.37.1
|
| 27 |
-
# For downloading data
|
| 28 |
-
|
|
|
|
| 29 |
# Others
|
| 30 |
python-dotenv
|
|
|
|
| 11 |
# Stated requirements:
|
| 12 |
# SmolLM3: transformers>=4.53
|
| 13 |
# Gemma 3: transformers>=4.50
|
| 14 |
+
# NOTE: Gemma 3 with transformers==4.54.0 gives:
|
| 15 |
# ValueError: Max cache length is not consistent across layers
|
| 16 |
transformers==4.51.3
|
| 17 |
# Commented because we have local modifications
|
|
|
|
| 24 |
# Gradio for the web interface
|
| 25 |
gradio==5.38.2
|
| 26 |
spaces==0.37.1
|
| 27 |
+
# For downloading data from S3
|
| 28 |
+
# Commented because we're using Dropbox
|
| 29 |
+
#boto3==1.39.14
|
| 30 |
# Others
|
| 31 |
python-dotenv
|