Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -236,22 +236,18 @@ from fastapi.templating import Jinja2Templates
|
|
| 236 |
from simple_salesforce import Salesforce, SalesforceLogin
|
| 237 |
from langchain_groq import ChatGroq
|
| 238 |
from langchain_core.prompts import ChatPromptTemplate
|
| 239 |
-
from llama_index.core import StorageContext, VectorStoreIndex, SimpleDirectoryReader, Settings
|
| 240 |
-
from llama_index.core import load_index_from_storage # Added missing import
|
| 241 |
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
|
| 242 |
|
| 243 |
# Configure logging
|
| 244 |
logging.basicConfig(level=logging.INFO)
|
| 245 |
logger = logging.getLogger(__name__)
|
| 246 |
|
| 247 |
-
# Define Pydantic model for incoming request body
|
| 248 |
class MessageRequest(BaseModel):
|
| 249 |
message: str
|
| 250 |
|
| 251 |
-
# Initialize FastAPI app
|
| 252 |
app = FastAPI()
|
| 253 |
|
| 254 |
-
# Allow CORS requests (restrict in production)
|
| 255 |
app.add_middleware(
|
| 256 |
CORSMiddleware,
|
| 257 |
allow_origins=["*"],
|
|
@@ -260,61 +256,40 @@ app.add_middleware(
|
|
| 260 |
allow_headers=["*"],
|
| 261 |
)
|
| 262 |
|
| 263 |
-
# Mount static files
|
| 264 |
app.mount("/static", StaticFiles(directory="static"), name="static")
|
| 265 |
templates = Jinja2Templates(directory="static")
|
| 266 |
|
| 267 |
-
# Validate environment variables
|
| 268 |
required_env_vars = ["CHATGROQ_API_KEY", "username", "password", "security_token", "domain", "HF_TOKEN"]
|
| 269 |
for var in required_env_vars:
|
| 270 |
if not os.getenv(var):
|
| 271 |
-
logger.error(f"
|
| 272 |
raise ValueError(f"Environment variable {var} is not set")
|
| 273 |
|
| 274 |
-
#
|
| 275 |
GROQ_API_KEY = os.getenv("CHATGROQ_API_KEY")
|
| 276 |
-
|
| 277 |
-
try:
|
| 278 |
-
llm = ChatGroq(
|
| 279 |
-
model_name=GROQ_MODEL,
|
| 280 |
-
api_key=GROQ_API_KEY,
|
| 281 |
-
temperature=0.1,
|
| 282 |
-
max_tokens=50
|
| 283 |
-
)
|
| 284 |
-
except Exception as e:
|
| 285 |
-
logger.error(f"Failed to initialize Groq model: {e}")
|
| 286 |
-
raise HTTPException(status_code=500, detail="Failed to initialize Groq model")
|
| 287 |
-
|
| 288 |
-
# Configure LlamaIndex settings
|
| 289 |
Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
|
| 290 |
|
| 291 |
-
# Salesforce
|
| 292 |
-
username = os.getenv("username")
|
| 293 |
-
password = os.getenv("password")
|
| 294 |
-
security_token = os.getenv("security_token")
|
| 295 |
-
domain = os.getenv("domain") # e.g., 'test' for sandbox
|
| 296 |
-
|
| 297 |
-
# Initialize Salesforce connection (allow failure)
|
| 298 |
sf = None
|
| 299 |
try:
|
| 300 |
session_id, sf_instance = SalesforceLogin(
|
| 301 |
-
username=username,
|
|
|
|
|
|
|
|
|
|
| 302 |
)
|
| 303 |
sf = Salesforce(instance=sf_instance, session_id=session_id)
|
| 304 |
-
logger.info("Salesforce
|
| 305 |
except Exception as e:
|
| 306 |
-
logger.
|
| 307 |
|
| 308 |
-
# Chat history
|
| 309 |
chat_history = []
|
| 310 |
current_chat_history = []
|
| 311 |
-
MAX_HISTORY_SIZE = 100
|
| 312 |
|
| 313 |
-
# Directories for data ingestion
|
| 314 |
PDF_DIRECTORY = "data"
|
| 315 |
PERSIST_DIR = "db"
|
| 316 |
-
|
| 317 |
-
# Ensure directories exist
|
| 318 |
os.makedirs(PDF_DIRECTORY, exist_ok=True)
|
| 319 |
os.makedirs(PERSIST_DIR, exist_ok=True)
|
| 320 |
|
|
@@ -327,70 +302,68 @@ def data_ingestion_from_directory():
|
|
| 327 |
storage_context = StorageContext.from_defaults()
|
| 328 |
index = VectorStoreIndex.from_documents(documents, storage_context=storage_context)
|
| 329 |
index.storage_context.persist(persist_dir=PERSIST_DIR)
|
| 330 |
-
logger.info("Data ingestion
|
| 331 |
except Exception as e:
|
| 332 |
-
logger.error(f"
|
| 333 |
-
raise HTTPException(status_code=500, detail=
|
| 334 |
|
| 335 |
def initialize():
|
| 336 |
try:
|
| 337 |
-
data_ingestion_from_directory()
|
| 338 |
except Exception as e:
|
| 339 |
-
logger.error(f"Initialization
|
| 340 |
-
raise HTTPException(status_code=500, detail="
|
| 341 |
|
| 342 |
-
initialize()
|
| 343 |
|
| 344 |
-
def handle_query(query):
|
| 345 |
-
# Prepare context from chat history
|
| 346 |
chat_context = ""
|
| 347 |
-
for past_query, response in reversed(current_chat_history[-10:]):
|
| 348 |
-
|
| 349 |
-
chat_context += f"User asked: '{past_query}'\nBot answered: '{response}'\n"
|
| 350 |
|
| 351 |
-
# Load
|
| 352 |
try:
|
| 353 |
storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
|
| 354 |
index = load_index_from_storage(storage_context)
|
| 355 |
query_engine = index.as_query_engine(similarity_top_k=2)
|
| 356 |
retrieved = query_engine.query(query)
|
| 357 |
-
doc_context =
|
| 358 |
-
logger.info(f"Retrieved documents for query '{query}': {doc_context[:100]}...")
|
| 359 |
except Exception as e:
|
| 360 |
-
logger.error(f"
|
| 361 |
-
doc_context = "
|
| 362 |
|
| 363 |
-
#
|
| 364 |
prompt_template = ChatPromptTemplate.from_messages([
|
| 365 |
("system", """
|
| 366 |
-
|
| 367 |
-
|
| 368 |
-
|
| 369 |
|
| 370 |
-
|
| 371 |
-
|
| 372 |
|
| 373 |
-
|
| 374 |
-
|
| 375 |
|
| 376 |
-
|
| 377 |
-
|
| 378 |
-
""")
|
| 379 |
])
|
| 380 |
prompt = prompt_template.format(doc_context=doc_context, chat_context=chat_context, query=query)
|
| 381 |
|
| 382 |
-
# Query Groq model
|
| 383 |
try:
|
| 384 |
response = llm.invoke(prompt)
|
| 385 |
response_text = response.content.strip()
|
|
|
|
|
|
|
| 386 |
except Exception as e:
|
| 387 |
-
logger.error(f"
|
| 388 |
-
response_text = "
|
| 389 |
|
| 390 |
-
# Update chat history
|
| 391 |
if len(current_chat_history) >= MAX_HISTORY_SIZE:
|
| 392 |
current_chat_history.pop(0)
|
| 393 |
current_chat_history.append((query, response_text))
|
|
|
|
| 394 |
return response_text
|
| 395 |
|
| 396 |
@app.get("/ch/{id}", response_class=HTMLResponse)
|
|
@@ -400,62 +373,52 @@ async def load_chat(request: Request, id: str):
|
|
| 400 |
@app.post("/hist/")
|
| 401 |
async def save_chat_history(history: dict):
|
| 402 |
if not sf:
|
| 403 |
-
|
| 404 |
-
return {"error": "Salesforce integration is unavailable"}, 503
|
| 405 |
|
| 406 |
user_id = history.get('userId')
|
| 407 |
if not user_id:
|
| 408 |
-
|
| 409 |
-
return {"error": "userId is required"}, 400
|
| 410 |
|
| 411 |
-
hist = ''.join([f"
|
| 412 |
-
|
| 413 |
|
| 414 |
try:
|
| 415 |
-
sf.Lead.update(user_id, {'Description':
|
| 416 |
-
|
| 417 |
except Exception as e:
|
| 418 |
-
|
| 419 |
-
return {"error": f"Failed to update lead: {str(e)}"}, 500
|
| 420 |
-
|
| 421 |
-
return {"summary": hist, "message": "Chat history saved"}
|
| 422 |
|
| 423 |
@app.post("/webhook")
|
| 424 |
async def receive_form_data(request: Request):
|
| 425 |
if not sf:
|
| 426 |
-
|
| 427 |
-
return {"error": "Salesforce integration is unavailable"}, 503
|
| 428 |
|
| 429 |
try:
|
| 430 |
form_data = await request.json()
|
| 431 |
except json.JSONDecodeError:
|
| 432 |
-
|
| 433 |
-
|
| 434 |
-
|
| 435 |
-
|
| 436 |
-
|
| 437 |
-
|
| 438 |
-
|
| 439 |
-
|
| 440 |
-
|
| 441 |
-
|
| 442 |
-
'Email': form_data.get('email', ''),
|
| 443 |
}
|
| 444 |
|
| 445 |
try:
|
| 446 |
-
result = sf.Lead.create(
|
| 447 |
-
|
| 448 |
-
logger.info(f"Lead created with ID {unique_id}")
|
| 449 |
-
return JSONResponse({"id": unique_id})
|
| 450 |
except Exception as e:
|
| 451 |
-
|
| 452 |
-
return {"error": f"Failed to create lead: {str(e)}"}, 500
|
| 453 |
|
| 454 |
@app.post("/chat/")
|
| 455 |
async def chat(request: MessageRequest):
|
| 456 |
message = request.message
|
| 457 |
response = handle_query(message)
|
| 458 |
-
|
| 459 |
"sender": "User",
|
| 460 |
"message": message,
|
| 461 |
"response": response,
|
|
@@ -463,30 +426,24 @@ async def chat(request: MessageRequest):
|
|
| 463 |
}
|
| 464 |
if len(chat_history) >= MAX_HISTORY_SIZE:
|
| 465 |
chat_history.pop(0)
|
| 466 |
-
chat_history.append(
|
| 467 |
-
logger.info(f"Chat message processed: {message}")
|
| 468 |
return {"response": response}
|
| 469 |
|
| 470 |
@app.get("/health")
|
| 471 |
async def health_check():
|
| 472 |
try:
|
| 473 |
storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
|
| 474 |
-
|
| 475 |
-
|
| 476 |
-
return {"status": "healthy", "pdf_ingestion": "successful"}
|
| 477 |
except Exception as e:
|
| 478 |
-
logger.error(f"Health check failed: {e}")
|
| 479 |
return {"status": "unhealthy", "error": str(e)}
|
| 480 |
|
| 481 |
@app.get("/")
|
| 482 |
def read_root():
|
| 483 |
-
return {"message": "Welcome to the
|
| 484 |
|
| 485 |
def split_name(full_name):
|
| 486 |
-
|
| 487 |
-
if len(
|
| 488 |
-
return '',
|
| 489 |
-
|
| 490 |
-
return words[0], words[1]
|
| 491 |
-
else:
|
| 492 |
-
return words[0], ' '.join(words[1:])
|
|
|
|
| 236 |
from simple_salesforce import Salesforce, SalesforceLogin
|
| 237 |
from langchain_groq import ChatGroq
|
| 238 |
from langchain_core.prompts import ChatPromptTemplate
|
| 239 |
+
from llama_index.core import StorageContext, VectorStoreIndex, SimpleDirectoryReader, Settings, load_index_from_storage
|
|
|
|
| 240 |
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
|
| 241 |
|
| 242 |
# Configure logging
|
| 243 |
logging.basicConfig(level=logging.INFO)
|
| 244 |
logger = logging.getLogger(__name__)
|
| 245 |
|
|
|
|
| 246 |
class MessageRequest(BaseModel):
|
| 247 |
message: str
|
| 248 |
|
|
|
|
| 249 |
app = FastAPI()
|
| 250 |
|
|
|
|
| 251 |
app.add_middleware(
|
| 252 |
CORSMiddleware,
|
| 253 |
allow_origins=["*"],
|
|
|
|
| 256 |
allow_headers=["*"],
|
| 257 |
)
|
| 258 |
|
|
|
|
| 259 |
app.mount("/static", StaticFiles(directory="static"), name="static")
|
| 260 |
templates = Jinja2Templates(directory="static")
|
| 261 |
|
|
|
|
| 262 |
required_env_vars = ["CHATGROQ_API_KEY", "username", "password", "security_token", "domain", "HF_TOKEN"]
|
| 263 |
for var in required_env_vars:
|
| 264 |
if not os.getenv(var):
|
| 265 |
+
logger.error(f"Missing environment variable: {var}")
|
| 266 |
raise ValueError(f"Environment variable {var} is not set")
|
| 267 |
|
| 268 |
+
# LLM & Embedding Setup
|
| 269 |
GROQ_API_KEY = os.getenv("CHATGROQ_API_KEY")
|
| 270 |
+
llm = ChatGroq(model_name="llama3-8b-8192", api_key=GROQ_API_KEY, temperature=0.1, max_tokens=50)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 271 |
Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
|
| 272 |
|
| 273 |
+
# Salesforce setup
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 274 |
sf = None
|
| 275 |
try:
|
| 276 |
session_id, sf_instance = SalesforceLogin(
|
| 277 |
+
username=os.getenv("username"),
|
| 278 |
+
password=os.getenv("password"),
|
| 279 |
+
security_token=os.getenv("security_token"),
|
| 280 |
+
domain=os.getenv("domain")
|
| 281 |
)
|
| 282 |
sf = Salesforce(instance=sf_instance, session_id=session_id)
|
| 283 |
+
logger.info("Salesforce connected.")
|
| 284 |
except Exception as e:
|
| 285 |
+
logger.warning(f"Salesforce connection failed: {e}")
|
| 286 |
|
|
|
|
| 287 |
chat_history = []
|
| 288 |
current_chat_history = []
|
| 289 |
+
MAX_HISTORY_SIZE = 100
|
| 290 |
|
|
|
|
| 291 |
PDF_DIRECTORY = "data"
|
| 292 |
PERSIST_DIR = "db"
|
|
|
|
|
|
|
| 293 |
os.makedirs(PDF_DIRECTORY, exist_ok=True)
|
| 294 |
os.makedirs(PERSIST_DIR, exist_ok=True)
|
| 295 |
|
|
|
|
| 302 |
storage_context = StorageContext.from_defaults()
|
| 303 |
index = VectorStoreIndex.from_documents(documents, storage_context=storage_context)
|
| 304 |
index.storage_context.persist(persist_dir=PERSIST_DIR)
|
| 305 |
+
logger.info("Data ingestion and embedding complete.")
|
| 306 |
except Exception as e:
|
| 307 |
+
logger.error(f"Data ingestion failed: {e}")
|
| 308 |
+
raise HTTPException(status_code=500, detail="Data ingestion failed")
|
| 309 |
|
| 310 |
def initialize():
|
| 311 |
try:
|
| 312 |
+
data_ingestion_from_directory()
|
| 313 |
except Exception as e:
|
| 314 |
+
logger.error(f"Initialization error: {e}")
|
| 315 |
+
raise HTTPException(status_code=500, detail="Startup initialization failed")
|
| 316 |
|
| 317 |
+
initialize()
|
| 318 |
|
| 319 |
+
def handle_query(query: str) -> str:
|
|
|
|
| 320 |
chat_context = ""
|
| 321 |
+
for past_query, response in reversed(current_chat_history[-10:]):
|
| 322 |
+
chat_context += f"User: {past_query}\nBot: {response}\n"
|
|
|
|
| 323 |
|
| 324 |
+
# Load index
|
| 325 |
try:
|
| 326 |
storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
|
| 327 |
index = load_index_from_storage(storage_context)
|
| 328 |
query_engine = index.as_query_engine(similarity_top_k=2)
|
| 329 |
retrieved = query_engine.query(query)
|
| 330 |
+
doc_context = getattr(retrieved, 'response', "No relevant documents found.")
|
|
|
|
| 331 |
except Exception as e:
|
| 332 |
+
logger.error(f"Retrieval error: {e}")
|
| 333 |
+
doc_context = "No relevant documents found."
|
| 334 |
|
| 335 |
+
# Prompt template
|
| 336 |
prompt_template = ChatPromptTemplate.from_messages([
|
| 337 |
("system", """
|
| 338 |
+
You are a helpful and professional company chatbot.
|
| 339 |
+
Answer user queries based on the provided document context and chat history.
|
| 340 |
+
If you are unsure about the answer, politely respond with "I'm sorry, I don't know that yet."
|
| 341 |
|
| 342 |
+
Document Context:
|
| 343 |
+
{doc_context}
|
| 344 |
|
| 345 |
+
Chat History:
|
| 346 |
+
{chat_context}
|
| 347 |
|
| 348 |
+
Question:
|
| 349 |
+
{query}
|
| 350 |
+
""")
|
| 351 |
])
|
| 352 |
prompt = prompt_template.format(doc_context=doc_context, chat_context=chat_context, query=query)
|
| 353 |
|
|
|
|
| 354 |
try:
|
| 355 |
response = llm.invoke(prompt)
|
| 356 |
response_text = response.content.strip()
|
| 357 |
+
if "I'm sorry" not in response_text and len(response_text.strip()) < 3:
|
| 358 |
+
response_text = "I'm sorry, I don't know that yet."
|
| 359 |
except Exception as e:
|
| 360 |
+
logger.error(f"Groq API Error: {e}")
|
| 361 |
+
response_text = "I'm sorry, I don't know that yet."
|
| 362 |
|
|
|
|
| 363 |
if len(current_chat_history) >= MAX_HISTORY_SIZE:
|
| 364 |
current_chat_history.pop(0)
|
| 365 |
current_chat_history.append((query, response_text))
|
| 366 |
+
|
| 367 |
return response_text
|
| 368 |
|
| 369 |
@app.get("/ch/{id}", response_class=HTMLResponse)
|
|
|
|
| 373 |
@app.post("/hist/")
|
| 374 |
async def save_chat_history(history: dict):
|
| 375 |
if not sf:
|
| 376 |
+
return JSONResponse({"error": "Salesforce not connected"}, status_code=503)
|
|
|
|
| 377 |
|
| 378 |
user_id = history.get('userId')
|
| 379 |
if not user_id:
|
| 380 |
+
return JSONResponse({"error": "userId missing"}, status_code=400)
|
|
|
|
| 381 |
|
| 382 |
+
hist = '\n'.join([f"{entry['sender']}: {entry['message']}" for entry in history.get("history", [])])
|
| 383 |
+
summary = "This is the chat summary: " + hist
|
| 384 |
|
| 385 |
try:
|
| 386 |
+
sf.Lead.update(user_id, {'Description': summary})
|
| 387 |
+
return {"summary": summary, "message": "Chat history saved"}
|
| 388 |
except Exception as e:
|
| 389 |
+
return JSONResponse({"error": str(e)}, status_code=500)
|
|
|
|
|
|
|
|
|
|
| 390 |
|
| 391 |
@app.post("/webhook")
|
| 392 |
async def receive_form_data(request: Request):
|
| 393 |
if not sf:
|
| 394 |
+
return JSONResponse({"error": "Salesforce not connected"}, status_code=503)
|
|
|
|
| 395 |
|
| 396 |
try:
|
| 397 |
form_data = await request.json()
|
| 398 |
except json.JSONDecodeError:
|
| 399 |
+
return JSONResponse({"error": "Invalid JSON"}, status_code=400)
|
| 400 |
+
|
| 401 |
+
first_name, last_name = split_name(form_data.get("name", ""))
|
| 402 |
+
lead_data = {
|
| 403 |
+
"FirstName": first_name,
|
| 404 |
+
"LastName": last_name,
|
| 405 |
+
"Company": form_data.get("company", ""),
|
| 406 |
+
"Phone": form_data.get("phone", ""),
|
| 407 |
+
"Email": form_data.get("email", ""),
|
| 408 |
+
"Description": "Lead from website form"
|
|
|
|
| 409 |
}
|
| 410 |
|
| 411 |
try:
|
| 412 |
+
result = sf.Lead.create(lead_data)
|
| 413 |
+
return {"id": result.get("id")}
|
|
|
|
|
|
|
| 414 |
except Exception as e:
|
| 415 |
+
return JSONResponse({"error": str(e)}, status_code=500)
|
|
|
|
| 416 |
|
| 417 |
@app.post("/chat/")
|
| 418 |
async def chat(request: MessageRequest):
|
| 419 |
message = request.message
|
| 420 |
response = handle_query(message)
|
| 421 |
+
chat_entry = {
|
| 422 |
"sender": "User",
|
| 423 |
"message": message,
|
| 424 |
"response": response,
|
|
|
|
| 426 |
}
|
| 427 |
if len(chat_history) >= MAX_HISTORY_SIZE:
|
| 428 |
chat_history.pop(0)
|
| 429 |
+
chat_history.append(chat_entry)
|
|
|
|
| 430 |
return {"response": response}
|
| 431 |
|
| 432 |
@app.get("/health")
|
| 433 |
async def health_check():
|
| 434 |
try:
|
| 435 |
storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
|
| 436 |
+
load_index_from_storage(storage_context)
|
| 437 |
+
return {"status": "healthy"}
|
|
|
|
| 438 |
except Exception as e:
|
|
|
|
| 439 |
return {"status": "unhealthy", "error": str(e)}
|
| 440 |
|
| 441 |
@app.get("/")
|
| 442 |
def read_root():
|
| 443 |
+
return {"message": "Welcome to the company chatbot API"}
|
| 444 |
|
| 445 |
def split_name(full_name):
|
| 446 |
+
parts = full_name.strip().split()
|
| 447 |
+
if len(parts) == 1:
|
| 448 |
+
return '', parts[0]
|
| 449 |
+
return parts[0], ' '.join(parts[1:])
|
|
|
|
|
|
|
|
|