Divyansh Kushwaha
commited on
Commit
·
2199ce9
1
Parent(s):
5352272
Updated
Browse files- .gitignore +2 -0
- api.py +28 -17
- app.py +19 -24
- requirements.txt +4 -1
.gitignore
CHANGED
|
@@ -1 +1,3 @@
|
|
| 1 |
.env
|
|
|
|
|
|
|
|
|
| 1 |
.env
|
| 2 |
+
gcp_key.json
|
| 3 |
+
.gitignore
|
api.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
| 1 |
from fastapi import FastAPI, Query,HTTPException
|
| 2 |
from fastapi.responses import JSONResponse, FileResponse
|
| 3 |
-
from
|
|
|
|
| 4 |
from langchain.schema import HumanMessage
|
| 5 |
from langchain_groq import ChatGroq
|
| 6 |
import json
|
|
@@ -15,8 +16,8 @@ from utils import (
|
|
| 15 |
|
| 16 |
load_dotenv()
|
| 17 |
GROQ_API_KEY = os.getenv('GROQ_API_KEY')
|
| 18 |
-
|
| 19 |
-
|
| 20 |
|
| 21 |
app = FastAPI(title="Company Sentiment API", description="Get company news summaries with sentiment analysis")
|
| 22 |
|
|
@@ -25,13 +26,23 @@ llm=ChatGroq(api_key=GROQ_API_KEY, model="llama-3.1-8b-instant")
|
|
| 25 |
JSON_FILE_PATH = "final_summary.json"
|
| 26 |
AUDIO_FILE_PATH = "hindi_summary.mp3"
|
| 27 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
def generate_summary(company_name):
|
| 29 |
news_articles = extract_titles_and_summaries(company_name)
|
| 30 |
news_articles, sentiment_counts = perform_sentiment_analysis(news_articles)
|
| 31 |
news_articles = extract_topics_with_hf(news_articles)
|
| 32 |
final_summary = compare_articles(news_articles, sentiment_counts)
|
| 33 |
hindi_text = ""
|
| 34 |
-
if
|
| 35 |
hindi_prompt = f"Just Translate this text into Hindi: {final_summary['Final Sentiment Analysis']}"
|
| 36 |
hindi_response = llm.invoke([HumanMessage(content=hindi_prompt)]).content
|
| 37 |
hindi_text = hindi_response.strip() if hindi_response else "Translation not available."
|
|
@@ -41,24 +52,24 @@ def generate_summary(company_name):
|
|
| 41 |
print("Hindi Text not generated")
|
| 42 |
|
| 43 |
try:
|
| 44 |
-
client =
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
model_id="eleven_multilingual_v2",
|
| 50 |
)
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
with open(AUDIO_FILE_PATH, "wb") as
|
| 54 |
-
|
|
|
|
| 55 |
|
| 56 |
except Exception as e:
|
| 57 |
print(f"Error generating audio: {e}")
|
| 58 |
-
audio_bytes = None
|
| 59 |
if not os.path.exists(AUDIO_FILE_PATH):
|
| 60 |
print(f"Audio file could not be found at {AUDIO_FILE_PATH}.")
|
| 61 |
-
|
|
|
|
| 62 |
|
| 63 |
with open(JSON_FILE_PATH,"w",encoding="utf-8") as f:
|
| 64 |
json.dump(final_summary,f,ensure_ascii=False, indent=4)
|
|
@@ -84,7 +95,7 @@ def generate_summary(company_name):
|
|
| 84 |
}
|
| 85 |
},
|
| 86 |
'Final Sentiment Analysis': final_summary["Final Sentiment Analysis"],
|
| 87 |
-
'
|
| 88 |
}
|
| 89 |
|
| 90 |
@app.get("/")
|
|
|
|
| 1 |
from fastapi import FastAPI, Query,HTTPException
|
| 2 |
from fastapi.responses import JSONResponse, FileResponse
|
| 3 |
+
from google.cloud import texttospeech
|
| 4 |
+
from google.oauth2.service_account import Credentials
|
| 5 |
from langchain.schema import HumanMessage
|
| 6 |
from langchain_groq import ChatGroq
|
| 7 |
import json
|
|
|
|
| 16 |
|
| 17 |
load_dotenv()
|
| 18 |
GROQ_API_KEY = os.getenv('GROQ_API_KEY')
|
| 19 |
+
PRIVATE_KEY = os.getenv('PRIVATE_KEY').replace("\\n", "\n")
|
| 20 |
+
CLIENT_EMAIL = os.getenv('CLIENT_EMAIL')
|
| 21 |
|
| 22 |
app = FastAPI(title="Company Sentiment API", description="Get company news summaries with sentiment analysis")
|
| 23 |
|
|
|
|
| 26 |
JSON_FILE_PATH = "final_summary.json"
|
| 27 |
AUDIO_FILE_PATH = "hindi_summary.mp3"
|
| 28 |
|
| 29 |
+
|
| 30 |
+
def get_tts_client():
|
| 31 |
+
credentials = Credentials.from_service_account_info({
|
| 32 |
+
"type": "service_account",
|
| 33 |
+
"private_key": PRIVATE_KEY,
|
| 34 |
+
"client_email": CLIENT_EMAIL,
|
| 35 |
+
"token_uri": "https://oauth2.googleapis.com/token"
|
| 36 |
+
})
|
| 37 |
+
return texttospeech.TextToSpeechClient(credentials=credentials)
|
| 38 |
+
|
| 39 |
def generate_summary(company_name):
|
| 40 |
news_articles = extract_titles_and_summaries(company_name)
|
| 41 |
news_articles, sentiment_counts = perform_sentiment_analysis(news_articles)
|
| 42 |
news_articles = extract_topics_with_hf(news_articles)
|
| 43 |
final_summary = compare_articles(news_articles, sentiment_counts)
|
| 44 |
hindi_text = ""
|
| 45 |
+
if PRIVATE_KEY and CLIENT_EMAIL:
|
| 46 |
hindi_prompt = f"Just Translate this text into Hindi: {final_summary['Final Sentiment Analysis']}"
|
| 47 |
hindi_response = llm.invoke([HumanMessage(content=hindi_prompt)]).content
|
| 48 |
hindi_text = hindi_response.strip() if hindi_response else "Translation not available."
|
|
|
|
| 52 |
print("Hindi Text not generated")
|
| 53 |
|
| 54 |
try:
|
| 55 |
+
client = get_tts_client()
|
| 56 |
+
input_text = texttospeech.SynthesisInput(text=hindi_text)
|
| 57 |
+
voice = texttospeech.VoiceSelectionParams(
|
| 58 |
+
language_code="hi-IN",
|
| 59 |
+
name="hi-IN-Chirp3-HD-Kore"
|
|
|
|
| 60 |
)
|
| 61 |
+
audio_config = texttospeech.AudioConfig(audio_encoding=texttospeech.AudioEncoding.MP3)
|
| 62 |
+
response = client.synthesize_speech(input=input_text, voice=voice, audio_config=audio_config)
|
| 63 |
+
with open(AUDIO_FILE_PATH, "wb") as out:
|
| 64 |
+
out.write(response.audio_content)
|
| 65 |
+
print(f"Audio content written to file: {AUDIO_FILE_PATH}")
|
| 66 |
|
| 67 |
except Exception as e:
|
| 68 |
print(f"Error generating audio: {e}")
|
|
|
|
| 69 |
if not os.path.exists(AUDIO_FILE_PATH):
|
| 70 |
print(f"Audio file could not be found at {AUDIO_FILE_PATH}.")
|
| 71 |
+
|
| 72 |
+
final_summary["Audio"] = AUDIO_FILE_PATH
|
| 73 |
|
| 74 |
with open(JSON_FILE_PATH,"w",encoding="utf-8") as f:
|
| 75 |
json.dump(final_summary,f,ensure_ascii=False, indent=4)
|
|
|
|
| 95 |
}
|
| 96 |
},
|
| 97 |
'Final Sentiment Analysis': final_summary["Final Sentiment Analysis"],
|
| 98 |
+
'Audio': AUDIO_FILE_PATH
|
| 99 |
}
|
| 100 |
|
| 101 |
@app.get("/")
|
app.py
CHANGED
|
@@ -1,26 +1,23 @@
|
|
| 1 |
import streamlit as st
|
| 2 |
import requests
|
| 3 |
|
| 4 |
-
BASE_URL = "
|
| 5 |
st.title("Company Sentiment Analysis")
|
| 6 |
|
| 7 |
company_name = st.text_input(
|
| 8 |
-
"Enter the company name:",
|
| 9 |
placeholder="Example: Microsoft, Apple, Tesla"
|
| 10 |
)
|
| 11 |
|
| 12 |
def display_articles(articles):
|
| 13 |
for i, article in enumerate(articles, start=1):
|
| 14 |
-
st.markdown(f"
|
| 15 |
-
st.write(f"- **Title:** {article['Title']}")
|
| 16 |
st.write(f"- **Summary:** {article['Summary']}")
|
| 17 |
-
st.write(f"- **Sentiment:** {article['Sentiment']}")
|
| 18 |
-
st.write(f"- **Score:** {article['Score']:.2f}")
|
| 19 |
st.write(f"- **Topics:** {', '.join(article['Topics'])}")
|
| 20 |
-
st.markdown("---")
|
| 21 |
|
| 22 |
def display_sentiment_distribution(sentiment_distribution):
|
| 23 |
-
st.
|
| 24 |
sentiment_data = {
|
| 25 |
"Sentiment": list(sentiment_distribution.keys()),
|
| 26 |
"Count": list(sentiment_distribution.values())
|
|
@@ -28,38 +25,35 @@ def display_sentiment_distribution(sentiment_distribution):
|
|
| 28 |
st.table(sentiment_data)
|
| 29 |
|
| 30 |
def display_coverage_differences(coverage_differences):
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
st.markdown("---")
|
| 36 |
|
| 37 |
def display_topic_overlap(topic_overlap):
|
| 38 |
-
st.
|
| 39 |
st.write(f"- **Common Topics:** {', '.join(topic_overlap['Common Topics'])}")
|
| 40 |
-
st.
|
| 41 |
for article, topics in topic_overlap["Unique Topics"].items():
|
| 42 |
st.write(f" - **{article}:** {', '.join(topics)}")
|
| 43 |
-
st.markdown("---")
|
| 44 |
|
| 45 |
if st.button("Generate Summary"):
|
| 46 |
if company_name:
|
| 47 |
try:
|
| 48 |
summary_url = f"{BASE_URL}/generateSummary?company_name={company_name}"
|
| 49 |
response = requests.post(summary_url)
|
| 50 |
-
|
| 51 |
if response.status_code == 200:
|
| 52 |
data = response.json()
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
st.markdown(f"### **Company: {data.get('Company', 'Unknown')}**")
|
| 56 |
|
| 57 |
# Articles
|
| 58 |
-
st.markdown("
|
| 59 |
display_articles(data.get("Articles", []))
|
| 60 |
|
| 61 |
# Comparative Sentiment Score
|
| 62 |
-
st.markdown("
|
| 63 |
sentiment_distribution = data.get("Comparative Sentiment Score", {}).get("Sentiment Distribution", {})
|
| 64 |
display_sentiment_distribution(sentiment_distribution)
|
| 65 |
|
|
@@ -70,12 +64,13 @@ if st.button("Generate Summary"):
|
|
| 70 |
display_topic_overlap(topic_overlap)
|
| 71 |
|
| 72 |
# Final Sentiment Analysis
|
| 73 |
-
st.markdown("
|
| 74 |
st.write(data.get("Final Sentiment Analysis", "No sentiment analysis available."))
|
| 75 |
|
| 76 |
# Hindi Summary
|
| 77 |
-
st.markdown("
|
| 78 |
st.write(data.get("Hindi Summary", "No Hindi summary available."))
|
|
|
|
| 79 |
|
| 80 |
else:
|
| 81 |
st.error(f"Error: {response.status_code}, {response.text}")
|
|
|
|
| 1 |
import streamlit as st
|
| 2 |
import requests
|
| 3 |
|
| 4 |
+
BASE_URL = "http://127.0.0.1:8000"
|
| 5 |
st.title("Company Sentiment Analysis")
|
| 6 |
|
| 7 |
company_name = st.text_input(
|
| 8 |
+
"Enter the company name:",
|
| 9 |
placeholder="Example: Microsoft, Apple, Tesla"
|
| 10 |
)
|
| 11 |
|
| 12 |
def display_articles(articles):
|
| 13 |
for i, article in enumerate(articles, start=1):
|
| 14 |
+
st.markdown(f"##### **Article {i}: {article['Title']}**")
|
|
|
|
| 15 |
st.write(f"- **Summary:** {article['Summary']}")
|
| 16 |
+
st.write(f"- **Sentiment:** {article['Sentiment']} | **Score:** {article['Score']:.2f}")
|
|
|
|
| 17 |
st.write(f"- **Topics:** {', '.join(article['Topics'])}")
|
|
|
|
| 18 |
|
| 19 |
def display_sentiment_distribution(sentiment_distribution):
|
| 20 |
+
st.markdown("#### **Sentiment Distribution:**")
|
| 21 |
sentiment_data = {
|
| 22 |
"Sentiment": list(sentiment_distribution.keys()),
|
| 23 |
"Count": list(sentiment_distribution.values())
|
|
|
|
| 25 |
st.table(sentiment_data)
|
| 26 |
|
| 27 |
def display_coverage_differences(coverage_differences):
|
| 28 |
+
if coverage_differences:
|
| 29 |
+
st.markdown("#### **Coverage Differences:**")
|
| 30 |
+
for diff in coverage_differences:
|
| 31 |
+
st.write(f"- **{diff['Comparison']}:** {diff['Impact']}")
|
|
|
|
| 32 |
|
| 33 |
def display_topic_overlap(topic_overlap):
|
| 34 |
+
st.markdown("#### **Topic Overlap:**")
|
| 35 |
st.write(f"- **Common Topics:** {', '.join(topic_overlap['Common Topics'])}")
|
| 36 |
+
st.markdown("- **Unique Topics by Article:**")
|
| 37 |
for article, topics in topic_overlap["Unique Topics"].items():
|
| 38 |
st.write(f" - **{article}:** {', '.join(topics)}")
|
|
|
|
| 39 |
|
| 40 |
if st.button("Generate Summary"):
|
| 41 |
if company_name:
|
| 42 |
try:
|
| 43 |
summary_url = f"{BASE_URL}/generateSummary?company_name={company_name}"
|
| 44 |
response = requests.post(summary_url)
|
| 45 |
+
|
| 46 |
if response.status_code == 200:
|
| 47 |
data = response.json()
|
| 48 |
+
|
| 49 |
+
st.markdown(f"#### **Company: {data.get('Company', 'Unknown')}**")
|
|
|
|
| 50 |
|
| 51 |
# Articles
|
| 52 |
+
st.markdown("#### **Articles:**")
|
| 53 |
display_articles(data.get("Articles", []))
|
| 54 |
|
| 55 |
# Comparative Sentiment Score
|
| 56 |
+
st.markdown("#### **Comparative Sentiment Score:**")
|
| 57 |
sentiment_distribution = data.get("Comparative Sentiment Score", {}).get("Sentiment Distribution", {})
|
| 58 |
display_sentiment_distribution(sentiment_distribution)
|
| 59 |
|
|
|
|
| 64 |
display_topic_overlap(topic_overlap)
|
| 65 |
|
| 66 |
# Final Sentiment Analysis
|
| 67 |
+
st.markdown("#### **Final Sentiment Analysis:**")
|
| 68 |
st.write(data.get("Final Sentiment Analysis", "No sentiment analysis available."))
|
| 69 |
|
| 70 |
# Hindi Summary
|
| 71 |
+
st.markdown("#### **Hindi Summary:**")
|
| 72 |
st.write(data.get("Hindi Summary", "No Hindi summary available."))
|
| 73 |
+
st.audio(f"{BASE_URL}/downloadHindiAudio", format="audio/mp3")
|
| 74 |
|
| 75 |
else:
|
| 76 |
st.error(f"Error: {response.status_code}, {response.text}")
|
requirements.txt
CHANGED
|
@@ -7,4 +7,7 @@ langchain
|
|
| 7 |
langchain_groq
|
| 8 |
elevenlabs
|
| 9 |
torch
|
| 10 |
-
python-dotenv
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
langchain_groq
|
| 8 |
elevenlabs
|
| 9 |
torch
|
| 10 |
+
python-dotenv
|
| 11 |
+
google-cloud-texttospeech
|
| 12 |
+
google-auth
|
| 13 |
+
streamlit
|