Spaces:

Ehrii
/

sentiment-analysis

Running

App Files Files Community

Ehrii commited on Mar 13

Commit

7eced3d

1 Parent(s): 589cfa5

Update main.py

Browse files

Files changed (1) hide show

main.py +21 -52

main.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import os
-from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel
 from transformers import pipeline, AutoTokenizer
 from langdetect import detect, DetectorFactory
@@ -7,52 +7,24 @@ from langdetect import detect, DetectorFactory
 # Ensure consistent language detection results
 DetectorFactory.seed = 0
-# Set Hugging Face cache directory
-os.environ["HF_HOME"] = "/tmp/huggingface_cache"
-os.environ["TRANSFORMERS_CACHE"] = "/tmp/huggingface_cache"
-# Create cache directory if it doesn't exist
-cache_dir = os.environ["HF_HOME"]
-os.makedirs(cache_dir, exist_ok=True)
-# Retrieve Hugging Face token from environment variable
-HF_TOKEN = os.getenv("HF_TOKEN")
-if not HF_TOKEN:
-    raise RuntimeError("Hugging Face token is missing! Please set the HF_TOKEN environment variable.")
-# Set the Hugging Face token in the environment variable
-os.environ["HUGGINGFACE_HUB_TOKEN"] = HF_TOKEN
 app = FastAPI()
-# Model names
-MULTILINGUAL_MODEL_NAME = "Ehrii/sentiment"
-MULTILINGUAL_TOKENIZER_NAME = "tabularisai/multilingual-sentiment-analysis"
-ENGLISH_MODEL_NAME = "siebert/sentiment-roberta-large-english"
-# Load multilingual sentiment model
-try:
-    multilingual_tokenizer = AutoTokenizer.from_pretrained(
-        MULTILINGUAL_TOKENIZER_NAME,
-        cache_dir=cache_dir
-    )
-    multilingual_model = pipeline(
-        "sentiment-analysis",
-        model=MULTILINGUAL_MODEL_NAME,
-        tokenizer=multilingual_tokenizer
-    )
-except Exception as e:
-    raise RuntimeError(f"Failed to load multilingual model: {e}")
-# Load English sentiment model
-try:
-    english_model = pipeline(
-        "sentiment-analysis",
-        model=ENGLISH_MODEL_NAME
-    )
-except Exception as e:
-    raise RuntimeError(f"Failed to load English sentiment model: {e}")
 class SentimentRequest(BaseModel):
     text: str
@@ -64,7 +36,6 @@ class SentimentResponse(BaseModel):
     confidence_score: float
 def detect_language(text):
-    """Detect the language of the given text."""
     try:
         return detect(text)
     except Exception:
@@ -76,17 +47,15 @@ def home():
 @app.post("/analyze/", response_model=SentimentResponse)
 def analyze_sentiment(request: SentimentRequest):
-    text = request.text.strip()
-    if not text:
-        raise HTTPException(status_code=400, detail="Text input cannot be empty.")
     language = detect_language(text)
-    # Use English model if detected language is English; otherwise, use multilingual model
-    model = english_model if language == "en" else multilingual_model
-    result = model(text)
     return SentimentResponse(
         original_text=text,
         language_detected=language,

 import os
+from fastapi import FastAPI
 from pydantic import BaseModel
 from transformers import pipeline, AutoTokenizer
 from langdetect import detect, DetectorFactory
 # Ensure consistent language detection results
 DetectorFactory.seed = 0
+# Set Hugging Face cache directory to a writable location
+os.environ["HF_HOME"] = "/tmp/huggingface"
+os.makedirs(os.environ["HF_HOME"], exist_ok=True)
 app = FastAPI()
+# Load the original tokenizer from the base model
+original_tokenizer = AutoTokenizer.from_pretrained("tabularisai/multilingual-sentiment-analysis")
+# Load the fine-tuned model and pass the tokenizer explicitly
+multilingual_model = pipeline(
+    "sentiment-analysis",
+    model="Ehrii/sentiment",
+    tokenizer=original_tokenizer
+)
+# English model remains unchanged
+english_model = pipeline("sentiment-analysis", model="siebert/sentiment-roberta-large-english")
 class SentimentRequest(BaseModel):
     text: str
     confidence_score: float
 def detect_language(text):
     try:
         return detect(text)
     except Exception:
 @app.post("/analyze/", response_model=SentimentResponse)
 def analyze_sentiment(request: SentimentRequest):
+    text = request.text
     language = detect_language(text)
+    # Choose the appropriate model based on language
+    if language == "en":
+        result = english_model(text)
+    else:
+        result = multilingual_model(text)
     return SentimentResponse(
         original_text=text,
         language_detected=language,