Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -21,7 +21,7 @@ except ImportError:
|
|
| 21 |
# --- Configuration ---
|
| 22 |
# Model path is set to sojka
|
| 23 |
MODEL_PATH = os.getenv("MODEL_PATH", "AndromedaPL/sojka")
|
| 24 |
-
TOKENIZER_PATH = os.getenv("
|
| 25 |
|
| 26 |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
| 27 |
LABELS = ["self-harm", "hate", "vulgar", "sex", "crime"]
|
|
@@ -45,9 +45,10 @@ logger = logging.getLogger(__name__)
|
|
| 45 |
|
| 46 |
def load_model_and_tokenizer(model_path: str, tokenizer_path: str, device: str) -> Tuple[AutoModelForSequenceClassification, AutoTokenizer]:
|
| 47 |
"""Load the trained model and tokenizer"""
|
| 48 |
-
logger.info(f"Loading
|
| 49 |
|
| 50 |
tokenizer = AutoTokenizer.from_pretrained(tokenizer_path, use_fast=True)
|
|
|
|
| 51 |
|
| 52 |
if tokenizer.pad_token is None:
|
| 53 |
if tokenizer.eos_token:
|
|
@@ -57,6 +58,8 @@ def load_model_and_tokenizer(model_path: str, tokenizer_path: str, device: str)
|
|
| 57 |
|
| 58 |
tokenizer.truncation_side = "right"
|
| 59 |
|
|
|
|
|
|
|
| 60 |
model_load_kwargs = {
|
| 61 |
"torch_dtype": torch.float16 if device == 'cuda' else torch.float32,
|
| 62 |
"device_map": 'auto' if device == 'cuda' else None,
|
|
@@ -92,7 +95,7 @@ try:
|
|
| 92 |
model, tokenizer = load_model_and_tokenizer(MODEL_PATH, TOKENIZER_PATH, DEVICE)
|
| 93 |
model_loaded = True
|
| 94 |
except Exception as e:
|
| 95 |
-
logger.error(f"FATAL: Failed to load the model from {MODEL_PATH}: {e}")
|
| 96 |
model, tokenizer, model_loaded = None, None, False
|
| 97 |
|
| 98 |
def predict(text: str) -> Dict[str, Any]:
|
|
|
|
| 21 |
# --- Configuration ---
|
| 22 |
# Model path is set to sojka
|
| 23 |
MODEL_PATH = os.getenv("MODEL_PATH", "AndromedaPL/sojka")
|
| 24 |
+
TOKENIZER_PATH = os.getenv("TOKENIZER_PATH", "sdadas/mmlw-roberta-base")
|
| 25 |
|
| 26 |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
| 27 |
LABELS = ["self-harm", "hate", "vulgar", "sex", "crime"]
|
|
|
|
| 45 |
|
| 46 |
def load_model_and_tokenizer(model_path: str, tokenizer_path: str, device: str) -> Tuple[AutoModelForSequenceClassification, AutoTokenizer]:
|
| 47 |
"""Load the trained model and tokenizer"""
|
| 48 |
+
logger.info(f"Loading tokenizer from {tokenizer_path}")
|
| 49 |
|
| 50 |
tokenizer = AutoTokenizer.from_pretrained(tokenizer_path, use_fast=True)
|
| 51 |
+
logger.info(f"Tokenizer loaded: {tokenizer.name_or_path}")
|
| 52 |
|
| 53 |
if tokenizer.pad_token is None:
|
| 54 |
if tokenizer.eos_token:
|
|
|
|
| 58 |
|
| 59 |
tokenizer.truncation_side = "right"
|
| 60 |
|
| 61 |
+
logger.info(f"Loading model from {model_path}")
|
| 62 |
+
|
| 63 |
model_load_kwargs = {
|
| 64 |
"torch_dtype": torch.float16 if device == 'cuda' else torch.float32,
|
| 65 |
"device_map": 'auto' if device == 'cuda' else None,
|
|
|
|
| 95 |
model, tokenizer = load_model_and_tokenizer(MODEL_PATH, TOKENIZER_PATH, DEVICE)
|
| 96 |
model_loaded = True
|
| 97 |
except Exception as e:
|
| 98 |
+
logger.error(f"FATAL: Failed to load the model from {MODEL_PATH} or tokenizer from {TOKENIZER_PATH}: {e}", e)
|
| 99 |
model, tokenizer, model_loaded = None, None, False
|
| 100 |
|
| 101 |
def predict(text: str) -> Dict[str, Any]:
|