LLMnBiasV2

Paused

App Files Files Community

Woziii commited on Oct 19, 2024

Commit

12f46b7

verified ·

1 Parent(s): 9255c5c

Update app.py

Browse files

Files changed (1) hide show

app.py +4 -10

app.py CHANGED Viewed

@@ -48,17 +48,11 @@ def load_model(model_name, progress=gr.Progress()):
                 device_map="auto",
                 load_in_8bit=True
             )
-        elif "llama" in model_name.lower() or "mistral" in model_name.lower():
-            model = AutoModelForCausalLM.from_pretrained(
-                model_name,
-                torch_dtype=torch.float16,
-                device_map="cpu"
-            )
         else:
             model = AutoModelForCausalLM.from_pretrained(
                 model_name,
                 torch_dtype=torch.float16,
-                device_map="cpu"
             )
         if tokenizer.pad_token is None:
@@ -87,7 +81,7 @@ def analyze_next_token(input_text, temperature, top_p, top_k):
     if model is None or tokenizer is None:
         return "Veuillez d'abord charger un modèle.", None, None
-    inputs = tokenizer(input_text, return_tensors="pt", padding=True, truncation=True, max_length=512)
     try:
         with torch.no_grad():
@@ -106,7 +100,7 @@ def analyze_next_token(input_text, temperature, top_p, top_k):
             prob_text += f"{word}: {prob:.2%}\n"
         prob_plot = plot_probabilities(prob_data)
-        attention_plot = plot_attention(inputs["input_ids"][0], last_token_logits)
         return prob_text, attention_plot, prob_plot
     except Exception as e:
@@ -118,7 +112,7 @@ def generate_text(input_text, temperature, top_p, top_k):
     if model is None or tokenizer is None:
         return "Veuillez d'abord charger un modèle."
-    inputs = tokenizer(input_text, return_tensors="pt", padding=True, truncation=True, max_length=512)
     try:
         with torch.no_grad():

                 device_map="auto",
                 load_in_8bit=True
             )
         else:
             model = AutoModelForCausalLM.from_pretrained(
                 model_name,
                 torch_dtype=torch.float16,
+                device_map="auto"
             )
         if tokenizer.pad_token is None:
     if model is None or tokenizer is None:
         return "Veuillez d'abord charger un modèle.", None, None
+    inputs = tokenizer(input_text, return_tensors="pt", padding=True, truncation=True, max_length=512).to(model.device)
     try:
         with torch.no_grad():
             prob_text += f"{word}: {prob:.2%}\n"
         prob_plot = plot_probabilities(prob_data)
+        attention_plot = plot_attention(inputs["input_ids"][0].cpu(), last_token_logits.cpu())
         return prob_text, attention_plot, prob_plot
     except Exception as e:
     if model is None or tokenizer is None:
         return "Veuillez d'abord charger un modèle."
+    inputs = tokenizer(input_text, return_tensors="pt", padding=True, truncation=True, max_length=512).to(model.device)
     try:
         with torch.no_grad():