Spaces:

hotmemeh
/

newspace

Build error

hotmemeh commited on Sep 30

Commit

144f336

verified ·

1 Parent(s): 741e6dd

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,36 +1,43 @@
 import gradio as gr
-from transformers import pipeline
 import torch
-# Auto-select model based on device
-if torch.cuda.is_available():
 MODEL_NAME = "darkc0de/XortronCriminalComputingConfig"
-    device = 0
-else:
-    MODEL_NAME = "gpt2"  # CPU fallback
-    device = -1
-print(f"Loading model: {MODEL_NAME} on {'GPU' if device == 0 else 'CPU'}")
-# Load Hugging Face pipeline
-generator = pipeline("text-generation", model=MODEL_NAME, device=device)
-# Streaming response function
 def respond(message, history):
     output = generator(
         message,
-        max_new_tokens=256,   # use this instead of max_length
-        num_return_sequences=1,
         do_sample=True,
         temperature=0.7,
-        truncation=True,      # fixes truncation warning
     )[0]["generated_text"]
-    # Stream output in chunks
     for i in range(0, len(output), 20):
         yield {"role": "assistant", "content": output[: i + 20]}
-# Build the Gradio chat
 chat = gr.ChatInterface(
     fn=respond,
     type="messages",

 import gradio as gr
 import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 MODEL_NAME = "darkc0de/XortronCriminalComputingConfig"
+print(f"Loading model: {MODEL_NAME}")
+# Load tokenizer & model
+tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+# device_map="auto" lets it use GPU if available, otherwise CPU (warning: very slow on CPU)
+model = AutoModelForCausalLM.from_pretrained(
+    MODEL_NAME,
+    device_map="auto",
+    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
+    low_cpu_mem_usage=True,
+)
+generator = pipeline(
+    "text-generation",
+    model=model,
+    tokenizer=tokenizer,
+    device=0 if torch.cuda.is_available() else -1,
+)
+# Streaming response
 def respond(message, history):
     output = generator(
         message,
+        max_new_tokens=256,
         do_sample=True,
         temperature=0.7,
+        truncation=True,
     )[0]["generated_text"]
     for i in range(0, len(output), 20):
         yield {"role": "assistant", "content": output[: i + 20]}
+# Build Gradio chat
 chat = gr.ChatInterface(
     fn=respond,
     type="messages",