Spaces:
Running
on
Zero
Running
on
Zero
app.py
CHANGED
|
@@ -16,42 +16,39 @@ model_options = {
|
|
| 16 |
"Foundation-Sec-8B": pipeline("text-generation", model="fdtn-ai/Foundation-Sec-8B"),
|
| 17 |
}
|
| 18 |
|
| 19 |
-
|
| 20 |
-
@spaces.GPU
|
| 21 |
def generate_text_local(model_pipeline, prompt):
|
| 22 |
"""Local text generation"""
|
| 23 |
try:
|
| 24 |
-
|
| 25 |
-
|
|
|
|
|
|
|
| 26 |
# Move model to GPU (entire pipeline)
|
| 27 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 28 |
-
model_pipeline
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
if hasattr(model_pipeline, "device"):
|
| 32 |
-
model_pipeline.device = device
|
| 33 |
-
|
| 34 |
# Record device information
|
| 35 |
-
device_info = next(model_pipeline.model.parameters()).device
|
| 36 |
-
logger.info(f"Model {
|
| 37 |
-
|
| 38 |
outputs = model_pipeline(
|
| 39 |
prompt,
|
| 40 |
-
max_new_tokens=3,
|
| 41 |
do_sample=True,
|
| 42 |
temperature=0.1,
|
| 43 |
top_p=0.9,
|
| 44 |
-
clean_up_tokenization_spaces=True,
|
| 45 |
)
|
| 46 |
|
| 47 |
# Move model back to CPU
|
| 48 |
-
model_pipeline
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
return outputs[0]["generated_text"].replace(prompt, "").strip()
|
| 53 |
except Exception as e:
|
| 54 |
-
logger.error(f"Error in local text generation with {
|
| 55 |
return f"Error: {str(e)}"
|
| 56 |
|
| 57 |
# Build Gradio app
|
|
|
|
| 16 |
"Foundation-Sec-8B": pipeline("text-generation", model="fdtn-ai/Foundation-Sec-8B"),
|
| 17 |
}
|
| 18 |
|
| 19 |
+
#@spaces.GPU
|
|
|
|
| 20 |
def generate_text_local(model_pipeline, prompt):
|
| 21 |
"""Local text generation"""
|
| 22 |
try:
|
| 23 |
+
# モデル名取得(なければ 'unknown')
|
| 24 |
+
model_name = getattr(getattr(model_pipeline, "model", None), "name_or_path", "unknown")
|
| 25 |
+
logger.info(f"Running local text generation with {model_name}")
|
| 26 |
+
|
| 27 |
# Move model to GPU (entire pipeline)
|
| 28 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 29 |
+
if hasattr(model_pipeline, "model"):
|
| 30 |
+
model_pipeline.model = model_pipeline.model.to(device)
|
| 31 |
+
|
|
|
|
|
|
|
|
|
|
| 32 |
# Record device information
|
| 33 |
+
device_info = next(model_pipeline.model.parameters()).device if hasattr(model_pipeline, "model") else "unknown"
|
| 34 |
+
logger.info(f"Model {model_name} is running on device: {device_info}")
|
| 35 |
+
|
| 36 |
outputs = model_pipeline(
|
| 37 |
prompt,
|
| 38 |
+
max_new_tokens=3,
|
| 39 |
do_sample=True,
|
| 40 |
temperature=0.1,
|
| 41 |
top_p=0.9,
|
| 42 |
+
clean_up_tokenization_spaces=True,
|
| 43 |
)
|
| 44 |
|
| 45 |
# Move model back to CPU
|
| 46 |
+
if hasattr(model_pipeline, "model"):
|
| 47 |
+
model_pipeline.model = model_pipeline.model.to("cpu")
|
| 48 |
+
|
|
|
|
| 49 |
return outputs[0]["generated_text"].replace(prompt, "").strip()
|
| 50 |
except Exception as e:
|
| 51 |
+
logger.error(f"Error in local text generation with {model_name}: {str(e)}")
|
| 52 |
return f"Error: {str(e)}"
|
| 53 |
|
| 54 |
# Build Gradio app
|