Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -60,16 +60,19 @@ def glb_to_data_url(glb_path: str) -> str:
|
|
| 60 |
b64_data = base64.b64encode(data).decode("utf-8")
|
| 61 |
return f"data:model/gltf-binary;base64,{b64_data}"
|
| 62 |
|
| 63 |
-
def
|
| 64 |
"""
|
| 65 |
-
|
| 66 |
-
Otherwise,
|
| 67 |
"""
|
| 68 |
if isinstance(file, str):
|
| 69 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
else:
|
| 71 |
-
|
| 72 |
-
return audio, samplerate
|
| 73 |
|
| 74 |
# Model class for Text-to-3D Generation (ShapE)
|
| 75 |
|
|
@@ -468,15 +471,18 @@ def process_phi4(input_type: str, file, question: str, max_new_tokens: int = 200
|
|
| 468 |
yield "Please upload a file and provide a question."
|
| 469 |
return
|
| 470 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 471 |
if input_type.lower() == "image":
|
| 472 |
prompt = f'{user_prompt}<|image_1|>{question}{prompt_suffix}{assistant_prompt}'
|
| 473 |
-
|
| 474 |
-
image = load_image(file)
|
| 475 |
inputs = phi4_processor(text=prompt, images=image, return_tensors='pt').to(phi4_model.device)
|
| 476 |
elif input_type.lower() == "audio":
|
| 477 |
prompt = f'{user_prompt}<|audio_1|>{question}{prompt_suffix}{assistant_prompt}'
|
| 478 |
-
|
| 479 |
-
audio, samplerate = load_audio_file(file)
|
| 480 |
inputs = phi4_processor(text=prompt, audios=[(audio, samplerate)], return_tensors='pt').to(phi4_model.device)
|
| 481 |
else:
|
| 482 |
yield "Invalid input type selected."
|
|
@@ -565,7 +571,6 @@ def generate(
|
|
| 565 |
# --- Web Search/Visit branch ---
|
| 566 |
if text.strip().lower().startswith("@web"):
|
| 567 |
web_command = text[len("@web"):].strip()
|
| 568 |
-
# If the command starts with "visit", then treat the rest as a URL
|
| 569 |
if web_command.lower().startswith("visit"):
|
| 570 |
url = web_command[len("visit"):].strip()
|
| 571 |
yield "🌍 Visiting webpage..."
|
|
@@ -573,7 +578,6 @@ def generate(
|
|
| 573 |
content = visitor.forward(url)
|
| 574 |
yield content
|
| 575 |
else:
|
| 576 |
-
# Otherwise, treat the rest as a search query.
|
| 577 |
query = web_command
|
| 578 |
yield "🧤 Performing a web search ..."
|
| 579 |
searcher = DuckDuckGoSearchTool()
|
|
@@ -585,7 +589,6 @@ def generate(
|
|
| 585 |
if text.strip().lower().startswith("@ragent"):
|
| 586 |
prompt = text[len("@ragent"):].strip()
|
| 587 |
yield "📝 Initiating reasoning chain using Llama mode..."
|
| 588 |
-
# Pass the current chat history (cleaned) to help inform the chain.
|
| 589 |
for partial in ragent_reasoning(prompt, clean_chat_history(chat_history)):
|
| 590 |
yield partial
|
| 591 |
return
|
|
@@ -596,13 +599,12 @@ def generate(
|
|
| 596 |
if not files or len(files) == 0:
|
| 597 |
yield "Error: Please attach an image for YOLO object detection."
|
| 598 |
return
|
| 599 |
-
# Use the first attached image
|
| 600 |
input_file = files[0]
|
| 601 |
try:
|
| 602 |
if isinstance(input_file, str):
|
| 603 |
pil_image = Image.open(input_file)
|
| 604 |
else:
|
| 605 |
-
pil_image = input_file
|
| 606 |
except Exception as e:
|
| 607 |
yield f"Error loading image: {str(e)}"
|
| 608 |
return
|
|
@@ -613,7 +615,6 @@ def generate(
|
|
| 613 |
|
| 614 |
# --- Phi-4 Multimodal branch with text streaming ---
|
| 615 |
if text.strip().lower().startswith("@phi4"):
|
| 616 |
-
# Expected format: "@phi4 [image|audio] <your question>"
|
| 617 |
parts = text.strip().split(maxsplit=2)
|
| 618 |
if len(parts) < 3:
|
| 619 |
yield "Error: Please provide input type and a question. Format: '@phi4 [image|audio] <your question>'"
|
|
@@ -646,9 +647,9 @@ def generate(
|
|
| 646 |
|
| 647 |
if files:
|
| 648 |
if len(files) > 1:
|
| 649 |
-
images = [load_image(image) for image in files]
|
| 650 |
elif len(files) == 1:
|
| 651 |
-
images = [load_image(files[0])]
|
| 652 |
else:
|
| 653 |
images = []
|
| 654 |
messages = [{
|
|
|
|
| 60 |
b64_data = base64.b64encode(data).decode("utf-8")
|
| 61 |
return f"data:model/gltf-binary;base64,{b64_data}"
|
| 62 |
|
| 63 |
+
def get_file_path(file):
|
| 64 |
"""
|
| 65 |
+
Normalize a file input. If the input is a string, assume it is a file path.
|
| 66 |
+
Otherwise, if the object has a 'name' attribute or key, return that.
|
| 67 |
"""
|
| 68 |
if isinstance(file, str):
|
| 69 |
+
return file
|
| 70 |
+
elif hasattr(file, "name"):
|
| 71 |
+
return file.name
|
| 72 |
+
elif isinstance(file, dict) and "name" in file:
|
| 73 |
+
return file["name"]
|
| 74 |
else:
|
| 75 |
+
return None
|
|
|
|
| 76 |
|
| 77 |
# Model class for Text-to-3D Generation (ShapE)
|
| 78 |
|
|
|
|
| 471 |
yield "Please upload a file and provide a question."
|
| 472 |
return
|
| 473 |
|
| 474 |
+
file_path = get_file_path(file)
|
| 475 |
+
if file_path is None:
|
| 476 |
+
yield "Could not determine the file path."
|
| 477 |
+
return
|
| 478 |
+
|
| 479 |
if input_type.lower() == "image":
|
| 480 |
prompt = f'{user_prompt}<|image_1|>{question}{prompt_suffix}{assistant_prompt}'
|
| 481 |
+
image = Image.open(file_path)
|
|
|
|
| 482 |
inputs = phi4_processor(text=prompt, images=image, return_tensors='pt').to(phi4_model.device)
|
| 483 |
elif input_type.lower() == "audio":
|
| 484 |
prompt = f'{user_prompt}<|audio_1|>{question}{prompt_suffix}{assistant_prompt}'
|
| 485 |
+
audio, samplerate = sf.read(file_path)
|
|
|
|
| 486 |
inputs = phi4_processor(text=prompt, audios=[(audio, samplerate)], return_tensors='pt').to(phi4_model.device)
|
| 487 |
else:
|
| 488 |
yield "Invalid input type selected."
|
|
|
|
| 571 |
# --- Web Search/Visit branch ---
|
| 572 |
if text.strip().lower().startswith("@web"):
|
| 573 |
web_command = text[len("@web"):].strip()
|
|
|
|
| 574 |
if web_command.lower().startswith("visit"):
|
| 575 |
url = web_command[len("visit"):].strip()
|
| 576 |
yield "🌍 Visiting webpage..."
|
|
|
|
| 578 |
content = visitor.forward(url)
|
| 579 |
yield content
|
| 580 |
else:
|
|
|
|
| 581 |
query = web_command
|
| 582 |
yield "🧤 Performing a web search ..."
|
| 583 |
searcher = DuckDuckGoSearchTool()
|
|
|
|
| 589 |
if text.strip().lower().startswith("@ragent"):
|
| 590 |
prompt = text[len("@ragent"):].strip()
|
| 591 |
yield "📝 Initiating reasoning chain using Llama mode..."
|
|
|
|
| 592 |
for partial in ragent_reasoning(prompt, clean_chat_history(chat_history)):
|
| 593 |
yield partial
|
| 594 |
return
|
|
|
|
| 599 |
if not files or len(files) == 0:
|
| 600 |
yield "Error: Please attach an image for YOLO object detection."
|
| 601 |
return
|
|
|
|
| 602 |
input_file = files[0]
|
| 603 |
try:
|
| 604 |
if isinstance(input_file, str):
|
| 605 |
pil_image = Image.open(input_file)
|
| 606 |
else:
|
| 607 |
+
pil_image = Image.open(get_file_path(input_file))
|
| 608 |
except Exception as e:
|
| 609 |
yield f"Error loading image: {str(e)}"
|
| 610 |
return
|
|
|
|
| 615 |
|
| 616 |
# --- Phi-4 Multimodal branch with text streaming ---
|
| 617 |
if text.strip().lower().startswith("@phi4"):
|
|
|
|
| 618 |
parts = text.strip().split(maxsplit=2)
|
| 619 |
if len(parts) < 3:
|
| 620 |
yield "Error: Please provide input type and a question. Format: '@phi4 [image|audio] <your question>'"
|
|
|
|
| 647 |
|
| 648 |
if files:
|
| 649 |
if len(files) > 1:
|
| 650 |
+
images = [load_image(get_file_path(image)) for image in files]
|
| 651 |
elif len(files) == 1:
|
| 652 |
+
images = [load_image(get_file_path(files[0]))]
|
| 653 |
else:
|
| 654 |
images = []
|
| 655 |
messages = [{
|