Spaces:
Sleeping
Sleeping
mchinea
commited on
Commit
·
e71f323
1
Parent(s):
f8e3605
add new tool
Browse files- requirements.txt +1 -0
- tools.py +32 -1
requirements.txt
CHANGED
|
@@ -13,3 +13,4 @@ pydub
|
|
| 13 |
tavily-python
|
| 14 |
wikipedia
|
| 15 |
pytesseract
|
|
|
|
|
|
| 13 |
tavily-python
|
| 14 |
wikipedia
|
| 15 |
pytesseract
|
| 16 |
+
openai-whisper
|
tools.py
CHANGED
|
@@ -8,6 +8,7 @@ from typing import Dict
|
|
| 8 |
from pathlib import Path
|
| 9 |
#from markitdown import MarkItDown
|
| 10 |
from urllib.parse import urlparse
|
|
|
|
| 11 |
|
| 12 |
from langchain_core.tools import tool
|
| 13 |
|
|
@@ -364,6 +365,35 @@ def extract_text_from_image(image_path: str) -> str:
|
|
| 364 |
return f"Unexpected error during OCR: {str(e)}"
|
| 365 |
|
| 366 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 367 |
level1_tools = [
|
| 368 |
multiply,
|
| 369 |
add,
|
|
@@ -379,5 +409,6 @@ level1_tools = [
|
|
| 379 |
download_file_from_url,
|
| 380 |
save_and_read_file,
|
| 381 |
read_python_file,
|
| 382 |
-
extract_text_from_image
|
|
|
|
| 383 |
]
|
|
|
|
| 8 |
from pathlib import Path
|
| 9 |
#from markitdown import MarkItDown
|
| 10 |
from urllib.parse import urlparse
|
| 11 |
+
from smolagents import Tool
|
| 12 |
|
| 13 |
from langchain_core.tools import tool
|
| 14 |
|
|
|
|
| 365 |
return f"Unexpected error during OCR: {str(e)}"
|
| 366 |
|
| 367 |
|
| 368 |
+
@tool
|
| 369 |
+
def transcribe_audio(audio_path: str) -> str:
|
| 370 |
+
"""
|
| 371 |
+
Transcribes speech from an audio file using Whisper (local).
|
| 372 |
+
|
| 373 |
+
Args:
|
| 374 |
+
audio_path: Path to the audio file (e.g., .mp3, .wav, .m4a).
|
| 375 |
+
|
| 376 |
+
Returns:
|
| 377 |
+
The transcribed text or an error message.
|
| 378 |
+
"""
|
| 379 |
+
try:
|
| 380 |
+
import whisper
|
| 381 |
+
|
| 382 |
+
if not os.path.exists(audio_path):
|
| 383 |
+
return f"Error: File not found at '{audio_path}'."
|
| 384 |
+
|
| 385 |
+
model = whisper.load_model("base") # You can use "small", "medium", "large"
|
| 386 |
+
result = model.transcribe(audio_path)
|
| 387 |
+
|
| 388 |
+
return result["text"].strip()
|
| 389 |
+
except ImportError:
|
| 390 |
+
return (
|
| 391 |
+
"Error: 'whisper' library is not installed. "
|
| 392 |
+
"Install it using 'pip install openai-whisper'."
|
| 393 |
+
)
|
| 394 |
+
except Exception as e:
|
| 395 |
+
return f"Error during transcription: {str(e)}"
|
| 396 |
+
|
| 397 |
level1_tools = [
|
| 398 |
multiply,
|
| 399 |
add,
|
|
|
|
| 409 |
download_file_from_url,
|
| 410 |
save_and_read_file,
|
| 411 |
read_python_file,
|
| 412 |
+
extract_text_from_image,
|
| 413 |
+
transcribe_audio
|
| 414 |
]
|