nPeppon commited on
Commit
0584af4
·
1 Parent(s): 81917a3

Enhance app.py with Langfuse integration and file handling; update requirements.txt with additional dependencies.

Browse files
.gitignore ADDED
@@ -0,0 +1,120 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ pip-wheel-metadata/
24
+ share/python-wheels/
25
+ *.egg-info/
26
+ .installed.cfg
27
+ *.egg
28
+ MANIFEST
29
+
30
+ # PyInstaller
31
+ # Usually these files are written by a python script from a template
32
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
33
+ *.manifest
34
+ *.spec
35
+
36
+ # Installer logs
37
+ pip-log.txt
38
+ pip-delete-this-directory.txt
39
+
40
+ # Unit test / coverage reports
41
+ htmlcov/
42
+ .tox/
43
+ .nox/
44
+ .coverage
45
+ .coverage.*
46
+ .cache
47
+ nosetests.xml
48
+ coverage.xml
49
+ *.cover
50
+ *.py,cover
51
+ .hypothesis/
52
+ .pytest_cache/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ target/
76
+
77
+ # Jupyter Notebook
78
+ .ipynb_checkpoints
79
+
80
+ # IPython
81
+ profile_default/
82
+ ipython_config.py
83
+
84
+ # pyenv
85
+ .python-version
86
+
87
+ # PEP 582; __pypackages__
88
+ __pypackages__/
89
+
90
+ # Celery stuff
91
+ celerybeat-schedule
92
+ celerybeat.pid
93
+
94
+ # SageMath parsed files
95
+ *.sage.py
96
+
97
+ # Environments
98
+ .env
99
+ .venv
100
+ env/
101
+ venv/
102
+ ENV/
103
+ env.bak/
104
+ venv.bak/
105
+
106
+ # IDE / Editor specific files
107
+ .idea/
108
+ .vscode/
109
+ *.project
110
+ *.pydevproject
111
+ .project
112
+ .settings/
113
+ *.sublime-workspace
114
+
115
+ # dotenv
116
+ .env
117
+
118
+ # OS specific files
119
+ .DS_Store
120
+ Thumbs.db
app.py CHANGED
@@ -3,23 +3,43 @@ import gradio as gr
3
  import requests
4
  import inspect
5
  import pandas as pd
6
-
 
 
 
7
  # (Keep Constants as is)
8
  # --- Constants ---
9
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
10
 
 
 
 
 
 
 
 
 
 
 
 
11
  # --- Basic Agent Definition ---
12
  # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
13
- class BasicAgent:
14
  def __init__(self):
15
  print("BasicAgent initialized.")
16
- def __call__(self, question: str) -> str:
17
  print(f"Agent received question (first 50 chars): {question[:50]}...")
 
 
 
 
18
  fixed_answer = "This is a default answer."
 
 
19
  print(f"Agent returning fixed answer: {fixed_answer}")
20
- return fixed_answer
21
 
22
- def run_and_submit_all( profile: gr.OAuthProfile | None):
23
  """
24
  Fetches all questions, runs the BasicAgent on them, submits all answers,
25
  and displays the results.
@@ -40,7 +60,7 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
40
 
41
  # 1. Instantiate Agent ( modify this part to create your agent)
42
  try:
43
- agent = BasicAgent()
44
  except Exception as e:
45
  print(f"Error instantiating agent: {e}")
46
  return f"Error initializing agent: {e}", None
@@ -76,13 +96,24 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
76
  for item in questions_data:
77
  task_id = item.get("task_id")
78
  question_text = item.get("question")
 
 
79
  if not task_id or question_text is None:
80
  print(f"Skipping item with missing task_id or question: {item}")
81
  continue
82
  try:
83
- submitted_answer = agent(question_text)
84
- answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
85
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
 
 
 
 
 
 
 
 
 
86
  except Exception as e:
87
  print(f"Error running agent on task {task_id}: {e}")
88
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
@@ -146,11 +177,9 @@ with gr.Blocks() as demo:
146
  gr.Markdown(
147
  """
148
  **Instructions:**
149
-
150
  1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
151
  2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
152
  3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
153
-
154
  ---
155
  **Disclaimers:**
156
  Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
@@ -193,4 +222,5 @@ if __name__ == "__main__":
193
  print("-"*(60 + len(" App Starting ")) + "\n")
194
 
195
  print("Launching Gradio Interface for Basic Agent Evaluation...")
196
- demo.launch(debug=True, share=False)
 
 
3
  import requests
4
  import inspect
5
  import pandas as pd
6
+ from graph.graph_builder import graph
7
+ from langfuse.callback import CallbackHandler
8
+ from typing import Optional
9
+ from langchain_core.messages import AnyMessage, HumanMessage, AIMessage
10
  # (Keep Constants as is)
11
  # --- Constants ---
12
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
13
 
14
+
15
+ langfuse_secret_key = os.getenv("LANGFUSE_SECRET_KEY")
16
+ langfuse_public_key = os.getenv("LANGFUSE_PUBLIC_KEY")
17
+
18
+ # Initialize Langfuse CallbackHandler for LangGraph/Langchain (tracing)
19
+ langfuse_handler = CallbackHandler(
20
+ public_key=langfuse_public_key,
21
+ secret_key=langfuse_secret_key,
22
+ host="https://cloud.langfuse.com"
23
+ )
24
+
25
  # --- Basic Agent Definition ---
26
  # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
27
+ """ class BasicAgent:
28
  def __init__(self):
29
  print("BasicAgent initialized.")
30
+ def __call__(self, question: str, file_name: str | None = None) -> str:
31
  print(f"Agent received question (first 50 chars): {question[:50]}...")
32
+ if file_name:
33
+ print(f"Agent received file_name: {file_name}")
34
+ # Qui puoi aggiungere la logica per utilizzare file_name se fornito.
35
+ # Per ora, lo aggiungiamo alla risposta di default per dimostrazione.
36
  fixed_answer = "This is a default answer."
37
+ if file_name:
38
+ fixed_answer += f" (File to use: {file_name})"
39
  print(f"Agent returning fixed answer: {fixed_answer}")
40
+ return fixed_answer """
41
 
42
+ def run_and_submit_all( profile: Optional[gr.OAuthProfile]):
43
  """
44
  Fetches all questions, runs the BasicAgent on them, submits all answers,
45
  and displays the results.
 
60
 
61
  # 1. Instantiate Agent ( modify this part to create your agent)
62
  try:
63
+ agent = graph
64
  except Exception as e:
65
  print(f"Error instantiating agent: {e}")
66
  return f"Error initializing agent: {e}", None
 
96
  for item in questions_data:
97
  task_id = item.get("task_id")
98
  question_text = item.get("question")
99
+ file_name = item.get("file_name") # Estrai file_name
100
+
101
  if not task_id or question_text is None:
102
  print(f"Skipping item with missing task_id or question: {item}")
103
  continue
104
  try:
105
+ if file_name and isinstance(file_name, str) and file_name.strip():
106
+ messages = HumanMessage(content=question_text + " Path: files/" + file_name)
107
+ else:
108
+ messages = HumanMessage(content=question_text)
109
+ submitted_answer = graph.invoke(input={"messages": messages}, config={"callbacks": [langfuse_handler]})
110
+ answers_payload.append({
111
+ "task_id": task_id,
112
+ "submitted_answer": submitted_answer['messages'][-1].content[-1]
113
+ if isinstance(submitted_answer['messages'][-1].content, list)
114
+ else submitted_answer['messages'][-1].content
115
+ })
116
+ results_log.append({"Task ID": task_id, "Question": question_text, "File Name": file_name if file_name and file_name.strip() else "N/A", "Submitted Answer": submitted_answer['messages'][-1].content})
117
  except Exception as e:
118
  print(f"Error running agent on task {task_id}: {e}")
119
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
 
177
  gr.Markdown(
178
  """
179
  **Instructions:**
 
180
  1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
181
  2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
182
  3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
 
183
  ---
184
  **Disclaimers:**
185
  Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
 
222
  print("-"*(60 + len(" App Starting ")) + "\n")
223
 
224
  print("Launching Gradio Interface for Basic Agent Evaluation...")
225
+ demo.launch(debug=True, share=False)
226
+
graph/graph_builder.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langgraph.graph import START, StateGraph
2
+ from langgraph.prebuilt import tools_condition
3
+ from langgraph.prebuilt import ToolNode
4
+ from nodes.core import assistant, tools
5
+ from states.state import AgentState
6
+
7
+ ## The graph
8
+ builder = StateGraph(AgentState)
9
+
10
+ # Define nodes: these do the work
11
+ builder.add_node("assistant", assistant)
12
+ builder.add_node("tools", ToolNode(tools))
13
+
14
+ # Define edges: these determine how the control flow moves
15
+ builder.add_edge(START, "assistant")
16
+ builder.add_conditional_edges(
17
+ "assistant",
18
+ # If the latest message requires a tool, route to tools
19
+ # Otherwise, provide a direct response
20
+ tools_condition,
21
+ )
22
+ builder.add_edge("tools", "assistant")
23
+ graph = builder.compile()
graph_builder.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langgraph.graph import START, StateGraph
2
+ from langgraph.prebuilt import tools_condition
3
+ from langgraph.prebuilt import ToolNode
4
+ from nodes.core import assistant, tools
5
+ from states.state import AgentState
6
+
7
+ ## The graph
8
+ builder = StateGraph(AgentState)
9
+
10
+ # Define nodes: these do the work
11
+ builder.add_node("assistant", assistant)
12
+ builder.add_node("tools", ToolNode(tools))
13
+
14
+ # Define edges: these determine how the control flow moves
15
+ builder.add_edge(START, "assistant")
16
+ builder.add_conditional_edges(
17
+ "assistant",
18
+ # If the latest message requires a tool, route to tools
19
+ # Otherwise, provide a direct response
20
+ tools_condition,
21
+ )
22
+ builder.add_edge("tools", "assistant")
23
+ graph = builder.compile()
nodes/core.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from states.state import AgentState
2
+ import os
3
+ # Import the load_dotenv function from the dotenv library
4
+ from dotenv import load_dotenv
5
+ from langchain_google_genai import ChatGoogleGenerativeAI
6
+ from tools.multimodal_tools import extract_text, analyze_image_tool, analyze_audio_tool
7
+ from tools.math_tools import add, subtract, multiply, divide
8
+ from tools.search_tools import search_tool, serpapi_search
9
+ from tools.youtube_tools import extract_youtube_transcript
10
+ from langfuse.callback import CallbackHandler
11
+
12
+ load_dotenv()
13
+
14
+ # Read your API key from the environment variable or set it manually
15
+ api_key = os.getenv("GEMINI_API_KEY")
16
+ langfuse_secret_key = os.getenv("LANGFUSE_SECRET_KEY")
17
+ langfuse_public_key = os.getenv("LANGFUSE_PUBLIC_KEY")
18
+
19
+ # Initialize Langfuse CallbackHandler for LangGraph/Langchain (tracing)
20
+ langfuse_handler = CallbackHandler(
21
+ public_key=langfuse_public_key,
22
+ secret_key=langfuse_secret_key,
23
+ host="http://localhost:3000"
24
+ )
25
+
26
+ chat = ChatGoogleGenerativeAI(
27
+ model= "gemini-2.5-pro-preview-05-06",
28
+ temperature=0,
29
+ max_retries=2,
30
+ google_api_key=api_key,
31
+ thinking_budget= 0
32
+ )
33
+
34
+ tools = [
35
+ extract_text,
36
+ analyze_image_tool,
37
+ analyze_audio_tool,
38
+ extract_youtube_transcript,
39
+ add,
40
+ subtract,
41
+ multiply,
42
+ divide,
43
+ search_tool
44
+ ]
45
+
46
+ chat_with_tools = chat.bind_tools(tools)
47
+
48
+ def assistant(state: AgentState):
49
+ sys_msg = "You are a helpful assistant with access to tools. Understand user requests accurately. Use your tools when needed to answer effectively. Strictly follow all user instructions and constraints." \
50
+ "Pay attention: your output needs to contain only the final answer without any reasoning since it will be strictly evaluated against a dataset which contains only the specific response." \
51
+ "Your final output needs to be just the string or integer containing the answer, not an array or technical stuff."
52
+ return {
53
+ "messages": [chat_with_tools.invoke([sys_msg] + state["messages"])]
54
+ }
requirements.txt CHANGED
@@ -1,2 +1,121 @@
1
- gradio
2
- requests
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiofiles==24.1.0
2
+ aiohappyeyeballs==2.6.1
3
+ aiohttp==3.12.7
4
+ aiosignal==1.3.2
5
+ annotated-types==0.7.0
6
+ anyio==4.9.0
7
+ asttokens==3.0.0
8
+ async-timeout==4.0.3
9
+ attrs==25.3.0
10
+ backoff==2.2.1
11
+ cachetools==5.5.2
12
+ certifi==2025.4.26
13
+ charset-normalizer==3.4.2
14
+ click==8.2.1
15
+ colorama==0.4.6
16
+ dataclasses-json==0.6.7
17
+ decorator==5.2.1
18
+ defusedxml==0.7.1
19
+ exceptiongroup==1.3.0
20
+ executing==2.2.0
21
+ fastapi==0.115.12
22
+ ffmpy==0.6.0
23
+ filelock==3.18.0
24
+ filetype==1.2.0
25
+ frozenlist==1.6.0
26
+ fsspec==2025.5.1
27
+ google-ai-generativelanguage==0.6.18
28
+ google-api-core==2.25.0
29
+ google-auth==2.40.2
30
+ google-search-results==2.4.2
31
+ googleapis-common-protos==1.70.0
32
+ gradio==5.32.1
33
+ gradio_client==1.10.2
34
+ greenlet==3.2.2
35
+ groovy==0.1.2
36
+ grpcio==1.72.1
37
+ grpcio-status==1.72.1
38
+ h11==0.16.0
39
+ httpcore==1.0.9
40
+ httpx==0.28.1
41
+ httpx-sse==0.4.0
42
+ huggingface-hub==0.32.4
43
+ idna==3.10
44
+ ipython==8.37.0
45
+ jedi==0.19.2
46
+ Jinja2==3.1.6
47
+ jsonpatch==1.33
48
+ jsonpointer==3.0.0
49
+ langchain==0.3.25
50
+ langchain-community==0.3.24
51
+ langchain-core==0.3.63
52
+ langchain-google-genai==2.1.5
53
+ langchain-text-splitters==0.3.8
54
+ langfuse==2.60.7
55
+ langgraph==0.4.8
56
+ langgraph-checkpoint==2.0.26
57
+ langgraph-prebuilt==0.2.2
58
+ langgraph-sdk==0.1.70
59
+ langsmith==0.3.44
60
+ markdown-it-py==3.0.0
61
+ MarkupSafe==3.0.2
62
+ marshmallow==3.26.1
63
+ matplotlib-inline==0.1.7
64
+ mdurl==0.1.2
65
+ multidict==6.4.4
66
+ mypy_extensions==1.1.0
67
+ numpy==2.2.6
68
+ orjson==3.10.18
69
+ ormsgpack==1.10.0
70
+ packaging==24.2
71
+ pandas==2.2.3
72
+ parso==0.8.4
73
+ pillow==11.2.1
74
+ prompt_toolkit==3.0.51
75
+ propcache==0.3.1
76
+ proto-plus==1.26.1
77
+ protobuf==6.31.1
78
+ pure_eval==0.2.3
79
+ pyasn1==0.6.1
80
+ pyasn1_modules==0.4.2
81
+ pydantic==2.11.5
82
+ pydantic-settings==2.9.1
83
+ pydantic_core==2.33.2
84
+ pydub==0.25.1
85
+ Pygments==2.19.1
86
+ python-dateutil==2.9.0.post0
87
+ python-dotenv==1.1.0
88
+ python-multipart==0.0.20
89
+ pytz==2025.2
90
+ PyYAML==6.0.2
91
+ requests==2.32.3
92
+ requests-toolbelt==1.0.0
93
+ rich==14.0.0
94
+ rsa==4.9.1
95
+ ruff==0.11.12
96
+ safehttpx==0.1.6
97
+ semantic-version==2.10.0
98
+ shellingham==1.5.4
99
+ six==1.17.0
100
+ sniffio==1.3.1
101
+ SQLAlchemy==2.0.41
102
+ stack-data==0.6.3
103
+ starlette==0.46.2
104
+ tenacity==9.1.2
105
+ tomlkit==0.13.2
106
+ tqdm==4.67.1
107
+ traitlets==5.14.3
108
+ typer==0.16.0
109
+ typing-inspect==0.9.0
110
+ typing-inspection==0.4.1
111
+ typing_extensions==4.14.0
112
+ tzdata==2025.2
113
+ urllib3==2.4.0
114
+ uvicorn==0.34.3
115
+ wcwidth==0.2.13
116
+ websockets==15.0.1
117
+ wrapt==1.17.2
118
+ xxhash==3.5.0
119
+ yarl==1.20.0
120
+ youtube-transcript-api==1.0.3
121
+ zstandard==0.23.0
states/state.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ from typing import TypedDict, Annotated
2
+ from langchain_core.messages import AnyMessage
3
+ from langgraph.graph.message import add_messages
4
+
5
+
6
+ class AgentState(TypedDict):
7
+ messages: Annotated[list[AnyMessage], add_messages]
tools/math_tools.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_core.tools import tool
2
+ import operator
3
+
4
+ @tool("add_tool", parse_docstring=True)
5
+ def add(a: float, b: float) -> float:
6
+ """
7
+ Adds two numbers.
8
+
9
+ Args:
10
+ a: The first number.
11
+ b: The second number.
12
+
13
+ Returns:
14
+ The sum of a and b.
15
+ """
16
+ return operator.add(a, b)
17
+
18
+ @tool("subtract_tool", parse_docstring=True)
19
+ def subtract(a: float, b: float) -> float:
20
+ """
21
+ Subtracts the second number from the first.
22
+
23
+ Args:
24
+ a: The first number (minuend).
25
+ b: The second number (subtrahend).
26
+
27
+ Returns:
28
+ The result of subtracting b from a.
29
+ """
30
+ return operator.sub(a, b)
31
+
32
+ @tool("multiply_tool", parse_docstring=True)
33
+ def multiply(a: float, b: float) -> float:
34
+ """
35
+ Multiplies two numbers.
36
+
37
+ Args:
38
+ a: The first number.
39
+ b: The second number.
40
+
41
+ Returns:
42
+ The product of a and b.
43
+ """
44
+ return operator.mul(a, b)
45
+
46
+ @tool("divide_tool", parse_docstring=True)
47
+ def divide(a: float, b: float) -> float:
48
+ """
49
+ Divides the first number by the second.
50
+
51
+ Args:
52
+ a: The numerator.
53
+ b: The denominator.
54
+
55
+ Returns:
56
+ The result of dividing a by b.
57
+ Returns an error message string if division by zero occurs.
58
+ """
59
+ if b == 0:
60
+ return "Error: Cannot divide by zero."
61
+ return operator.truediv(a, b)
tools/multimodal_tools.py ADDED
@@ -0,0 +1,179 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import base64
2
+ import os
3
+ from langchain_core.messages import AnyMessage, SystemMessage, HumanMessage
4
+ from langchain_google_genai import ChatGoogleGenerativeAI
5
+ from langchain.tools import Tool
6
+ from langchain_core.tools import tool
7
+
8
+ api_key = os.getenv("GEMINI_API_KEY")
9
+
10
+ # Create LLM class
11
+ vision_llm = ChatGoogleGenerativeAI(
12
+ model= "gemini-2.5-flash-preview-05-20",
13
+ temperature=0,
14
+ max_retries=2,
15
+ google_api_key=api_key
16
+ )
17
+
18
+ @tool("extract_text_tool", parse_docstring=True)
19
+ def extract_text(img_path: str) -> str:
20
+ """
21
+ Extract text from an image file using a multimodal model.
22
+
23
+ Args:
24
+ img_path: The path to the image file from which to extract text.
25
+
26
+ Returns:
27
+ The extracted text from the image, or an empty string if an error occurs.
28
+ """
29
+ all_text = ""
30
+ try:
31
+ # Read image and encode as base64
32
+ with open(img_path, "rb") as image_file:
33
+ image_bytes = image_file.read()
34
+
35
+ image_base64 = base64.b64encode(image_bytes).decode("utf-8")
36
+
37
+ # Prepare the prompt including the base64 image data
38
+ message = [
39
+ HumanMessage(
40
+ content=[
41
+ {
42
+ "type": "text",
43
+ "text": (
44
+ "Extract all the text from this image. "
45
+ "Return only the extracted text, no explanations."
46
+ ),
47
+ },
48
+ {
49
+ "type": "image_url",
50
+ "image_url": {
51
+ "url": f"data:image/png;base64,{image_base64}"
52
+ },
53
+ },
54
+ ]
55
+ )
56
+ ]
57
+
58
+ # Call the vision-capable model
59
+ response = vision_llm.invoke(message)
60
+
61
+ # Append extracted text
62
+ all_text += response.content + "\n\n"
63
+
64
+ return all_text.strip()
65
+ except Exception as e:
66
+ # A butler should handle errors gracefully
67
+ error_msg = f"Error extracting text: {str(e)}"
68
+ print(error_msg)
69
+ return ""
70
+
71
+ @tool("analyze_image_tool", parse_docstring=True)
72
+ def analyze_image_tool(user_query: str, img_path: str) -> str:
73
+ """
74
+ Answer the question reasoning on the image.
75
+
76
+ Args:
77
+ user_query: The question to be answered based on the image.
78
+ img_path: Path to the image file to be analyzed.
79
+
80
+ Returns:
81
+ The answer to the query based on image content, or an empty string if an error occurs.
82
+ """
83
+ all_text = ""
84
+ try:
85
+ # Read image and encode as base64
86
+ with open(img_path, "rb") as image_file:
87
+ image_bytes = image_file.read()
88
+
89
+ image_base64 = base64.b64encode(image_bytes).decode("utf-8")
90
+
91
+ # Prepare the prompt including the base64 image data
92
+ message = [
93
+ HumanMessage(
94
+ content=[
95
+ {
96
+ "type": "text",
97
+ "text": (
98
+ f"User query: {user_query}"
99
+ ),
100
+ },
101
+ {
102
+ "type": "image_url",
103
+ "image_url": {
104
+ "url": f"data:image/png;base64,{image_base64}"
105
+ },
106
+ },
107
+ ]
108
+ )
109
+ ]
110
+
111
+ # Call the vision-capable model
112
+ response = vision_llm.invoke(message)
113
+
114
+ # Append extracted text
115
+ all_text += response.content + "\n\n"
116
+
117
+ return all_text.strip()
118
+ except Exception as e:
119
+ # A butler should handle errors gracefully
120
+ error_msg = f"Error analyzing image: {str(e)}"
121
+ print(error_msg)
122
+ return ""
123
+
124
+ @tool("analyze_audio_tool", parse_docstring=True)
125
+ def analyze_audio_tool(user_query: str, audio_path: str) -> str:
126
+ """Answer the question by reasoning on the provided audio file.
127
+
128
+ Args:
129
+ user_query: The question to be answered based on the audio content.
130
+ audio_path: Path to the audio file (e.g., .mp3, .wav, .flac, .aac, .ogg).
131
+
132
+ Returns:
133
+ The answer to the query based on audio content, or an error message/empty string if an error occurs.
134
+ """
135
+ try:
136
+ # Determine MIME type from file extension
137
+ _filename, file_extension = os.path.splitext(audio_path)
138
+ file_extension = file_extension.lower()
139
+
140
+ supported_formats = {
141
+ ".mp3": "audio/mp3", ".wav": "audio/wav", ".flac": "audio/flac",
142
+ ".aac": "audio/aac", ".ogg": "audio/ogg"
143
+ }
144
+
145
+ if file_extension not in supported_formats:
146
+ return (f"Error: Unsupported audio file format '{file_extension}'. "
147
+ f"Supported extensions: {', '.join(supported_formats.keys())}.")
148
+ mime_type = supported_formats[file_extension]
149
+
150
+ # Read audio file and encode as base64
151
+ with open(audio_path, "rb") as audio_file:
152
+ audio_bytes = audio_file.read()
153
+ audio_base64 = base64.b64encode(audio_bytes).decode("utf-8")
154
+
155
+ # Prepare the prompt including the base64 audio data
156
+ message = [
157
+ HumanMessage(
158
+ content=[
159
+ {
160
+ "type": "text",
161
+ "text": f"User query: {user_query}",
162
+ },
163
+ {
164
+ "type": "audio",
165
+ "source_type": "base64",
166
+ "mime_type": mime_type,
167
+ "data": audio_base64
168
+ },
169
+ ]
170
+ )
171
+ ]
172
+
173
+ # Call the vision-capable model
174
+ response = vision_llm.invoke(message)
175
+ return response.content.strip()
176
+ except Exception as e:
177
+ error_msg = f"Error analyzing audio: {str(e)}"
178
+ print(error_msg)
179
+ return ""
tools/search_tools.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from langchain.tools import Tool
3
+ from serpapi import GoogleSearch
4
+ from dotenv import load_dotenv
5
+ from langchain_community.tools.tavily_search import TavilySearchResults
6
+ from langchain_core.tools import tool
7
+
8
+ # Carica le variabili d'ambiente se hai la chiave API in un file .env
9
+ load_dotenv()
10
+
11
+ SERPAPI_API_KEY = os.getenv("SERPAPI_API_KEY")
12
+
13
+ search_tool = TavilySearchResults(
14
+ name="tavily_web_search", # Puoi personalizzare il nome se vuoi
15
+ description="Esegue una ricerca web avanzata utilizzando Tavily per informazioni aggiornate e complete. Utile per domande complesse o che richiedono dati recenti. Può essere utile fare più ricerche modificando la query per ottenere risultati migliori.", # Descrizione per l'LLM
16
+ max_results=5
17
+ )
18
+
19
+ @tool("serpapi_search_tool", parse_docstring=True)
20
+ def serpapi_search(query: str, num_results: int = 5, gl: str = "it", hl: str = "it") -> str:
21
+ """
22
+ Esegue una ricerca sul web utilizzando SerpAPI con Google Search e restituisce i risultati formattati.
23
+ Questo tool ha un costo elevato, pertanto sono da preferire altri tool se disponibili.
24
+ Richiamare questo tool soltanto in caso gli altri tool non siano stati soddisfacenti.
25
+
26
+ Args:
27
+ query: La query di ricerca.
28
+ num_results: Il numero di risultati da restituire.
29
+ gl: Codice del paese per la geolocalizzazione dei risultati (es. "it" per Italia).
30
+ hl: Codice della lingua per i risultati della ricerca (es. "it" per Italiano).
31
+
32
+ Returns:
33
+ Una stringa formattata con i risultati della ricerca o un messaggio di errore.
34
+ """
35
+ if not SERPAPI_API_KEY:
36
+ return "Errore: La variabile d'ambiente SERPAPI_API_KEY non è impostata."
37
+
38
+ params = {
39
+ "engine": "google",
40
+ "q": query,
41
+ "api_key": SERPAPI_API_KEY,
42
+ "num": num_results,
43
+ "gl": gl,
44
+ "hl": hl
45
+ }
46
+ search = GoogleSearch(params)
47
+ results = search.get_dict()
48
+ organic_results = results.get("organic_results", [])
49
+
50
+ if not organic_results:
51
+ return f"Nessun risultato trovato per '{query}'."
52
+
53
+ formatted_results = "\n\n".join([f"Title: {res.get('title')}\nLink: {res.get('link')}\nSnippet: {res.get('snippet')}" for res in organic_results])
54
+ return formatted_results
tools/youtube_tools.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_core.tools import tool
2
+ from youtube_transcript_api import YouTubeTranscriptApi, NoTranscriptFound, TranscriptsDisabled
3
+
4
+ @tool("youtube_transcript_extractor", parse_docstring=True)
5
+ def extract_youtube_transcript(youtube_url: str) -> str:
6
+ """
7
+ Extracts the transcript from a given YouTube video URL.
8
+
9
+ Args:
10
+ youtube_url: The URL of the YouTube video.
11
+
12
+ Returns:
13
+ The transcript as a single string, or an error message if the transcript
14
+ cannot be found or an error occurs.
15
+ """
16
+ try:
17
+ video_id = youtube_url.split("v=")[1].split("&")[0]
18
+ transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
19
+ transcript = " ".join([item['text'] for item in transcript_list])
20
+ return transcript
21
+ except NoTranscriptFound:
22
+ return "Error: No transcript found for this video. It might be disabled or not available in English."
23
+ except TranscriptsDisabled:
24
+ return "Error: Transcripts are disabled for this video."
25
+ except Exception as e:
26
+ return f"Error extracting transcript: {str(e)}"