|
|
|
|
|
from smolagents import tool |
|
|
from youtube_transcript_api import YouTubeTranscriptApi |
|
|
from bs4 import BeautifulSoup |
|
|
from tools.fetch import fetch_webpage |
|
|
|
|
|
@tool |
|
|
|
|
|
def get_youtube_transcript(video_id: str) -> str: |
|
|
""" |
|
|
Fetches the transcript of a YouTube video given its video ID. |
|
|
Args: |
|
|
video_id (str): The ID of the YouTube video. Pass in the video ID, NOT the video URL. For a video with the URL https://www.youtube.com/watch?v=12345 the ID is 12345. |
|
|
Returns: |
|
|
str: The transcript of the YouTube video. as a single string with each line separated by a newline character. |
|
|
""" |
|
|
|
|
|
ytt_api = YouTubeTranscriptApi() |
|
|
fetched_transcript = ytt_api.fetch(video_id) |
|
|
raw_data = fetched_transcript.to_raw_data() |
|
|
|
|
|
transcript = "\n".join([item['text'] for item in raw_data]) |
|
|
return transcript |
|
|
|
|
|
|
|
|
@tool |
|
|
|
|
|
def get_youtube_title_description(video_url: str) -> str: |
|
|
""" |
|
|
Fetches the title and description of a YouTube video given its video ID. |
|
|
Args: |
|
|
video_url (str): The url of the YouTube video. |
|
|
Returns: |
|
|
str: The title and description of the YouTube video. |
|
|
""" |
|
|
|
|
|
soup = BeautifulSoup(fetch_webpage(video_url, convert_to_markdown=False), "html.parser") |
|
|
|
|
|
metatitle = soup.find("meta", {"name": "title"}) |
|
|
if metatitle is not None: |
|
|
title = metatitle["content"] |
|
|
else: |
|
|
title = "No title found" |
|
|
|
|
|
|
|
|
metadescription = soup.find("meta", {"name": "description"}) |
|
|
if metadescription is not None: |
|
|
description = metadescription["content"] |
|
|
else: |
|
|
description = "No description found" |
|
|
|
|
|
return f"Title: {title}\nDescription: {description}" |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
from dotenv import load_dotenv |
|
|
load_dotenv |
|
|
|
|
|
video_id = "1htKBjuUWec" |
|
|
video_url = "https://www.youtube.com/watch?v=" + video_id |
|
|
|
|
|
try: |
|
|
title_description = get_youtube_title_description(video_url) |
|
|
print(title_description) |
|
|
except Exception as e: |
|
|
print(f"Error fetching title and description: {e}") |
|
|
|
|
|
try: |
|
|
transcript = get_youtube_transcript(video_id) |
|
|
except Exception as e: |
|
|
print(f"Error fetching transcript: {e}") |
|
|
print(transcript) |