Spaces:

OEvortex
/

Webscout-API

Paused

KingNish commited on Jul 18, 2024

Commit

1ee12e5

verified ·

1 Parent(s): 95a34b4

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -177,7 +177,10 @@ async def chat(
 def extract_text_from_webpage(html_content):
     """Extracts visible text from HTML content using BeautifulSoup."""
-    return BeautifulSoup(html_content).get_text(strip=True)
 async def fetch_and_extract(url, max_chars, proxy: Optional[str] = None):
     """Fetches a URL and extracts text asynchronously."""
@@ -242,7 +245,10 @@ async def web_search_and_extract(
 def extract_text_from_webpage2(html_content):
     """Extracts visible text from HTML content using BeautifulSoup."""
-    return BeautifulSoup(html_content).get_text(strip=True)
 def fetch_and_extract2(url, max_chars):
     """Fetches a URL and extracts text using threading."""

 def extract_text_from_webpage(html_content):
     """Extracts visible text from HTML content using BeautifulSoup."""
+    soup = BeautifulSoup(html_content)
+    for tag in soup(["script", "style", "header", "footer"]):
+        tag.extract()
+    return soup.get_text(strip=True)
 async def fetch_and_extract(url, max_chars, proxy: Optional[str] = None):
     """Fetches a URL and extracts text asynchronously."""
 def extract_text_from_webpage2(html_content):
     """Extracts visible text from HTML content using BeautifulSoup."""
+    soup = BeautifulSoup(html_content)
+    for tag in soup(["script", "style", "header", "footer"]):
+        tag.extract()
+    return soup.get_text(strip=True)
 def fetch_and_extract2(url, max_chars):
     """Fetches a URL and extracts text using threading."""