Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -177,7 +177,10 @@ async def chat(
|
|
| 177 |
|
| 178 |
def extract_text_from_webpage(html_content):
|
| 179 |
"""Extracts visible text from HTML content using BeautifulSoup."""
|
| 180 |
-
|
|
|
|
|
|
|
|
|
|
| 181 |
|
| 182 |
async def fetch_and_extract(url, max_chars, proxy: Optional[str] = None):
|
| 183 |
"""Fetches a URL and extracts text asynchronously."""
|
|
@@ -242,7 +245,10 @@ async def web_search_and_extract(
|
|
| 242 |
|
| 243 |
def extract_text_from_webpage2(html_content):
|
| 244 |
"""Extracts visible text from HTML content using BeautifulSoup."""
|
| 245 |
-
|
|
|
|
|
|
|
|
|
|
| 246 |
|
| 247 |
def fetch_and_extract2(url, max_chars):
|
| 248 |
"""Fetches a URL and extracts text using threading."""
|
|
|
|
| 177 |
|
| 178 |
def extract_text_from_webpage(html_content):
|
| 179 |
"""Extracts visible text from HTML content using BeautifulSoup."""
|
| 180 |
+
soup = BeautifulSoup(html_content)
|
| 181 |
+
for tag in soup(["script", "style", "header", "footer"]):
|
| 182 |
+
tag.extract()
|
| 183 |
+
return soup.get_text(strip=True)
|
| 184 |
|
| 185 |
async def fetch_and_extract(url, max_chars, proxy: Optional[str] = None):
|
| 186 |
"""Fetches a URL and extracts text asynchronously."""
|
|
|
|
| 245 |
|
| 246 |
def extract_text_from_webpage2(html_content):
|
| 247 |
"""Extracts visible text from HTML content using BeautifulSoup."""
|
| 248 |
+
soup = BeautifulSoup(html_content)
|
| 249 |
+
for tag in soup(["script", "style", "header", "footer"]):
|
| 250 |
+
tag.extract()
|
| 251 |
+
return soup.get_text(strip=True)
|
| 252 |
|
| 253 |
def fetch_and_extract2(url, max_chars):
|
| 254 |
"""Fetches a URL and extracts text using threading."""
|