Spaces:
Running
Running
| import re | |
| import requests | |
| from bs4 import BeautifulSoup | |
| def read_web(url: str) -> str: | |
| if not url: | |
| return '' | |
| resp = requests.get(url) | |
| soup = BeautifulSoup(resp.text, 'html.parser') | |
| text = soup.get_text() | |
| text = re.sub('\n{3,}', '\n\n', text) | |
| return text | |
| if __name__ == '__main__': | |
| r = read_web('https://en.wikipedia.org/wiki/Wiki') | |
| print(r) | |