Spaces:
Running
Running
| import asyncio | |
| from playwright.async_api import async_playwright | |
| from playwright_stealth.stealth import Stealth | |
| from bs4 import BeautifulSoup | |
| async def main(): | |
| url = "https://www.fragrantica.com.br/perfume/Natura/Frescor-de-Cacau-25963.html" | |
| async with Stealth().use_async(async_playwright()) as p: | |
| browser = await p.chromium.launch(headless=True) | |
| # Create the page from the stealthy context | |
| page = await browser.new_page() | |
| try: | |
| print("Navigating to page with corrected stealth logic...") | |
| await page.goto(url, timeout=120000) | |
| print("Waiting for Cloudflare check/content load...") | |
| main_content_selector = 'h1[itemprop="name"]' | |
| await page.wait_for_selector(main_content_selector, timeout=60000) | |
| print("β Cloudflare passed! Main content is visible.") | |
| await page.screenshot(path='success_screenshot.png') | |
| html_content = await page.content() | |
| soup = BeautifulSoup(html_content, 'html.parser') | |
| target_div = soup.find('div', class_='grid-x grid-margin-x') | |
| if target_div: | |
| div_string = target_div.prettify() | |
| print("\n--- Targeted Div HTML Content ---") | |
| print(div_string) | |
| else: | |
| print("β Could not find the <div class=\"grid-x grid-margin-x\"> tag.") | |
| except Exception as e: | |
| print(f"An error occurred: {e}") | |
| await page.screenshot(path='error_screenshot.png') | |
| print("Saved 'error_screenshot.png' for debugging.") | |
| finally: | |
| await browser.close() | |
| print("\nBrowser closed.") | |
| if __name__ == "__main__": | |
| asyncio.run(main()) |