| import asyncio | |
| from crawl4ai import * | |
| async def main(): | |
| browser_config = BrowserConfig(headless=True, verbose=True) | |
| async with AsyncWebCrawler(config=browser_config) as crawler: | |
| crawler_config = CrawlerRunConfig( | |
| cache_mode=CacheMode.BYPASS, | |
| markdown_generator=DefaultMarkdownGenerator( | |
| content_filter=PruningContentFilter(threshold=0.48, threshold_type="fixed", min_word_threshold=0) | |
| ) | |
| ) | |
| result = await crawler.arun( | |
| url="https://www.helloworld.org", | |
| config=crawler_config | |
| ) | |
| print(result.markdown_v2.raw_markdown[:500]) | |
| if __name__ == "__main__": | |
| asyncio.run(main()) |