Spaces:
Running
Running
| import unittest | |
| from mediaunmasked.scrapers.article_scraper import ArticleScraper | |
| import logging | |
| class TestArticleScraper(unittest.TestCase): | |
| def setUp(self): | |
| self.scraper = ArticleScraper() | |
| # Configure logging for tests | |
| logging.basicConfig(level=logging.INFO) | |
| self.logger = logging.getLogger(__name__) | |
| def test_cna_article(self): | |
| """Test scraping a Channel News Asia article""" | |
| url = "https://www.channelnewsasia.com/singapore/singapore-mccy-sg-culture-pass-arts-culture-heritage-4951451" | |
| result = self.scraper.scrape_article(url) | |
| # Log the result | |
| self.logger.info("Scraping Result:") | |
| self.logger.info(f"Headline: {result.get('headline', 'No headline found')}") | |
| self.logger.info(f"Content Preview: {result.get('content', 'No content found')[:200]}...") | |
| # Basic assertions | |
| self.assertIsNotNone(result) | |
| self.assertIn('headline', result) | |
| self.assertIn('content', result) | |
| self.assertNotEqual(result['headline'], '') | |
| self.assertNotEqual(result['content'], '') | |
| # Print full result for manual inspection | |
| print("\nFull Scraping Result:") | |
| print(f"Headline: {result['headline']}") | |
| print(f"\nContent Preview (first 500 chars):\n{result['content'][:500]}...") | |
| def test_invalid_url(self): | |
| """Test scraping an invalid URL""" | |
| url = "https://invalid.url.that.doesnt.exist" | |
| result = self.scraper.scrape_article(url) | |
| self.assertIsNone(result) | |
| def test_empty_url(self): | |
| """Test scraping with empty URL""" | |
| url = "" | |
| result = self.scraper.scrape_article(url) | |
| self.assertIsNone(result) | |
| if __name__ == '__main__': | |
| unittest.main() |