Commit
·
820622d
1
Parent(s):
99b81db
Building new logic
Browse files- screenshot.py +65 -62
screenshot.py
CHANGED
|
@@ -3,74 +3,77 @@ from pydantic import BaseModel
|
|
| 3 |
from playwright.async_api import async_playwright
|
| 4 |
import asyncio
|
| 5 |
import base64
|
| 6 |
-
import
|
| 7 |
-
from typing import List, Optional
|
| 8 |
-
from urllib.parse import urlparse
|
| 9 |
|
| 10 |
-
app = FastAPI(
|
| 11 |
-
logger = logging.getLogger("browser-api")
|
| 12 |
|
| 13 |
-
# Pydantic Models
|
| 14 |
-
class ScreenshotRequest(BaseModel):
|
| 15 |
-
url: str
|
| 16 |
-
full_page: bool = True
|
| 17 |
-
device: Optional[str] = "desktop" # mobile/tablet/desktop
|
| 18 |
-
format: str = "png" # png/jpeg/pdf
|
| 19 |
-
delay_ms: int = 2000 # wait after load
|
| 20 |
|
| 21 |
-
class
|
| 22 |
url: str
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
"desktop": {"width": 1366, "height": 768, "mobile": False}
|
| 31 |
-
}
|
| 32 |
|
| 33 |
-
@app.on_event("startup")
|
| 34 |
-
async def init_browser():
|
| 35 |
-
app.state.playwright = await async_playwright().start()
|
| 36 |
-
app.state.browser = await app.state.playwright.chromium.launch()
|
| 37 |
|
| 38 |
-
@app.
|
| 39 |
-
async def
|
| 40 |
-
"""Capture website screenshot with device emulation"""
|
| 41 |
-
if not valid_url(req.url):
|
| 42 |
-
raise HTTPException(400, "Invalid URL")
|
| 43 |
-
|
| 44 |
-
device = DEVICES.get(req.device, DEVICES["desktop"])
|
| 45 |
-
browser = app.state.browser
|
| 46 |
-
|
| 47 |
try:
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
except Exception as e:
|
| 61 |
-
logger.error(f"Screenshot failed: {str(e)}")
|
| 62 |
-
raise HTTPException(500, "Capture failed")
|
| 63 |
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
# Implementation similar to screenshot but:
|
| 68 |
-
# 1. Execute provided JS scripts
|
| 69 |
-
# 2. Extract DOM elements by CSS selectors
|
| 70 |
-
# 3. Return structured JSON data
|
| 71 |
-
pass
|
| 72 |
|
| 73 |
-
#
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
from playwright.async_api import async_playwright
|
| 4 |
import asyncio
|
| 5 |
import base64
|
| 6 |
+
import time
|
|
|
|
|
|
|
| 7 |
|
| 8 |
+
app = FastAPI()
|
|
|
|
| 9 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
|
| 11 |
+
class AnalysisResult(BaseModel):
|
| 12 |
url: str
|
| 13 |
+
load_time: float
|
| 14 |
+
title: Optional[str]
|
| 15 |
+
meta_description: Optional[str]
|
| 16 |
+
og_image: Optional[str]
|
| 17 |
+
seo_flags: List[str]
|
| 18 |
+
accessibility_flags: List[str]
|
| 19 |
+
screenshot_base64: str
|
|
|
|
|
|
|
| 20 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
|
| 22 |
+
@app.get("/analyze", response_model=AnalysisResult)
|
| 23 |
+
async def analyze_website(url: str):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
try:
|
| 25 |
+
async with async_playwright() as p:
|
| 26 |
+
browser = await p.chromium.launch(headless=True)
|
| 27 |
+
page = await browser.new_page()
|
| 28 |
+
|
| 29 |
+
# Start timing
|
| 30 |
+
start_time = time.time()
|
| 31 |
+
response = await page.goto(url, timeout=30000)
|
| 32 |
+
load_time = round(time.time() - start_time, 2)
|
| 33 |
+
|
| 34 |
+
# Wait for content
|
| 35 |
+
await page.wait_for_load_state("networkidle")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
|
| 37 |
+
# Screenshot
|
| 38 |
+
screenshot = await page.screenshot(full_page=True)
|
| 39 |
+
screenshot_base64 = base64.b64encode(screenshot).decode("utf-8")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
|
| 41 |
+
# Title and meta info
|
| 42 |
+
title = await page.title()
|
| 43 |
+
meta_description = await page.eval_on_selector("meta[name='description']", "el => el.content") if await page.query_selector("meta[name='description']") else None
|
| 44 |
+
og_image = await page.eval_on_selector("meta[property='og:image']", "el => el.content") if await page.query_selector("meta[property='og:image']") else None
|
| 45 |
+
|
| 46 |
+
# SEO flags
|
| 47 |
+
seo_flags = []
|
| 48 |
+
if not title:
|
| 49 |
+
seo_flags.append("Missing <title>")
|
| 50 |
+
if not meta_description:
|
| 51 |
+
seo_flags.append("Missing meta description")
|
| 52 |
+
if not await page.query_selector("h1"):
|
| 53 |
+
seo_flags.append("Missing <h1> tag")
|
| 54 |
+
if not og_image:
|
| 55 |
+
seo_flags.append("Missing Open Graph image")
|
| 56 |
+
|
| 57 |
+
# Accessibility flags
|
| 58 |
+
accessibility_flags = []
|
| 59 |
+
images = await page.query_selector_all("img")
|
| 60 |
+
for img in images:
|
| 61 |
+
has_alt = await img.get_attribute("alt")
|
| 62 |
+
if not has_alt:
|
| 63 |
+
accessibility_flags.append("Image without alt attribute")
|
| 64 |
+
break
|
| 65 |
+
|
| 66 |
+
await browser.close()
|
| 67 |
+
|
| 68 |
+
return AnalysisResult(
|
| 69 |
+
url=url,
|
| 70 |
+
load_time=load_time,
|
| 71 |
+
title=title,
|
| 72 |
+
meta_description=meta_description,
|
| 73 |
+
og_image=og_image,
|
| 74 |
+
seo_flags=seo_flags,
|
| 75 |
+
accessibility_flags=accessibility_flags,
|
| 76 |
+
screenshot_base64=screenshot_base64
|
| 77 |
+
)
|
| 78 |
+
except Exception as e:
|
| 79 |
+
raise HTTPException(status_code=500, detail=str(e))
|