Spaces:
Running
Running
update
Browse files
app.py
CHANGED
|
@@ -473,14 +473,40 @@ def extract_website_content(url: str) -> str:
|
|
| 473 |
if not parsed_url.netloc:
|
| 474 |
return "Error: Invalid URL provided"
|
| 475 |
|
| 476 |
-
# Set headers to mimic a browser request
|
| 477 |
headers = {
|
| 478 |
-
'User-Agent': 'Mozilla/5.0 (
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 479 |
}
|
| 480 |
|
| 481 |
-
#
|
| 482 |
-
|
| 483 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 484 |
|
| 485 |
# Parse HTML content
|
| 486 |
soup = BeautifulSoup(response.content, 'html.parser')
|
|
@@ -572,6 +598,19 @@ PAGE STRUCTURE:
|
|
| 572 |
|
| 573 |
return website_content.strip()
|
| 574 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 575 |
except requests.exceptions.RequestException as e:
|
| 576 |
return f"Error accessing website: {str(e)}"
|
| 577 |
except Exception as e:
|
|
@@ -603,7 +642,17 @@ def generation_code(query: Optional[str], image: Optional[gr.Image], file: Optio
|
|
| 603 |
website_text = website_text[:8000] # Limit to 8000 chars for prompt size
|
| 604 |
query = f"{query}\n\n[Website content to redesign below]\n{website_text}"
|
| 605 |
elif website_text.startswith("Error"):
|
| 606 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 607 |
|
| 608 |
# Enhance query with search if enabled
|
| 609 |
enhanced_query = enhance_query_with_search(query, enable_search)
|
|
|
|
| 473 |
if not parsed_url.netloc:
|
| 474 |
return "Error: Invalid URL provided"
|
| 475 |
|
| 476 |
+
# Set comprehensive headers to mimic a real browser request
|
| 477 |
headers = {
|
| 478 |
+
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
| 479 |
+
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
|
| 480 |
+
'Accept-Language': 'en-US,en;q=0.9',
|
| 481 |
+
'Accept-Encoding': 'gzip, deflate, br',
|
| 482 |
+
'DNT': '1',
|
| 483 |
+
'Connection': 'keep-alive',
|
| 484 |
+
'Upgrade-Insecure-Requests': '1',
|
| 485 |
+
'Sec-Fetch-Dest': 'document',
|
| 486 |
+
'Sec-Fetch-Mode': 'navigate',
|
| 487 |
+
'Sec-Fetch-Site': 'none',
|
| 488 |
+
'Sec-Fetch-User': '?1',
|
| 489 |
+
'Cache-Control': 'max-age=0'
|
| 490 |
}
|
| 491 |
|
| 492 |
+
# Create a session to maintain cookies and handle redirects
|
| 493 |
+
session = requests.Session()
|
| 494 |
+
session.headers.update(headers)
|
| 495 |
+
|
| 496 |
+
# Make the request with retry logic
|
| 497 |
+
max_retries = 3
|
| 498 |
+
for attempt in range(max_retries):
|
| 499 |
+
try:
|
| 500 |
+
response = session.get(url, timeout=15, allow_redirects=True)
|
| 501 |
+
response.raise_for_status()
|
| 502 |
+
break
|
| 503 |
+
except requests.exceptions.HTTPError as e:
|
| 504 |
+
if e.response.status_code == 403 and attempt < max_retries - 1:
|
| 505 |
+
# Try with different User-Agent on 403
|
| 506 |
+
session.headers['User-Agent'] = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
|
| 507 |
+
continue
|
| 508 |
+
else:
|
| 509 |
+
raise
|
| 510 |
|
| 511 |
# Parse HTML content
|
| 512 |
soup = BeautifulSoup(response.content, 'html.parser')
|
|
|
|
| 598 |
|
| 599 |
return website_content.strip()
|
| 600 |
|
| 601 |
+
except requests.exceptions.HTTPError as e:
|
| 602 |
+
if e.response.status_code == 403:
|
| 603 |
+
return f"Error: Website blocked access (403 Forbidden). This website may have anti-bot protection. Try a different website or provide a description of what you want to build instead."
|
| 604 |
+
elif e.response.status_code == 404:
|
| 605 |
+
return f"Error: Website not found (404). Please check the URL and try again."
|
| 606 |
+
elif e.response.status_code >= 500:
|
| 607 |
+
return f"Error: Website server error ({e.response.status_code}). Please try again later."
|
| 608 |
+
else:
|
| 609 |
+
return f"Error accessing website: HTTP {e.response.status_code} - {str(e)}"
|
| 610 |
+
except requests.exceptions.Timeout:
|
| 611 |
+
return "Error: Request timed out. The website may be slow or unavailable."
|
| 612 |
+
except requests.exceptions.ConnectionError:
|
| 613 |
+
return "Error: Could not connect to the website. Please check your internet connection and the URL."
|
| 614 |
except requests.exceptions.RequestException as e:
|
| 615 |
return f"Error accessing website: {str(e)}"
|
| 616 |
except Exception as e:
|
|
|
|
| 642 |
website_text = website_text[:8000] # Limit to 8000 chars for prompt size
|
| 643 |
query = f"{query}\n\n[Website content to redesign below]\n{website_text}"
|
| 644 |
elif website_text.startswith("Error"):
|
| 645 |
+
# Provide helpful guidance when website extraction fails
|
| 646 |
+
fallback_guidance = """
|
| 647 |
+
Since I couldn't extract the website content, please provide additional details about what you'd like to build:
|
| 648 |
+
|
| 649 |
+
1. What type of website is this? (e.g., e-commerce, blog, portfolio, dashboard)
|
| 650 |
+
2. What are the main features you want?
|
| 651 |
+
3. What's the target audience?
|
| 652 |
+
4. Any specific design preferences? (colors, style, layout)
|
| 653 |
+
|
| 654 |
+
This will help me create a better design for you."""
|
| 655 |
+
query = f"{query}\n\n[Error extracting website: {website_text}]{fallback_guidance}"
|
| 656 |
|
| 657 |
# Enhance query with search if enabled
|
| 658 |
enhanced_query = enhance_query_with_search(query, enable_search)
|