Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python3 | |
| import requests | |
| import re | |
| import argparse | |
| import os | |
| from urllib.parse import urljoin | |
| from pypdf import PdfReader, PdfWriter | |
| def get_maze_generator_page(): | |
| """Get the maze generator page to extract form data and session info""" | |
| url = "https://www.mazegenerator.net/" | |
| headers = { | |
| 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36' | |
| } | |
| response = requests.get(url, headers=headers) | |
| return response.text, response.cookies | |
| def extract_form_data(html_content): | |
| """Extract form data and hidden fields from the page""" | |
| # Look for form action URL | |
| form_action_match = re.search(r'<form[^>]*action=["\']([^"\']*)["\']', html_content, re.IGNORECASE) | |
| form_action = form_action_match.group(1) if form_action_match else "" | |
| # Look for hidden inputs | |
| hidden_inputs = re.findall(r'<input[^>]*type=["\']hidden["\'][^>]*name=["\']([^"\']*)["\'][^>]*value=["\']([^"\']*)["\']', html_content, re.IGNORECASE) | |
| form_data = dict(hidden_inputs) | |
| # Look for viewstate and other common ASP.NET fields | |
| viewstate_match = re.search(r'name=["\']__VIEWSTATE["\'][^>]*value=["\']([^"\']*)["\']', html_content) | |
| if viewstate_match: | |
| form_data['__VIEWSTATE'] = viewstate_match.group(1) | |
| viewstategen_match = re.search(r'name=["\']__VIEWSTATEGENERATOR["\'][^>]*value=["\']([^"\']*)["\']', html_content) | |
| if viewstategen_match: | |
| form_data['__VIEWSTATEGENERATOR'] = viewstategen_match.group(1) | |
| eventvalidation_match = re.search(r'name=["\']__EVENTVALIDATION["\'][^>]*value=["\']([^"\']*)["\']', html_content) | |
| if eventvalidation_match: | |
| form_data['__EVENTVALIDATION'] = eventvalidation_match.group(1) | |
| return form_action, form_data | |
| def generate_maze(width=25, height=35): | |
| """Generate a maze with specified dimensions""" | |
| # Get the initial page | |
| html_content, cookies = get_maze_generator_page() | |
| # Extract form data | |
| form_action, form_data = extract_form_data(html_content) | |
| # Add maze parameters with correct field names from the HTML | |
| form_data.update({ | |
| 'ShapeDropDownList': '1', # Rectangular | |
| 'S1TesselationDropDownList': '1', # Orthogonal (Square cells) | |
| 'S1WidthTextBox': str(width), | |
| 'S1HeightTextBox': str(height), | |
| 'S1InnerWidthTextBox': '0', | |
| 'S1InnerHeightTextBox': '0', | |
| 'S1StartsAtDropDownList': '1', # Top | |
| 'AlgorithmParameter1TextBox': '50', # E parameter | |
| 'AlgorithmParameter2TextBox': '100', # R parameter | |
| 'GenerateButton': 'Generate' | |
| }) | |
| # Make the request to generate the maze | |
| headers = { | |
| 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36', | |
| 'Content-Type': 'application/x-www-form-urlencoded', | |
| 'Referer': 'https://www.mazegenerator.net/' | |
| } | |
| # Determine the full URL for the form submission | |
| if form_action: | |
| submit_url = urljoin('https://www.mazegenerator.net/', form_action) | |
| else: | |
| submit_url = 'https://www.mazegenerator.net/' | |
| response = requests.post(submit_url, data=form_data, cookies=cookies, headers=headers) | |
| return response.text, response.cookies | |
| def download_pdf(html_content, cookies, filename="maze.pdf"): | |
| """Download the maze as PDF by submitting the download form""" | |
| # Extract form data for the download request | |
| form_action, form_data = extract_form_data(html_content) | |
| # Add download parameters | |
| form_data.update({ | |
| 'FileFormatSelectorList': '1', # PDF (A4 size) | |
| 'DownloadFileButton': 'Download' | |
| }) | |
| # Make the request to download the PDF | |
| headers = { | |
| 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36', | |
| 'Content-Type': 'application/x-www-form-urlencoded', | |
| 'Referer': 'https://www.mazegenerator.net/' | |
| } | |
| # Determine the full URL for the form submission | |
| if form_action: | |
| submit_url = urljoin('https://www.mazegenerator.net/', form_action) | |
| else: | |
| submit_url = 'https://www.mazegenerator.net/' | |
| response = requests.post(submit_url, data=form_data, cookies=cookies, headers=headers) | |
| # Check if the response is a PDF file | |
| content_type = response.headers.get('content-type', '').lower() | |
| if 'pdf' in content_type: | |
| with open(filename, 'wb') as f: | |
| f.write(response.content) | |
| print(f"PDF downloaded successfully as {filename}") | |
| return True | |
| else: | |
| print(f"Response is not a PDF. Content-Type: {content_type}") | |
| # Save the response for debugging | |
| with open("download_response.html", "w", encoding="utf-8") as f: | |
| f.write(response.text) | |
| print("Download response saved to download_response.html for inspection") | |
| return False | |
| def get_unique_filename(base_path): | |
| """Generate a unique filename by incrementing the sequence number if file exists""" | |
| if not os.path.exists(base_path): | |
| return base_path | |
| # Extract directory, filename, and extension | |
| directory = os.path.dirname(base_path) | |
| filename = os.path.basename(base_path) | |
| # Parse the base name to extract width, height, and current number | |
| # Expected format: maze_{width}x{height}_{number}.pdf | |
| match = re.match(r'maze_(\d+)x(\d+)_(\d+)$', os.path.splitext(filename)[0]) | |
| if not match: | |
| # If parsing fails, fall back to suffix method | |
| name, ext = os.path.splitext(filename) | |
| counter = 1 | |
| while True: | |
| new_filename = f"{name}_{counter}{ext}" | |
| new_path = os.path.join(directory, new_filename) | |
| if not os.path.exists(new_path): | |
| return new_path | |
| counter += 1 | |
| width, height, current_num = match.groups() | |
| # Find the next available number | |
| next_num = int(current_num) + 1 | |
| while True: | |
| new_filename = f"maze_{width}x{height}_{next_num}.pdf" | |
| new_path = os.path.join(directory, new_filename) | |
| if not os.path.exists(new_path): | |
| return new_path | |
| next_num += 1 | |
| def extract_first_page(input_pdf_path, output_pdf_path): | |
| """Extract only the first page from a PDF file""" | |
| try: | |
| with open(input_pdf_path, 'rb') as input_file: | |
| reader = PdfReader(input_file) | |
| writer = PdfWriter() | |
| # Add only the first page | |
| if len(reader.pages) > 0: | |
| writer.add_page(reader.pages[0]) | |
| # Get unique filename for output | |
| unique_output_path = get_unique_filename(output_pdf_path) | |
| with open(unique_output_path, 'wb') as output_file: | |
| writer.write(output_file) | |
| print(f"Extracted first page to {unique_output_path}") | |
| return unique_output_path | |
| else: | |
| print("No pages found in the PDF") | |
| return False | |
| except Exception as e: | |
| print(f"Error extracting first page: {e}") | |
| return False | |
| def generate_multiple_mazes(num_files, width, height): | |
| """Generate multiple maze PDFs with specified dimensions""" | |
| # Create pdfs directory if it doesn't exist | |
| os.makedirs('pdfs', exist_ok=True) | |
| success_count = 0 | |
| for i in range(num_files): | |
| print(f"\nGenerating maze {i+1}/{num_files}...") | |
| # Generate the maze | |
| maze_html, maze_cookies = generate_maze(width, height) | |
| # Download the full PDF | |
| temp_filename = f"temp_maze_{i+1}.pdf" | |
| if download_pdf(maze_html, maze_cookies, temp_filename): | |
| # Extract only the first page to pdfs directory | |
| # Start with i+1, but get_unique_filename will find the next available number | |
| final_filename = os.path.join('pdfs', f"maze_{width}x{height}_{i+1}.pdf") | |
| result_path = extract_first_page(temp_filename, final_filename) | |
| if result_path: | |
| success_count += 1 | |
| else: | |
| print(f"Failed to extract first page for maze {i+1}") | |
| # Clean up temp file | |
| if os.path.exists(temp_filename): | |
| os.remove(temp_filename) | |
| else: | |
| print(f"Failed to download maze {i+1}") | |
| print(f"\nSuccessfully generated {success_count}/{num_files} maze PDFs in pdfs/ directory") | |
| return success_count | |
| def merge_pdfs(pdf_directory, output_filename="merged_mazes.pdf"): | |
| """Merge all PDFs in directory into a single file and delete individual files""" | |
| try: | |
| # Get all PDF files in directory | |
| pdf_files = [f for f in os.listdir(pdf_directory) if f.endswith('.pdf')] | |
| pdf_files.sort() # Sort files by name | |
| if not pdf_files: | |
| print("No PDF files found to merge") | |
| return False | |
| print(f"Found {len(pdf_files)} PDF files to merge") | |
| # Create PDF writer | |
| writer = PdfWriter() | |
| # Add each PDF to the writer | |
| for pdf_file in pdf_files: | |
| pdf_path = os.path.join(pdf_directory, pdf_file) | |
| try: | |
| with open(pdf_path, 'rb') as f: | |
| reader = PdfReader(f) | |
| for page in reader.pages: | |
| writer.add_page(page) | |
| print(f"Added {pdf_file} to merge") | |
| except Exception as e: | |
| print(f"Error reading {pdf_file}: {e}") | |
| continue | |
| # Write merged PDF | |
| output_path = os.path.join(pdf_directory, output_filename) | |
| with open(output_path, 'wb') as f: | |
| writer.write(f) | |
| print(f"Successfully merged {len(pdf_files)} PDFs into {output_path}") | |
| # Delete individual PDF files | |
| deleted_count = 0 | |
| for pdf_file in pdf_files: | |
| pdf_path = os.path.join(pdf_directory, pdf_file) | |
| try: | |
| os.remove(pdf_path) | |
| deleted_count += 1 | |
| print(f"Deleted {pdf_file}") | |
| except Exception as e: | |
| print(f"Error deleting {pdf_file}: {e}") | |
| print(f"Deleted {deleted_count} individual PDF files") | |
| return True | |
| except Exception as e: | |
| print(f"Error merging PDFs: {e}") | |
| return False | |
| def main(): | |
| parser = argparse.ArgumentParser(description='Generate maze PDFs with specified dimensions') | |
| parser.add_argument('-n', '--number', type=int, default=1, help='Number of PDF files to generate (default: 1)') | |
| parser.add_argument('-w', '--width', type=int, default=25, help='Maze width (default: 25)') | |
| parser.add_argument('--height', type=int, default=35, help='Maze height (default: 35)') | |
| parser.add_argument('--merge', action='store_true', help='Merge all existing PDFs in pdfs/ directory into a single file and delete individual files') | |
| args = parser.parse_args() | |
| # Handle merge mode | |
| if args.merge: | |
| print("Merge mode: Merging all existing PDFs in pdfs/ directory") | |
| if os.path.exists('pdfs'): | |
| success = merge_pdfs('pdfs') | |
| return 0 if success else 1 | |
| else: | |
| print("Error: pdfs/ directory does not exist") | |
| return 1 | |
| # Handle generation mode | |
| print(f"Generating {args.number} maze(s) with width={args.width}, height={args.height}") | |
| # Validate parameters | |
| if args.number < 1: | |
| print("Error: Number of files must be at least 1") | |
| return 1 | |
| if not (2 <= args.width <= 200): | |
| print("Error: Width must be between 2 and 200") | |
| return 1 | |
| if not (2 <= args.height <= 200): | |
| print("Error: Height must be between 2 and 200") | |
| return 1 | |
| # Generate the mazes | |
| success_count = generate_multiple_mazes(args.number, args.width, args.height) | |
| if success_count == args.number: | |
| print("All mazes generated successfully!") | |
| return 0 | |
| else: | |
| print(f"Warning: Only {success_count} out of {args.number} mazes were generated successfully") | |
| return 1 | |
| if __name__ == "__main__": | |
| exit(main()) |