#!/usr/bin/env python3 import requests import re import argparse import os from urllib.parse import urljoin from pypdf import PdfReader, PdfWriter def get_maze_generator_page(): """Get the maze generator page to extract form data and session info""" url = "https://www.mazegenerator.net/" headers = { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36' } response = requests.get(url, headers=headers) return response.text, response.cookies def extract_form_data(html_content): """Extract form data and hidden fields from the page""" # Look for form action URL form_action_match = re.search(r']*action=["\']([^"\']*)["\']', html_content, re.IGNORECASE) form_action = form_action_match.group(1) if form_action_match else "" # Look for hidden inputs hidden_inputs = re.findall(r']*type=["\']hidden["\'][^>]*name=["\']([^"\']*)["\'][^>]*value=["\']([^"\']*)["\']', html_content, re.IGNORECASE) form_data = dict(hidden_inputs) # Look for viewstate and other common ASP.NET fields viewstate_match = re.search(r'name=["\']__VIEWSTATE["\'][^>]*value=["\']([^"\']*)["\']', html_content) if viewstate_match: form_data['__VIEWSTATE'] = viewstate_match.group(1) viewstategen_match = re.search(r'name=["\']__VIEWSTATEGENERATOR["\'][^>]*value=["\']([^"\']*)["\']', html_content) if viewstategen_match: form_data['__VIEWSTATEGENERATOR'] = viewstategen_match.group(1) eventvalidation_match = re.search(r'name=["\']__EVENTVALIDATION["\'][^>]*value=["\']([^"\']*)["\']', html_content) if eventvalidation_match: form_data['__EVENTVALIDATION'] = eventvalidation_match.group(1) return form_action, form_data def generate_maze(width=25, height=35): """Generate a maze with specified dimensions""" # Get the initial page html_content, cookies = get_maze_generator_page() # Extract form data form_action, form_data = extract_form_data(html_content) # Add maze parameters with correct field names from the HTML form_data.update({ 'ShapeDropDownList': '1', # Rectangular 'S1TesselationDropDownList': '1', # Orthogonal (Square cells) 'S1WidthTextBox': str(width), 'S1HeightTextBox': str(height), 'S1InnerWidthTextBox': '0', 'S1InnerHeightTextBox': '0', 'S1StartsAtDropDownList': '1', # Top 'AlgorithmParameter1TextBox': '50', # E parameter 'AlgorithmParameter2TextBox': '100', # R parameter 'GenerateButton': 'Generate' }) # Make the request to generate the maze headers = { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36', 'Content-Type': 'application/x-www-form-urlencoded', 'Referer': 'https://www.mazegenerator.net/' } # Determine the full URL for the form submission if form_action: submit_url = urljoin('https://www.mazegenerator.net/', form_action) else: submit_url = 'https://www.mazegenerator.net/' response = requests.post(submit_url, data=form_data, cookies=cookies, headers=headers) return response.text, response.cookies def download_pdf(html_content, cookies, filename="maze.pdf"): """Download the maze as PDF by submitting the download form""" # Extract form data for the download request form_action, form_data = extract_form_data(html_content) # Add download parameters form_data.update({ 'FileFormatSelectorList': '1', # PDF (A4 size) 'DownloadFileButton': 'Download' }) # Make the request to download the PDF headers = { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36', 'Content-Type': 'application/x-www-form-urlencoded', 'Referer': 'https://www.mazegenerator.net/' } # Determine the full URL for the form submission if form_action: submit_url = urljoin('https://www.mazegenerator.net/', form_action) else: submit_url = 'https://www.mazegenerator.net/' response = requests.post(submit_url, data=form_data, cookies=cookies, headers=headers) # Check if the response is a PDF file content_type = response.headers.get('content-type', '').lower() if 'pdf' in content_type: with open(filename, 'wb') as f: f.write(response.content) print(f"PDF downloaded successfully as {filename}") return True else: print(f"Response is not a PDF. Content-Type: {content_type}") # Save the response for debugging with open("download_response.html", "w", encoding="utf-8") as f: f.write(response.text) print("Download response saved to download_response.html for inspection") return False def get_unique_filename(base_path): """Generate a unique filename by incrementing the sequence number if file exists""" if not os.path.exists(base_path): return base_path # Extract directory, filename, and extension directory = os.path.dirname(base_path) filename = os.path.basename(base_path) # Parse the base name to extract width, height, and current number # Expected format: maze_{width}x{height}_{number}.pdf match = re.match(r'maze_(\d+)x(\d+)_(\d+)$', os.path.splitext(filename)[0]) if not match: # If parsing fails, fall back to suffix method name, ext = os.path.splitext(filename) counter = 1 while True: new_filename = f"{name}_{counter}{ext}" new_path = os.path.join(directory, new_filename) if not os.path.exists(new_path): return new_path counter += 1 width, height, current_num = match.groups() # Find the next available number next_num = int(current_num) + 1 while True: new_filename = f"maze_{width}x{height}_{next_num}.pdf" new_path = os.path.join(directory, new_filename) if not os.path.exists(new_path): return new_path next_num += 1 def extract_first_page(input_pdf_path, output_pdf_path): """Extract only the first page from a PDF file""" try: with open(input_pdf_path, 'rb') as input_file: reader = PdfReader(input_file) writer = PdfWriter() # Add only the first page if len(reader.pages) > 0: writer.add_page(reader.pages[0]) # Get unique filename for output unique_output_path = get_unique_filename(output_pdf_path) with open(unique_output_path, 'wb') as output_file: writer.write(output_file) print(f"Extracted first page to {unique_output_path}") return unique_output_path else: print("No pages found in the PDF") return False except Exception as e: print(f"Error extracting first page: {e}") return False def generate_multiple_mazes(num_files, width, height, session_id=None): """Generate multiple maze PDFs with specified dimensions""" # Create pdfs directory if it doesn't exist os.makedirs('pdfs', exist_ok=True) success_count = 0 for i in range(num_files): print(f"\nGenerating maze {i+1}/{num_files}...") # Generate the maze maze_html, maze_cookies = generate_maze(width, height) # Download the full PDF temp_filename = f"temp_maze_{i+1}.pdf" if download_pdf(maze_html, maze_cookies, temp_filename): # Extract only the first page to pdfs directory with session prefix # Start with i+1, but get_unique_filename will find the next available number session_prefix = f"{session_id}_" if session_id else "" final_filename = os.path.join('pdfs', f"{session_prefix}maze_{width}x{height}_{i+1}.pdf") result_path = extract_first_page(temp_filename, final_filename) if result_path: success_count += 1 else: print(f"Failed to extract first page for maze {i+1}") # Clean up temp file if os.path.exists(temp_filename): os.remove(temp_filename) else: print(f"Failed to download maze {i+1}") print(f"\nSuccessfully generated {success_count}/{num_files} maze PDFs in pdfs/ directory") return success_count def merge_pdfs(pdf_directory, session_id=None, output_filename=None): """Merge all PDFs in directory into a single file and delete individual files""" try: # Get all PDF files in directory if session_id: # Only merge files with the session prefix pdf_files = [f for f in os.listdir(pdf_directory) if f.endswith('.pdf') and f.startswith(f"{session_id}_") and not f.endswith("_merged_mazes.pdf")] output_filename = output_filename or f"{session_id}_merged_mazes.pdf" else: # Merge all PDF files (backward compatibility) pdf_files = [f for f in os.listdir(pdf_directory) if f.endswith('.pdf')] output_filename = output_filename or "merged_mazes.pdf" pdf_files.sort() # Sort files by name if not pdf_files: print("No PDF files found to merge") return False print(f"Found {len(pdf_files)} PDF files to merge") # Create PDF writer writer = PdfWriter() # Add each PDF to the writer for pdf_file in pdf_files: pdf_path = os.path.join(pdf_directory, pdf_file) try: with open(pdf_path, 'rb') as f: reader = PdfReader(f) for page in reader.pages: writer.add_page(page) print(f"Added {pdf_file} to merge") except Exception as e: print(f"Error reading {pdf_file}: {e}") continue # Write merged PDF output_path = os.path.join(pdf_directory, output_filename) with open(output_path, 'wb') as f: writer.write(f) print(f"Successfully merged {len(pdf_files)} PDFs into {output_path}") # Delete individual PDF files deleted_count = 0 for pdf_file in pdf_files: pdf_path = os.path.join(pdf_directory, pdf_file) try: os.remove(pdf_path) deleted_count += 1 print(f"Deleted {pdf_file}") except Exception as e: print(f"Error deleting {pdf_file}: {e}") print(f"Deleted {deleted_count} individual PDF files") return True except Exception as e: print(f"Error merging PDFs: {e}") return False def main(): parser = argparse.ArgumentParser(description='Generate maze PDFs with specified dimensions') parser.add_argument('-n', '--number', type=int, default=1, help='Number of PDF files to generate (default: 1)') parser.add_argument('-w', '--width', type=int, default=25, help='Maze width (default: 25)') parser.add_argument('--height', type=int, default=35, help='Maze height (default: 35)') parser.add_argument('--merge', action='store_true', help='Merge all existing PDFs in pdfs/ directory into a single file and delete individual files') args = parser.parse_args() # Handle merge mode if args.merge: print("Merge mode: Merging all existing PDFs in pdfs/ directory") if os.path.exists('pdfs'): success = merge_pdfs('pdfs', session_id=None) return 0 if success else 1 else: print("Error: pdfs/ directory does not exist") return 1 # Handle generation mode print(f"Generating {args.number} maze(s) with width={args.width}, height={args.height}") # Validate parameters if args.number < 1: print("Error: Number of files must be at least 1") return 1 if not (2 <= args.width <= 200): print("Error: Width must be between 2 and 200") return 1 if not (2 <= args.height <= 200): print("Error: Height must be between 2 and 200") return 1 # Generate the mazes success_count = generate_multiple_mazes(args.number, args.width, args.height) if success_count == args.number: print("All mazes generated successfully!") return 0 else: print(f"Warning: Only {success_count} out of {args.number} mazes were generated successfully") return 1 if __name__ == "__main__": exit(main())