maze-generator / main.py
utarn's picture
Initial commit of maze generator app
f48fe4d
raw
history blame
12.5 kB
#!/usr/bin/env python3
import requests
import re
import argparse
import os
from urllib.parse import urljoin
from pypdf import PdfReader, PdfWriter
def get_maze_generator_page():
"""Get the maze generator page to extract form data and session info"""
url = "https://www.mazegenerator.net/"
headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}
response = requests.get(url, headers=headers)
return response.text, response.cookies
def extract_form_data(html_content):
"""Extract form data and hidden fields from the page"""
# Look for form action URL
form_action_match = re.search(r'<form[^>]*action=["\']([^"\']*)["\']', html_content, re.IGNORECASE)
form_action = form_action_match.group(1) if form_action_match else ""
# Look for hidden inputs
hidden_inputs = re.findall(r'<input[^>]*type=["\']hidden["\'][^>]*name=["\']([^"\']*)["\'][^>]*value=["\']([^"\']*)["\']', html_content, re.IGNORECASE)
form_data = dict(hidden_inputs)
# Look for viewstate and other common ASP.NET fields
viewstate_match = re.search(r'name=["\']__VIEWSTATE["\'][^>]*value=["\']([^"\']*)["\']', html_content)
if viewstate_match:
form_data['__VIEWSTATE'] = viewstate_match.group(1)
viewstategen_match = re.search(r'name=["\']__VIEWSTATEGENERATOR["\'][^>]*value=["\']([^"\']*)["\']', html_content)
if viewstategen_match:
form_data['__VIEWSTATEGENERATOR'] = viewstategen_match.group(1)
eventvalidation_match = re.search(r'name=["\']__EVENTVALIDATION["\'][^>]*value=["\']([^"\']*)["\']', html_content)
if eventvalidation_match:
form_data['__EVENTVALIDATION'] = eventvalidation_match.group(1)
return form_action, form_data
def generate_maze(width=25, height=35):
"""Generate a maze with specified dimensions"""
# Get the initial page
html_content, cookies = get_maze_generator_page()
# Extract form data
form_action, form_data = extract_form_data(html_content)
# Add maze parameters with correct field names from the HTML
form_data.update({
'ShapeDropDownList': '1', # Rectangular
'S1TesselationDropDownList': '1', # Orthogonal (Square cells)
'S1WidthTextBox': str(width),
'S1HeightTextBox': str(height),
'S1InnerWidthTextBox': '0',
'S1InnerHeightTextBox': '0',
'S1StartsAtDropDownList': '1', # Top
'AlgorithmParameter1TextBox': '50', # E parameter
'AlgorithmParameter2TextBox': '100', # R parameter
'GenerateButton': 'Generate'
})
# Make the request to generate the maze
headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
'Content-Type': 'application/x-www-form-urlencoded',
'Referer': 'https://www.mazegenerator.net/'
}
# Determine the full URL for the form submission
if form_action:
submit_url = urljoin('https://www.mazegenerator.net/', form_action)
else:
submit_url = 'https://www.mazegenerator.net/'
response = requests.post(submit_url, data=form_data, cookies=cookies, headers=headers)
return response.text, response.cookies
def download_pdf(html_content, cookies, filename="maze.pdf"):
"""Download the maze as PDF by submitting the download form"""
# Extract form data for the download request
form_action, form_data = extract_form_data(html_content)
# Add download parameters
form_data.update({
'FileFormatSelectorList': '1', # PDF (A4 size)
'DownloadFileButton': 'Download'
})
# Make the request to download the PDF
headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
'Content-Type': 'application/x-www-form-urlencoded',
'Referer': 'https://www.mazegenerator.net/'
}
# Determine the full URL for the form submission
if form_action:
submit_url = urljoin('https://www.mazegenerator.net/', form_action)
else:
submit_url = 'https://www.mazegenerator.net/'
response = requests.post(submit_url, data=form_data, cookies=cookies, headers=headers)
# Check if the response is a PDF file
content_type = response.headers.get('content-type', '').lower()
if 'pdf' in content_type:
with open(filename, 'wb') as f:
f.write(response.content)
print(f"PDF downloaded successfully as {filename}")
return True
else:
print(f"Response is not a PDF. Content-Type: {content_type}")
# Save the response for debugging
with open("download_response.html", "w", encoding="utf-8") as f:
f.write(response.text)
print("Download response saved to download_response.html for inspection")
return False
def get_unique_filename(base_path):
"""Generate a unique filename by incrementing the sequence number if file exists"""
if not os.path.exists(base_path):
return base_path
# Extract directory, filename, and extension
directory = os.path.dirname(base_path)
filename = os.path.basename(base_path)
# Parse the base name to extract width, height, and current number
# Expected format: maze_{width}x{height}_{number}.pdf
match = re.match(r'maze_(\d+)x(\d+)_(\d+)$', os.path.splitext(filename)[0])
if not match:
# If parsing fails, fall back to suffix method
name, ext = os.path.splitext(filename)
counter = 1
while True:
new_filename = f"{name}_{counter}{ext}"
new_path = os.path.join(directory, new_filename)
if not os.path.exists(new_path):
return new_path
counter += 1
width, height, current_num = match.groups()
# Find the next available number
next_num = int(current_num) + 1
while True:
new_filename = f"maze_{width}x{height}_{next_num}.pdf"
new_path = os.path.join(directory, new_filename)
if not os.path.exists(new_path):
return new_path
next_num += 1
def extract_first_page(input_pdf_path, output_pdf_path):
"""Extract only the first page from a PDF file"""
try:
with open(input_pdf_path, 'rb') as input_file:
reader = PdfReader(input_file)
writer = PdfWriter()
# Add only the first page
if len(reader.pages) > 0:
writer.add_page(reader.pages[0])
# Get unique filename for output
unique_output_path = get_unique_filename(output_pdf_path)
with open(unique_output_path, 'wb') as output_file:
writer.write(output_file)
print(f"Extracted first page to {unique_output_path}")
return unique_output_path
else:
print("No pages found in the PDF")
return False
except Exception as e:
print(f"Error extracting first page: {e}")
return False
def generate_multiple_mazes(num_files, width, height):
"""Generate multiple maze PDFs with specified dimensions"""
# Create pdfs directory if it doesn't exist
os.makedirs('pdfs', exist_ok=True)
success_count = 0
for i in range(num_files):
print(f"\nGenerating maze {i+1}/{num_files}...")
# Generate the maze
maze_html, maze_cookies = generate_maze(width, height)
# Download the full PDF
temp_filename = f"temp_maze_{i+1}.pdf"
if download_pdf(maze_html, maze_cookies, temp_filename):
# Extract only the first page to pdfs directory
# Start with i+1, but get_unique_filename will find the next available number
final_filename = os.path.join('pdfs', f"maze_{width}x{height}_{i+1}.pdf")
result_path = extract_first_page(temp_filename, final_filename)
if result_path:
success_count += 1
else:
print(f"Failed to extract first page for maze {i+1}")
# Clean up temp file
if os.path.exists(temp_filename):
os.remove(temp_filename)
else:
print(f"Failed to download maze {i+1}")
print(f"\nSuccessfully generated {success_count}/{num_files} maze PDFs in pdfs/ directory")
return success_count
def merge_pdfs(pdf_directory, output_filename="merged_mazes.pdf"):
"""Merge all PDFs in directory into a single file and delete individual files"""
try:
# Get all PDF files in directory
pdf_files = [f for f in os.listdir(pdf_directory) if f.endswith('.pdf')]
pdf_files.sort() # Sort files by name
if not pdf_files:
print("No PDF files found to merge")
return False
print(f"Found {len(pdf_files)} PDF files to merge")
# Create PDF writer
writer = PdfWriter()
# Add each PDF to the writer
for pdf_file in pdf_files:
pdf_path = os.path.join(pdf_directory, pdf_file)
try:
with open(pdf_path, 'rb') as f:
reader = PdfReader(f)
for page in reader.pages:
writer.add_page(page)
print(f"Added {pdf_file} to merge")
except Exception as e:
print(f"Error reading {pdf_file}: {e}")
continue
# Write merged PDF
output_path = os.path.join(pdf_directory, output_filename)
with open(output_path, 'wb') as f:
writer.write(f)
print(f"Successfully merged {len(pdf_files)} PDFs into {output_path}")
# Delete individual PDF files
deleted_count = 0
for pdf_file in pdf_files:
pdf_path = os.path.join(pdf_directory, pdf_file)
try:
os.remove(pdf_path)
deleted_count += 1
print(f"Deleted {pdf_file}")
except Exception as e:
print(f"Error deleting {pdf_file}: {e}")
print(f"Deleted {deleted_count} individual PDF files")
return True
except Exception as e:
print(f"Error merging PDFs: {e}")
return False
def main():
parser = argparse.ArgumentParser(description='Generate maze PDFs with specified dimensions')
parser.add_argument('-n', '--number', type=int, default=1, help='Number of PDF files to generate (default: 1)')
parser.add_argument('-w', '--width', type=int, default=25, help='Maze width (default: 25)')
parser.add_argument('--height', type=int, default=35, help='Maze height (default: 35)')
parser.add_argument('--merge', action='store_true', help='Merge all existing PDFs in pdfs/ directory into a single file and delete individual files')
args = parser.parse_args()
# Handle merge mode
if args.merge:
print("Merge mode: Merging all existing PDFs in pdfs/ directory")
if os.path.exists('pdfs'):
success = merge_pdfs('pdfs')
return 0 if success else 1
else:
print("Error: pdfs/ directory does not exist")
return 1
# Handle generation mode
print(f"Generating {args.number} maze(s) with width={args.width}, height={args.height}")
# Validate parameters
if args.number < 1:
print("Error: Number of files must be at least 1")
return 1
if not (2 <= args.width <= 200):
print("Error: Width must be between 2 and 200")
return 1
if not (2 <= args.height <= 200):
print("Error: Height must be between 2 and 200")
return 1
# Generate the mazes
success_count = generate_multiple_mazes(args.number, args.width, args.height)
if success_count == args.number:
print("All mazes generated successfully!")
return 0
else:
print(f"Warning: Only {success_count} out of {args.number} mazes were generated successfully")
return 1
if __name__ == "__main__":
exit(main())