Yaz Hobooti
Increase PDF resolution: DPI from 300 to 600, scaling factors improved for better OCR and barcode detection
e7a28e8
| #!/usr/bin/env python3 | |
| """ | |
| Test script to verify PDF Comparison Tool setup | |
| """ | |
| import sys | |
| import importlib | |
| def test_imports(): | |
| """Test if all required packages can be imported""" | |
| required_packages = [ | |
| 'flask', | |
| 'cv2', | |
| 'numpy', | |
| 'PIL', | |
| 'pytesseract', | |
| 'pdf2image', | |
| 'pyzbar', | |
| 'spellchecker', | |
| 'nltk', | |
| 'skimage', | |
| 'matplotlib', | |
| 'pandas' | |
| ] | |
| print("Testing package imports...") | |
| failed_imports = [] | |
| for package in required_packages: | |
| try: | |
| importlib.import_module(package) | |
| print(f"β {package}") | |
| except ImportError as e: | |
| print(f"β {package}: {e}") | |
| failed_imports.append(package) | |
| return failed_imports | |
| def test_tesseract(): | |
| """Test if Tesseract OCR is available""" | |
| print("\nTesting Tesseract OCR...") | |
| try: | |
| import pytesseract | |
| # Try to get Tesseract version | |
| version = pytesseract.get_tesseract_version() | |
| print(f"β Tesseract version: {version}") | |
| return True | |
| except Exception as e: | |
| print(f"β Tesseract not found: {e}") | |
| print("Please install Tesseract OCR:") | |
| print(" macOS: brew install tesseract") | |
| print(" Ubuntu: sudo apt-get install tesseract-ocr") | |
| print(" Windows: Download from https://github.com/UB-Mannheim/tesseract/wiki") | |
| return False | |
| def test_pdf_comparator(): | |
| """Test if PDFComparator class can be instantiated""" | |
| print("\nTesting PDFComparator...") | |
| try: | |
| from pdf_comparator import PDFComparator | |
| comparator = PDFComparator() | |
| print("β PDFComparator initialized successfully") | |
| return True | |
| except Exception as e: | |
| print(f"β PDFComparator error: {e}") | |
| return False | |
| def test_flask_app(): | |
| """Test if Flask app can be imported""" | |
| print("\nTesting Flask application...") | |
| try: | |
| from app import app | |
| print("β Flask app imported successfully") | |
| return True | |
| except Exception as e: | |
| print(f"β Flask app error: {e}") | |
| return False | |
| def main(): | |
| """Run all tests""" | |
| print("PDF Comparison Tool - Setup Test") | |
| print("=" * 40) | |
| # Test imports | |
| failed_imports = test_imports() | |
| # Test Tesseract | |
| tesseract_ok = test_tesseract() | |
| # Test PDFComparator | |
| comparator_ok = test_pdf_comparator() | |
| # Test Flask app | |
| flask_ok = test_flask_app() | |
| # Summary | |
| print("\n" + "=" * 40) | |
| print("SETUP SUMMARY") | |
| print("=" * 40) | |
| if failed_imports: | |
| print(f"β Missing packages: {', '.join(failed_imports)}") | |
| print("Run: pip install -r requirements.txt") | |
| else: | |
| print("β All packages imported successfully") | |
| if tesseract_ok: | |
| print("β Tesseract OCR is available") | |
| else: | |
| print("β Tesseract OCR is not available") | |
| if comparator_ok: | |
| print("β PDFComparator is working") | |
| else: | |
| print("β PDFComparator has issues") | |
| if flask_ok: | |
| print("β Flask application is ready") | |
| else: | |
| print("β Flask application has issues") | |
| # Overall status | |
| all_ok = not failed_imports and tesseract_ok and comparator_ok and flask_ok | |
| if all_ok: | |
| print("\nπ Setup is complete! You can run the application with:") | |
| print(" python app.py") | |
| else: | |
| print("\nβ οΈ Setup is incomplete. Please fix the issues above.") | |
| sys.exit(1) | |
| if __name__ == "__main__": | |
| main() | |