""" Utilities for working with PDF files """ import PyPDF2 import io import os from config import get_settings import logging settings = get_settings() logger = logging.getLogger(__name__) def extract_text_from_pdf(pdf_path): """ Extracts text from a PDF file. Args: pdf_path (str): The path to the PDF file. Returns: str: The extracted text. Returns an empty string if extraction fails. """ text = "" try: with open(pdf_path, 'rb') as file: reader = PyPDF2.PdfReader(file) for page_num in range(len(reader.pages)): page = reader.pages[page_num] text += page.extract_text() if not text.strip(): logger.warning(f"Extracted empty text from PDF: {pdf_path}") logger.info(f"Extracted text are {text}") return text except Exception as e: logger.error(f"Error extracting text from PDF: {e}") return "" # Return empty string on failure def save_temp_pdf(file_data, filename="temp.pdf"): """ Save uploaded file data to a temporary PDF file Args: file_data: The binary data of the file filename: The name to save the file as Returns: Path to the saved file """ # Use the /app/data directory for saving temporary files filepath = os.path.join("/app/data", filename) try: with open(filepath, 'wb') as f: f.write(file_data) return filepath except Exception as e: logger.error(f"Error saving temporary PDF: {e}") raise