Spaces:

Soufianesejjari
/

ibuilder

Sleeping

ibuilder / pdfextractor.py

working

7c71548 8 months ago

833 Bytes

	import PyPDF2
	import io
	import os
	from dotenv import load_dotenv
	import groq
	import streamlit as st
	from typing import List, Dict, Optional
	from pydantic import BaseModel

	load_dotenv()

	def extract_text_from_pdf(pdf_path):
	"""
	Extracts text from a PDF file.

	Args:
	pdf_path (str): The path to the PDF file.

	Returns:
	str: The extracted text. Returns an empty string if extraction fails.
	"""
	text = ""
	try:
	with open(pdf_path, 'rb') as file:
	reader = PyPDF2.PdfReader(file)
	for page_num in range(len(reader.pages)):
	page = reader.pages[page_num]
	text += page.extract_text()
	except Exception as e:
	print(f"Error extracting text from PDF: {e}")
	return "" # Return empty string on failure

	return text