Spaces:
Sleeping
Sleeping
File size: 833 Bytes
7c71548 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 |
import PyPDF2
import io
import os
from dotenv import load_dotenv
import groq
import streamlit as st
from typing import List, Dict, Optional
from pydantic import BaseModel
load_dotenv()
def extract_text_from_pdf(pdf_path):
"""
Extracts text from a PDF file.
Args:
pdf_path (str): The path to the PDF file.
Returns:
str: The extracted text. Returns an empty string if extraction fails.
"""
text = ""
try:
with open(pdf_path, 'rb') as file:
reader = PyPDF2.PdfReader(file)
for page_num in range(len(reader.pages)):
page = reader.pages[page_num]
text += page.extract_text()
except Exception as e:
print(f"Error extracting text from PDF: {e}")
return "" # Return empty string on failure
return text
|