Spaces:

luanpoppe
/

vella-backend

Sleeping

vella-backend / _utils /handle_files.py

luanpoppe

feat: adicionando logs pelo axiom e sentry

ab34606 7 months ago

2.73 kB

	import tempfile, os
	from typing import List
	from langchain_core.documents import Document as LangchainDocument
	from llama_index import Document
	from llama_parse import LlamaParse, ResultType

	from _utils.langchain_utils.splitter_util import SplitterUtils
	from setup.logging import Axiom

	llama_parser_keys = [
	os.getenv("LLAMA_CLOUD_API_KEY_POPS"),
	os.getenv("LLAMA_CLOUD_API_KEY_PEIXE"),
	]


	def handle_pdf_files_from_serializer(files, axiom_instance: Axiom):
	listaPDFs = []
	for file in files:
	file_extension = file.name.split(".")[-1]
	file.seek(0)
	with tempfile.NamedTemporaryFile(
	delete=False, suffix=f".{file_extension}"
	) as temp_file: # Create a temporary file to save the uploaded PDF
	for (
	chunk
	) in file.chunks(): # Write the uploaded file content to the temporary file
	temp_file.write(chunk)
	temp_file_path = temp_file.name # Get the path of the temporary file
	listaPDFs.append(temp_file_path)
	axiom_instance.send_axiom(f"listaPDFs: {listaPDFs}")
	return listaPDFs


	def remove_pdf_temp_files(listaPDFs):
	print("\nREMOVENDO ARQUIVOS PDF TEMPORÁRIOS")
	for file in listaPDFs:
	os.remove(file)


	async def return_document_list_with_llama_parser(file: str):
	for key in llama_parser_keys:
	documents: List[LangchainDocument] = []
	if key:
	parser = LlamaParse(
	api_key=key,
	result_type=ResultType.JSON, # Options: 'text', 'markdown', 'json', 'structured'
	language="pt",
	verbose=True,
	)

	try:
	parsed_document = await parser.aget_json(file)
	except:
	print(f"Error with llama parser key ending with {key[-4:]}")
	continue # Faz com que comece o próximo loop
	if len(parsed_document) == 0:
	continue

	for doc in parsed_document[0].get("pages"): # type: ignore
	# documents.append(doc.to_langchain_format())

	langchain_document = LangchainDocument(
	page_content=doc.get("md"), # type: ignore
	metadata={
	"page": doc.get("page"), # type: ignore
	# **doc.get("metadata", {}), # type: ignore
	}, # Include page number in metadata
	)

	documents.append(langchain_document)

	return documents

	# Código abaixo só é executado se o loop acima acabar e não tiver retornado um valor nenhuma vez
	raise ValueError(f"ALGO DEU ERRADO NO PARSER DO LLAMA PARSE:")