Spaces:
Running
Running
summarize text
Browse files
app.py
CHANGED
|
@@ -6,8 +6,8 @@ from pydantic_ai.models.groq import GroqModel
|
|
| 6 |
import nest_asyncio
|
| 7 |
from pydantic_ai.messages import ModelMessage
|
| 8 |
import pdfplumber
|
| 9 |
-
|
| 10 |
-
|
| 11 |
import os
|
| 12 |
import presentation as customClass
|
| 13 |
from streamlit_pdf_viewer import pdf_viewer
|
|
@@ -26,14 +26,14 @@ model = GroqModel('llama-3.1-70b-versatile', api_key = api_key)
|
|
| 26 |
|
| 27 |
|
| 28 |
# to summarize
|
| 29 |
-
|
| 30 |
#summarizer = pipeline('text2text-generation', model='describeai/gemini')
|
| 31 |
#nlpaueb/legal-bert-base-uncased
|
| 32 |
|
| 33 |
|
| 34 |
|
| 35 |
|
| 36 |
-
def split_into_token_chunks(text: str, max_tokens: int =
|
| 37 |
"""
|
| 38 |
Splits a long string into chunks of a specified maximum number of tokens (words).
|
| 39 |
|
|
@@ -87,16 +87,13 @@ async def ppt_content(data):
|
|
| 87 |
# for i, chunk in enumerate(listOfString):
|
| 88 |
# print(f"Chunk {i}:\n{chunk}\n")
|
| 89 |
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
message_history = result.all_messages()
|
| 98 |
-
result_data.append(result.data)
|
| 99 |
-
print(result_data[-1])
|
| 100 |
|
| 101 |
|
| 102 |
|
|
@@ -109,12 +106,16 @@ async def ppt_content(data):
|
|
| 109 |
|
| 110 |
|
| 111 |
def ai_ppt(data):
|
| 112 |
-
#call summerizer to summerize pdf
|
| 113 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 114 |
|
| 115 |
-
# summary_texts = [item['summary_text'] for item in summary]
|
| 116 |
#summary_texts = [item['generated_text'] for item in summary]
|
| 117 |
-
asyncio.run(ppt_content(data=
|
| 118 |
|
| 119 |
|
| 120 |
def extract_data(feed):
|
|
|
|
| 6 |
import nest_asyncio
|
| 7 |
from pydantic_ai.messages import ModelMessage
|
| 8 |
import pdfplumber
|
| 9 |
+
from transformers import pipeline
|
| 10 |
+
import torch
|
| 11 |
import os
|
| 12 |
import presentation as customClass
|
| 13 |
from streamlit_pdf_viewer import pdf_viewer
|
|
|
|
| 26 |
|
| 27 |
|
| 28 |
# to summarize
|
| 29 |
+
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
|
| 30 |
#summarizer = pipeline('text2text-generation', model='describeai/gemini')
|
| 31 |
#nlpaueb/legal-bert-base-uncased
|
| 32 |
|
| 33 |
|
| 34 |
|
| 35 |
|
| 36 |
+
def split_into_token_chunks(text: str, max_tokens: int = 5000) -> list:
|
| 37 |
"""
|
| 38 |
Splits a long string into chunks of a specified maximum number of tokens (words).
|
| 39 |
|
|
|
|
| 87 |
# for i, chunk in enumerate(listOfString):
|
| 88 |
# print(f"Chunk {i}:\n{chunk}\n")
|
| 89 |
|
| 90 |
+
|
| 91 |
+
result = agent.run_sync(user_prompt = f"Create me a powerpoint presentation {data}",
|
| 92 |
+
message_history = message_history,
|
| 93 |
+
deps=deps,
|
| 94 |
+
)
|
| 95 |
+
|
| 96 |
+
print(result.data)
|
|
|
|
|
|
|
|
|
|
| 97 |
|
| 98 |
|
| 99 |
|
|
|
|
| 106 |
|
| 107 |
|
| 108 |
def ai_ppt(data):
|
| 109 |
+
#call summerizer to summerize pdf
|
| 110 |
+
summary_texts = []
|
| 111 |
+
listOfString = split_into_token_chunks("".join(data))
|
| 112 |
+
for x in listOfString:
|
| 113 |
+
summary = summarizer("".join(data), max_length=400, min_length=100, truncation=True,do_sample=False)
|
| 114 |
+
summary_texts .append([item['summary_text'] for item in summary])
|
| 115 |
+
print(summary_texts)
|
| 116 |
|
|
|
|
| 117 |
#summary_texts = [item['generated_text'] for item in summary]
|
| 118 |
+
asyncio.run(ppt_content(data=summary_texts))
|
| 119 |
|
| 120 |
|
| 121 |
def extract_data(feed):
|