Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -781,46 +781,32 @@ with gr.Blocks(css="""
|
|
| 781 |
|
| 782 |
# PDF Processing Handlers
|
| 783 |
def handle_pdf_process(pdf, fmt, ctx_size):
|
| 784 |
-
"""Process PDF and
|
| 785 |
if not pdf:
|
| 786 |
-
return (
|
| 787 |
-
|
| 788 |
-
"", # processed_text
|
| 789 |
-
"", # pdf_content
|
| 790 |
-
[], # snippets
|
| 791 |
-
gr.update(choices=[], value=None), # snippet_selector
|
| 792 |
-
None # download_files
|
| 793 |
-
)
|
| 794 |
-
|
| 795 |
try:
|
| 796 |
-
# Extract and format text
|
| 797 |
text = extract_text_from_pdf(pdf.name)
|
| 798 |
if text.startswith("Error"):
|
| 799 |
-
return (
|
| 800 |
-
|
| 801 |
-
|
| 802 |
-
|
| 803 |
-
[],
|
| 804 |
-
gr.update(choices=[], value=None),
|
| 805 |
-
None
|
| 806 |
-
)
|
| 807 |
-
|
| 808 |
-
formatted_text = format_content(text, fmt)
|
| 809 |
snippets_list = split_into_snippets(formatted_text, ctx_size)
|
| 810 |
-
|
| 811 |
-
|
| 812 |
-
|
| 813 |
-
f.write(formatted_text)
|
| 814 |
download_file = f.name
|
| 815 |
-
|
|
|
|
|
|
|
| 816 |
return (
|
| 817 |
f"PDF processed successfully! Generated {len(snippets_list)} snippets.",
|
| 818 |
-
formatted_text,
|
| 819 |
-
formatted_text,
|
| 820 |
snippets_list,
|
| 821 |
-
gr.update(choices=
|
| 822 |
-
download_file
|
| 823 |
-
#[download_file]
|
| 824 |
)
|
| 825 |
|
| 826 |
except Exception as e:
|
|
|
|
| 781 |
|
| 782 |
# PDF Processing Handlers
|
| 783 |
def handle_pdf_process(pdf, fmt, ctx_size):
|
| 784 |
+
"""Process PDF, format text, and return formatted text and snippets."""
|
| 785 |
if not pdf:
|
| 786 |
+
return "Please upload a PDF file.", "", "", [], gr.update(choices=[], value=None), None
|
| 787 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 788 |
try:
|
|
|
|
| 789 |
text = extract_text_from_pdf(pdf.name)
|
| 790 |
if text.startswith("Error"):
|
| 791 |
+
return text, "", "", [], gr.update(choices=[], value=None), None
|
| 792 |
+
|
| 793 |
+
# Format the text *before* splitting into snippets:
|
| 794 |
+
formatted_text = format_content(text, fmt) # Call format_content here!
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 795 |
snippets_list = split_into_snippets(formatted_text, ctx_size)
|
| 796 |
+
|
| 797 |
+
with tempfile.NamedTemporaryFile(delete=False, mode='w', suffix=f'.{fmt}') as f: # Correct suffix
|
| 798 |
+
f.write(formatted_text) # Write the *formatted* text
|
|
|
|
| 799 |
download_file = f.name
|
| 800 |
+
|
| 801 |
+
snippet_choices = update_snippet_choices(snippets_list) # Pre-calculate choices
|
| 802 |
+
|
| 803 |
return (
|
| 804 |
f"PDF processed successfully! Generated {len(snippets_list)} snippets.",
|
| 805 |
+
formatted_text, # Return the *formatted* text
|
| 806 |
+
formatted_text, # Update the state with formatted text
|
| 807 |
snippets_list,
|
| 808 |
+
gr.update(choices=snippet_choices, value=snippet_choices[0] if snippet_choices else None),
|
| 809 |
+
download_file
|
|
|
|
| 810 |
)
|
| 811 |
|
| 812 |
except Exception as e:
|