Spaces:

infly
/

Infinity-Parser-Demo

Running

App Files Files Community

zuminghuang commited on Aug 4

Commit

95e350e

verified ·

1 Parent(s): 46820dd

Update app.py

Browse files

Files changed (1) hide show

app.py +166 -49

app.py CHANGED Viewed

@@ -19,13 +19,16 @@ from pdf2image import convert_from_path
 from loguru import logger
 from openai import OpenAI, AsyncOpenAI
 from gradio_pdf import PDF
 import aiohttp
 import uuid
 import tqdm
 import requests
-import httpx
 def setup_poppler_linux():
@@ -64,9 +67,10 @@ def send_pdf_to_parse(file_path, server_ip, port, route="/upload", api_key=None)
     return response
 async def send_pdf_async_aiohttp(file_path, server_ip, route="/upload", Authorization=None):
     """使用aiohttp异步发送PDF"""
-    # url = f"http://{server_ip}:{port}{route}"
     url = f"{server_ip}{route}"
     headers = {}
     if Authorization:
@@ -105,12 +109,12 @@ Authorization = os.environ.get("Authorization")
 client = AsyncOpenAI(
     api_key=openai_api_key,
     base_url=openai_api_base + "/v1",
-    http_client=httpx.AsyncClient(verify=False),
 )
 async def request(messages):
     chat_completion_from_base64 = await client.chat.completions.create(
         messages=messages,
         extra_headers={
@@ -122,20 +126,20 @@ async def request(messages):
         temperature=0.0,
         top_p=0.95
     )
     page = ""
     async for chunk in chat_completion_from_base64:
         if chunk.choices[0].delta.content:
             content = chunk.choices[0].delta.content
             choice = chunk.choices[0]
             if choice.finish_reason is not None:
                 print(f"end reason = {choice.finish_reason}")
                 break
             page += content
             yield content
 def images_to_pdf(img_paths, pdf_path):
@@ -168,9 +172,8 @@ def encode_image(image_path):
     with open(image_path, "rb") as image_file:
         return base64.b64encode(image_file.read()).decode("utf-8")
 def build_message(image_path, prompt):
     content = [
         {
             "type": "image_url",
@@ -180,17 +183,18 @@ def build_message(image_path, prompt):
         },
         {"type": "text", 'text': prompt}
     ]
     messages = [
         {"role": "system", "content": "You are a helpful assistant."},
         {'role': 'user', 'content': content}
     ]
     return messages
 def download_markdown_file(md_text):
     filename = f"markdown_{uuid.uuid4().hex[:8]}.md"
     filepath = Path("downloads") / filename
@@ -215,14 +219,14 @@ async def doc_parser(doc_path, prompt):
             for idx, page in enumerate(pages, start=1):
                 img_path = tmpdir / f"page_{idx}.png"
                 page.save(img_path, "PNG")
                 messages = build_message(img_path, prompt)
                 queries.append(messages)
         else:
             messages = build_message(doc_path, prompt)
             queries.append(messages)
     all_pages = []
     all_pages_raw = []
     for query in queries:
@@ -235,26 +239,25 @@ async def doc_parser(doc_path, prompt):
         print(all_pages)
         yield "\n---\n".join(all_pages), "\n\n".join(all_pages_raw)
 def compress_directory_to_zip(directory_path, output_zip_path):
     try:
         with zipfile.ZipFile(output_zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
             for root, dirs, files in os.walk(directory_path):
                 for file in files:
                     file_path = os.path.join(root, file)
                     arcname = os.path.relpath(file_path, directory_path)
                     zipf.write(file_path, arcname)
         return 0
     except Exception as e:
         logger.exception(e)
         return -1
 latex_delimiters = [
     {'left': '$$', 'right': '$$', 'display': True},
     {'left': '$', 'right': '$', 'display': False},
@@ -262,31 +265,94 @@ latex_delimiters = [
     {'left': '\\[', 'right': '\\]', 'display': True},
 ]
 def check_prompt(prompt):
     if not prompt or prompt.strip() == "":
         raise gr.Error("Please select or enter a prompt before parsing.")
     return prompt
 def to_file(image_path):
     if image_path.endswith("Academic_Papers.png"):
         image_path = image_path.replace("Academic_Papers.png", "Academic_Papers.pdf")
     return image_path
-# async def process_file(file_path):
-#     if not file_path.endswith(".pdf"):
-#         tmp_path = Path(file_path).with_suffix(".pdf")
-#         images_to_pdf(file_path, tmp_path)
-#     else:
-#         tmp_path = Path(file_path)
-#     async with httpx.AsyncClient() as client:
-#         await send_pdf_to_parse_async(client, str(tmp_path), IP, PORT)
-#     return str(tmp_path)
 async def process_file(file_path):
@@ -309,7 +375,7 @@ if __name__ == '__main__':
     with gr.Blocks() as demo:
         with gr.Row():
             with gr.Column(variant='panel', scale=5):
                 file = gr.File(label='Please upload a PDF or image', file_types=['.pdf', '.png', '.jpeg', '.jpg'], type="filepath")
                 prompts = gr.Dropdown(
                     choices=preset_prompts,
@@ -324,9 +390,13 @@ if __name__ == '__main__':
                 with gr.Row():
                     change_bu = gr.Button('Parse')
                     clear_bu = gr.ClearButton(value='Clear')
-                pdf_show = PDF(label='Preview', interactive=False, visible=True, height=800)
                 example_root = os.path.join(os.path.dirname(__file__), 'examples')
                 images = [
@@ -341,9 +411,9 @@ if __name__ == '__main__':
                     file_path = [
                         os.path.join(example_root, f)
                         for f in ["Financial_Reports.png", "Books.png", "Magazines.png", "Academic_Papers.png"]
                     ]
                     with gr.Row():
                         for i, label in enumerate(["Financial Reports(IMG)", "Books(IMG)", "Magazines(IMG)", "Academic Papers(PDF)"]):
                             with gr.Column(scale=1, min_width=120):
@@ -355,11 +425,11 @@ if __name__ == '__main__':
                                     show_download_button=False
                                 )
                                 gr.Button(label).click(fn=to_file, inputs=gr.State(file_path[i]), outputs=file)
                 download_btn = gr.Button("⬇️ Generate download link", size="sm")
                 output_file = gr.File(label='Parse result', interactive=False, elem_id="down-file-box",visible=False)
                 gr.HTML("""
                 <style>
                 #down-file-box {
@@ -375,9 +445,56 @@ if __name__ == '__main__':
                     with gr.Tab('Markdown text'):
                         md_text = gr.TextArea(lines=45, show_copy_button=True)
-        file.change(fn=process_file, inputs=file, outputs=pdf_show)
         change_bu.click(
@@ -393,9 +510,9 @@ if __name__ == '__main__':
             inputs=[file, prompts],
             outputs=[md, md_text]
         )
-        clear_bu.add([file, md, pdf_show, md_text])
         download_btn.click(
             fn=download_markdown_file,
             inputs=md_text,

 from loguru import logger
 from openai import OpenAI, AsyncOpenAI
 from gradio_pdf import PDF
+import certifi
+import httpx
 import aiohttp
 import uuid
 import tqdm
+import base64, pathlib
+from io import BytesIO
+from pdf2image import convert_from_bytes, convert_from_path     # pip install pdf2image
 import requests
 def setup_poppler_linux():
     return response
 async def send_pdf_async_aiohttp(file_path, server_ip, route="/upload", Authorization=None):
     """使用aiohttp异步发送PDF"""
     url = f"{server_ip}{route}"
     headers = {}
     if Authorization:
 client = AsyncOpenAI(
     api_key=openai_api_key,
     base_url=openai_api_base + "/v1",
+    http_client=httpx.AsyncClient(verify=False)
 )
 async def request(messages):
     chat_completion_from_base64 = await client.chat.completions.create(
         messages=messages,
         extra_headers={
         temperature=0.0,
         top_p=0.95
     )
     page = ""
     async for chunk in chat_completion_from_base64:
         if chunk.choices[0].delta.content:
             content = chunk.choices[0].delta.content
             choice = chunk.choices[0]
             if choice.finish_reason is not None:
                 print(f"end reason = {choice.finish_reason}")
                 break
             page += content
             yield content
 def images_to_pdf(img_paths, pdf_path):
     with open(image_path, "rb") as image_file:
         return base64.b64encode(image_file.read()).decode("utf-8")
 def build_message(image_path, prompt):
     content = [
         {
             "type": "image_url",
         },
         {"type": "text", 'text': prompt}
     ]
     messages = [
         {"role": "system", "content": "You are a helpful assistant."},
         {'role': 'user', 'content': content}
     ]
     return messages
 def download_markdown_file(md_text):
     filename = f"markdown_{uuid.uuid4().hex[:8]}.md"
     filepath = Path("downloads") / filename
             for idx, page in enumerate(pages, start=1):
                 img_path = tmpdir / f"page_{idx}.png"
                 page.save(img_path, "PNG")
                 messages = build_message(img_path, prompt)
                 queries.append(messages)
         else:
             messages = build_message(doc_path, prompt)
             queries.append(messages)
     all_pages = []
     all_pages_raw = []
     for query in queries:
         print(all_pages)
         yield "\n---\n".join(all_pages), "\n\n".join(all_pages_raw)
 def compress_directory_to_zip(directory_path, output_zip_path):
     try:
         with zipfile.ZipFile(output_zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
             for root, dirs, files in os.walk(directory_path):
                 for file in files:
                     file_path = os.path.join(root, file)
                     arcname = os.path.relpath(file_path, directory_path)
                     zipf.write(file_path, arcname)
         return 0
     except Exception as e:
         logger.exception(e)
         return -1
 latex_delimiters = [
     {'left': '$$', 'right': '$$', 'display': True},
     {'left': '$', 'right': '$', 'display': False},
     {'left': '\\[', 'right': '\\]', 'display': True},
 ]
 def check_prompt(prompt):
     if not prompt or prompt.strip() == "":
         raise gr.Error("Please select or enter a prompt before parsing.")
     return prompt
 def to_file(image_path):
     if image_path.endswith("Academic_Papers.png"):
         image_path = image_path.replace("Academic_Papers.png", "Academic_Papers.pdf")
     return image_path
+def render_img(b64_list, idx, scale):
+    """根据当前索引 idx 和缩放倍数 scale 渲染 HTML。"""
+    if not b64_list:
+        return "<p style='color:gray'>请先上传图片</p>"
+    idx %= len(b64_list)
+    src = b64_list[idx]
+    # return (
+    #     f'<div style="overflow:auto;border:1px solid #ccc;'
+    #     f'display:flex;justify-content:center;align-items:center;'   # ① 横纵向居中
+    #     f'width:100%;height:800px;">'                               # ② 容器尺寸
+    #     f'<img src="{src}" '
+    #     f'style="transform:scale({scale});transform-origin:center center;" />'  # ③ 以中心缩放
+    #     f'</div>'
+    # )
+    # 以百分比形式设置 width，height 自动等比
+    percent = scale * 100
+    if scale <= 1:
+        # ---------- 居中模式 ----------
+        return (
+            f'<div style="overflow:auto;border:1px solid #ccc;'
+            f'display:flex;justify-content:center;align-items:center;'
+            f'width:100%;height:800px;">'
+            f'  <img src="{src}" '
+            f'       style="width:{percent}%;max-width:none;'
+            f'              height:auto;display:block;" />'
+            f'</div>'
+        )
+    else:
+        # ---------- 放大模式 ----------
+        return (
+            f'<div style="overflow:auto;border:1px solid #ccc;'
+            f'width:100%;height:800px;">'
+            f'  <img src="{src}" '
+            f'       style="width:{percent}%;max-width:none;'
+            f'              height:auto;display:block;" />'
+            f'</div>'
+        )
+def files_to_b64(file, pdf_dpi: int = 200):
+    out: list[str] = []
+    if hasattr(file, "data"):
+        raw_bytes = file.data
+        suffix    = pathlib.Path(file.name).suffix.lower()
+        # -- PDF --
+        if suffix == ".pdf":
+            pages = convert_from_bytes(raw_bytes, dpi=pdf_dpi)
+            for page in pages:
+                buf = BytesIO()
+                page.save(buf, format="PNG")
+                b64 = base64.b64encode(buf.getvalue()).decode()
+                out.append(f"data:image/png;base64,{b64}")
+        else:
+            b64 = base64.b64encode(raw_bytes).decode()
+            out.append(f"data:image/{suffix[1:]};base64,{b64}")
+    else:
+        path   = pathlib.Path(file)
+        suffix = path.suffix.lower()
+        if suffix == ".pdf":
+            pages = convert_from_path(str(path), dpi=pdf_dpi)
+            for page in pages:
+                buf = BytesIO()
+                page.save(buf, format="PNG")
+                b64 = base64.b64encode(buf.getvalue()).decode()
+                out.append(f"data:image/png;base64,{b64}")
+        else:
+            raw_bytes = path.read_bytes()
+            b64 = base64.b64encode(raw_bytes).decode()
+            out.append(f"data:image/{suffix[1:]};base64,{b64}")
+    return out
 async def process_file(file_path):
     with gr.Blocks() as demo:
         with gr.Row():
             with gr.Column(variant='panel', scale=5):
                 file = gr.File(label='Please upload a PDF or image', file_types=['.pdf', '.png', '.jpeg', '.jpg'], type="filepath")
                 prompts = gr.Dropdown(
                     choices=preset_prompts,
                 with gr.Row():
                     change_bu = gr.Button('Parse')
                     clear_bu = gr.ClearButton(value='Clear')
+                zoom = gr.Slider(0.5, 3, value=1, step=0.1, label="Image Scale")
+                with gr.Row():
+                    prev_btn = gr.Button("⬅️ Pre")
+                    next_btn = gr.Button("Next ➡️")
+                viewer = gr.HTML()
                 example_root = os.path.join(os.path.dirname(__file__), 'examples')
                 images = [
                     file_path = [
                         os.path.join(example_root, f)
                         for f in ["Financial_Reports.png", "Books.png", "Magazines.png", "Academic_Papers.png"]
                     ]
                     with gr.Row():
                         for i, label in enumerate(["Financial Reports(IMG)", "Books(IMG)", "Magazines(IMG)", "Academic Papers(PDF)"]):
                             with gr.Column(scale=1, min_width=120):
                                     show_download_button=False
                                 )
                                 gr.Button(label).click(fn=to_file, inputs=gr.State(file_path[i]), outputs=file)
                 download_btn = gr.Button("⬇️ Generate download link", size="sm")
                 output_file = gr.File(label='Parse result', interactive=False, elem_id="down-file-box",visible=False)
                 gr.HTML("""
                 <style>
                 #down-file-box {
                     with gr.Tab('Markdown text'):
                         md_text = gr.TextArea(lines=45, show_copy_button=True)
+        img_list_state = gr.State([])
+        idx_state = gr.State(0)
+        async def upload_handler(files):
+            if files is None:
+                return [], 0, ""
+            if files.lower().endswith(".pdf"):
+                asyncio.create_task(send_pdf_async_aiohttp(files, server_ip=openai_api_base, Authorization=Authorization))
+            b64s = files_to_b64(files)
+            return b64s, 0, render_img(b64s, 0, 1)
+        file.change(
+            upload_handler,
+            inputs=file,
+            outputs=[img_list_state, idx_state, viewer],
+        ).then(
+            lambda: gr.update(value=1),   # 无输入，直接把 zoom 设为 1
+            None,                                # inputs=None
+            zoom                                 # outputs=[zoom]
+        )
+        def show_prev(b64s, idx, scale):
+            idx -= 1
+            return idx, render_img(b64s, idx, scale)
+        prev_btn.click(
+            show_prev,
+            inputs=[img_list_state, idx_state, zoom],
+            outputs=[idx_state, viewer],
+        )
+        def show_next(b64s, idx, scale):
+            idx += 1
+            return idx, render_img(b64s, idx, scale)
+        next_btn.click(
+            show_next,
+            inputs=[img_list_state, idx_state, zoom],
+            outputs=[idx_state, viewer],
+        )
+        zoom.change(
+            lambda b64s, idx, scale: render_img(b64s, idx, scale),
+            inputs=[img_list_state, idx_state, zoom],
+            outputs=viewer,
+        )
         change_bu.click(
             inputs=[file, prompts],
             outputs=[md, md_text]
         )
+        clear_bu.add([file, md, md_text])
         download_btn.click(
             fn=download_markdown_file,
             inputs=md_text,