Spaces:

awacke1
/

TorchTransformers-CV-SFT

Sleeping

App Files Files Community

awacke1 commited on Mar 22

Commit

9218bcd

verified ·

1 Parent(s): a9df450

Update app.py

Browse files

Files changed (1) hide show

app.py +168 -38

app.py CHANGED Viewed

@@ -13,7 +13,7 @@ from transformers import AutoModelForCausalLM, AutoTokenizer, AutoModel
 from diffusers import StableDiffusionPipeline
 from torch.utils.data import Dataset, DataLoader
 import csv
-import fitz  # PyMuPDF, pure Python library
 import requests
 from PIL import Image
 import cv2
@@ -27,6 +27,7 @@ from typing import Optional, Tuple
 import zipfile
 import math
 import random
 # Logging setup with custom buffer
 logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
@@ -61,6 +62,10 @@ if 'model_loaded' not in st.session_state:
     st.session_state['model_loaded'] = False
 if 'processing' not in st.session_state:
     st.session_state['processing'] = {}
 # Model Configuration Classes
 @dataclass
@@ -311,11 +316,16 @@ def generate_filename(sequence, ext="png"):
     timestamp = time.strftime("%d%m%Y%H%M%S")
     return f"{sequence}_{timestamp}.{ext}"
-def get_download_link(file_path, mime_type="text/plain", label="Download"):
     with open(file_path, 'rb') as f:
         data = f.read()
     b64 = base64.b64encode(data).decode()
-    return f'<a href="data:{mime_type};base64,{b64}" download="{os.path.basename(file_path)}">{label} 📥</a>'
 def zip_directory(directory_path, zip_path):
     with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
@@ -330,6 +340,9 @@ def get_model_files(model_type="causal_lm"):
 def get_gallery_files(file_types=["png"]):
     return sorted([f for ext in file_types for f in glob.glob(f"*.{ext}")])
 def download_pdf(url, output_path):
     try:
         response = requests.get(url, stream=True, timeout=10)
@@ -343,26 +356,33 @@ def download_pdf(url, output_path):
     return False
 # Async Processing Functions
-async def process_pdf_snapshot(pdf_path, mode="thumbnail"):
     start_time = time.time()
     status = st.empty()
     status.text(f"Processing PDF Snapshot ({mode})... (0s)")
     try:
         doc = fitz.open(pdf_path)
         output_files = []
-        if mode == "thumbnail":
             page = doc[0]
-            pix = page.get_pixmap(matrix=fitz.Matrix(0.5, 0.5))  # 50% scale
-            output_file = generate_filename("thumbnail", "png")
             pix.save(output_file)
             output_files.append(output_file)
         elif mode == "twopage":
             for i in range(min(2, len(doc))):
                 page = doc[i]
-                pix = page.get_pixmap(matrix=fitz.Matrix(1.0, 1.0))  # Full scale
                 output_file = generate_filename(f"twopage_{i}", "png")
                 pix.save(output_file)
                 output_files.append(output_file)
         doc.close()
         elapsed = int(time.time() - start_time)
         status.text(f"PDF Snapshot ({mode}) completed in {elapsed}s!")
@@ -502,11 +522,16 @@ st.sidebar.header("Captured Files 📜")
 gallery_size = st.sidebar.slider("Gallery Size", 1, 10, 2)  # Default to 2
 def update_gallery():
     media_files = get_gallery_files(["png"])
-    if media_files:
         cols = st.sidebar.columns(2)
         for idx, file in enumerate(media_files[:gallery_size * 2]):  # Limit by gallery size
             with cols[idx % 2]:
                 st.image(Image.open(file), caption=os.path.basename(file), use_container_width=True)
 update_gallery()
 st.sidebar.subheader("Model Management 🗂️")
@@ -570,23 +595,98 @@ with tab1:
 with tab2:
     st.header("Download PDFs 📥")
-    url_input = st.text_area("Enter PDF URLs (one per line)", height=100)
-    mode = st.selectbox("Snapshot Mode", ["Thumbnail", "Two-Page View"], key="download_mode")
-    if st.button("Download & Snapshot 📸"):
         urls = url_input.strip().split("\n")
-        for url in urls:
             if url:
-                pdf_path = generate_filename("downloaded", "pdf")
-                if download_pdf(url, pdf_path):
-                    logger.info(f"Downloaded PDF from {url} to {pdf_path}")
-                    entry = f"Downloaded PDF: {pdf_path}"
-                    if entry not in st.session_state['history']:
-                        st.session_state['history'].append(entry)
-                    snapshots = asyncio.run(process_pdf_snapshot(pdf_path, mode.lower().replace(" ", "")))
-                    for snapshot in snapshots:
-                        st.image(Image.open(snapshot), caption=snapshot, use_container_width=True)
                 else:
-                    st.error(f"Failed to download {url}")
 with tab3:
     st.header("Build Titan 🌱")
@@ -647,11 +747,14 @@ with tab4:
                 st.rerun()
         elif isinstance(st.session_state['builder'], DiffusionBuilder):
             captured_files = get_gallery_files(["png"])
-            if len(captured_files) >= 2:
                 demo_data = [{"image": img, "text": f"Superhero {os.path.basename(img).split('.')[0]}"} for img in captured_files]
                 edited_data = st.data_editor(pd.DataFrame(demo_data), num_rows="dynamic")
                 if st.button("Fine-Tune with Dataset 🔄"):
-                    images = [Image.open(row["image"]) for _, row in edited_data.iterrows()]
                     texts = [row["text"] for _, row in edited_data.iterrows()]
                     new_model_name = f"{st.session_state['builder'].config.name}-sft-{int(time.time())}"
                     new_config = DiffusionConfig(name=new_model_name, base_model=st.session_state['builder'].config.base_model, size="small")
@@ -701,6 +804,7 @@ with tab5:
                 status_container.empty()
         elif isinstance(st.session_state['builder'], DiffusionBuilder):
             test_prompt = st.text_area("Enter Test Prompt", "Neon Batman")
             if st.button("Run Test ▶️"):
                 image = st.session_state['builder'].generate(test_prompt)
                 output_file = generate_filename("diffusion_test", "png")
@@ -744,10 +848,18 @@ with tab6:
 with tab7:
     st.header("Test OCR 🔍")
     captured_files = get_gallery_files(["png"])
-    if captured_files:
-        selected_file = st.selectbox("Select Image", captured_files, key="ocr_select")
         if selected_file:
-            image = Image.open(selected_file)
             st.image(image, caption="Input Image", use_container_width=True)
             if st.button("Run OCR 🚀", key="ocr_run"):
                 output_file = generate_filename("ocr_output", "txt")
@@ -760,15 +872,23 @@ with tab7:
                 st.success(f"OCR output saved to {output_file}")
                 st.session_state['processing']['ocr'] = False
     else:
-        st.warning("No images captured yet. Use Camera Snap or Download PDFs first!")
 with tab8:
     st.header("Test Image Gen 🎨")
     captured_files = get_gallery_files(["png"])
-    if captured_files:
-        selected_file = st.selectbox("Select Image", captured_files, key="gen_select")
         if selected_file:
-            image = Image.open(selected_file)
             st.image(image, caption="Reference Image", use_container_width=True)
             prompt = st.text_area("Prompt", "Generate a similar superhero image", key="gen_prompt")
             if st.button("Run Image Gen 🚀", key="gen_run"):
@@ -782,16 +902,26 @@ with tab8:
                 st.success(f"Image saved to {output_file}")
                 st.session_state['processing']['gen'] = False
     else:
-        st.warning("No images captured yet. Use Camera Snap or Download PDFs first!")
 with tab9:
     st.header("Custom Diffusion 🎨🤓")
     st.write("Unleash your inner artist with our tiny diffusion models!")
     captured_files = get_gallery_files(["png"])
-    if captured_files:
-        st.subheader("Select Images to Train")
-        selected_files = st.multiselect("Pick Images", captured_files, key="diffusion_select")
-        images = [Image.open(file) for file in selected_files]
         model_options = [
             ("PixelTickler 🎨✨", "OFA-Sys/small-stable-diffusion-v0"),
@@ -818,7 +948,7 @@ with tab9:
             st.success(f"Image saved to {output_file}")
             st.session_state['processing']['diffusion'] = False
     else:
-        st.warning("No images captured yet. Use Camera Snap or Download PDFs first!")
 # Initial Gallery Update
 update_gallery()

 from diffusers import StableDiffusionPipeline
 from torch.utils.data import Dataset, DataLoader
 import csv
+import fitz  # PyMuPDF
 import requests
 from PIL import Image
 import cv2
 import zipfile
 import math
 import random
+import re
 # Logging setup with custom buffer
 logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
     st.session_state['model_loaded'] = False
 if 'processing' not in st.session_state:
     st.session_state['processing'] = {}
+if 'pdf_checkboxes' not in st.session_state:
+    st.session_state['pdf_checkboxes'] = {}  # Shared cache for PDF checkboxes
+if 'downloaded_pdfs' not in st.session_state:
+    st.session_state['downloaded_pdfs'] = {}  # Cache for downloaded PDF paths
 # Model Configuration Classes
 @dataclass
     timestamp = time.strftime("%d%m%Y%H%M%S")
     return f"{sequence}_{timestamp}.{ext}"
+def pdf_url_to_filename(url):
+    # Convert full URL to filename, replacing illegal characters
+    safe_name = re.sub(r'[<>:"/\\|?*]', '_', url)
+    return f"{safe_name}.pdf"
+def get_download_link(file_path, mime_type="application/pdf", label="Download"):
     with open(file_path, 'rb') as f:
         data = f.read()
     b64 = base64.b64encode(data).decode()
+    return f'<a href="data:{mime_type};base64,{b64}" download="{os.path.basename(file_path)}">{label}</a>'
 def zip_directory(directory_path, zip_path):
     with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
 def get_gallery_files(file_types=["png"]):
     return sorted([f for ext in file_types for f in glob.glob(f"*.{ext}")])
+def get_pdf_files():
+    return sorted(glob.glob("*.pdf"))
 def download_pdf(url, output_path):
     try:
         response = requests.get(url, stream=True, timeout=10)
     return False
 # Async Processing Functions
+async def process_pdf_snapshot(pdf_path, mode="single"):
     start_time = time.time()
     status = st.empty()
     status.text(f"Processing PDF Snapshot ({mode})... (0s)")
     try:
         doc = fitz.open(pdf_path)
         output_files = []
+        if mode == "single":
             page = doc[0]
+            pix = page.get_pixmap(matrix=fitz.Matrix(2.0, 2.0))  # High-res: 200% scale
+            output_file = generate_filename("single", "png")
             pix.save(output_file)
             output_files.append(output_file)
         elif mode == "twopage":
             for i in range(min(2, len(doc))):
                 page = doc[i]
+                pix = page.get_pixmap(matrix=fitz.Matrix(2.0, 2.0))  # High-res: 200% scale
                 output_file = generate_filename(f"twopage_{i}", "png")
                 pix.save(output_file)
                 output_files.append(output_file)
+        elif mode == "allthumbs":
+            for i in range(len(doc)):
+                page = doc[i]
+                pix = page.get_pixmap(matrix=fitz.Matrix(0.5, 0.5))  # Thumbnail: 50% scale
+                output_file = generate_filename(f"thumb_{i}", "png")
+                pix.save(output_file)
+                output_files.append(output_file)
         doc.close()
         elapsed = int(time.time() - start_time)
         status.text(f"PDF Snapshot ({mode}) completed in {elapsed}s!")
 gallery_size = st.sidebar.slider("Gallery Size", 1, 10, 2)  # Default to 2
 def update_gallery():
     media_files = get_gallery_files(["png"])
+    pdf_files = get_pdf_files()
+    if media_files or pdf_files:
+        st.sidebar.subheader("Images 📸")
         cols = st.sidebar.columns(2)
         for idx, file in enumerate(media_files[:gallery_size * 2]):  # Limit by gallery size
             with cols[idx % 2]:
                 st.image(Image.open(file), caption=os.path.basename(file), use_container_width=True)
+        st.sidebar.subheader("PDF Downloads 📖")
+        for pdf_file in pdf_files[:gallery_size * 2]:  # Limit by gallery size
+            st.markdown(get_download_link(pdf_file, "application/pdf", f"📥 Grab {os.path.basename(pdf_file)}"), unsafe_allow_html=True)
 update_gallery()
 st.sidebar.subheader("Model Management 🗂️")
 with tab2:
     st.header("Download PDFs 📥")
+    # Examples button with arXiv PDF links from README.md
+    if st.button("Examples 📚"):
+        example_urls = [
+            "https://arxiv.org/pdf/2308.03892",  # Streamlit
+            "https://arxiv.org/pdf/1912.01703",  # PyTorch
+            "https://arxiv.org/pdf/2408.11039",  # Qwen2-VL
+            "https://arxiv.org/pdf/2109.10282",  # TrOCR
+            "https://arxiv.org/pdf/2112.10752",  # LDM
+            "https://arxiv.org/pdf/2308.11236",  # OpenCV
+            "https://arxiv.org/pdf/1706.03762",  # Attention is All You Need
+            "https://arxiv.org/pdf/2006.11239",  # DDPM
+            "https://arxiv.org/pdf/2305.11207",  # Pandas
+            "https://arxiv.org/pdf/2106.09685",  # LoRA
+            "https://arxiv.org/pdf/2005.11401",  # RAG
+            "https://arxiv.org/pdf/2106.10504"   # Fine-Tuning Vision Transformers
+        ]
+        st.session_state['pdf_urls'] = "\n".join(example_urls)
+    # Robo-Downloader
+    url_input = st.text_area("Enter PDF URLs (one per line)", value=st.session_state.get('pdf_urls', ""), height=200)
+    if st.button("Robo-Download 🤖"):
         urls = url_input.strip().split("\n")
+        progress_bar = st.progress(0)
+        status_text = st.empty()
+        total_urls = len(urls)
+        existing_pdfs = get_pdf_files()
+        for idx, url in enumerate(urls):
             if url:
+                output_path = pdf_url_to_filename(url)
+                status_text.text(f"Fetching {idx + 1}/{total_urls}: {os.path.basename(output_path)}...")
+                if output_path not in existing_pdfs:
+                    if download_pdf(url, output_path):
+                        st.session_state['downloaded_pdfs'][url] = output_path
+                        logger.info(f"Downloaded PDF from {url} to {output_path}")
+                        entry = f"Downloaded PDF: {output_path}"
+                        if entry not in st.session_state['history']:
+                            st.session_state['history'].append(entry)
+                    else:
+                        st.error(f"Failed to nab {url} 😿")
                 else:
+                    st.info(f"Already got {os.path.basename(output_path)}! Skipping... 🐾")
+                    st.session_state['downloaded_pdfs'][url] = output_path
+                progress_bar.progress((idx + 1) / total_urls)
+        status_text.text("Robo-Download complete! 🚀")
+        update_gallery()
+    # PDF Gallery with Thumbnails and Checkboxes
+    st.subheader("PDF Gallery 📖")
+    downloaded_pdfs = list(st.session_state['downloaded_pdfs'].values())
+    if downloaded_pdfs:
+        cols_per_row = 3
+        for i in range(0, len(downloaded_pdfs), cols_per_row):
+            cols = st.columns(cols_per_row)
+            for j, pdf_path in enumerate(downloaded_pdfs[i:i + cols_per_row]):
+                with cols[j]:
+                    doc = fitz.open(pdf_path)
+                    page = doc[0]
+                    pix = page.get_pixmap(matrix=fitz.Matrix(0.5, 0.5))  # Thumbnail at 50% scale
+                    img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
+                    st.image(img, caption=os.path.basename(pdf_path), use_container_width=True)
+                    # Checkbox for SFT/Input use
+                    checkbox_key = f"pdf_{pdf_path}"
+                    st.session_state['pdf_checkboxes'][checkbox_key] = st.checkbox(
+                        "Use for SFT/Input",
+                        value=st.session_state['pdf_checkboxes'].get(checkbox_key, False),
+                        key=checkbox_key
+                    )
+                    # Download and Delete Buttons
+                    st.markdown(get_download_link(pdf_path, "application/pdf", "Snag It! 📥"), unsafe_allow_html=True)
+                    if st.button("Zap It! 🗑️", key=f"delete_{pdf_path}"):
+                        os.remove(pdf_path)
+                        url_key = next((k for k, v in st.session_state['downloaded_pdfs'].items() if v == pdf_path), None)
+                        if url_key:
+                            del st.session_state['downloaded_pdfs'][url_key]
+                        del st.session_state['pdf_checkboxes'][checkbox_key]
+                        st.success(f"PDF {os.path.basename(pdf_path)} vaporized! 💨")
+                        st.rerun()
+                    doc.close()
+    else:
+        st.info("No PDFs captured yet. Feed the robo-downloader some URLs! 🤖")
+    mode = st.selectbox("Snapshot Mode", ["Single Page (High-Res)", "Two Pages (High-Res)", "All Pages (Thumbnails)"], key="download_mode")
+    if st.button("Snapshot Selected 📸"):
+        selected_pdfs = [path for key, path in st.session_state['downloaded_pdfs'].items() if st.session_state['pdf_checkboxes'].get(f"pdf_{path}", False)]
+        if selected_pdfs:
+            for pdf_path in selected_pdfs:
+                mode_key = {"Single Page (High-Res)": "single", "Two Pages (High-Res)": "twopage", "All Pages (Thumbnails)": "allthumbs"}[mode]
+                snapshots = asyncio.run(process_pdf_snapshot(pdf_path, mode_key))
+                for snapshot in snapshots:
+                    st.image(Image.open(snapshot), caption=snapshot, use_container_width=True)
+        else:
+            st.warning("No PDFs selected for snapshotting! Check some boxes first. 📝")
 with tab3:
     st.header("Build Titan 🌱")
                 st.rerun()
         elif isinstance(st.session_state['builder'], DiffusionBuilder):
             captured_files = get_gallery_files(["png"])
+            selected_pdfs = [path for key, path in st.session_state['downloaded_pdfs'].items() if st.session_state['pdf_checkboxes'].get(f"pdf_{path}", False)]
+            if len(captured_files) + len(selected_pdfs) >= 2:
                 demo_data = [{"image": img, "text": f"Superhero {os.path.basename(img).split('.')[0]}"} for img in captured_files]
+                for pdf_path in selected_pdfs:
+                    demo_data.append({"image": pdf_path, "text": f"PDF {os.path.basename(pdf_path)}"})
                 edited_data = st.data_editor(pd.DataFrame(demo_data), num_rows="dynamic")
                 if st.button("Fine-Tune with Dataset 🔄"):
+                    images = [Image.open(row["image"]) if row["image"].endswith('.png') else Image.frombytes("RGB", fitz.open(row["image"])[0].get_pixmap(matrix=fitz.Matrix(2.0, 2.0)).size, fitz.open(row["image"])[0].get_pixmap(matrix=fitz.Matrix(2.0, 2.0)).samples) for _, row in edited_data.iterrows()]
                     texts = [row["text"] for _, row in edited_data.iterrows()]
                     new_model_name = f"{st.session_state['builder'].config.name}-sft-{int(time.time())}"
                     new_config = DiffusionConfig(name=new_model_name, base_model=st.session_state['builder'].config.base_model, size="small")
                 status_container.empty()
         elif isinstance(st.session_state['builder'], DiffusionBuilder):
             test_prompt = st.text_area("Enter Test Prompt", "Neon Batman")
+            selected_pdfs = [path for key, path in st.session_state['downloaded_pdfs'].items() if st.session_state['pdf_checkboxes'].get(f"pdf_{path}", False)]
             if st.button("Run Test ▶️"):
                 image = st.session_state['builder'].generate(test_prompt)
                 output_file = generate_filename("diffusion_test", "png")
 with tab7:
     st.header("Test OCR 🔍")
     captured_files = get_gallery_files(["png"])
+    selected_pdfs = [path for key, path in st.session_state['downloaded_pdfs'].items() if st.session_state['pdf_checkboxes'].get(f"pdf_{path}", False)]
+    all_files = captured_files + selected_pdfs
+    if all_files:
+        selected_file = st.selectbox("Select Image or PDF", all_files, key="ocr_select")
         if selected_file:
+            if selected_file.endswith('.png'):
+                image = Image.open(selected_file)
+            else:
+                doc = fitz.open(selected_file)
+                pix = doc[0].get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
+                image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
+                doc.close()
             st.image(image, caption="Input Image", use_container_width=True)
             if st.button("Run OCR 🚀", key="ocr_run"):
                 output_file = generate_filename("ocr_output", "txt")
                 st.success(f"OCR output saved to {output_file}")
                 st.session_state['processing']['ocr'] = False
     else:
+        st.warning("No images or PDFs captured yet. Use Camera Snap or Download PDFs first!")
 with tab8:
     st.header("Test Image Gen 🎨")
     captured_files = get_gallery_files(["png"])
+    selected_pdfs = [path for key, path in st.session_state['downloaded_pdfs'].items() if st.session_state['pdf_checkboxes'].get(f"pdf_{path}", False)]
+    all_files = captured_files + selected_pdfs
+    if all_files:
+        selected_file = st.selectbox("Select Image or PDF", all_files, key="gen_select")
         if selected_file:
+            if selected_file.endswith('.png'):
+                image = Image.open(selected_file)
+            else:
+                doc = fitz.open(selected_file)
+                pix = doc[0].get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
+                image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
+                doc.close()
             st.image(image, caption="Reference Image", use_container_width=True)
             prompt = st.text_area("Prompt", "Generate a similar superhero image", key="gen_prompt")
             if st.button("Run Image Gen 🚀", key="gen_run"):
                 st.success(f"Image saved to {output_file}")
                 st.session_state['processing']['gen'] = False
     else:
+        st.warning("No images or PDFs captured yet. Use Camera Snap or Download PDFs first WAV!")
 with tab9:
     st.header("Custom Diffusion 🎨🤓")
     st.write("Unleash your inner artist with our tiny diffusion models!")
     captured_files = get_gallery_files(["png"])
+    selected_pdfs = [path for key, path in st.session_state['downloaded_pdfs'].items() if st.session_state['pdf_checkboxes'].get(f"pdf_{path}", False)]
+    all_files = captured_files + selected_pdfs
+    if all_files:
+        st.subheader("Select Images or PDFs to Train")
+        selected_files = st.multiselect("Pick Images or PDFs", all_files, key="diffusion_select")
+        images = []
+        for file in selected_files:
+            if file.endswith('.png'):
+                images.append(Image.open(file))
+            else:
+                doc = fitz.open(file)
+                pix = doc[0].get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
+                images.append(Image.frombytes("RGB", [pix.width, pix.height], pix.samples))
+                doc.close()
         model_options = [
             ("PixelTickler 🎨✨", "OFA-Sys/small-stable-diffusion-v0"),
             st.success(f"Image saved to {output_file}")
             st.session_state['processing']['diffusion'] = False
     else:
+        st.warning("No images or PDFs captured yet. Use Camera Snap or Download PDFs first!")
 # Initial Gallery Update
 update_gallery()