Spaces:
				
			
			
	
			
			
					
		Running
		
	
	
	
			
			
	
	
	
	
		
		
					
		Running
		
	Update app-backup.py
Browse files- app-backup.py +85 -19
 
    	
        app-backup.py
    CHANGED
    
    | 
         @@ -14,6 +14,9 @@ from loguru import logger 
     | 
|
| 14 | 
         
             
            from PIL import Image
         
     | 
| 15 | 
         
             
            from transformers import AutoProcessor, Gemma3ForConditionalGeneration, TextIteratorStreamer
         
     | 
| 16 | 
         | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 17 | 
         
             
            model_id = os.getenv("MODEL_ID", "google/gemma-3-27b-it")
         
     | 
| 18 | 
         
             
            processor = AutoProcessor.from_pretrained(model_id, padding_side="left")
         
     | 
| 19 | 
         
             
            model = Gemma3ForConditionalGeneration.from_pretrained(
         
     | 
| 
         @@ -48,10 +51,20 @@ def count_files_in_history(history: list[dict]) -> tuple[int, int]: 
     | 
|
| 48 | 
         | 
| 49 | 
         | 
| 50 | 
         
             
            def validate_media_constraints(message: dict, history: list[dict]) -> bool:
         
     | 
| 51 | 
         
            -
                 
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 52 | 
         
             
                history_image_count, history_video_count = count_files_in_history(history)
         
     | 
| 53 | 
         
             
                image_count = history_image_count + new_image_count
         
     | 
| 54 | 
         
             
                video_count = history_video_count + new_video_count
         
     | 
| 
         | 
|
| 55 | 
         
             
                if video_count > 1:
         
     | 
| 56 | 
         
             
                    gr.Warning("Only one video is supported.")
         
     | 
| 57 | 
         
             
                    return False
         
     | 
| 
         @@ -63,12 +76,21 @@ def validate_media_constraints(message: dict, history: list[dict]) -> bool: 
     | 
|
| 63 | 
         
             
                        gr.Warning("Using <image> tags with video files is not supported.")
         
     | 
| 64 | 
         
             
                        return False
         
     | 
| 65 | 
         
             
                    # TODO: Add frame count validation for videos similar to image count limits  # noqa: FIX002, TD002, TD003
         
     | 
| 
         | 
|
| 66 | 
         
             
                if video_count == 0 and image_count > MAX_NUM_IMAGES:
         
     | 
| 67 | 
         
             
                    gr.Warning(f"You can upload up to {MAX_NUM_IMAGES} images.")
         
     | 
| 68 | 
         
             
                    return False
         
     | 
| 69 | 
         
            -
             
     | 
| 70 | 
         
            -
             
     | 
| 71 | 
         
            -
             
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 72 | 
         
             
                return True
         
     | 
| 73 | 
         | 
| 74 | 
         | 
| 
         @@ -127,20 +149,65 @@ def process_interleaved_images(message: dict) -> list[dict]: 
     | 
|
| 127 | 
         
             
                return content
         
     | 
| 128 | 
         | 
| 129 | 
         | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 130 | 
         
             
            def process_new_user_message(message: dict) -> list[dict]:
         
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 131 | 
         
             
                if not message["files"]:
         
     | 
| 132 | 
         
             
                    return [{"type": "text", "text": message["text"]}]
         
     | 
| 133 | 
         | 
| 134 | 
         
            -
                 
     | 
| 135 | 
         
            -
             
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 136 | 
         | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 137 | 
         
             
                if "<image>" in message["text"]:
         
     | 
| 138 | 
         
             
                    return process_interleaved_images(message)
         
     | 
| 139 | 
         | 
| 140 | 
         
            -
                 
     | 
| 141 | 
         
            -
             
     | 
| 142 | 
         
            -
             
     | 
| 143 | 
         
            -
             
     | 
| 
         | 
|
| 
         | 
|
| 144 | 
         | 
| 145 | 
         | 
| 146 | 
         
             
            def process_history(history: list[dict]) -> list[dict]:
         
     | 
| 
         @@ -318,26 +385,25 @@ examples = [ 
     | 
|
| 318 | 
         
             
                ],
         
     | 
| 319 | 
         
             
            ]
         
     | 
| 320 | 
         | 
| 321 | 
         
            -
            DESCRIPTION = """\
         
     | 
| 322 | 
         
            -
            <img src='https://huggingface.co/spaces/huggingface-projects/gemma-3-12b-it/resolve/main/assets/logo.png' id='logo' />
         
     | 
| 323 | 
         | 
| 324 | 
         
            -
            This is a demo of Gemma 3 27B it, a vision language model with outstanding performance on a wide range of tasks.
         
     | 
| 325 | 
         
            -
            You can upload images, interleaved images and videos. Note that video input only supports single-turn conversation and mp4 input.
         
     | 
| 326 | 
         
            -
            """
         
     | 
| 327 | 
         | 
| 
         | 
|
| 328 | 
         
             
            demo = gr.ChatInterface(
         
     | 
| 329 | 
         
             
                fn=run,
         
     | 
| 330 | 
         
             
                type="messages",
         
     | 
| 331 | 
         
             
                chatbot=gr.Chatbot(type="messages", scale=1, allow_tags=["image"]),
         
     | 
| 332 | 
         
            -
                textbox=gr.MultimodalTextbox( 
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 333 | 
         
             
                multimodal=True,
         
     | 
| 334 | 
         
             
                additional_inputs=[
         
     | 
| 335 | 
         
            -
                    gr.Textbox(label="System Prompt", value=" 
     | 
| 336 | 
         
            -
                    gr.Slider(label="Max New Tokens", minimum=100, maximum= 
     | 
| 337 | 
         
             
                ],
         
     | 
| 338 | 
         
             
                stop_btn=False,
         
     | 
| 339 | 
         
             
                title="Gemma 3 27B IT",
         
     | 
| 340 | 
         
            -
                description=DESCRIPTION,
         
     | 
| 341 | 
         
             
                examples=examples,
         
     | 
| 342 | 
         
             
                run_examples_on_click=False,
         
     | 
| 343 | 
         
             
                cache_examples=False,
         
     | 
| 
         | 
|
| 14 | 
         
             
            from PIL import Image
         
     | 
| 15 | 
         
             
            from transformers import AutoProcessor, Gemma3ForConditionalGeneration, TextIteratorStreamer
         
     | 
| 16 | 
         | 
| 17 | 
         
            +
            # [PDF] PyPDF2 ์ถ๊ฐ
         
     | 
| 18 | 
         
            +
            import PyPDF2
         
     | 
| 19 | 
         
            +
             
     | 
| 20 | 
         
             
            model_id = os.getenv("MODEL_ID", "google/gemma-3-27b-it")
         
     | 
| 21 | 
         
             
            processor = AutoProcessor.from_pretrained(model_id, padding_side="left")
         
     | 
| 22 | 
         
             
            model = Gemma3ForConditionalGeneration.from_pretrained(
         
     | 
| 
         | 
|
| 51 | 
         | 
| 52 | 
         | 
| 53 | 
         
             
            def validate_media_constraints(message: dict, history: list[dict]) -> bool:
         
     | 
| 54 | 
         
            +
                """
         
     | 
| 55 | 
         
            +
                ์ด๋ฏธ์ง/๋น๋์ค ๊ฐ์์ ํผํฉ ์ฌ๋ถ ๋ฑ์ ๊ฒ์ฌํ๋ ํจ์.
         
     | 
| 56 | 
         
            +
                PDF๋ ๊ฒ์ฌ ๋ก์ง์์ ์ ์ธํ์ฌ ์
๋ก๋๋ง ํ์ฉ.
         
     | 
| 57 | 
         
            +
                """
         
     | 
| 58 | 
         
            +
                # [PDF] PDF ํ์ผ ์ ์ธ ์ฒ๋ฆฌ
         
     | 
| 59 | 
         
            +
                pdf_files = [f for f in message["files"] if f.endswith(".pdf")]
         
     | 
| 60 | 
         
            +
                non_pdf_files = [f for f in message["files"] if not f.endswith(".pdf")]
         
     | 
| 61 | 
         
            +
             
     | 
| 62 | 
         
            +
                # ๊ธฐ์กด ๋ก์ง์ non_pdf_files(= ์ด๋ฏธ์ง/๋น๋์ค)์ ๋ํด์๋ง ์ฒดํฌ
         
     | 
| 63 | 
         
            +
                new_image_count, new_video_count = count_files_in_new_message(non_pdf_files)
         
     | 
| 64 | 
         
             
                history_image_count, history_video_count = count_files_in_history(history)
         
     | 
| 65 | 
         
             
                image_count = history_image_count + new_image_count
         
     | 
| 66 | 
         
             
                video_count = history_video_count + new_video_count
         
     | 
| 67 | 
         
            +
             
     | 
| 68 | 
         
             
                if video_count > 1:
         
     | 
| 69 | 
         
             
                    gr.Warning("Only one video is supported.")
         
     | 
| 70 | 
         
             
                    return False
         
     | 
| 
         | 
|
| 76 | 
         
             
                        gr.Warning("Using <image> tags with video files is not supported.")
         
     | 
| 77 | 
         
             
                        return False
         
     | 
| 78 | 
         
             
                    # TODO: Add frame count validation for videos similar to image count limits  # noqa: FIX002, TD002, TD003
         
     | 
| 79 | 
         
            +
             
     | 
| 80 | 
         
             
                if video_count == 0 and image_count > MAX_NUM_IMAGES:
         
     | 
| 81 | 
         
             
                    gr.Warning(f"You can upload up to {MAX_NUM_IMAGES} images.")
         
     | 
| 82 | 
         
             
                    return False
         
     | 
| 83 | 
         
            +
             
     | 
| 84 | 
         
            +
                # [PDF] PDF ๊ฐฏ์ ์ ํ(ํ์ํ๋ค๋ฉด)๋ ์ถ๊ฐ ๊ฐ๋ฅ
         
     | 
| 85 | 
         
            +
                # ์ผ๋จ ์ ํ์ ๋์ง ์๊ณ  ๋ฐ๋ก True ๋ฐํ
         
     | 
| 86 | 
         
            +
             
     | 
| 87 | 
         
            +
                # <image> ํ๊ทธ๊ฐ ์์ ๊ฒฝ์ฐ, ์ด๋ฏธ์ง ๊ฐ์์ ๋งค์นญ ๊ฒ์ฌ
         
     | 
| 88 | 
         
            +
                if "<image>" in message["text"]:
         
     | 
| 89 | 
         
            +
                    # new_image_count๋ pdf ์ ์ธ๋ ์ด๋ฏธ์ง ์
         
     | 
| 90 | 
         
            +
                    if message["text"].count("<image>") != new_image_count:
         
     | 
| 91 | 
         
            +
                        gr.Warning("The number of <image> tags in the text does not match the number of images.")
         
     | 
| 92 | 
         
            +
                        return False
         
     | 
| 93 | 
         
            +
             
     | 
| 94 | 
         
             
                return True
         
     | 
| 95 | 
         | 
| 96 | 
         | 
| 
         | 
|
| 149 | 
         
             
                return content
         
     | 
| 150 | 
         | 
| 151 | 
         | 
| 152 | 
         
            +
            # [PDF] PDF -> Markdown ๋ณํ ํจ์ ์ถ๊ฐ
         
     | 
| 153 | 
         
            +
            def pdf_to_markdown(pdf_path: str) -> str:
         
     | 
| 154 | 
         
            +
                """
         
     | 
| 155 | 
         
            +
                PDF ํ์ผ์ ํ
์คํธ๋ก ์ถ์ถ ํ, ๊ฐ๋จํ Markdown ํํ๋ก ๋ฐํ.
         
     | 
| 156 | 
         
            +
                """
         
     | 
| 157 | 
         
            +
                text_chunks = []
         
     | 
| 158 | 
         
            +
                with open(pdf_path, "rb") as f:
         
     | 
| 159 | 
         
            +
                    reader = PyPDF2.PdfReader(f)
         
     | 
| 160 | 
         
            +
                    for page_num, page in enumerate(reader.pages, start=1):
         
     | 
| 161 | 
         
            +
                        page_text = page.extract_text()
         
     | 
| 162 | 
         
            +
                        page_text = page_text.strip() if page_text else ""
         
     | 
| 163 | 
         
            +
                        if page_text:
         
     | 
| 164 | 
         
            +
                            # ํ์ด์ง๋ณ๋ก ๊ฐ๋จํ ํค๋์ ๋ณธ๋ฌธ์ Markdown์ผ๋ก ํฉ์นจ
         
     | 
| 165 | 
         
            +
                            text_chunks.append(f"## Page {page_num}\n\n{page_text}\n")
         
     | 
| 166 | 
         
            +
                return "\n".join(text_chunks)
         
     | 
| 167 | 
         
            +
             
     | 
| 168 | 
         
            +
             
     | 
| 169 | 
         
             
            def process_new_user_message(message: dict) -> list[dict]:
         
     | 
| 170 | 
         
            +
                """
         
     | 
| 171 | 
         
            +
                ์ user message์์ text, ํ์ผ(์ด๋ฏธ์ง/๋น๋์ค/PDF)์ ์ฒ๋ฆฌ.
         
     | 
| 172 | 
         
            +
                """
         
     | 
| 173 | 
         
             
                if not message["files"]:
         
     | 
| 174 | 
         
             
                    return [{"type": "text", "text": message["text"]}]
         
     | 
| 175 | 
         | 
| 176 | 
         
            +
                # [PDF] PDF ํ์ผ ๋ชฉ๋ก
         
     | 
| 177 | 
         
            +
                pdf_files = [f for f in message["files"] if f.endswith(".pdf")]
         
     | 
| 178 | 
         
            +
                # ์ด๋ฏธ์งยท๋น๋์ค ๋ชฉ๋ก
         
     | 
| 179 | 
         
            +
                other_files = [f for f in message["files"] if not f.endswith(".pdf")]
         
     | 
| 180 | 
         
            +
             
     | 
| 181 | 
         
            +
                # ์ผ๋จ ์ฌ์ฉ์์ text๋ฅผ ๊ฐ์ฅ ๋จผ์  ๋ฃ๋๋ค
         
     | 
| 182 | 
         
            +
                content_list = [{"type": "text", "text": message["text"]}]
         
     | 
| 183 | 
         
            +
             
     | 
| 184 | 
         
            +
                # PDF ๋ณํ ํ ์ถ๊ฐ
         
     | 
| 185 | 
         
            +
                for pdf_path in pdf_files:
         
     | 
| 186 | 
         
            +
                    pdf_markdown = pdf_to_markdown(pdf_path)
         
     | 
| 187 | 
         
            +
                    if pdf_markdown.strip():
         
     | 
| 188 | 
         
            +
                        content_list.append({"type": "text", "text": pdf_markdown})
         
     | 
| 189 | 
         
            +
                    else:
         
     | 
| 190 | 
         
            +
                        content_list.append({"type": "text", "text": "(PDF์์ ํ
์คํธ ์ถ์ถ ์คํจ)"})
         
     | 
| 191 | 
         
            +
             
     | 
| 192 | 
         | 
| 193 | 
         
            +
                # ์์์ด ์๋์ง ํ์ธ
         
     | 
| 194 | 
         
            +
                video_files = [f for f in other_files if f.endswith(".mp4")]
         
     | 
| 195 | 
         
            +
                if video_files:
         
     | 
| 196 | 
         
            +
                    # ๋น๋์ค๋ ํ ๊ฐ๋ง ์ฒ๋ฆฌํ๋ค๋ ์ ์  (validate_media_constraints์์ ์ด๋ฏธ ๊ฒ์ฌ)
         
     | 
| 197 | 
         
            +
                    # ์ฌ๋ฌ ๊ฐ์ผ ๊ฒฝ์ฐ ์ฒซ ๋ฒ์งธ ๊ฒ๋ง ์ฒ๋ฆฌํ๊ฑฐ๋, ๊ฒฝ๊ณ  ์ฒ๋ฆฌ
         
     | 
| 198 | 
         
            +
                    content_list += process_video(video_files[0])
         
     | 
| 199 | 
         
            +
                    return content_list
         
     | 
| 200 | 
         
            +
             
     | 
| 201 | 
         
            +
                # interleaved ์ด๋ฏธ์ง
         
     | 
| 202 | 
         
             
                if "<image>" in message["text"]:
         
     | 
| 203 | 
         
             
                    return process_interleaved_images(message)
         
     | 
| 204 | 
         | 
| 205 | 
         
            +
                # ์ผ๋ฐ ์ด๋ฏธ์ง(์ฌ๋ฌ ์ฅ)
         
     | 
| 206 | 
         
            +
                image_files = [f for f in other_files if not f.endswith(".mp4")]
         
     | 
| 207 | 
         
            +
                if image_files:
         
     | 
| 208 | 
         
            +
                    content_list += [{"type": "image", "url": path} for path in image_files]
         
     | 
| 209 | 
         
            +
             
     | 
| 210 | 
         
            +
                return content_list
         
     | 
| 211 | 
         | 
| 212 | 
         | 
| 213 | 
         
             
            def process_history(history: list[dict]) -> list[dict]:
         
     | 
| 
         | 
|
| 385 | 
         
             
                ],
         
     | 
| 386 | 
         
             
            ]
         
     | 
| 387 | 
         | 
| 
         | 
|
| 
         | 
|
| 388 | 
         | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 389 | 
         | 
| 390 | 
         
            +
            # [PDF] .pdf ํ์ฉ
         
     | 
| 391 | 
         
             
            demo = gr.ChatInterface(
         
     | 
| 392 | 
         
             
                fn=run,
         
     | 
| 393 | 
         
             
                type="messages",
         
     | 
| 394 | 
         
             
                chatbot=gr.Chatbot(type="messages", scale=1, allow_tags=["image"]),
         
     | 
| 395 | 
         
            +
                textbox=gr.MultimodalTextbox(
         
     | 
| 396 | 
         
            +
                    file_types=["image", ".mp4", ".pdf"],  # [PDF] ํ์ฉ
         
     | 
| 397 | 
         
            +
                    file_count="multiple",
         
     | 
| 398 | 
         
            +
                    autofocus=True
         
     | 
| 399 | 
         
            +
                ),
         
     | 
| 400 | 
         
             
                multimodal=True,
         
     | 
| 401 | 
         
             
                additional_inputs=[
         
     | 
| 402 | 
         
            +
                    gr.Textbox(label="System Prompt", value="ou are a deeply thoughtful AI. Consider problems thoroughly and derive correct solutions through systematic reasoning. Please answer in korean."),
         
     | 
| 403 | 
         
            +
                    gr.Slider(label="Max New Tokens", minimum=100, maximum=8000, step=50, value=2000),
         
     | 
| 404 | 
         
             
                ],
         
     | 
| 405 | 
         
             
                stop_btn=False,
         
     | 
| 406 | 
         
             
                title="Gemma 3 27B IT",
         
     | 
| 
         | 
|
| 407 | 
         
             
                examples=examples,
         
     | 
| 408 | 
         
             
                run_examples_on_click=False,
         
     | 
| 409 | 
         
             
                cache_examples=False,
         
     |