VanguardAI commited on
Commit
f553130
Β·
verified Β·
1 Parent(s): ec099ff

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +118 -38
app.py CHANGED
@@ -228,11 +228,12 @@ def extract_text_from_image(
228
  def create_gradio_interface():
229
  """Create the Gradio interface for AIN OCR."""
230
 
231
- # Custom CSS
232
  css = """
233
  .main-container {
234
- max-width: 1200px;
235
  margin: 0 auto;
 
236
  }
237
 
238
  .header-text {
@@ -248,6 +249,8 @@ def create_gradio_interface():
248
  font-weight: bold !important;
249
  font-size: 1.1em !important;
250
  padding: 12px 24px !important;
 
 
251
  }
252
 
253
  .process-button:hover {
@@ -255,15 +258,27 @@ def create_gradio_interface():
255
  box-shadow: 0 6px 12px rgba(0,0,0,0.2) !important;
256
  }
257
 
258
- .output-text {
259
- background: #f8f9fa;
260
- border: 2px solid #dee2e6;
 
 
 
 
 
 
 
 
 
 
261
  border-radius: 8px;
262
- padding: 20px;
263
- min-height: 300px;
264
- font-family: 'Courier New', monospace;
265
- white-space: pre-wrap;
266
- direction: auto;
 
 
267
  }
268
 
269
  .info-box {
@@ -273,6 +288,63 @@ def create_gradio_interface():
273
  margin: 10px 0;
274
  border-radius: 4px;
275
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
276
  """
277
 
278
  with gr.Blocks(theme=gr.themes.Soft(), css=css, title="AIN VLM OCR") as demo:
@@ -299,22 +371,23 @@ def create_gradio_interface():
299
  """)
300
 
301
  # Main interface
302
- with gr.Row():
303
  # Left column - Input
304
- with gr.Column(scale=1):
305
  # Image input
306
  image_input = gr.Image(
307
  label="πŸ“Έ Upload Image",
308
  type="pil",
309
- height=400
 
310
  )
311
 
312
  # Advanced settings
313
- with gr.Accordion("βš™οΈ Advanced Settings", open=False):
314
  custom_prompt = gr.Textbox(
315
  label="Custom Prompt (Optional)",
316
  placeholder="Leave empty to use default OCR prompt...",
317
- lines=4,
318
  info="Customize the prompt if you want specific extraction behavior"
319
  )
320
 
@@ -327,22 +400,24 @@ def create_gradio_interface():
327
  info="Maximum length of extracted text"
328
  )
329
 
330
- gr.Markdown("**Image Resolution Settings**")
331
- gr.Markdown("*Controls the range of visual tokens (4-16384) for balancing quality and speed*")
332
 
333
  with gr.Row():
334
  min_pixels_input = gr.Number(
335
  value=MIN_PIXELS,
336
  label="Min Pixels",
337
- info=f"Default: {MIN_PIXELS:,} (~{MIN_PIXELS//1000}k)"
 
338
  )
339
  max_pixels_input = gr.Number(
340
  value=MAX_PIXELS,
341
  label="Max Pixels",
342
- info=f"Default: {MAX_PIXELS:,} (~{MAX_PIXELS//1000}k)"
 
343
  )
344
 
345
- show_prompt_btn = gr.Button("πŸ‘οΈ Show Default Prompt", size="sm")
346
 
347
  # Process button
348
  process_btn = gr.Button(
@@ -356,35 +431,40 @@ def create_gradio_interface():
356
  clear_btn = gr.Button("πŸ—‘οΈ Clear All", variant="secondary", size="lg")
357
 
358
  # Right column - Output
359
- with gr.Column(scale=1):
360
- # Text output
361
  text_output = gr.Textbox(
362
  label="πŸ“ Extracted Text",
363
  placeholder="Extracted text will appear here...",
364
- lines=20,
365
- max_lines=25,
366
  show_copy_button=True,
367
  interactive=False,
368
- elem_classes=["output-text"]
 
369
  )
370
 
371
  # Status/info
372
  status_output = gr.Markdown(
373
- value="*Ready to process images*",
374
- elem_classes=["info-box"]
375
  )
376
 
377
- # Examples
378
- gr.Markdown("### πŸ“š Example Images")
379
- gr.Examples(
380
- examples=[
381
- ["image/app/1762329983969.png"],
382
- ["image/app/1762330009302.png"],
383
- ["image/app/1762330020168.png"],
384
- ],
385
- inputs=image_input,
386
- label="Try these examples"
387
- )
 
 
 
 
388
 
389
  # Default prompt display
390
  default_prompt_display = gr.Textbox(
 
228
  def create_gradio_interface():
229
  """Create the Gradio interface for AIN OCR."""
230
 
231
+ # Custom CSS for better UI
232
  css = """
233
  .main-container {
234
+ max-width: 1400px;
235
  margin: 0 auto;
236
+ padding: 20px;
237
  }
238
 
239
  .header-text {
 
249
  font-weight: bold !important;
250
  font-size: 1.1em !important;
251
  padding: 12px 24px !important;
252
+ width: 100% !important;
253
+ margin-top: 10px !important;
254
  }
255
 
256
  .process-button:hover {
 
258
  box-shadow: 0 6px 12px rgba(0,0,0,0.2) !important;
259
  }
260
 
261
+ /* Larger font for extracted text */
262
+ .output-textbox textarea {
263
+ font-size: 20px !important;
264
+ line-height: 2.0 !important;
265
+ font-family: 'Segoe UI', 'Tahoma', 'Traditional Arabic', 'Arabic Typesetting', sans-serif !important;
266
+ padding: 24px !important;
267
+ direction: auto !important;
268
+ text-align: start !important;
269
+ }
270
+
271
+ .output-textbox {
272
+ background: #ffffff;
273
+ border: 2px solid #e0e0e0;
274
  border-radius: 8px;
275
+ box-shadow: 0 2px 8px rgba(0,0,0,0.1);
276
+ }
277
+
278
+ /* Better Arabic text support */
279
+ .output-textbox textarea[dir="rtl"] {
280
+ text-align: right !important;
281
+ direction: rtl !important;
282
  }
283
 
284
  .info-box {
 
288
  margin: 10px 0;
289
  border-radius: 4px;
290
  }
291
+
292
+ /* Status box styling */
293
+ .status-box {
294
+ background: #f0f4f8;
295
+ border: 1px solid #d0dae6;
296
+ border-radius: 6px;
297
+ padding: 12px;
298
+ margin-top: 10px;
299
+ text-align: center;
300
+ font-size: 14px;
301
+ }
302
+
303
+ /* Better spacing for rows and columns */
304
+ .gradio-container {
305
+ gap: 20px !important;
306
+ }
307
+
308
+ .contain {
309
+ gap: 15px !important;
310
+ }
311
+
312
+ /* Image preview styling */
313
+ .image-preview {
314
+ border: 2px solid #e0e0e0;
315
+ border-radius: 8px;
316
+ box-shadow: 0 2px 8px rgba(0,0,0,0.1);
317
+ }
318
+
319
+ /* Accordion styling */
320
+ .accordion {
321
+ background: #f8f9fa;
322
+ border-radius: 8px;
323
+ margin-top: 15px;
324
+ padding: 5px;
325
+ }
326
+
327
+ /* Clear button */
328
+ button[variant="secondary"] {
329
+ width: 100% !important;
330
+ margin-top: 10px !important;
331
+ }
332
+
333
+ /* Label styling */
334
+ label {
335
+ font-weight: 600 !important;
336
+ margin-bottom: 8px !important;
337
+ }
338
+
339
+ /* Better component spacing */
340
+ .gr-form {
341
+ gap: 12px !important;
342
+ }
343
+
344
+ /* Example images styling */
345
+ .gr-examples {
346
+ margin-top: 15px;
347
+ }
348
  """
349
 
350
  with gr.Blocks(theme=gr.themes.Soft(), css=css, title="AIN VLM OCR") as demo:
 
371
  """)
372
 
373
  # Main interface
374
+ with gr.Row(equal_height=False):
375
  # Left column - Input
376
+ with gr.Column(scale=1, min_width=400):
377
  # Image input
378
  image_input = gr.Image(
379
  label="πŸ“Έ Upload Image",
380
  type="pil",
381
+ height=400,
382
+ elem_classes=["image-preview"]
383
  )
384
 
385
  # Advanced settings
386
+ with gr.Accordion("βš™οΈ Advanced Settings", open=False, elem_classes=["accordion"]):
387
  custom_prompt = gr.Textbox(
388
  label="Custom Prompt (Optional)",
389
  placeholder="Leave empty to use default OCR prompt...",
390
+ lines=3,
391
  info="Customize the prompt if you want specific extraction behavior"
392
  )
393
 
 
400
  info="Maximum length of extracted text"
401
  )
402
 
403
+ gr.Markdown("**πŸ“ Image Resolution Settings**")
404
+ gr.Markdown("*Controls visual token range (4-16384) - balance quality vs speed*")
405
 
406
  with gr.Row():
407
  min_pixels_input = gr.Number(
408
  value=MIN_PIXELS,
409
  label="Min Pixels",
410
+ info=f"Default: {MIN_PIXELS:,} (~{MIN_PIXELS//1000}k)",
411
+ precision=0
412
  )
413
  max_pixels_input = gr.Number(
414
  value=MAX_PIXELS,
415
  label="Max Pixels",
416
+ info=f"Default: {MAX_PIXELS:,} (~{MAX_PIXELS//1000}k)",
417
+ precision=0
418
  )
419
 
420
+ show_prompt_btn = gr.Button("πŸ‘οΈ Show Default Prompt", size="sm", variant="secondary")
421
 
422
  # Process button
423
  process_btn = gr.Button(
 
431
  clear_btn = gr.Button("πŸ—‘οΈ Clear All", variant="secondary", size="lg")
432
 
433
  # Right column - Output
434
+ with gr.Column(scale=1, min_width=500):
435
+ # Text output with larger font
436
  text_output = gr.Textbox(
437
  label="πŸ“ Extracted Text",
438
  placeholder="Extracted text will appear here...",
439
+ lines=18,
440
+ max_lines=22,
441
  show_copy_button=True,
442
  interactive=False,
443
+ elem_classes=["output-textbox"],
444
+ container=True,
445
  )
446
 
447
  # Status/info
448
  status_output = gr.Markdown(
449
+ value="✨ *Ready to process images*",
450
+ elem_classes=["status-box"]
451
  )
452
 
453
+ # Examples section
454
+ with gr.Row():
455
+ with gr.Column():
456
+ gr.Markdown("### πŸ“š Example Images")
457
+ gr.Markdown("*Click on any example below to load it*")
458
+ gr.Examples(
459
+ examples=[
460
+ ["image/app/1762329983969.png"],
461
+ ["image/app/1762330009302.png"],
462
+ ["image/app/1762330020168.png"],
463
+ ],
464
+ inputs=image_input,
465
+ label="",
466
+ examples_per_page=3
467
+ )
468
 
469
  # Default prompt display
470
  default_prompt_display = gr.Textbox(