Spaces:
Running
Running
update image text extract
Browse files- README.md +18 -0
- app.py +47 -3
- requirements.txt +4 -1
README.md
CHANGED
|
@@ -21,6 +21,7 @@ AnyCoder is an AI-powered code generator that helps you create applications by d
|
|
| 21 |
|
| 22 |
- **Multi-Model Support**: Choose from various AI models including DeepSeek, ERNIE-4.5-VL, MiniMax, and Qwen
|
| 23 |
- **Image-to-Code**: Upload UI design images and get corresponding HTML/CSS code (ERNIE-4.5-VL model)
|
|
|
|
| 24 |
- **Live Preview**: See your generated code in action with the built-in sandbox
|
| 25 |
- **Web Search Integration**: Enable real-time web search to get the latest information and best practices
|
| 26 |
- **Chat History**: Keep track of your conversations and generated code
|
|
@@ -75,6 +76,23 @@ The web search feature uses Tavily to provide real-time information when generat
|
|
| 75 |
|
| 76 |
When enabled, the AI will search the web for the latest information, best practices, and technologies related to your request.
|
| 77 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 78 |
## Available Models
|
| 79 |
|
| 80 |
- **DeepSeek V3**: Advanced code generation model
|
|
|
|
| 21 |
|
| 22 |
- **Multi-Model Support**: Choose from various AI models including DeepSeek, ERNIE-4.5-VL, MiniMax, and Qwen
|
| 23 |
- **Image-to-Code**: Upload UI design images and get corresponding HTML/CSS code (ERNIE-4.5-VL model)
|
| 24 |
+
- **Image Text Extraction**: Upload images and extract text using OCR for processing
|
| 25 |
- **Live Preview**: See your generated code in action with the built-in sandbox
|
| 26 |
- **Web Search Integration**: Enable real-time web search to get the latest information and best practices
|
| 27 |
- **Chat History**: Keep track of your conversations and generated code
|
|
|
|
| 76 |
|
| 77 |
When enabled, the AI will search the web for the latest information, best practices, and technologies related to your request.
|
| 78 |
|
| 79 |
+
## Image Text Extraction
|
| 80 |
+
|
| 81 |
+
The application supports extracting text from images using OCR (Optical Character Recognition). This feature allows you to:
|
| 82 |
+
|
| 83 |
+
1. Upload image files (JPG, PNG, BMP, TIFF, GIF, WebP) through the file input
|
| 84 |
+
2. Automatically extract text from the images using Tesseract OCR
|
| 85 |
+
3. Include the extracted text in your prompts for code generation
|
| 86 |
+
|
| 87 |
+
### Setting up OCR
|
| 88 |
+
|
| 89 |
+
To use the image text extraction feature, you need to install Tesseract OCR on your system. See `install_tesseract.md` for detailed installation instructions.
|
| 90 |
+
|
| 91 |
+
**Example usage:**
|
| 92 |
+
- Upload an image containing text (like a screenshot, document, or handwritten notes)
|
| 93 |
+
- The application will extract the text and include it in your prompt
|
| 94 |
+
- You can then ask the AI to process, summarize, or work with the extracted text
|
| 95 |
+
|
| 96 |
## Available Models
|
| 97 |
|
| 98 |
- **DeepSeek V3**: Advanced code generation model
|
app.py
CHANGED
|
@@ -6,6 +6,10 @@ import base64
|
|
| 6 |
import mimetypes
|
| 7 |
import PyPDF2
|
| 8 |
import docx
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
|
| 10 |
import gradio as gr
|
| 11 |
from huggingface_hub import InferenceClient
|
|
@@ -116,6 +120,10 @@ DEMO_LIST = [
|
|
| 116 |
{
|
| 117 |
"title": "UI from Image",
|
| 118 |
"description": "Upload an image of a UI design and I'll generate the HTML/CSS code for it"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 119 |
}
|
| 120 |
]
|
| 121 |
|
|
@@ -372,6 +380,38 @@ def demo_card_click(e: gr.EventData):
|
|
| 372 |
# Return the first demo description as fallback
|
| 373 |
return DEMO_LIST[0]['description']
|
| 374 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 375 |
def extract_text_from_file(file_path):
|
| 376 |
if not file_path:
|
| 377 |
return ""
|
|
@@ -391,6 +431,8 @@ def extract_text_from_file(file_path):
|
|
| 391 |
elif ext == ".docx":
|
| 392 |
doc = docx.Document(file_path)
|
| 393 |
return "\n".join([para.text for para in doc.paragraphs])
|
|
|
|
|
|
|
| 394 |
else:
|
| 395 |
return ""
|
| 396 |
except Exception as e:
|
|
@@ -471,7 +513,7 @@ with gr.Blocks(theme=gr.themes.Base(), title="AnyCoder - AI Code Generator") as
|
|
| 471 |
gr.Markdown("# AnyCoder\nAI-Powered Code Generator")
|
| 472 |
gr.Markdown("""Describe your app or UI in plain English. Optionally upload a UI image (for ERNIE model). Click Generate to get code and preview.""")
|
| 473 |
gr.Markdown("**Tip:** For best search results about people or entities, include details like profession, company, or location. Example: 'John Smith software engineer at Google.'")
|
| 474 |
-
gr.Markdown("**Tip:** You can attach a file (PDF, TXT, DOCX, CSV, MD) to use as reference for your prompt, e.g. 'Summarize this PDF.
|
| 475 |
input = gr.Textbox(
|
| 476 |
label="Describe your application",
|
| 477 |
placeholder="e.g., Create a todo app with add, delete, and mark as complete functionality",
|
|
@@ -482,8 +524,8 @@ with gr.Blocks(theme=gr.themes.Base(), title="AnyCoder - AI Code Generator") as
|
|
| 482 |
visible=False
|
| 483 |
)
|
| 484 |
file_input = gr.File(
|
| 485 |
-
label="Attach a file (PDF, TXT, DOCX, CSV, MD)",
|
| 486 |
-
file_types=[".pdf", ".txt", ".md", ".csv", ".docx"],
|
| 487 |
visible=True
|
| 488 |
)
|
| 489 |
with gr.Row():
|
|
@@ -503,6 +545,8 @@ with gr.Blocks(theme=gr.themes.Base(), title="AnyCoder - AI Code Generator") as
|
|
| 503 |
else:
|
| 504 |
gr.Markdown("β
**Web Search Available**: Toggle above to enable real-time search")
|
| 505 |
|
|
|
|
|
|
|
| 506 |
gr.Markdown("### Quick Examples")
|
| 507 |
for i, demo_item in enumerate(DEMO_LIST[:5]):
|
| 508 |
demo_card = gr.Button(
|
|
|
|
| 6 |
import mimetypes
|
| 7 |
import PyPDF2
|
| 8 |
import docx
|
| 9 |
+
import cv2
|
| 10 |
+
import numpy as np
|
| 11 |
+
from PIL import Image
|
| 12 |
+
import pytesseract
|
| 13 |
|
| 14 |
import gradio as gr
|
| 15 |
from huggingface_hub import InferenceClient
|
|
|
|
| 120 |
{
|
| 121 |
"title": "UI from Image",
|
| 122 |
"description": "Upload an image of a UI design and I'll generate the HTML/CSS code for it"
|
| 123 |
+
},
|
| 124 |
+
{
|
| 125 |
+
"title": "Extract Text from Image",
|
| 126 |
+
"description": "Upload an image containing text and I'll extract and process the text content"
|
| 127 |
}
|
| 128 |
]
|
| 129 |
|
|
|
|
| 380 |
# Return the first demo description as fallback
|
| 381 |
return DEMO_LIST[0]['description']
|
| 382 |
|
| 383 |
+
def extract_text_from_image(image_path):
|
| 384 |
+
"""Extract text from image using OCR"""
|
| 385 |
+
try:
|
| 386 |
+
# Check if tesseract is available
|
| 387 |
+
try:
|
| 388 |
+
pytesseract.get_tesseract_version()
|
| 389 |
+
except Exception:
|
| 390 |
+
return "Error: Tesseract OCR is not installed. Please install Tesseract to extract text from images. See install_tesseract.md for instructions."
|
| 391 |
+
|
| 392 |
+
# Read image using OpenCV
|
| 393 |
+
image = cv2.imread(image_path)
|
| 394 |
+
if image is None:
|
| 395 |
+
return "Error: Could not read image file"
|
| 396 |
+
|
| 397 |
+
# Convert to RGB (OpenCV uses BGR)
|
| 398 |
+
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
| 399 |
+
|
| 400 |
+
# Preprocess image for better OCR results
|
| 401 |
+
# Convert to grayscale
|
| 402 |
+
gray = cv2.cvtColor(image_rgb, cv2.COLOR_RGB2GRAY)
|
| 403 |
+
|
| 404 |
+
# Apply thresholding to get binary image
|
| 405 |
+
_, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
|
| 406 |
+
|
| 407 |
+
# Extract text using pytesseract
|
| 408 |
+
text = pytesseract.image_to_string(binary, config='--psm 6')
|
| 409 |
+
|
| 410 |
+
return text.strip() if text.strip() else "No text found in image"
|
| 411 |
+
|
| 412 |
+
except Exception as e:
|
| 413 |
+
return f"Error extracting text from image: {e}"
|
| 414 |
+
|
| 415 |
def extract_text_from_file(file_path):
|
| 416 |
if not file_path:
|
| 417 |
return ""
|
|
|
|
| 431 |
elif ext == ".docx":
|
| 432 |
doc = docx.Document(file_path)
|
| 433 |
return "\n".join([para.text for para in doc.paragraphs])
|
| 434 |
+
elif ext.lower() in [".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".tif", ".gif", ".webp"]:
|
| 435 |
+
return extract_text_from_image(file_path)
|
| 436 |
else:
|
| 437 |
return ""
|
| 438 |
except Exception as e:
|
|
|
|
| 513 |
gr.Markdown("# AnyCoder\nAI-Powered Code Generator")
|
| 514 |
gr.Markdown("""Describe your app or UI in plain English. Optionally upload a UI image (for ERNIE model). Click Generate to get code and preview.""")
|
| 515 |
gr.Markdown("**Tip:** For best search results about people or entities, include details like profession, company, or location. Example: 'John Smith software engineer at Google.'")
|
| 516 |
+
gr.Markdown("**Tip:** You can attach a file (PDF, TXT, DOCX, CSV, MD, Images) to use as reference for your prompt, e.g. 'Summarize this PDF' or 'Extract text from this image'.")
|
| 517 |
input = gr.Textbox(
|
| 518 |
label="Describe your application",
|
| 519 |
placeholder="e.g., Create a todo app with add, delete, and mark as complete functionality",
|
|
|
|
| 524 |
visible=False
|
| 525 |
)
|
| 526 |
file_input = gr.File(
|
| 527 |
+
label="Attach a file (PDF, TXT, DOCX, CSV, MD, Images)",
|
| 528 |
+
file_types=[".pdf", ".txt", ".md", ".csv", ".docx", ".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".tif", ".gif", ".webp"],
|
| 529 |
visible=True
|
| 530 |
)
|
| 531 |
with gr.Row():
|
|
|
|
| 545 |
else:
|
| 546 |
gr.Markdown("β
**Web Search Available**: Toggle above to enable real-time search")
|
| 547 |
|
| 548 |
+
gr.Markdown("π· **Image Text Extraction**: Upload images to extract text using OCR (requires Tesseract installation)")
|
| 549 |
+
|
| 550 |
gr.Markdown("### Quick Examples")
|
| 551 |
for i, demo_item in enumerate(DEMO_LIST[:5]):
|
| 552 |
demo_card = gr.Button(
|
requirements.txt
CHANGED
|
@@ -2,4 +2,7 @@ git+https://github.com/huggingface/huggingface_hub.git
|
|
| 2 |
gradio[oauth]
|
| 3 |
tavily-python
|
| 4 |
PyPDF2
|
| 5 |
-
python-docx
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
gradio[oauth]
|
| 3 |
tavily-python
|
| 4 |
PyPDF2
|
| 5 |
+
python-docx
|
| 6 |
+
pytesseract
|
| 7 |
+
Pillow
|
| 8 |
+
opencv-python
|