Spaces:

KillerKing93
/

Transformers-InferenceServer-OpenAPI

Sleeping

App Files Files Community

KillerKing93 commited on 12 days ago

Commit

67c3421

verified ·

1 Parent(s): 9447956

Sync from GitHub c60d35d

Browse files

Files changed (6) hide show

.gitignore +2 -0
README.md +28 -0
download_image.py +7 -0
image.jpg +0 -0
main.py +68 -1
tests/test_api.py +34 -1

.gitignore CHANGED Viewed

@@ -2,6 +2,8 @@
 node_modules
 .env
 # Python
 .venv/

 node_modules
 .env
+third_party
+hf-cache
 # Python
 .venv/

README.md CHANGED Viewed

@@ -26,6 +26,28 @@ Model:
 Node.js artifacts and scripts from the previous project have been removed.
 ## Quick Start
 ### Option 1: Run with Docker (with-model images: CPU / NVIDIA / AMD)
@@ -173,6 +195,12 @@ Endpoints (OpenAI-compatible)
   -H "Content-Type: application/json" ^
   -d "{\"model\":\"qwen-local\",\"messages\":[{\"role\":\"user\",\"content\":\"Describe this image briefly\"}],\"max_tokens\":4096}"
   Example (PowerShell):
   $body = @{
   model = "qwen-local"

 Node.js artifacts and scripts from the previous project have been removed.
+## KTP OCR Endpoint
+This server includes a KTP (Indonesian ID Card) OCR endpoint that extracts structured data from KTP images using the multimodal model.
+- Endpoint: `POST /ktp-ocr/`
+- Input: Multipart form-data with `image` field (JPEG/PNG)
+- Output: JSON with fields like `nik`, `nama`, `tempat_lahir`, `tgl_lahir`, `jenis_kelamin`, `alamat`, `agama`, `status_perkawinan`, `pekerjaan`, `kewarganegaraan`, `berlaku_hingga`
+Example usage:
+```bash
+curl -X POST http://localhost:3000/ktp-ocr/ \
+  -F "image=@image.jpg"
+```
+This feature is inspired by [raflyryhnsyh/Gemini-OCR-KTP](https://github.com/raflyryhnsyh/Gemini-OCR-KTP), adapted to use our local Qwen3-VL model instead of Gemini AI.
+## Hugging Face Space
+The project is hosted on Hugging Face Spaces for easy access: [KillerKing93/Transformers-InferenceServer-OpenAPI](https://huggingface.co/spaces/KillerKing93/Transformers-InferenceServer-OpenAPI)
+You can use the Space's API endpoints directly or access the web UI.
 ## Quick Start
 ### Option 1: Run with Docker (with-model images: CPU / NVIDIA / AMD)
   -H "Content-Type: application/json" ^
   -d "{\"model\":\"qwen-local\",\"messages\":[{\"role\":\"user\",\"content\":\"Describe this image briefly\"}],\"max_tokens\":4096}"
+- KTP OCR
+  POST /ktp-ocr/
+  Example (Windows CMD):
+  curl -X POST http://localhost:3000/ktp-ocr/ ^
+  -F "image=@image.jpg"
   Example (PowerShell):
   $body = @{
   model = "qwen-local"

download_image.py ADDED Viewed

	@@ -0,0 +1,7 @@

+import requests
+url = "https://raw.githubusercontent.com/raflyryhnsyh/Gemini-OCR-KTP/main/image.jpg"
+response = requests.get(url)
+with open("image.jpg", "wb") as f:
+    f.write(response.content)

image.jpg ADDED Viewed

main.py CHANGED Viewed

@@ -25,7 +25,7 @@ import tempfile
 import contextlib
 from typing import Any, Dict, List, Optional, Tuple, Deque, Literal
-from fastapi import FastAPI, HTTPException, Request, Header, Query
 from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel, ConfigDict, Field
 from starlette.responses import JSONResponse
@@ -954,6 +954,7 @@ tags_metadata = [
     {"name": "meta", "description": "Service metadata and OpenAPI schema"},
     {"name": "health", "description": "Readiness and runtime info including context window report"},
     {"name": "chat", "description": "OpenAI-compatible chat completions (non-stream and streaming SSE)"},
 ]
 app = FastAPI(
@@ -1296,6 +1297,72 @@ def chat_completions(
     return StreamingResponse(sse_generator(), media_type="text/event-stream", headers=headers)
 @app.post("/v1/cancel/{session_id}", tags=["chat"], response_model=CancelResponse, summary="Cancel a streaming session")
 def cancel_session(session_id: str):
     sess = _STORE.get(session_id)

 import contextlib
 from typing import Any, Dict, List, Optional, Tuple, Deque, Literal
+from fastapi import FastAPI, HTTPException, Request, Header, Query, UploadFile, File
 from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel, ConfigDict, Field
 from starlette.responses import JSONResponse
     {"name": "meta", "description": "Service metadata and OpenAPI schema"},
     {"name": "health", "description": "Readiness and runtime info including context window report"},
     {"name": "chat", "description": "OpenAI-compatible chat completions (non-stream and streaming SSE)"},
+    {"name": "ocr", "description": "Optical Character Recognition endpoints"},
 ]
 app = FastAPI(
     return StreamingResponse(sse_generator(), media_type="text/event-stream", headers=headers)
+@app.post("/ktp-ocr/", tags=["ocr"])
+async def ktp_ocr(image: UploadFile = File(...)):
+    try:
+        engine = get_engine()
+    except Exception as e:
+        raise HTTPException(status_code=503, detail=f"Model not ready: {e}")
+    if not image.content_type.startswith("image/"):
+        raise HTTPException(status_code=400, detail="File provided is not an image.")
+    try:
+        # Read image contents
+        contents = await image.read()
+        pil_image = Image.open(io.BytesIO(contents)).convert("RGB")
+        # The prompt from the reference project
+        prompt = r"""
+Ekstrak data KTP Indonesia dari gambar dan kembalikan dalam format JSON berikut:
+{
+  "nik": "",
+  "nama": "",
+  "tempat_lahir": "",
+  "tgl_lahir": "",
+  "jenis_kelamin": "",
+  "alamat": {
+    "name": "",
+    "rt_rw": "",
+    "kel_desa": "",
+    "kecamatan": "",
+  },
+  "agama": "",
+  "status_perkawinan": "",
+  "pekerjaan": "",
+  "kewarganegaraan": "",
+  "berlaku_hingga": ""
+}
+"""
+        # Prepare messages for the model
+        messages = [
+            {
+                "role": "user",
+                "content": [
+                    {"type": "text", "text": prompt},
+                    {"type": "image", "image": pil_image}
+                ],
+            }
+        ]
+        # Infer
+        content = engine.infer(messages, max_tokens=1024, temperature=0.1)
+        # The model might return the JSON in a code block, so we need to extract it.
+        json_match = re.search(r"```json\n(.*?)```", content, re.DOTALL)
+        if json_match:
+            json_str = json_match.group(1)
+        else:
+            json_str = content
+        # Parse the JSON string
+        response_data = json.loads(json_str)
+        return JSONResponse(content=response_data)
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Error processing image: {e}")
 @app.post("/v1/cancel/{session_id}", tags=["chat"], response_model=CancelResponse, summary="Cancel a streaming session")
 def cancel_session(session_id: str):
     sess = _STORE.get(session_id)

tests/test_api.py CHANGED Viewed

@@ -271,4 +271,37 @@ def test_edge_large_last_event_id_after_finish_yields_done():
         headers = {"Last-Event-ID": "done-session:99999"}
         with client.stream("POST", "/v1/chat/completions", headers=headers, json=payload) as resp2:
             lines2 = read_sse_lines(resp2)
-        assert "[DONE]" in "\n".join(lines2)

         headers = {"Last-Event-ID": "done-session:99999"}
         with client.stream("POST", "/v1/chat/completions", headers=headers, json=payload) as resp2:
             lines2 = read_sse_lines(resp2)
+        assert "[DONE]" in "\n".join(lines2)
+def test_ktp_ocr_success():
+    with patched_engine() as fake_engine:
+        # Configure the fake engine to return a specific JSON structure for this test
+        expected_json = {
+            "nik": "1234567890123456",
+            "nama": "JOHN DOE",
+            "tempat_lahir": "JAKARTA",
+            "tgl_lahir": "01-01-1990",
+            "jenis_kelamin": "LAKI-LAKI",
+            "alamat": {
+                "name": "JL. JEND. SUDIRMAN KAV. 52-53",
+                "rt_rw": "001/001",
+                "kel_desa": "SENAYAN",
+                "kecamatan": "KEBAYORAN BARU",
+            },
+            "agama": "ISLAM",
+            "status_perkawinan": "KAWIN",
+            "pekerjaan": "PEGAWAI SWASTA",
+            "kewarganegaraan": "WNI",
+            "berlaku_hingga": "SEUMUR HIDUP",
+        }
+        fake_engine.infer = lambda messages, max_tokens, temperature: json.dumps(expected_json)
+        client = get_client()
+        with open("image.jpg", "rb") as f:
+            files = {"image": ("image.jpg", f, "image/jpeg")}
+            r = client.post("/ktp-ocr/", files=files)
+        assert r.status_code == 200
+        body = r.json()
+        assert body["nik"] == "1234567890123456"
+        assert body["nama"] == "JOHN DOE"