Sync from GitHub c60d35d
Browse files- .gitignore +2 -0
- README.md +28 -0
- download_image.py +7 -0
- image.jpg +0 -0
- main.py +68 -1
- tests/test_api.py +34 -1
.gitignore
CHANGED
|
@@ -2,6 +2,8 @@
|
|
| 2 |
node_modules
|
| 3 |
|
| 4 |
.env
|
|
|
|
|
|
|
| 5 |
|
| 6 |
# Python
|
| 7 |
.venv/
|
|
|
|
| 2 |
node_modules
|
| 3 |
|
| 4 |
.env
|
| 5 |
+
third_party
|
| 6 |
+
hf-cache
|
| 7 |
|
| 8 |
# Python
|
| 9 |
.venv/
|
README.md
CHANGED
|
@@ -26,6 +26,28 @@ Model:
|
|
| 26 |
|
| 27 |
Node.js artifacts and scripts from the previous project have been removed.
|
| 28 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
## Quick Start
|
| 30 |
|
| 31 |
### Option 1: Run with Docker (with-model images: CPU / NVIDIA / AMD)
|
|
@@ -173,6 +195,12 @@ Endpoints (OpenAI-compatible)
|
|
| 173 |
-H "Content-Type: application/json" ^
|
| 174 |
-d "{\"model\":\"qwen-local\",\"messages\":[{\"role\":\"user\",\"content\":\"Describe this image briefly\"}],\"max_tokens\":4096}"
|
| 175 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 176 |
Example (PowerShell):
|
| 177 |
$body = @{
|
| 178 |
model = "qwen-local"
|
|
|
|
| 26 |
|
| 27 |
Node.js artifacts and scripts from the previous project have been removed.
|
| 28 |
|
| 29 |
+
## KTP OCR Endpoint
|
| 30 |
+
|
| 31 |
+
This server includes a KTP (Indonesian ID Card) OCR endpoint that extracts structured data from KTP images using the multimodal model.
|
| 32 |
+
|
| 33 |
+
- Endpoint: `POST /ktp-ocr/`
|
| 34 |
+
- Input: Multipart form-data with `image` field (JPEG/PNG)
|
| 35 |
+
- Output: JSON with fields like `nik`, `nama`, `tempat_lahir`, `tgl_lahir`, `jenis_kelamin`, `alamat`, `agama`, `status_perkawinan`, `pekerjaan`, `kewarganegaraan`, `berlaku_hingga`
|
| 36 |
+
|
| 37 |
+
Example usage:
|
| 38 |
+
```bash
|
| 39 |
+
curl -X POST http://localhost:3000/ktp-ocr/ \
|
| 40 |
+
-F "image=@image.jpg"
|
| 41 |
+
```
|
| 42 |
+
|
| 43 |
+
This feature is inspired by [raflyryhnsyh/Gemini-OCR-KTP](https://github.com/raflyryhnsyh/Gemini-OCR-KTP), adapted to use our local Qwen3-VL model instead of Gemini AI.
|
| 44 |
+
|
| 45 |
+
## Hugging Face Space
|
| 46 |
+
|
| 47 |
+
The project is hosted on Hugging Face Spaces for easy access: [KillerKing93/Transformers-InferenceServer-OpenAPI](https://huggingface.co/spaces/KillerKing93/Transformers-InferenceServer-OpenAPI)
|
| 48 |
+
|
| 49 |
+
You can use the Space's API endpoints directly or access the web UI.
|
| 50 |
+
|
| 51 |
## Quick Start
|
| 52 |
|
| 53 |
### Option 1: Run with Docker (with-model images: CPU / NVIDIA / AMD)
|
|
|
|
| 195 |
-H "Content-Type: application/json" ^
|
| 196 |
-d "{\"model\":\"qwen-local\",\"messages\":[{\"role\":\"user\",\"content\":\"Describe this image briefly\"}],\"max_tokens\":4096}"
|
| 197 |
|
| 198 |
+
- KTP OCR
|
| 199 |
+
POST /ktp-ocr/
|
| 200 |
+
Example (Windows CMD):
|
| 201 |
+
curl -X POST http://localhost:3000/ktp-ocr/ ^
|
| 202 |
+
-F "image=@image.jpg"
|
| 203 |
+
|
| 204 |
Example (PowerShell):
|
| 205 |
$body = @{
|
| 206 |
model = "qwen-local"
|
download_image.py
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import requests
|
| 2 |
+
|
| 3 |
+
url = "https://raw.githubusercontent.com/raflyryhnsyh/Gemini-OCR-KTP/main/image.jpg"
|
| 4 |
+
response = requests.get(url)
|
| 5 |
+
|
| 6 |
+
with open("image.jpg", "wb") as f:
|
| 7 |
+
f.write(response.content)
|
image.jpg
ADDED
|
main.py
CHANGED
|
@@ -25,7 +25,7 @@ import tempfile
|
|
| 25 |
import contextlib
|
| 26 |
from typing import Any, Dict, List, Optional, Tuple, Deque, Literal
|
| 27 |
|
| 28 |
-
from fastapi import FastAPI, HTTPException, Request, Header, Query
|
| 29 |
from fastapi.middleware.cors import CORSMiddleware
|
| 30 |
from pydantic import BaseModel, ConfigDict, Field
|
| 31 |
from starlette.responses import JSONResponse
|
|
@@ -954,6 +954,7 @@ tags_metadata = [
|
|
| 954 |
{"name": "meta", "description": "Service metadata and OpenAPI schema"},
|
| 955 |
{"name": "health", "description": "Readiness and runtime info including context window report"},
|
| 956 |
{"name": "chat", "description": "OpenAI-compatible chat completions (non-stream and streaming SSE)"},
|
|
|
|
| 957 |
]
|
| 958 |
|
| 959 |
app = FastAPI(
|
|
@@ -1296,6 +1297,72 @@ def chat_completions(
|
|
| 1296 |
return StreamingResponse(sse_generator(), media_type="text/event-stream", headers=headers)
|
| 1297 |
|
| 1298 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1299 |
@app.post("/v1/cancel/{session_id}", tags=["chat"], response_model=CancelResponse, summary="Cancel a streaming session")
|
| 1300 |
def cancel_session(session_id: str):
|
| 1301 |
sess = _STORE.get(session_id)
|
|
|
|
| 25 |
import contextlib
|
| 26 |
from typing import Any, Dict, List, Optional, Tuple, Deque, Literal
|
| 27 |
|
| 28 |
+
from fastapi import FastAPI, HTTPException, Request, Header, Query, UploadFile, File
|
| 29 |
from fastapi.middleware.cors import CORSMiddleware
|
| 30 |
from pydantic import BaseModel, ConfigDict, Field
|
| 31 |
from starlette.responses import JSONResponse
|
|
|
|
| 954 |
{"name": "meta", "description": "Service metadata and OpenAPI schema"},
|
| 955 |
{"name": "health", "description": "Readiness and runtime info including context window report"},
|
| 956 |
{"name": "chat", "description": "OpenAI-compatible chat completions (non-stream and streaming SSE)"},
|
| 957 |
+
{"name": "ocr", "description": "Optical Character Recognition endpoints"},
|
| 958 |
]
|
| 959 |
|
| 960 |
app = FastAPI(
|
|
|
|
| 1297 |
return StreamingResponse(sse_generator(), media_type="text/event-stream", headers=headers)
|
| 1298 |
|
| 1299 |
|
| 1300 |
+
@app.post("/ktp-ocr/", tags=["ocr"])
|
| 1301 |
+
async def ktp_ocr(image: UploadFile = File(...)):
|
| 1302 |
+
try:
|
| 1303 |
+
engine = get_engine()
|
| 1304 |
+
except Exception as e:
|
| 1305 |
+
raise HTTPException(status_code=503, detail=f"Model not ready: {e}")
|
| 1306 |
+
|
| 1307 |
+
if not image.content_type.startswith("image/"):
|
| 1308 |
+
raise HTTPException(status_code=400, detail="File provided is not an image.")
|
| 1309 |
+
|
| 1310 |
+
try:
|
| 1311 |
+
# Read image contents
|
| 1312 |
+
contents = await image.read()
|
| 1313 |
+
pil_image = Image.open(io.BytesIO(contents)).convert("RGB")
|
| 1314 |
+
|
| 1315 |
+
# The prompt from the reference project
|
| 1316 |
+
prompt = r"""
|
| 1317 |
+
Ekstrak data KTP Indonesia dari gambar dan kembalikan dalam format JSON berikut:
|
| 1318 |
+
{
|
| 1319 |
+
"nik": "",
|
| 1320 |
+
"nama": "",
|
| 1321 |
+
"tempat_lahir": "",
|
| 1322 |
+
"tgl_lahir": "",
|
| 1323 |
+
"jenis_kelamin": "",
|
| 1324 |
+
"alamat": {
|
| 1325 |
+
"name": "",
|
| 1326 |
+
"rt_rw": "",
|
| 1327 |
+
"kel_desa": "",
|
| 1328 |
+
"kecamatan": "",
|
| 1329 |
+
},
|
| 1330 |
+
"agama": "",
|
| 1331 |
+
"status_perkawinan": "",
|
| 1332 |
+
"pekerjaan": "",
|
| 1333 |
+
"kewarganegaraan": "",
|
| 1334 |
+
"berlaku_hingga": ""
|
| 1335 |
+
}
|
| 1336 |
+
"""
|
| 1337 |
+
# Prepare messages for the model
|
| 1338 |
+
messages = [
|
| 1339 |
+
{
|
| 1340 |
+
"role": "user",
|
| 1341 |
+
"content": [
|
| 1342 |
+
{"type": "text", "text": prompt},
|
| 1343 |
+
{"type": "image", "image": pil_image}
|
| 1344 |
+
],
|
| 1345 |
+
}
|
| 1346 |
+
]
|
| 1347 |
+
|
| 1348 |
+
# Infer
|
| 1349 |
+
content = engine.infer(messages, max_tokens=1024, temperature=0.1)
|
| 1350 |
+
|
| 1351 |
+
# The model might return the JSON in a code block, so we need to extract it.
|
| 1352 |
+
json_match = re.search(r"```json\n(.*?)```", content, re.DOTALL)
|
| 1353 |
+
if json_match:
|
| 1354 |
+
json_str = json_match.group(1)
|
| 1355 |
+
else:
|
| 1356 |
+
json_str = content
|
| 1357 |
+
|
| 1358 |
+
# Parse the JSON string
|
| 1359 |
+
response_data = json.loads(json_str)
|
| 1360 |
+
return JSONResponse(content=response_data)
|
| 1361 |
+
|
| 1362 |
+
except Exception as e:
|
| 1363 |
+
raise HTTPException(status_code=500, detail=f"Error processing image: {e}")
|
| 1364 |
+
|
| 1365 |
+
|
| 1366 |
@app.post("/v1/cancel/{session_id}", tags=["chat"], response_model=CancelResponse, summary="Cancel a streaming session")
|
| 1367 |
def cancel_session(session_id: str):
|
| 1368 |
sess = _STORE.get(session_id)
|
tests/test_api.py
CHANGED
|
@@ -271,4 +271,37 @@ def test_edge_large_last_event_id_after_finish_yields_done():
|
|
| 271 |
headers = {"Last-Event-ID": "done-session:99999"}
|
| 272 |
with client.stream("POST", "/v1/chat/completions", headers=headers, json=payload) as resp2:
|
| 273 |
lines2 = read_sse_lines(resp2)
|
| 274 |
-
assert "[DONE]" in "\n".join(lines2)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 271 |
headers = {"Last-Event-ID": "done-session:99999"}
|
| 272 |
with client.stream("POST", "/v1/chat/completions", headers=headers, json=payload) as resp2:
|
| 273 |
lines2 = read_sse_lines(resp2)
|
| 274 |
+
assert "[DONE]" in "\n".join(lines2)
|
| 275 |
+
|
| 276 |
+
def test_ktp_ocr_success():
|
| 277 |
+
with patched_engine() as fake_engine:
|
| 278 |
+
# Configure the fake engine to return a specific JSON structure for this test
|
| 279 |
+
expected_json = {
|
| 280 |
+
"nik": "1234567890123456",
|
| 281 |
+
"nama": "JOHN DOE",
|
| 282 |
+
"tempat_lahir": "JAKARTA",
|
| 283 |
+
"tgl_lahir": "01-01-1990",
|
| 284 |
+
"jenis_kelamin": "LAKI-LAKI",
|
| 285 |
+
"alamat": {
|
| 286 |
+
"name": "JL. JEND. SUDIRMAN KAV. 52-53",
|
| 287 |
+
"rt_rw": "001/001",
|
| 288 |
+
"kel_desa": "SENAYAN",
|
| 289 |
+
"kecamatan": "KEBAYORAN BARU",
|
| 290 |
+
},
|
| 291 |
+
"agama": "ISLAM",
|
| 292 |
+
"status_perkawinan": "KAWIN",
|
| 293 |
+
"pekerjaan": "PEGAWAI SWASTA",
|
| 294 |
+
"kewarganegaraan": "WNI",
|
| 295 |
+
"berlaku_hingga": "SEUMUR HIDUP",
|
| 296 |
+
}
|
| 297 |
+
fake_engine.infer = lambda messages, max_tokens, temperature: json.dumps(expected_json)
|
| 298 |
+
|
| 299 |
+
client = get_client()
|
| 300 |
+
with open("image.jpg", "rb") as f:
|
| 301 |
+
files = {"image": ("image.jpg", f, "image/jpeg")}
|
| 302 |
+
r = client.post("/ktp-ocr/", files=files)
|
| 303 |
+
|
| 304 |
+
assert r.status_code == 200
|
| 305 |
+
body = r.json()
|
| 306 |
+
assert body["nik"] == "1234567890123456"
|
| 307 |
+
assert body["nama"] == "JOHN DOE"
|