KillerKing93 commited on
Commit
67c3421
·
verified ·
1 Parent(s): 9447956

Sync from GitHub c60d35d

Browse files
Files changed (6) hide show
  1. .gitignore +2 -0
  2. README.md +28 -0
  3. download_image.py +7 -0
  4. image.jpg +0 -0
  5. main.py +68 -1
  6. tests/test_api.py +34 -1
.gitignore CHANGED
@@ -2,6 +2,8 @@
2
  node_modules
3
 
4
  .env
 
 
5
 
6
  # Python
7
  .venv/
 
2
  node_modules
3
 
4
  .env
5
+ third_party
6
+ hf-cache
7
 
8
  # Python
9
  .venv/
README.md CHANGED
@@ -26,6 +26,28 @@ Model:
26
 
27
  Node.js artifacts and scripts from the previous project have been removed.
28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  ## Quick Start
30
 
31
  ### Option 1: Run with Docker (with-model images: CPU / NVIDIA / AMD)
@@ -173,6 +195,12 @@ Endpoints (OpenAI-compatible)
173
  -H "Content-Type: application/json" ^
174
  -d "{\"model\":\"qwen-local\",\"messages\":[{\"role\":\"user\",\"content\":\"Describe this image briefly\"}],\"max_tokens\":4096}"
175
 
 
 
 
 
 
 
176
  Example (PowerShell):
177
  $body = @{
178
  model = "qwen-local"
 
26
 
27
  Node.js artifacts and scripts from the previous project have been removed.
28
 
29
+ ## KTP OCR Endpoint
30
+
31
+ This server includes a KTP (Indonesian ID Card) OCR endpoint that extracts structured data from KTP images using the multimodal model.
32
+
33
+ - Endpoint: `POST /ktp-ocr/`
34
+ - Input: Multipart form-data with `image` field (JPEG/PNG)
35
+ - Output: JSON with fields like `nik`, `nama`, `tempat_lahir`, `tgl_lahir`, `jenis_kelamin`, `alamat`, `agama`, `status_perkawinan`, `pekerjaan`, `kewarganegaraan`, `berlaku_hingga`
36
+
37
+ Example usage:
38
+ ```bash
39
+ curl -X POST http://localhost:3000/ktp-ocr/ \
40
+ -F "image=@image.jpg"
41
+ ```
42
+
43
+ This feature is inspired by [raflyryhnsyh/Gemini-OCR-KTP](https://github.com/raflyryhnsyh/Gemini-OCR-KTP), adapted to use our local Qwen3-VL model instead of Gemini AI.
44
+
45
+ ## Hugging Face Space
46
+
47
+ The project is hosted on Hugging Face Spaces for easy access: [KillerKing93/Transformers-InferenceServer-OpenAPI](https://huggingface.co/spaces/KillerKing93/Transformers-InferenceServer-OpenAPI)
48
+
49
+ You can use the Space's API endpoints directly or access the web UI.
50
+
51
  ## Quick Start
52
 
53
  ### Option 1: Run with Docker (with-model images: CPU / NVIDIA / AMD)
 
195
  -H "Content-Type: application/json" ^
196
  -d "{\"model\":\"qwen-local\",\"messages\":[{\"role\":\"user\",\"content\":\"Describe this image briefly\"}],\"max_tokens\":4096}"
197
 
198
+ - KTP OCR
199
+ POST /ktp-ocr/
200
+ Example (Windows CMD):
201
+ curl -X POST http://localhost:3000/ktp-ocr/ ^
202
+ -F "image=@image.jpg"
203
+
204
  Example (PowerShell):
205
  $body = @{
206
  model = "qwen-local"
download_image.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ import requests
2
+
3
+ url = "https://raw.githubusercontent.com/raflyryhnsyh/Gemini-OCR-KTP/main/image.jpg"
4
+ response = requests.get(url)
5
+
6
+ with open("image.jpg", "wb") as f:
7
+ f.write(response.content)
image.jpg ADDED
main.py CHANGED
@@ -25,7 +25,7 @@ import tempfile
25
  import contextlib
26
  from typing import Any, Dict, List, Optional, Tuple, Deque, Literal
27
 
28
- from fastapi import FastAPI, HTTPException, Request, Header, Query
29
  from fastapi.middleware.cors import CORSMiddleware
30
  from pydantic import BaseModel, ConfigDict, Field
31
  from starlette.responses import JSONResponse
@@ -954,6 +954,7 @@ tags_metadata = [
954
  {"name": "meta", "description": "Service metadata and OpenAPI schema"},
955
  {"name": "health", "description": "Readiness and runtime info including context window report"},
956
  {"name": "chat", "description": "OpenAI-compatible chat completions (non-stream and streaming SSE)"},
 
957
  ]
958
 
959
  app = FastAPI(
@@ -1296,6 +1297,72 @@ def chat_completions(
1296
  return StreamingResponse(sse_generator(), media_type="text/event-stream", headers=headers)
1297
 
1298
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1299
  @app.post("/v1/cancel/{session_id}", tags=["chat"], response_model=CancelResponse, summary="Cancel a streaming session")
1300
  def cancel_session(session_id: str):
1301
  sess = _STORE.get(session_id)
 
25
  import contextlib
26
  from typing import Any, Dict, List, Optional, Tuple, Deque, Literal
27
 
28
+ from fastapi import FastAPI, HTTPException, Request, Header, Query, UploadFile, File
29
  from fastapi.middleware.cors import CORSMiddleware
30
  from pydantic import BaseModel, ConfigDict, Field
31
  from starlette.responses import JSONResponse
 
954
  {"name": "meta", "description": "Service metadata and OpenAPI schema"},
955
  {"name": "health", "description": "Readiness and runtime info including context window report"},
956
  {"name": "chat", "description": "OpenAI-compatible chat completions (non-stream and streaming SSE)"},
957
+ {"name": "ocr", "description": "Optical Character Recognition endpoints"},
958
  ]
959
 
960
  app = FastAPI(
 
1297
  return StreamingResponse(sse_generator(), media_type="text/event-stream", headers=headers)
1298
 
1299
 
1300
+ @app.post("/ktp-ocr/", tags=["ocr"])
1301
+ async def ktp_ocr(image: UploadFile = File(...)):
1302
+ try:
1303
+ engine = get_engine()
1304
+ except Exception as e:
1305
+ raise HTTPException(status_code=503, detail=f"Model not ready: {e}")
1306
+
1307
+ if not image.content_type.startswith("image/"):
1308
+ raise HTTPException(status_code=400, detail="File provided is not an image.")
1309
+
1310
+ try:
1311
+ # Read image contents
1312
+ contents = await image.read()
1313
+ pil_image = Image.open(io.BytesIO(contents)).convert("RGB")
1314
+
1315
+ # The prompt from the reference project
1316
+ prompt = r"""
1317
+ Ekstrak data KTP Indonesia dari gambar dan kembalikan dalam format JSON berikut:
1318
+ {
1319
+ "nik": "",
1320
+ "nama": "",
1321
+ "tempat_lahir": "",
1322
+ "tgl_lahir": "",
1323
+ "jenis_kelamin": "",
1324
+ "alamat": {
1325
+ "name": "",
1326
+ "rt_rw": "",
1327
+ "kel_desa": "",
1328
+ "kecamatan": "",
1329
+ },
1330
+ "agama": "",
1331
+ "status_perkawinan": "",
1332
+ "pekerjaan": "",
1333
+ "kewarganegaraan": "",
1334
+ "berlaku_hingga": ""
1335
+ }
1336
+ """
1337
+ # Prepare messages for the model
1338
+ messages = [
1339
+ {
1340
+ "role": "user",
1341
+ "content": [
1342
+ {"type": "text", "text": prompt},
1343
+ {"type": "image", "image": pil_image}
1344
+ ],
1345
+ }
1346
+ ]
1347
+
1348
+ # Infer
1349
+ content = engine.infer(messages, max_tokens=1024, temperature=0.1)
1350
+
1351
+ # The model might return the JSON in a code block, so we need to extract it.
1352
+ json_match = re.search(r"```json\n(.*?)```", content, re.DOTALL)
1353
+ if json_match:
1354
+ json_str = json_match.group(1)
1355
+ else:
1356
+ json_str = content
1357
+
1358
+ # Parse the JSON string
1359
+ response_data = json.loads(json_str)
1360
+ return JSONResponse(content=response_data)
1361
+
1362
+ except Exception as e:
1363
+ raise HTTPException(status_code=500, detail=f"Error processing image: {e}")
1364
+
1365
+
1366
  @app.post("/v1/cancel/{session_id}", tags=["chat"], response_model=CancelResponse, summary="Cancel a streaming session")
1367
  def cancel_session(session_id: str):
1368
  sess = _STORE.get(session_id)
tests/test_api.py CHANGED
@@ -271,4 +271,37 @@ def test_edge_large_last_event_id_after_finish_yields_done():
271
  headers = {"Last-Event-ID": "done-session:99999"}
272
  with client.stream("POST", "/v1/chat/completions", headers=headers, json=payload) as resp2:
273
  lines2 = read_sse_lines(resp2)
274
- assert "[DONE]" in "\n".join(lines2)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
271
  headers = {"Last-Event-ID": "done-session:99999"}
272
  with client.stream("POST", "/v1/chat/completions", headers=headers, json=payload) as resp2:
273
  lines2 = read_sse_lines(resp2)
274
+ assert "[DONE]" in "\n".join(lines2)
275
+
276
+ def test_ktp_ocr_success():
277
+ with patched_engine() as fake_engine:
278
+ # Configure the fake engine to return a specific JSON structure for this test
279
+ expected_json = {
280
+ "nik": "1234567890123456",
281
+ "nama": "JOHN DOE",
282
+ "tempat_lahir": "JAKARTA",
283
+ "tgl_lahir": "01-01-1990",
284
+ "jenis_kelamin": "LAKI-LAKI",
285
+ "alamat": {
286
+ "name": "JL. JEND. SUDIRMAN KAV. 52-53",
287
+ "rt_rw": "001/001",
288
+ "kel_desa": "SENAYAN",
289
+ "kecamatan": "KEBAYORAN BARU",
290
+ },
291
+ "agama": "ISLAM",
292
+ "status_perkawinan": "KAWIN",
293
+ "pekerjaan": "PEGAWAI SWASTA",
294
+ "kewarganegaraan": "WNI",
295
+ "berlaku_hingga": "SEUMUR HIDUP",
296
+ }
297
+ fake_engine.infer = lambda messages, max_tokens, temperature: json.dumps(expected_json)
298
+
299
+ client = get_client()
300
+ with open("image.jpg", "rb") as f:
301
+ files = {"image": ("image.jpg", f, "image/jpeg")}
302
+ r = client.post("/ktp-ocr/", files=files)
303
+
304
+ assert r.status_code == 200
305
+ body = r.json()
306
+ assert body["nik"] == "1234567890123456"
307
+ assert body["nama"] == "JOHN DOE"