|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import os, re, glob, argparse, sys, time |
|
|
from pathlib import Path |
|
|
import numpy as np |
|
|
from PIL import Image |
|
|
import tensorflow as tf |
|
|
from tensorflow.keras import layers, models, backend as K |
|
|
|
|
|
|
|
|
def parse_args(): |
|
|
p = argparse.ArgumentParser("Test inference CRNN+CTC dari weights Keras 3 (model_with_ctc.save_weights).") |
|
|
p.add_argument("--weights", required=True, help="Path ke *.weights.h5 (hasil save_weights).") |
|
|
p.add_argument("--image", help="Uji 1 gambar (PNG/JPG). Nama file jadi GT jika --gt tidak diisi.") |
|
|
p.add_argument("--gt", help="Ground truth untuk --image (opsional, default dari nama file).") |
|
|
p.add_argument("--data-root", help="Root dataset berisi style0..style59/LABEL.png untuk batch test.") |
|
|
p.add_argument("--samples", type=int, default=64, help="Jumlah sampel di batch test.") |
|
|
p.add_argument("--height", type=int, default=50) |
|
|
p.add_argument("--width", type=int, default=250) |
|
|
p.add_argument("--ext", type=str, default="png") |
|
|
p.add_argument("--show", type=int, default=12, help="Banyak baris contoh yang ditampilkan.") |
|
|
return p.parse_args() |
|
|
|
|
|
|
|
|
CHARSET = list("0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ") |
|
|
BLANK_ID = len(CHARSET) |
|
|
ID2CHAR = np.array(CHARSET) |
|
|
|
|
|
def collapse_and_strip_blanks(seq_ids, blank_id=BLANK_ID): |
|
|
prev = -1; out = [] |
|
|
for t in seq_ids: |
|
|
if t != prev and t != blank_id: |
|
|
out.append(t) |
|
|
prev = t |
|
|
return out |
|
|
|
|
|
def ids_to_text(ids): |
|
|
ids = [i for i in ids if 0 <= i < len(CHARSET)] |
|
|
return "".join(ID2CHAR[ids]) if ids else "" |
|
|
|
|
|
def cer(pred, gt): |
|
|
m, n = len(pred), len(gt) |
|
|
if n == 0: return 0.0 if m == 0 else 1.0 |
|
|
dp = np.zeros((m+1, n+1), dtype=np.int32) |
|
|
dp[:,0] = np.arange(m+1); dp[0,:] = np.arange(n+1) |
|
|
for i in range(1, m+1): |
|
|
for j in range(1, n+1): |
|
|
dp[i,j] = min(dp[i-1,j]+1, dp[i,j-1]+1, dp[i-1,j-1] + (pred[i-1]!=gt[j-1])) |
|
|
return dp[m,n] / n |
|
|
|
|
|
|
|
|
def build_models(h=50, w=250, num_classes=len(CHARSET)+1): |
|
|
inp = layers.Input(shape=(h, w, 1), name="input") |
|
|
x = layers.Conv2D(32, (3,3), activation="relu", padding="same")(inp) |
|
|
x = layers.BatchNormalization()(x) |
|
|
x = layers.MaxPooling2D((2,2))(x) |
|
|
|
|
|
x = layers.Conv2D(64, (3,3), activation="relu", padding="same")(x) |
|
|
x = layers.BatchNormalization()(x) |
|
|
x = layers.MaxPooling2D((2,2))(x) |
|
|
|
|
|
x = layers.Conv2D(128, (3,3), activation="relu", padding="same")(x) |
|
|
x = layers.BatchNormalization()(x) |
|
|
x = layers.MaxPooling2D((2,2))(x) |
|
|
|
|
|
shp = K.int_shape(x) |
|
|
x = layers.Reshape((shp[2], shp[1]*shp[3]))(x) |
|
|
|
|
|
x = layers.Bidirectional(layers.LSTM(128, return_sequences=True, dropout=0.0, recurrent_dropout=0.0))(x) |
|
|
x = layers.Bidirectional(layers.LSTM(128, return_sequences=True, dropout=0.0, recurrent_dropout=0.0))(x) |
|
|
|
|
|
pred = layers.Dense(num_classes, activation="softmax", name="predictions")(x) |
|
|
|
|
|
|
|
|
labels = layers.Input(name="labels", shape=(None,), dtype="int32") |
|
|
input_len = layers.Input(name="input_length", shape=(1,), dtype="int32") |
|
|
label_len = layers.Input(name="label_length", shape=(1,), dtype="int32") |
|
|
def ctc_fn(args): |
|
|
y_pred, labels_t, in_l, lab_l = args |
|
|
return K.ctc_batch_cost(labels_t, y_pred, in_l, lab_l) |
|
|
ctc = layers.Lambda(ctc_fn, output_shape=(1,), name="ctc_loss", dtype="float32")([pred, labels, input_len, label_len]) |
|
|
|
|
|
model_with_ctc = models.Model(inputs=[inp, labels, input_len, label_len], outputs=ctc, name="crnn_ctc_train") |
|
|
base_model = models.Model(inputs=inp, outputs=pred, name="crnn_ctc_base") |
|
|
return model_with_ctc, base_model |
|
|
|
|
|
|
|
|
def preprocess_gray(img_pil, h=50, w=250): |
|
|
im = img_pil.convert("L").resize((w, h), Image.BILINEAR) |
|
|
arr = np.asarray(im, dtype=np.float32) / 255.0 |
|
|
arr = (arr - 0.5) / 0.5 |
|
|
return arr[..., None] |
|
|
|
|
|
def list_files(root, ext="png", max_n=64): |
|
|
rootp = Path(root) |
|
|
pat = re.compile(r"^[A-Z0-9]{5}$") |
|
|
pairs = [] |
|
|
for sid in range(60): |
|
|
d = rootp / f"style{sid}" |
|
|
if not d.exists(): continue |
|
|
for f in glob.glob(str(d / f"*.{ext}")): |
|
|
lbl = Path(f).stem.upper() |
|
|
if pat.match(lbl): |
|
|
pairs.append((f, lbl)) |
|
|
if len(pairs) >= max_n: break |
|
|
if len(pairs) >= max_n: break |
|
|
return pairs |
|
|
|
|
|
|
|
|
def predict_batch(base_model, batch_imgs): |
|
|
"""batch_imgs: np.array (B,H,W,1) float32 [-1,1]""" |
|
|
probs = base_model.predict(batch_imgs, verbose=0) |
|
|
ids = np.argmax(probs, axis=-1) |
|
|
texts = [] |
|
|
for row in ids: |
|
|
dec = collapse_and_strip_blanks(row, blank_id=BLANK_ID) |
|
|
texts.append(ids_to_text(dec)) |
|
|
return texts |
|
|
|
|
|
def main(): |
|
|
args = parse_args() |
|
|
|
|
|
|
|
|
os.environ.setdefault("TF_NUM_INTRAOP_THREADS", "1") |
|
|
os.environ.setdefault("TF_NUM_INTEROP_THREADS", "1") |
|
|
os.environ.setdefault("OMP_NUM_THREADS", "1") |
|
|
|
|
|
|
|
|
wpath = Path(args.weights) |
|
|
if not wpath.exists(): |
|
|
print("Weights not found:", wpath); sys.exit(1) |
|
|
st = wpath.stat() |
|
|
print(f"Found weights: {wpath} | size: {st.st_size/1024:.1f} KB | mtime: {time.ctime(st.st_mtime)}") |
|
|
print("TF GPUs:", tf.config.list_physical_devices('GPU')) |
|
|
|
|
|
model_with_ctc, base_model = build_models(h=args.height, w=args.width, num_classes=len(CHARSET)+1) |
|
|
try: |
|
|
model_with_ctc.load_weights(str(wpath)) |
|
|
print("OK: weights loaded.") |
|
|
except Exception as e: |
|
|
print("Failed to load weights:", e); sys.exit(2) |
|
|
|
|
|
print("Base output shape:", base_model.output_shape) |
|
|
|
|
|
|
|
|
if args.image: |
|
|
f = Path(args.image) |
|
|
if not f.exists(): |
|
|
print("Image not found:", f); sys.exit(3) |
|
|
with Image.open(f) as im: |
|
|
x = preprocess_gray(im, h=args.height, w=args.width) |
|
|
pred = predict_batch(base_model, np.expand_dims(x, 0))[0] |
|
|
gt = args.gt if args.gt else f.stem.upper() |
|
|
print(f"\nSingle image:") |
|
|
print(f"GT : {gt}") |
|
|
print(f"PRED: {pred}") |
|
|
sys.exit(0) |
|
|
|
|
|
|
|
|
if args.data_root: |
|
|
pairs = list_files(args.data_root, ext=args.ext, max_n=args.samples) |
|
|
if not pairs: |
|
|
print("No valid files in dataset root."); sys.exit(0) |
|
|
print(f"Testing on {len(pairs)} samples from {args.data_root} ...") |
|
|
X, GT = [], [] |
|
|
for f, lbl in pairs: |
|
|
with Image.open(f) as im: |
|
|
X.append(preprocess_gray(im, h=args.height, w=args.width)) |
|
|
GT.append(lbl) |
|
|
X = np.stack(X, 0).astype(np.float32) |
|
|
|
|
|
PRED = predict_batch(base_model, X) |
|
|
exact = np.mean([int(p == g) for p, g in zip(PRED, GT)]) |
|
|
cer_vals = [cer(p, g) for p, g in zip(PRED, GT)] |
|
|
|
|
|
for i in range(min(args.show, len(PRED))): |
|
|
print(f"{i:02d} GT: {GT[i]} | Pred: {PRED[i]}") |
|
|
|
|
|
print(f"\nExact match: {exact*100:.2f}% | Mean CER: {float(np.mean(cer_vals)):.4f}\n") |
|
|
print(f"Total images tested: {len(PRED)}\n") |
|
|
sys.exit(0) |
|
|
|
|
|
print("Nothing to test. Provide --image or --data-root.") |
|
|
sys.exit(0) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
main() |