BirdNET_GLOBAL_6K_V2.4_Model_FP32.tflite ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55f3e4055b1a13bfa9a2452731d0d34f6a02d6b775a334362665892794165e4c
3
+ size 51726412
README.md CHANGED
@@ -8,4 +8,9 @@ base_model:
8
 
9
  ONNX model converted and optimized from `BirdNET_GLOBAL_6K_V2.4_Model_FP32.tflite`.
10
 
 
 
 
 
 
11
  Source: https://github.com/birdnet-team/BirdNET-Analyzer
 
8
 
9
  ONNX model converted and optimized from `BirdNET_GLOBAL_6K_V2.4_Model_FP32.tflite`.
10
 
11
+ Files:
12
+
13
+ - `model.onnx`: Initial model converted with tf2onnx and edited using NVIDIA Nsight DL Designer
14
+ - `birdnet.onnx`: Model further optimized with the `scripts/optimize.py` script. Recommended
15
+
16
  Source: https://github.com/birdnet-team/BirdNET-Analyzer
birdnet.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:920e2bd05aa6265e68e21711bc3bab5900cf7c4117f7d958723632ef75956295
3
+ size 66935346
REALTIME_README.md β†’ scripts/REALTIME_README.md RENAMED
File without changes
USAGE.md β†’ scripts/USAGE.md RENAMED
File without changes
scripts/compare_onnx_tflite.py ADDED
@@ -0,0 +1,667 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ """
5
+ Script to compare the results of an ONNX model with a TFLite model given the same input.
6
+ Optionally also compare with Tract runtime for ONNX.
7
+ Created by Copilot.
8
+
9
+ Usage:
10
+ python compare_onnx_tflite.py --onnx model.onnx --tflite model.tflite
11
+ python compare_onnx_tflite.py --onnx model.onnx --tflite model.tflite --input input.npy
12
+ python compare_onnx_tflite.py --onnx model.onnx --tflite model.tflite --rtol 1e-5 --atol 1e-5
13
+ python compare_onnx_tflite.py --onnx model.onnx --tflite model.tflite --benchmark
14
+ python compare_onnx_tflite.py --onnx model.onnx --tflite model.tflite --use-tract --benchmark
15
+ """
16
+
17
+ import argparse
18
+ import time
19
+ import numpy as np
20
+ import onnxruntime as ort
21
+ import tensorflow as tf
22
+ from typing import Dict, List, Tuple, Optional, Any
23
+
24
+ try:
25
+ import tract
26
+
27
+ TRACT_AVAILABLE = True
28
+ except ImportError:
29
+ TRACT_AVAILABLE = False
30
+
31
+
32
+ def load_onnx_model(onnx_path: str) -> ort.InferenceSession:
33
+ """Load an ONNX model and return an inference session."""
34
+ print(f"Loading ONNX model from: {onnx_path}")
35
+ session = ort.InferenceSession(onnx_path)
36
+ return session
37
+
38
+
39
+ def load_tflite_model(tflite_path: str) -> tf.lite.Interpreter:
40
+ """Load a TFLite model and return an interpreter."""
41
+ print(f"Loading TFLite model from: {tflite_path}")
42
+ interpreter = tf.lite.Interpreter(model_path=tflite_path)
43
+ interpreter.allocate_tensors()
44
+ return interpreter
45
+
46
+
47
+ def load_tract_model(onnx_path: str) -> Optional[Any]:
48
+ """Load an ONNX model using tract and return a runnable model."""
49
+ if not TRACT_AVAILABLE:
50
+ print("Tract is not available. Install with: pip install tract")
51
+ return None
52
+ print(f"Loading ONNX model with tract from: {onnx_path}")
53
+ model = tract.onnx().model_for_path(onnx_path).into_optimized().into_runnable()
54
+ return model
55
+
56
+
57
+ def get_onnx_model_info(session: ort.InferenceSession) -> Tuple[List, List]:
58
+ """Get input and output information from ONNX model."""
59
+ inputs = session.get_inputs()
60
+ outputs = session.get_outputs()
61
+
62
+ print("\nONNX Model Information:")
63
+ print("Inputs:")
64
+ for inp in inputs:
65
+ print(f" - Name: {inp.name}, Shape: {inp.shape}, Type: {inp.type}")
66
+ print("Outputs:")
67
+ for out in outputs:
68
+ print(f" - Name: {out.name}, Shape: {out.shape}, Type: {out.type}")
69
+
70
+ return inputs, outputs
71
+
72
+
73
+ def get_tflite_model_info(interpreter: tf.lite.Interpreter) -> Tuple[List, List]:
74
+ """Get input and output information from TFLite model."""
75
+ input_details = interpreter.get_input_details()
76
+ output_details = interpreter.get_output_details()
77
+
78
+ print("\nTFLite Model Information:")
79
+ print("Inputs:")
80
+ for inp in input_details:
81
+ print(f" - Name: {inp['name']}, Shape: {inp['shape']}, Type: {inp['dtype']}")
82
+ print("Outputs:")
83
+ for out in output_details:
84
+ print(f" - Name: {out['name']}, Shape: {out['shape']}, Type: {out['dtype']}")
85
+
86
+ return input_details, output_details
87
+
88
+
89
+ def generate_random_inputs(onnx_inputs: List, seed: int = 42) -> Dict[str, np.ndarray]:
90
+ """Generate random inputs based on ONNX model input specs."""
91
+ np.random.seed(seed)
92
+ inputs = {}
93
+
94
+ print("\nGenerating random inputs:")
95
+ for inp in onnx_inputs:
96
+ # Handle dynamic dimensions
97
+ shape = []
98
+ for dim in inp.shape:
99
+ if isinstance(dim, str) or dim is None or dim < 0:
100
+ # Default to 1 for dynamic dimensions
101
+ shape.append(1)
102
+ else:
103
+ shape.append(dim)
104
+
105
+ # Generate random data based on type
106
+ if "float" in inp.type.lower():
107
+ data = np.random.randn(*shape).astype(np.float32)
108
+ elif "int64" in inp.type.lower():
109
+ data = np.random.randint(0, 100, size=shape).astype(np.int64)
110
+ elif "int32" in inp.type.lower():
111
+ data = np.random.randint(0, 100, size=shape).astype(np.int32)
112
+ else:
113
+ # Default to float32
114
+ data = np.random.randn(*shape).astype(np.float32)
115
+
116
+ inputs[inp.name] = data
117
+ print(f" - {inp.name}: shape={data.shape}, dtype={data.dtype}")
118
+
119
+ return inputs
120
+
121
+
122
+ def load_inputs_from_file(input_path: str) -> Dict[str, np.ndarray]:
123
+ """Load inputs from a numpy file (.npy or .npz)."""
124
+ print(f"\nLoading inputs from: {input_path}")
125
+
126
+ if input_path.endswith(".npz"):
127
+ data = np.load(input_path)
128
+ inputs = {key: data[key] for key in data.files}
129
+ elif input_path.endswith(".npy"):
130
+ data = np.load(input_path)
131
+ # Assume single input
132
+ inputs = {"input": data}
133
+ else:
134
+ raise ValueError("Input file must be .npy or .npz format")
135
+
136
+ for name, value in inputs.items():
137
+ print(f" - {name}: shape={value.shape}, dtype={value.dtype}")
138
+
139
+ return inputs
140
+
141
+
142
+ def run_onnx_model(
143
+ session: ort.InferenceSession, inputs: Dict[str, np.ndarray]
144
+ ) -> List[np.ndarray]:
145
+ """Run inference on ONNX model."""
146
+ print("\nRunning ONNX model inference...")
147
+ outputs = session.run(None, inputs)
148
+ return outputs
149
+
150
+
151
+ def run_tflite_model(
152
+ interpreter: tf.lite.Interpreter, inputs: Dict[str, np.ndarray], input_details: List
153
+ ) -> List[np.ndarray]:
154
+ """Run inference on TFLite model."""
155
+ print("Running TFLite model inference...")
156
+
157
+ # Set input tensors
158
+ for i, detail in enumerate(input_details):
159
+ # Try to match by name or use order
160
+ input_data = None
161
+ if detail["name"] in inputs:
162
+ input_data = inputs[detail["name"]]
163
+ elif len(inputs) == 1:
164
+ # If only one input, use it
165
+ input_data = list(inputs.values())[0]
166
+ elif i < len(inputs):
167
+ # Use by order
168
+ input_data = list(inputs.values())[i]
169
+ else:
170
+ raise ValueError(f"Cannot match input for TFLite input {detail['name']}")
171
+
172
+ # Ensure correct dtype
173
+ if input_data.dtype != detail["dtype"]:
174
+ input_data = input_data.astype(detail["dtype"])
175
+
176
+ interpreter.set_tensor(detail["index"], input_data)
177
+
178
+ # Run inference
179
+ interpreter.invoke()
180
+
181
+ # Get output tensors
182
+ output_details = interpreter.get_output_details()
183
+ outputs = []
184
+ for detail in output_details:
185
+ outputs.append(interpreter.get_tensor(detail["index"]))
186
+
187
+ return outputs
188
+
189
+
190
+ def run_tract_model(model: Any, inputs: Dict[str, np.ndarray]) -> List[np.ndarray]:
191
+ """Run inference on tract model."""
192
+ if model is None:
193
+ return []
194
+ print("Running tract model inference...")
195
+
196
+ # Convert inputs to list (tract expects a list of tensors)
197
+ input_list = list(inputs.values())
198
+
199
+ # Run inference
200
+ outputs = model.run(input_list)
201
+
202
+ # Convert outputs to numpy arrays
203
+ result = []
204
+ for output in outputs:
205
+ result.append(output.to_numpy())
206
+
207
+ return result
208
+
209
+
210
+ def benchmark_onnx_model(
211
+ session: ort.InferenceSession,
212
+ inputs: Dict[str, np.ndarray],
213
+ num_runs: int = 100,
214
+ warmup_runs: int = 10,
215
+ ) -> Dict[str, float]:
216
+ """Benchmark ONNX model inference speed."""
217
+ print(f"\nBenchmarking ONNX model ({warmup_runs} warmup + {num_runs} test runs)...")
218
+
219
+ # Warmup runs
220
+ for _ in range(warmup_runs):
221
+ session.run(None, inputs)
222
+
223
+ # Timed runs
224
+ times = []
225
+ for _ in range(num_runs):
226
+ start = time.perf_counter()
227
+ session.run(None, inputs)
228
+ end = time.perf_counter()
229
+ times.append((end - start) * 1000) # Convert to ms
230
+
231
+ return {
232
+ "mean": np.mean(times),
233
+ "median": np.median(times),
234
+ "std": np.std(times),
235
+ "min": np.min(times),
236
+ "max": np.max(times),
237
+ }
238
+
239
+
240
+ def benchmark_tflite_model(
241
+ interpreter: tf.lite.Interpreter,
242
+ inputs: Dict[str, np.ndarray],
243
+ input_details: List,
244
+ num_runs: int = 100,
245
+ warmup_runs: int = 10,
246
+ ) -> Dict[str, float]:
247
+ """Benchmark TFLite model inference speed."""
248
+ print(f"Benchmarking TFLite model ({warmup_runs} warmup + {num_runs} test runs)...")
249
+
250
+ # Prepare inputs
251
+ def set_inputs():
252
+ for i, detail in enumerate(input_details):
253
+ input_data = None
254
+ if detail["name"] in inputs:
255
+ input_data = inputs[detail["name"]]
256
+ elif len(inputs) == 1:
257
+ input_data = list(inputs.values())[0]
258
+ elif i < len(inputs):
259
+ input_data = list(inputs.values())[i]
260
+ else:
261
+ raise ValueError(
262
+ f"Cannot match input for TFLite input {detail['name']}"
263
+ )
264
+
265
+ if input_data.dtype != detail["dtype"]:
266
+ input_data = input_data.astype(detail["dtype"])
267
+
268
+ interpreter.set_tensor(detail["index"], input_data)
269
+
270
+ # Warmup runs
271
+ for _ in range(warmup_runs):
272
+ set_inputs()
273
+ interpreter.invoke()
274
+
275
+ # Timed runs
276
+ times = []
277
+ for _ in range(num_runs):
278
+ set_inputs()
279
+ start = time.perf_counter()
280
+ interpreter.invoke()
281
+ end = time.perf_counter()
282
+ times.append((end - start) * 1000) # Convert to ms
283
+
284
+ return {
285
+ "mean": np.mean(times),
286
+ "median": np.median(times),
287
+ "std": np.std(times),
288
+ "min": np.min(times),
289
+ "max": np.max(times),
290
+ }
291
+
292
+
293
+ def benchmark_tract_model(
294
+ model: Any,
295
+ inputs: Dict[str, np.ndarray],
296
+ num_runs: int = 100,
297
+ warmup_runs: int = 10,
298
+ ) -> Optional[Dict[str, float]]:
299
+ """Benchmark tract model inference speed."""
300
+ if model is None:
301
+ return None
302
+ print(f"Benchmarking tract model ({warmup_runs} warmup + {num_runs} test runs)...")
303
+
304
+ # Convert inputs to list
305
+ input_list = list(inputs.values())
306
+
307
+ # Warmup runs
308
+ for _ in range(warmup_runs):
309
+ model.run(input_list)
310
+
311
+ # Timed runs
312
+ times = []
313
+ for _ in range(num_runs):
314
+ start = time.perf_counter()
315
+ model.run(input_list)
316
+ end = time.perf_counter()
317
+ times.append((end - start) * 1000) # Convert to ms
318
+
319
+ return {
320
+ "mean": np.mean(times),
321
+ "median": np.median(times),
322
+ "std": np.std(times),
323
+ "min": np.min(times),
324
+ "max": np.max(times),
325
+ }
326
+
327
+
328
+ def print_benchmark_results(
329
+ onnx_stats: Dict[str, float],
330
+ tflite_stats: Dict[str, float],
331
+ tract_stats: Optional[Dict[str, float]] = None,
332
+ ) -> None:
333
+ """Print benchmark comparison results."""
334
+ print("\n" + "=" * 80)
335
+ print("BENCHMARK RESULTS")
336
+ print("=" * 80)
337
+
338
+ print("\nONNX Model:")
339
+ print(f" Mean: {onnx_stats['mean']:.3f} ms")
340
+ print(f" Median: {onnx_stats['median']:.3f} ms")
341
+ print(f" Std: {onnx_stats['std']:.3f} ms")
342
+ print(f" Min: {onnx_stats['min']:.3f} ms")
343
+ print(f" Max: {onnx_stats['max']:.3f} ms")
344
+
345
+ print("\nTFLite Model:")
346
+ print(f" Mean: {tflite_stats['mean']:.3f} ms")
347
+ print(f" Median: {tflite_stats['median']:.3f} ms")
348
+ print(f" Std: {tflite_stats['std']:.3f} ms")
349
+ print(f" Min: {tflite_stats['min']:.3f} ms")
350
+ print(f" Max: {tflite_stats['max']:.3f} ms")
351
+
352
+ if tract_stats:
353
+ print("\nTract Model:")
354
+ print(f" Mean: {tract_stats['mean']:.3f} ms")
355
+ print(f" Median: {tract_stats['median']:.3f} ms")
356
+ print(f" Std: {tract_stats['std']:.3f} ms")
357
+ print(f" Min: {tract_stats['min']:.3f} ms")
358
+ print(f" Max: {tract_stats['max']:.3f} ms")
359
+
360
+ print("\nComparison:")
361
+ speedup = tflite_stats["mean"] / onnx_stats["mean"]
362
+ if speedup > 1:
363
+ print(f" ONNX Runtime is {speedup:.2f}x faster than TFLite")
364
+ else:
365
+ print(f" TFLite is {1 / speedup:.2f}x faster than ONNX Runtime")
366
+ print(f" Difference: {abs(onnx_stats['mean'] - tflite_stats['mean']):.3f} ms")
367
+
368
+ if tract_stats:
369
+ speedup_tract = tflite_stats["mean"] / tract_stats["mean"]
370
+ if speedup_tract > 1:
371
+ print(f" Tract is {speedup_tract:.2f}x faster than TFLite")
372
+ else:
373
+ print(f" TFLite is {1 / speedup_tract:.2f}x faster than Tract")
374
+ print(f" Difference: {abs(tract_stats['mean'] - tflite_stats['mean']):.3f} ms")
375
+
376
+ speedup_ort = onnx_stats["mean"] / tract_stats["mean"]
377
+ if speedup_ort > 1:
378
+ print(f" Tract is {speedup_ort:.2f}x faster than ONNX Runtime")
379
+ else:
380
+ print(f" ONNX Runtime is {1 / speedup_ort:.2f}x faster than Tract")
381
+ print(f" Difference: {abs(tract_stats['mean'] - onnx_stats['mean']):.3f} ms")
382
+
383
+ print("=" * 80)
384
+
385
+
386
+ def compare_outputs(
387
+ onnx_outputs: List[np.ndarray],
388
+ tflite_outputs: List[np.ndarray],
389
+ tract_outputs: Optional[List[np.ndarray]] = None,
390
+ rtol: float = 1e-5,
391
+ atol: float = 1e-5,
392
+ ) -> bool:
393
+ """Compare outputs from ONNX, TFLite, and optionally Tract models."""
394
+ print("\n" + "=" * 80)
395
+ print("COMPARISON RESULTS")
396
+ print("=" * 80)
397
+
398
+ if len(onnx_outputs) != len(tflite_outputs):
399
+ print(
400
+ f"❌ Number of outputs differs: ONNX={len(onnx_outputs)}, TFLite={len(tflite_outputs)}"
401
+ )
402
+ return False
403
+
404
+ if tract_outputs and len(onnx_outputs) != len(tract_outputs):
405
+ print(
406
+ f"❌ Number of outputs differs: ONNX={len(onnx_outputs)}, Tract={len(tract_outputs)}"
407
+ )
408
+ return False
409
+
410
+ all_match = True
411
+ for i, (onnx_out, tflite_out) in enumerate(zip(onnx_outputs, tflite_outputs)):
412
+ tract_out = tract_outputs[i] if tract_outputs else None
413
+
414
+ print(f"\nOutput {i}:")
415
+ print(f" ONNX Runtime shape: {onnx_out.shape}, dtype: {onnx_out.dtype}")
416
+ print(f" TFLite shape: {tflite_out.shape}, dtype: {tflite_out.dtype}")
417
+ if tract_out is not None:
418
+ print(f" Tract shape: {tract_out.shape}, dtype: {tract_out.dtype}")
419
+
420
+ if onnx_out.shape != tflite_out.shape:
421
+ print(" ❌ Shape mismatch between ONNX and TFLite!")
422
+ all_match = False
423
+ continue
424
+
425
+ if tract_out is not None and onnx_out.shape != tract_out.shape:
426
+ print(" ❌ Shape mismatch between ONNX and Tract!")
427
+ all_match = False
428
+ continue
429
+
430
+ # Convert to same dtype for comparison
431
+ if onnx_out.dtype != tflite_out.dtype:
432
+ print(" ⚠️ Different dtypes, converting to float32 for comparison")
433
+ onnx_out = onnx_out.astype(np.float32)
434
+ tflite_out = tflite_out.astype(np.float32)
435
+
436
+ if tract_out is not None and onnx_out.dtype != tract_out.dtype:
437
+ tract_out = tract_out.astype(np.float32)
438
+
439
+ # Compute statistics - ONNX vs TFLite
440
+ print("\n ONNX Runtime vs TFLite:")
441
+ diff = np.abs(onnx_out - tflite_out)
442
+ max_diff = np.max(diff)
443
+ mean_diff = np.mean(diff)
444
+ is_close = np.allclose(onnx_out, tflite_out, rtol=rtol, atol=atol)
445
+
446
+ print(f" Max difference: {max_diff:.10f}")
447
+ print(f" Mean difference: {mean_diff:.10f}")
448
+ print(f" Relative tolerance: {rtol}")
449
+ print(f" Absolute tolerance: {atol}")
450
+
451
+ if is_close:
452
+ print(" βœ… Outputs match within tolerance")
453
+ else:
454
+ print(" ❌ Outputs do NOT match within tolerance")
455
+ all_match = False
456
+
457
+ # Show some sample values
458
+ print("\n Sample values (first 5 elements):")
459
+ flat_onnx = onnx_out.flatten()[:5]
460
+ flat_tflite = tflite_out.flatten()[:5]
461
+ for j, (o, t) in enumerate(zip(flat_onnx, flat_tflite)):
462
+ print(
463
+ f" [{j}] ONNX: {o:.10f}, TFLite: {t:.10f}, Diff: {abs(o - t):.10f}"
464
+ )
465
+
466
+ # Compute statistics - ONNX vs Tract
467
+ if tract_out is not None:
468
+ print("\n ONNX Runtime vs Tract:")
469
+ diff_tract = np.abs(onnx_out - tract_out)
470
+ max_diff_tract = np.max(diff_tract)
471
+ mean_diff_tract = np.mean(diff_tract)
472
+ is_close_tract = np.allclose(onnx_out, tract_out, rtol=rtol, atol=atol)
473
+
474
+ print(f" Max difference: {max_diff_tract:.10f}")
475
+ print(f" Mean difference: {mean_diff_tract:.10f}")
476
+
477
+ if is_close_tract:
478
+ print(" βœ… Outputs match within tolerance")
479
+ else:
480
+ print(" ❌ Outputs do NOT match within tolerance")
481
+ all_match = False
482
+
483
+ # Show some sample values
484
+ print("\n Sample values (first 5 elements):")
485
+ flat_onnx_tract = onnx_out.flatten()[:5]
486
+ flat_tract = tract_out.flatten()[:5]
487
+ for j, (o, tr) in enumerate(zip(flat_onnx_tract, flat_tract)):
488
+ print(
489
+ f" [{j}] ONNX: {o:.10f}, Tract: {tr:.10f}, Diff: {abs(o - tr):.10f}"
490
+ )
491
+
492
+ # Compute statistics - TFLite vs Tract
493
+ print("\n TFLite vs Tract:")
494
+ diff_tflite_tract = np.abs(tflite_out - tract_out)
495
+ max_diff_tflite_tract = np.max(diff_tflite_tract)
496
+ mean_diff_tflite_tract = np.mean(diff_tflite_tract)
497
+ is_close_tflite_tract = np.allclose(
498
+ tflite_out, tract_out, rtol=rtol, atol=atol
499
+ )
500
+
501
+ print(f" Max difference: {max_diff_tflite_tract:.10f}")
502
+ print(f" Mean difference: {mean_diff_tflite_tract:.10f}")
503
+
504
+ if is_close_tflite_tract:
505
+ print(" βœ… Outputs match within tolerance")
506
+ else:
507
+ print(" ❌ Outputs do NOT match within tolerance")
508
+ all_match = False
509
+
510
+ print("\n" + "=" * 80)
511
+ if all_match:
512
+ print("βœ… ALL OUTPUTS MATCH!")
513
+ else:
514
+ print("❌ SOME OUTPUTS DO NOT MATCH")
515
+ print("=" * 80)
516
+
517
+ return all_match
518
+
519
+
520
+ def main():
521
+ parser = argparse.ArgumentParser(
522
+ description="Compare ONNX and TFLite model outputs",
523
+ formatter_class=argparse.RawDescriptionHelpFormatter,
524
+ epilog="""
525
+ Examples:
526
+ # Compare with random inputs
527
+ python compare_onnx_tflite.py --onnx model.onnx --tflite model.tflite
528
+
529
+ # Compare with custom inputs from file
530
+ python compare_onnx_tflite.py --onnx model.onnx --tflite model.tflite --input input.npz
531
+
532
+ # Compare with custom tolerances
533
+ python compare_onnx_tflite.py --onnx model.onnx --tflite model.tflite --rtol 1e-3 --atol 1e-3
534
+
535
+ # Save outputs for inspection
536
+ python compare_onnx_tflite.py --onnx model.onnx --tflite model.tflite --save-outputs
537
+
538
+ # Benchmark execution speed
539
+ python compare_onnx_tflite.py --onnx model.onnx --tflite model.tflite --benchmark
540
+
541
+ # Benchmark with custom number of runs
542
+ python compare_onnx_tflite.py --onnx model.onnx --tflite model.tflite --benchmark --num-runs 200 --warmup-runs 20
543
+
544
+ # Compare with tract runtime as well
545
+ python compare_onnx_tflite.py --onnx model.onnx --tflite model.tflite --use-tract
546
+
547
+ # Benchmark all three runtimes
548
+ python compare_onnx_tflite.py --onnx model.onnx --tflite model.tflite --use-tract --benchmark
549
+ """,
550
+ )
551
+
552
+ parser.add_argument("--onnx", required=True, help="Path to ONNX model")
553
+ parser.add_argument("--tflite", required=True, help="Path to TFLite model")
554
+ parser.add_argument("--input", help="Path to input file (.npy or .npz)")
555
+ parser.add_argument(
556
+ "--rtol", type=float, default=1e-5, help="Relative tolerance (default: 1e-5)"
557
+ )
558
+ parser.add_argument(
559
+ "--atol", type=float, default=1e-5, help="Absolute tolerance (default: 1e-5)"
560
+ )
561
+ parser.add_argument(
562
+ "--seed",
563
+ type=int,
564
+ default=42,
565
+ help="Random seed for input generation (default: 42)",
566
+ )
567
+ parser.add_argument(
568
+ "--save-outputs", action="store_true", help="Save outputs to files"
569
+ )
570
+ parser.add_argument(
571
+ "--benchmark",
572
+ action="store_true",
573
+ help="Benchmark execution speed of both models",
574
+ )
575
+ parser.add_argument(
576
+ "--num-runs",
577
+ type=int,
578
+ default=100,
579
+ help="Number of benchmark runs (default: 100)",
580
+ )
581
+ parser.add_argument(
582
+ "--warmup-runs",
583
+ type=int,
584
+ default=10,
585
+ help="Number of warmup runs (default: 10)",
586
+ )
587
+ parser.add_argument(
588
+ "--use-tract", action="store_true", help="Also test with tract ONNX runtime"
589
+ )
590
+
591
+ args = parser.parse_args()
592
+
593
+ # Load models
594
+ onnx_session = load_onnx_model(args.onnx)
595
+ tflite_interpreter = load_tflite_model(args.tflite)
596
+
597
+ # Load tract model if requested
598
+ tract_model = None
599
+ if args.use_tract:
600
+ if not TRACT_AVAILABLE:
601
+ print(
602
+ "\n⚠️ Warning: Tract is not installed. Install with: pip install tract"
603
+ )
604
+ print("Continuing without tract comparison...\n")
605
+ else:
606
+ tract_model = load_tract_model(args.onnx)
607
+
608
+ # Get model info
609
+ onnx_inputs, onnx_outputs = get_onnx_model_info(onnx_session)
610
+ tflite_input_details, tflite_output_details = get_tflite_model_info(
611
+ tflite_interpreter
612
+ )
613
+
614
+ # Prepare inputs
615
+ if args.input:
616
+ inputs = load_inputs_from_file(args.input)
617
+ else:
618
+ inputs = generate_random_inputs(onnx_inputs, seed=args.seed)
619
+
620
+ # Run inference
621
+ onnx_results = run_onnx_model(onnx_session, inputs)
622
+ tflite_results = run_tflite_model(tflite_interpreter, inputs, tflite_input_details)
623
+ tract_results = None
624
+ if tract_model:
625
+ tract_results = run_tract_model(tract_model, inputs)
626
+
627
+ # Save outputs if requested
628
+ if args.save_outputs:
629
+ print("\nSaving outputs...")
630
+ np.savez("onnx_outputs.npz", *onnx_results)
631
+ np.savez("tflite_outputs.npz", *tflite_results)
632
+ print(" - onnx_outputs.npz")
633
+ print(" - tflite_outputs.npz")
634
+ if tract_results:
635
+ np.savez("tract_outputs.npz", *tract_results)
636
+ print(" - tract_outputs.npz")
637
+
638
+ # Compare results
639
+ match = compare_outputs(
640
+ onnx_results, tflite_results, tract_results, rtol=args.rtol, atol=args.atol
641
+ )
642
+
643
+ # Benchmark if requested
644
+ if args.benchmark:
645
+ onnx_stats = benchmark_onnx_model(
646
+ onnx_session, inputs, args.num_runs, args.warmup_runs
647
+ )
648
+ tflite_stats = benchmark_tflite_model(
649
+ tflite_interpreter,
650
+ inputs,
651
+ tflite_input_details,
652
+ args.num_runs,
653
+ args.warmup_runs,
654
+ )
655
+ tract_stats = None
656
+ if tract_model:
657
+ tract_stats = benchmark_tract_model(
658
+ tract_model, inputs, args.num_runs, args.warmup_runs
659
+ )
660
+ print_benchmark_results(onnx_stats, tflite_stats, tract_stats)
661
+
662
+ # Return exit code
663
+ return 0 if match else 1
664
+
665
+
666
+ if __name__ == "__main__":
667
+ exit(main())
scripts/optimize.py ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import onnxscript
2
+ import onnx_ir as ir
3
+ import onnx_ir.passes.common
4
+ import numpy as np
5
+
6
+
7
+ class ReplaceDftWithMatMulRule(onnxscript.rewriter.RewriteRuleClassBase):
8
+ def pattern(self, op, x, dft_length):
9
+ x = op.Reshape(x, _allow_other_inputs=True)
10
+ dft = op.DFT(x, dft_length, _outputs=["dft_output"])
11
+ real_part = op.Slice(dft, [0], [1], [-1])
12
+ return op.Squeeze(real_part, [-1])
13
+
14
+ def rewrite(self, op, x: ir.Value, dft_length: ir.Value, dft_output: ir.Value):
15
+ # Get the DFT node attributes
16
+ dft_node = dft_output.producer()
17
+ assert dft_node is not None
18
+
19
+ dft_size = ir.convenience.get_const_tensor(dft_length).numpy().item()
20
+
21
+ # Create one-sided DFT matrix (only real part, DC to Nyquist)
22
+ # The real part of DFT is: Re(DFT[k]) = sum(x[n] * cos(2*pi*k*n/N))
23
+ # For one-sided DFT, we only need frequencies from 0 to Nyquist (dft_size//2 + 1)
24
+ num_freqs = dft_size // 2 + 1
25
+
26
+ # Vectorized creation of DFT matrix
27
+ n = np.arange(dft_size, dtype=np.float32)[:, np.newaxis] # Shape: (dft_size, 1)
28
+ k = np.arange(num_freqs, dtype=np.float32)[
29
+ np.newaxis, :
30
+ ] # Shape: (1, num_freqs)
31
+ dft_matrix = np.cos(
32
+ 2 * np.pi * k * n / dft_size
33
+ ) # Shape: (dft_size, num_freqs)
34
+
35
+ # Create constant node for the DFT matrix
36
+ dft_matrix = op.initializer(ir.tensor(dft_matrix), name=f"{x.name}_dft_matrix")
37
+
38
+ # DFT axis is already at the end, direct matrix multiplication
39
+ result = op.MatMul(x, dft_matrix)
40
+
41
+ return result
42
+
43
+
44
+ class ReplaceSplit(onnxscript.rewriter.RewriteRuleClassBase):
45
+ def pattern(self, op, x):
46
+ return op.Split(
47
+ x, _allow_other_inputs=True, _outputs=["split_out_1", "split_out_2"]
48
+ )
49
+
50
+ def rewrite(self, op, x: ir.Value, **kwargs):
51
+ zero = op.initializer(ir.tensor(np.array([0], dtype=np.int64)), "zero")
52
+ batch_size = op.Gather(x, zero)
53
+ sample_size = op.initializer(
54
+ ir.tensor(np.array([144000], dtype=np.int32)), "sample_size"
55
+ )
56
+ return batch_size, sample_size
57
+
58
+
59
+ class RemoveCast(onnxscript.rewriter.RewriteRuleClassBase):
60
+ def pattern(self, op, x):
61
+ return op.Cast(x)
62
+
63
+ def rewrite(self, op, x: ir.Value, **kwargs):
64
+ return op.Identity(x)
65
+
66
+
67
+ model = ir.load("model.onnx")
68
+
69
+ # Set dynamic axes
70
+ model.graph.inputs[0].shape = ir.Shape(["batch", 144000])
71
+ model.graph.outputs[0].shape = ir.Shape(["batch", 6522])
72
+
73
+ onnxscript.rewriter.rewrite(
74
+ model,
75
+ [ReplaceDftWithMatMulRule().rule(), ReplaceSplit().rule(), RemoveCast().rule()],
76
+ )
77
+
78
+ # Change all int32 initializers to int64
79
+ initializers = list(model.graph.initializers.values())
80
+ for initializer in initializers:
81
+ if initializer.dtype == ir.DataType.INT32:
82
+ int32_array = initializer.const_value.numpy()
83
+ int64_array = int32_array.astype(np.int64)
84
+ new_initializer = ir.val(initializer.name, const_value=ir.tensor(int64_array))
85
+ model.graph.initializers.pop(initializer.name)
86
+ model.graph.initializers.add(new_initializer)
87
+ initializer.replace_all_uses_with(new_initializer)
88
+
89
+ onnxscript.optimizer.optimize(
90
+ model, input_size_limit=1024 * 1024 * 1024, output_size_limit=1024 * 1024 * 1024
91
+ )
92
+
93
+
94
+ # Remove Slice-Reshape
95
+ def remove_slice_reshape(model: ir.Model):
96
+ mul_node = model.graph.node("model/MEL_SPEC1/Mul")
97
+ first_reshape = model.graph.node("model/MEL_SPEC1/stft/frame/Reshape_1")
98
+ first_shape = ir.val(
99
+ "first_shape", const_value=ir.tensor([-1, 72000, 2], dtype=ir.DataType.INT64)
100
+ )
101
+ model.graph.initializers.add(first_shape)
102
+ second_reshape = model.graph.node("model/MEL_SPEC2/stft/frame/Reshape_1")
103
+ second_shape = ir.val(
104
+ "second_shape", const_value=ir.tensor([-1, 18000, 8], dtype=ir.DataType.INT64)
105
+ )
106
+ model.graph.initializers.add(second_shape)
107
+
108
+ third_reshape = model.graph.node("model/MEL_SPEC1/stft/frame/Reshape_4")
109
+ third_shape = ir.val(
110
+ "third_shape", const_value=ir.tensor([-1, 511, 2048], dtype=ir.DataType.INT64)
111
+ )
112
+ model.graph.initializers.add(third_shape)
113
+ fourth_reshape = model.graph.node("model/MEL_SPEC2/stft/frame/Reshape_4")
114
+ fourth_shape = ir.val(
115
+ "fourth_shape", const_value=ir.tensor([-1, 511, 1024], dtype=ir.DataType.INT64)
116
+ )
117
+ model.graph.initializers.add(fourth_shape)
118
+
119
+ # Replace with Mul-Reshape-Gather
120
+ first_reshape.replace_input_with(0, mul_node.outputs[0])
121
+ first_reshape.replace_input_with(1, first_shape)
122
+ second_reshape.replace_input_with(0, mul_node.outputs[0])
123
+ second_reshape.replace_input_with(1, second_shape)
124
+ third_reshape.replace_input_with(1, third_shape)
125
+ fourth_reshape.replace_input_with(1, fourth_shape)
126
+
127
+
128
+ remove_slice_reshape(model)
129
+ # Run DCE again
130
+ onnxscript.optimizer.optimize(
131
+ model, input_size_limit=1024 * 1024 * 1024, output_size_limit=1024 * 1024 * 1024
132
+ )
133
+
134
+
135
+ onnx_ir.passes.common.ClearMetadataAndDocStringPass()(model)
136
+ model.graph.inputs[0].name = "input"
137
+ model.graph.outputs[0].name = "output"
138
+ model.ir_version = 10
139
+ model.producer_name = "onnx-ir"
140
+ model.graph.name = "BirdNET-v2.4"
141
+
142
+ ir.save(model, "birdnet.onnx")
predict_audio.py β†’ scripts/predict_audio.py RENAMED
File without changes
realtime_detection.py β†’ scripts/realtime_detection.py RENAMED
File without changes