| import { Benchmark } from "./types"; | |
| export const googleBenchmarks: Benchmark[] = [ | |
| { | |
| model: "Gemini Diffusion", | |
| provider: "Google", | |
| inputPrice: 0, | |
| outputPrice: 0, | |
| benchmark: { | |
| livecodebench_v6: 30.9, | |
| bigcodebench: 45.4, | |
| lbpp_v2: 56.8, | |
| swe_bench_verified: 22.9, | |
| humaneval: 89.6, | |
| mbpp: 76.0, | |
| gpqa_diamond: 40.4, | |
| aime_2025: 23.3, | |
| bigbench_extra_hard: 15.0, | |
| global_mmlu_lite: 69.1, | |
| }, | |
| source: "https://deepmind.google/models/gemini-diffusion/", | |
| }, | |
| { | |
| model: "Gemini 2.0 Flash-Lite", | |
| provider: "Google", | |
| inputPrice: 0.10, | |
| outputPrice: 0.40, | |
| benchmark: { | |
| livecodebench_v6: 28.5, | |
| bigcodebench: 45.8, | |
| lbpp_v2: 56.0, | |
| swe_bench_verified: 28.5, | |
| humaneval: 90.2, | |
| mbpp: 75.8, | |
| gpqa_diamond: 56.5, | |
| aime_2025: 20.0, | |
| bigbench_extra_hard: 21.0, | |
| global_mmlu_lite: 79.0, | |
| }, | |
| source: "https://deepmind.google/models/gemini-diffusion/", | |
| }, | |
| { | |
| model: "Gemini 2.5 Flash Preview (05-20)", | |
| provider: "Google", | |
| inputPrice: 0.15, | |
| outputPrice: 3.5, | |
| source: "https://ai.google.dev/gemini-api/docs/thinking", | |
| benchmark: { | |
| aime_2025: 72.0, | |
| gpqa_diamond: 82.8, | |
| simpleqa: 26.9, | |
| global_mmlu_lite: 88.4, | |
| swe_bench_verified: 60.4, | |
| livecodebench_v6: 63.9, | |
| mmmu: 79.7, | |
| lbpp_v2: 61.9, | |
| bigcodebench: 56.7, | |
| facts_grounding: 85.3, | |
| humanitys_last_exam: 11.0, | |
| mrcr_v2_avg_128k: 74.0, | |
| mrcr_v2_pointwise_1m: 32.0, | |
| }, | |
| }, | |
| { | |
| model: "Gemini 2.5 Flash Preview (04-17) Thinking", | |
| provider: "Google", | |
| inputPrice: 0.15, | |
| outputPrice: 3.5, | |
| source: "https://ai.google.dev/gemini-api/docs/thinking", | |
| benchmark: { | |
| aime_2025: 78.0, | |
| gpqa_diamond: 78.3, | |
| simpleqa: 29.7, | |
| global_mmlu_lite: 88.4, | |
| livecodebench_v6: 63.5, | |
| lbpp_v2: 51.1, | |
| bigcodebench: 44.2, | |
| mmmu: 76.7, | |
| humanitys_last_exam: 12.1 | |
| }, | |
| }, | |
| { | |
| model: "Gemini 2.0 Flash", | |
| provider: "Google", | |
| inputPrice: 0.1, | |
| outputPrice: 0.4, | |
| source: "https://ai.google.dev/gemini-api/docs/thinking", | |
| benchmark: { | |
| aime_2025: 27.5, | |
| gpqa_diamond: 60.1, | |
| simpleqa: 29.9, | |
| global_mmlu_lite: 83.4, | |
| livecodebench_v6: 34.5, | |
| lbpp_v2: 22.2, | |
| mmmu: 71.7, | |
| facts_grounding: 84.6, | |
| humanitys_last_exam: 5.1, | |
| mrcr_v2_avg_128k: 36.0, | |
| mrcr_v2_pointwise_1m: 6.0, | |
| }, | |
| }, | |
| { | |
| model: "Gemini 2.5 Pro Preview (05-06)", | |
| provider: "Google", | |
| inputPrice: 2.5, | |
| outputPrice: 15.0, | |
| source: "https://blog.google/products/gemini/gemini-2-5-pro-updates/", | |
| benchmark: { | |
| humanitys_last_exam: 17.8, | |
| gpqa_diamond: 83.0, | |
| aime_2025: 83.0, | |
| livecodebench_v6: 75.6, | |
| lbpp_v2: 76.5, | |
| bigcodebench: 72.7, | |
| swe_bench_verified: 63.2, | |
| simpleqa: 50.8, | |
| mmmu: 79.6, | |
| video_mme: 84.8, | |
| mrcr_v2_avg_128k: 93.0, | |
| mrcr_v2_pointwise_1m: 82.9, | |
| global_mmlu_lite: 88.6, | |
| }, | |
| }, | |
| { | |
| model: "Gemini 2.5 Pro Experimental (03-25)", | |
| provider: "Google", | |
| inputPrice: 2.5, | |
| outputPrice: 15.0, | |
| source: "https://blog.google/technology/google-deepmind/gemini-model-thinking-updates-march-2025/", | |
| benchmark: { | |
| humanitys_last_exam: 18.8, | |
| gpqa_diamond: 84.0, | |
| aime_2025: 86.7, | |
| livecodebench_v6: 70.4, | |
| lbpp_v2: 74.0, | |
| bigcodebench: 68.6, | |
| swe_bench_verified: 63.8, | |
| simpleqa: 52.9, | |
| mmmu: 81.7, | |
| mrcr_v2_avg_128k: 94.5, | |
| mrcr_v2_pointwise_1m: 83.1, | |
| global_mmlu_lite: 89.8, | |
| }, | |
| }, | |
| ]; | |