3913 lines
117 KiB
JSON
3913 lines
117 KiB
JSON
{
|
|
"id": "e08a7051-a856-4053-b34b-561d7ce5e8cf",
|
|
"title": "predator-a3b-ngl-ctx-2d-1 \u2014 qwen3 on predator",
|
|
"date": "2026-05-05",
|
|
"started_at": "2026-05-05T07:43:00Z",
|
|
"git_sha": "3ff8bd1808a1df00426d9b447559c774e4704c39",
|
|
"hardware": "predator",
|
|
"hardware_label": "Predator \u00b7 gaming laptop \u00b7 GTX 1060 6 GB \u00b7 28 GB RAM",
|
|
"engine": "llamacpp",
|
|
"harness": "predator-a3b-ngl-ctx-2d-1",
|
|
"model_family": "qwen3",
|
|
"model_sizes": [
|
|
"qwen3:30b-a3b-iq2m"
|
|
],
|
|
"task_kind": "param-sweep",
|
|
"tags": [
|
|
"a3b",
|
|
"ctx",
|
|
"iq2",
|
|
"ngl",
|
|
"predator",
|
|
"qwen3"
|
|
],
|
|
"headline": "90 calls across 6 cell(s); ~5.9 tok/s mean; p50 73.4s",
|
|
"janie_blurb_md": null,
|
|
"janie_blurb_status": "pending",
|
|
"caveat": null,
|
|
"caveat_severity": null,
|
|
"methodology_ref": "A3B_AND_CPU_OVERNIGHT_2026-05-05",
|
|
"methodology_deviations_md": null,
|
|
"results_table": [
|
|
{
|
|
"label": "qwen3:30b-a3b-iq2m-think500-ngl12-ctx64k",
|
|
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx64k",
|
|
"n_calls": 15,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": 85573.42,
|
|
"duration_ms_p50": 78940.0,
|
|
"duration_ms_p95": 159864.3,
|
|
"tokens_per_sec_mean": 5.48,
|
|
"tokens_per_sec_p50": 5.31,
|
|
"tokens_per_sec_p95": 6.79,
|
|
"tokens_per_sec_max": 6.8,
|
|
"memory_mb": null,
|
|
"watts_avg": null,
|
|
"notes": null
|
|
},
|
|
{
|
|
"label": "qwen3:30b-a3b-iq2m-think500-ngl12-ctx96k",
|
|
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx96k",
|
|
"n_calls": 15,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": 71805.83,
|
|
"duration_ms_p50": 58541.0,
|
|
"duration_ms_p95": 140580.5,
|
|
"tokens_per_sec_mean": 5.79,
|
|
"tokens_per_sec_p50": 5.75,
|
|
"tokens_per_sec_p95": 6.47,
|
|
"tokens_per_sec_max": 6.7,
|
|
"memory_mb": null,
|
|
"watts_avg": null,
|
|
"notes": null
|
|
},
|
|
{
|
|
"label": "qwen3:30b-a3b-iq2m-think500-ngl12-ctx131k",
|
|
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx131k",
|
|
"n_calls": 15,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": 96406.17,
|
|
"duration_ms_p50": 105842.0,
|
|
"duration_ms_p95": 154492.35,
|
|
"tokens_per_sec_mean": 5.01,
|
|
"tokens_per_sec_p50": 5.13,
|
|
"tokens_per_sec_p95": 5.97,
|
|
"tokens_per_sec_max": 6.29,
|
|
"memory_mb": null,
|
|
"watts_avg": null,
|
|
"notes": null
|
|
},
|
|
{
|
|
"label": "qwen3:30b-a3b-iq2m-think500-ngl24-ctx48k",
|
|
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx48k",
|
|
"n_calls": 15,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": 65539.5,
|
|
"duration_ms_p50": 64606.5,
|
|
"duration_ms_p95": 118069.75,
|
|
"tokens_per_sec_mean": 6.6,
|
|
"tokens_per_sec_p50": 6.59,
|
|
"tokens_per_sec_p95": 6.89,
|
|
"tokens_per_sec_max": 6.91,
|
|
"memory_mb": null,
|
|
"watts_avg": null,
|
|
"notes": null
|
|
},
|
|
{
|
|
"label": "qwen3:30b-a3b-iq2m-think500-ngl24-ctx64k",
|
|
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx64k",
|
|
"n_calls": 15,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": 71267.58,
|
|
"duration_ms_p50": 80265.5,
|
|
"duration_ms_p95": 128231.3,
|
|
"tokens_per_sec_mean": 6.41,
|
|
"tokens_per_sec_p50": 6.39,
|
|
"tokens_per_sec_p95": 6.88,
|
|
"tokens_per_sec_max": 6.9,
|
|
"memory_mb": null,
|
|
"watts_avg": null,
|
|
"notes": null
|
|
},
|
|
{
|
|
"label": "qwen3:30b-a3b-iq2m-think500-ngl24-ctx96k",
|
|
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx96k",
|
|
"n_calls": 15,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": 69299.5,
|
|
"duration_ms_p50": 68355.0,
|
|
"duration_ms_p95": 124990.95,
|
|
"tokens_per_sec_mean": 6.08,
|
|
"tokens_per_sec_p50": 6.44,
|
|
"tokens_per_sec_p95": 6.68,
|
|
"tokens_per_sec_max": 6.69,
|
|
"memory_mb": null,
|
|
"watts_avg": null,
|
|
"notes": null
|
|
}
|
|
],
|
|
"cells": [
|
|
{
|
|
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx64k",
|
|
"n_calls": 15,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": 85573.42,
|
|
"duration_ms_p50": 78940.0,
|
|
"duration_ms_p95": 159864.3,
|
|
"tokens_per_sec_mean": 5.48,
|
|
"tokens_per_sec_p50": 5.31,
|
|
"tokens_per_sec_p95": 6.79,
|
|
"tokens_per_sec_max": 6.8,
|
|
"cold_warm_split": {
|
|
"cold": {
|
|
"n_calls": 3,
|
|
"duration_ms_mean": 92828.67,
|
|
"duration_ms_p50": 110762.0,
|
|
"tokens_per_sec_mean": 5.4
|
|
},
|
|
"warm": {
|
|
"n_calls": 9,
|
|
"duration_ms_mean": 83155.0,
|
|
"duration_ms_p50": 76258.0,
|
|
"tokens_per_sec_mean": 5.51
|
|
}
|
|
},
|
|
"per_prompt": {
|
|
"unknown": {
|
|
"n_calls": 3,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"tokens_per_sec_mean": null,
|
|
"tokens_per_sec_p50": null
|
|
},
|
|
"hello": {
|
|
"n_calls": 4,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": 18907.5,
|
|
"duration_ms_p50": 19953.0,
|
|
"tokens_per_sec_mean": 6.04,
|
|
"tokens_per_sec_p50": 6.21
|
|
},
|
|
"P-MEDIUM": {
|
|
"n_calls": 4,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": 84750.25,
|
|
"duration_ms_p50": 78940.0,
|
|
"tokens_per_sec_mean": 5.3,
|
|
"tokens_per_sec_p50": 5.32
|
|
},
|
|
"P-HARD": {
|
|
"n_calls": 4,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": 153062.5,
|
|
"duration_ms_p50": 153912.0,
|
|
"tokens_per_sec_mean": 5.11,
|
|
"tokens_per_sec_p50": 5.14
|
|
}
|
|
},
|
|
"chars_split": {
|
|
"has_thinking": true,
|
|
"reasoning_chars_mean": 1193.67,
|
|
"answer_chars_mean": 809.42
|
|
},
|
|
"timeline": [
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": null,
|
|
"phase": null,
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": null,
|
|
"phase": null,
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": null,
|
|
"phase": null,
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "hello",
|
|
"run_idx": 0,
|
|
"phase": "cold",
|
|
"duration_ms": 25689,
|
|
"tokens_per_sec": 5.644439254155475,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "hello",
|
|
"run_idx": 1,
|
|
"phase": "warm",
|
|
"duration_ms": 10035,
|
|
"tokens_per_sec": 6.776283009466866,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "hello",
|
|
"run_idx": 2,
|
|
"phase": "warm",
|
|
"duration_ms": 15585,
|
|
"tokens_per_sec": 6.801411613731151,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "hello",
|
|
"run_idx": 3,
|
|
"phase": "warm",
|
|
"duration_ms": 24321,
|
|
"tokens_per_sec": 4.934007647711853,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "P-MEDIUM",
|
|
"run_idx": 0,
|
|
"phase": "cold",
|
|
"duration_ms": 110762,
|
|
"tokens_per_sec": 5.29062313789928,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "P-MEDIUM",
|
|
"run_idx": 1,
|
|
"phase": "warm",
|
|
"duration_ms": 76258,
|
|
"tokens_per_sec": 5.101104146450209,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "P-MEDIUM",
|
|
"run_idx": 2,
|
|
"phase": "warm",
|
|
"duration_ms": 70359,
|
|
"tokens_per_sec": 5.344021376085505,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "P-MEDIUM",
|
|
"run_idx": 3,
|
|
"phase": "warm",
|
|
"duration_ms": 81622,
|
|
"tokens_per_sec": 5.451961480973267,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "P-HARD",
|
|
"run_idx": 0,
|
|
"phase": "cold",
|
|
"duration_ms": 142035,
|
|
"tokens_per_sec": 5.252226563875102,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "P-HARD",
|
|
"run_idx": 1,
|
|
"phase": "warm",
|
|
"duration_ms": 150027,
|
|
"tokens_per_sec": 5.339038972984863,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "P-HARD",
|
|
"run_idx": 2,
|
|
"phase": "warm",
|
|
"duration_ms": 162391,
|
|
"tokens_per_sec": 5.024909015893738,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "P-HARD",
|
|
"run_idx": 3,
|
|
"phase": "warm",
|
|
"duration_ms": 157797,
|
|
"tokens_per_sec": 4.835326400375165,
|
|
"error": null
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx96k",
|
|
"n_calls": 15,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": 71805.83,
|
|
"duration_ms_p50": 58541.0,
|
|
"duration_ms_p95": 140580.5,
|
|
"tokens_per_sec_mean": 5.79,
|
|
"tokens_per_sec_p50": 5.75,
|
|
"tokens_per_sec_p95": 6.47,
|
|
"tokens_per_sec_max": 6.7,
|
|
"cold_warm_split": {
|
|
"cold": {
|
|
"n_calls": 3,
|
|
"duration_ms_mean": 65655.0,
|
|
"duration_ms_p50": 45721.0,
|
|
"tokens_per_sec_mean": 5.92
|
|
},
|
|
"warm": {
|
|
"n_calls": 9,
|
|
"duration_ms_mean": 73856.11,
|
|
"duration_ms_p50": 59728.0,
|
|
"tokens_per_sec_mean": 5.75
|
|
}
|
|
},
|
|
"per_prompt": {
|
|
"unknown": {
|
|
"n_calls": 3,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"tokens_per_sec_mean": null,
|
|
"tokens_per_sec_p50": null
|
|
},
|
|
"hello": {
|
|
"n_calls": 4,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": 20742.5,
|
|
"duration_ms_p50": 18111.0,
|
|
"tokens_per_sec_mean": 5.82,
|
|
"tokens_per_sec_p50": 5.83
|
|
},
|
|
"P-MEDIUM": {
|
|
"n_calls": 4,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": 66764.5,
|
|
"duration_ms_p50": 58541.0,
|
|
"tokens_per_sec_mean": 5.84,
|
|
"tokens_per_sec_p50": 5.76
|
|
},
|
|
"P-HARD": {
|
|
"n_calls": 4,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": 127910.5,
|
|
"duration_ms_p50": 132742.5,
|
|
"tokens_per_sec_mean": 5.71,
|
|
"tokens_per_sec_p50": 5.72
|
|
}
|
|
},
|
|
"chars_split": {
|
|
"has_thinking": true,
|
|
"reasoning_chars_mean": 1209.5,
|
|
"answer_chars_mean": 618.75
|
|
},
|
|
"timeline": [
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": null,
|
|
"phase": null,
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": null,
|
|
"phase": null,
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": null,
|
|
"phase": null,
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "hello",
|
|
"run_idx": 0,
|
|
"phase": "cold",
|
|
"duration_ms": 13701,
|
|
"tokens_per_sec": 5.766002481570688,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "hello",
|
|
"run_idx": 1,
|
|
"phase": "warm",
|
|
"duration_ms": 33047,
|
|
"tokens_per_sec": 4.932369050140709,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "hello",
|
|
"run_idx": 2,
|
|
"phase": "warm",
|
|
"duration_ms": 15830,
|
|
"tokens_per_sec": 6.69614655716993,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "hello",
|
|
"run_idx": 3,
|
|
"phase": "warm",
|
|
"duration_ms": 20392,
|
|
"tokens_per_sec": 5.884660651235779,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "P-MEDIUM",
|
|
"run_idx": 0,
|
|
"phase": "cold",
|
|
"duration_ms": 45721,
|
|
"tokens_per_sec": 6.277203035804117,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "P-MEDIUM",
|
|
"run_idx": 1,
|
|
"phase": "warm",
|
|
"duration_ms": 59728,
|
|
"tokens_per_sec": 5.575274578087329,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "P-MEDIUM",
|
|
"run_idx": 2,
|
|
"phase": "warm",
|
|
"duration_ms": 57354,
|
|
"tokens_per_sec": 5.666562053213377,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "P-MEDIUM",
|
|
"run_idx": 3,
|
|
"phase": "warm",
|
|
"duration_ms": 104255,
|
|
"tokens_per_sec": 5.85103831950506,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "P-HARD",
|
|
"run_idx": 0,
|
|
"phase": "cold",
|
|
"duration_ms": 137543,
|
|
"tokens_per_sec": 5.729117439637059,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "P-HARD",
|
|
"run_idx": 1,
|
|
"phase": "warm",
|
|
"duration_ms": 144293,
|
|
"tokens_per_sec": 5.516553124545196,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "P-HARD",
|
|
"run_idx": 2,
|
|
"phase": "warm",
|
|
"duration_ms": 101864,
|
|
"tokens_per_sec": 5.703683342495877,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "P-HARD",
|
|
"run_idx": 3,
|
|
"phase": "warm",
|
|
"duration_ms": 127942,
|
|
"tokens_per_sec": 5.893295399477889,
|
|
"error": null
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx131k",
|
|
"n_calls": 15,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": 96406.17,
|
|
"duration_ms_p50": 105842.0,
|
|
"duration_ms_p95": 154492.35,
|
|
"tokens_per_sec_mean": 5.01,
|
|
"tokens_per_sec_p50": 5.13,
|
|
"tokens_per_sec_p95": 5.97,
|
|
"tokens_per_sec_max": 6.29,
|
|
"cold_warm_split": {
|
|
"cold": {
|
|
"n_calls": 3,
|
|
"duration_ms_mean": 109523.67,
|
|
"duration_ms_p50": 118125.0,
|
|
"tokens_per_sec_mean": 4.11
|
|
},
|
|
"warm": {
|
|
"n_calls": 9,
|
|
"duration_ms_mean": 92033.67,
|
|
"duration_ms_p50": 95486.0,
|
|
"tokens_per_sec_mean": 5.31
|
|
}
|
|
},
|
|
"per_prompt": {
|
|
"unknown": {
|
|
"n_calls": 3,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"tokens_per_sec_mean": null,
|
|
"tokens_per_sec_p50": null
|
|
},
|
|
"hello": {
|
|
"n_calls": 4,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": 33574.75,
|
|
"duration_ms_p50": 31872.5,
|
|
"tokens_per_sec_mean": 4.73,
|
|
"tokens_per_sec_p50": 5.19
|
|
},
|
|
"P-MEDIUM": {
|
|
"n_calls": 4,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": 103544.75,
|
|
"duration_ms_p50": 105842.0,
|
|
"tokens_per_sec_mean": 5.19,
|
|
"tokens_per_sec_p50": 5.05
|
|
},
|
|
"P-HARD": {
|
|
"n_calls": 4,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": 152099.0,
|
|
"duration_ms_p50": 150187.0,
|
|
"tokens_per_sec_mean": 5.11,
|
|
"tokens_per_sec_p50": 5.13
|
|
}
|
|
},
|
|
"chars_split": {
|
|
"has_thinking": true,
|
|
"reasoning_chars_mean": 1296.25,
|
|
"answer_chars_mean": 873.42
|
|
},
|
|
"timeline": [
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": null,
|
|
"phase": null,
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": null,
|
|
"phase": null,
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": null,
|
|
"phase": null,
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "hello",
|
|
"run_idx": 0,
|
|
"phase": "cold",
|
|
"duration_ms": 59744,
|
|
"tokens_per_sec": 2.259641135511516,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "hello",
|
|
"run_idx": 1,
|
|
"phase": "warm",
|
|
"duration_ms": 10810,
|
|
"tokens_per_sec": 6.290471785383904,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "hello",
|
|
"run_idx": 2,
|
|
"phase": "warm",
|
|
"duration_ms": 20002,
|
|
"tokens_per_sec": 5.049495050494951,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "hello",
|
|
"run_idx": 3,
|
|
"phase": "warm",
|
|
"duration_ms": 43743,
|
|
"tokens_per_sec": 5.326566536360103,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "P-MEDIUM",
|
|
"run_idx": 0,
|
|
"phase": "cold",
|
|
"duration_ms": 118125,
|
|
"tokens_per_sec": 4.960846560846561,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "P-MEDIUM",
|
|
"run_idx": 1,
|
|
"phase": "warm",
|
|
"duration_ms": 116198,
|
|
"tokens_per_sec": 5.146388061756657,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "P-MEDIUM",
|
|
"run_idx": 2,
|
|
"phase": "warm",
|
|
"duration_ms": 95486,
|
|
"tokens_per_sec": 4.932660285277422,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "P-MEDIUM",
|
|
"run_idx": 3,
|
|
"phase": "warm",
|
|
"duration_ms": 84370,
|
|
"tokens_per_sec": 5.71293113665995,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "P-HARD",
|
|
"run_idx": 0,
|
|
"phase": "cold",
|
|
"duration_ms": 150702,
|
|
"tokens_per_sec": 5.122692465926132,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "P-HARD",
|
|
"run_idx": 1,
|
|
"phase": "warm",
|
|
"duration_ms": 148897,
|
|
"tokens_per_sec": 5.131063755481978,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "P-HARD",
|
|
"run_idx": 2,
|
|
"phase": "warm",
|
|
"duration_ms": 159125,
|
|
"tokens_per_sec": 4.788688138256088,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "P-HARD",
|
|
"run_idx": 3,
|
|
"phase": "warm",
|
|
"duration_ms": 149672,
|
|
"tokens_per_sec": 5.378427494788604,
|
|
"error": null
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx48k",
|
|
"n_calls": 15,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": 65539.5,
|
|
"duration_ms_p50": 64606.5,
|
|
"duration_ms_p95": 118069.75,
|
|
"tokens_per_sec_mean": 6.6,
|
|
"tokens_per_sec_p50": 6.59,
|
|
"tokens_per_sec_p95": 6.89,
|
|
"tokens_per_sec_max": 6.91,
|
|
"cold_warm_split": {
|
|
"cold": {
|
|
"n_calls": 3,
|
|
"duration_ms_mean": 66671.33,
|
|
"duration_ms_p50": 58574.0,
|
|
"tokens_per_sec_mean": 6.36
|
|
},
|
|
"warm": {
|
|
"n_calls": 9,
|
|
"duration_ms_mean": 65162.22,
|
|
"duration_ms_p50": 69317.0,
|
|
"tokens_per_sec_mean": 6.68
|
|
}
|
|
},
|
|
"per_prompt": {
|
|
"unknown": {
|
|
"n_calls": 3,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"tokens_per_sec_mean": null,
|
|
"tokens_per_sec_p50": null
|
|
},
|
|
"hello": {
|
|
"n_calls": 4,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": 16306.0,
|
|
"duration_ms_p50": 15313.0,
|
|
"tokens_per_sec_mean": 6.71,
|
|
"tokens_per_sec_p50": 6.86
|
|
},
|
|
"P-MEDIUM": {
|
|
"n_calls": 4,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": 75223.75,
|
|
"duration_ms_p50": 74910.0,
|
|
"tokens_per_sec_mean": 6.61,
|
|
"tokens_per_sec_p50": 6.59
|
|
},
|
|
"P-HARD": {
|
|
"n_calls": 4,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": 105088.75,
|
|
"duration_ms_p50": 114731.0,
|
|
"tokens_per_sec_mean": 6.49,
|
|
"tokens_per_sec_p50": 6.48
|
|
}
|
|
},
|
|
"chars_split": {
|
|
"has_thinking": true,
|
|
"reasoning_chars_mean": 1152.67,
|
|
"answer_chars_mean": 769.33
|
|
},
|
|
"timeline": [
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": null,
|
|
"phase": null,
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": null,
|
|
"phase": null,
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": null,
|
|
"phase": null,
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "hello",
|
|
"run_idx": 0,
|
|
"phase": "cold",
|
|
"duration_ms": 19864,
|
|
"tokens_per_sec": 6.192106322996375,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "hello",
|
|
"run_idx": 1,
|
|
"phase": "warm",
|
|
"duration_ms": 14734,
|
|
"tokens_per_sec": 6.854893443735578,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "hello",
|
|
"run_idx": 2,
|
|
"phase": "warm",
|
|
"duration_ms": 15636,
|
|
"tokens_per_sec": 6.907137375287798,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "hello",
|
|
"run_idx": 3,
|
|
"phase": "warm",
|
|
"duration_ms": 14990,
|
|
"tokens_per_sec": 6.871247498332221,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "P-MEDIUM",
|
|
"run_idx": 0,
|
|
"phase": "cold",
|
|
"duration_ms": 58574,
|
|
"tokens_per_sec": 6.572882166148803,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "P-MEDIUM",
|
|
"run_idx": 1,
|
|
"phase": "warm",
|
|
"duration_ms": 89924,
|
|
"tokens_per_sec": 6.60557804368133,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "P-MEDIUM",
|
|
"run_idx": 2,
|
|
"phase": "warm",
|
|
"duration_ms": 92501,
|
|
"tokens_per_sec": 6.551280526697008,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "P-MEDIUM",
|
|
"run_idx": 3,
|
|
"phase": "warm",
|
|
"duration_ms": 59896,
|
|
"tokens_per_sec": 6.728329103779885,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "P-HARD",
|
|
"run_idx": 0,
|
|
"phase": "cold",
|
|
"duration_ms": 121576,
|
|
"tokens_per_sec": 6.317036257156018,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "P-HARD",
|
|
"run_idx": 1,
|
|
"phase": "warm",
|
|
"duration_ms": 69317,
|
|
"tokens_per_sec": 6.6794581415814305,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "P-HARD",
|
|
"run_idx": 2,
|
|
"phase": "warm",
|
|
"duration_ms": 114261,
|
|
"tokens_per_sec": 6.485152414209573,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "P-HARD",
|
|
"run_idx": 3,
|
|
"phase": "warm",
|
|
"duration_ms": 115201,
|
|
"tokens_per_sec": 6.475638232307012,
|
|
"error": null
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx64k",
|
|
"n_calls": 15,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": 71267.58,
|
|
"duration_ms_p50": 80265.5,
|
|
"duration_ms_p95": 128231.3,
|
|
"tokens_per_sec_mean": 6.41,
|
|
"tokens_per_sec_p50": 6.39,
|
|
"tokens_per_sec_p95": 6.88,
|
|
"tokens_per_sec_max": 6.9,
|
|
"cold_warm_split": {
|
|
"cold": {
|
|
"n_calls": 3,
|
|
"duration_ms_mean": 80477.67,
|
|
"duration_ms_p50": 99471.0,
|
|
"tokens_per_sec_mean": 5.99
|
|
},
|
|
"warm": {
|
|
"n_calls": 9,
|
|
"duration_ms_mean": 68197.56,
|
|
"duration_ms_p50": 69604.0,
|
|
"tokens_per_sec_mean": 6.55
|
|
}
|
|
},
|
|
"per_prompt": {
|
|
"unknown": {
|
|
"n_calls": 3,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"tokens_per_sec_mean": null,
|
|
"tokens_per_sec_p50": null
|
|
},
|
|
"hello": {
|
|
"n_calls": 4,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": 16514.0,
|
|
"duration_ms_p50": 16585.0,
|
|
"tokens_per_sec_mean": 6.53,
|
|
"tokens_per_sec_p50": 6.84
|
|
},
|
|
"P-MEDIUM": {
|
|
"n_calls": 4,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": 79436.25,
|
|
"duration_ms_p50": 81054.0,
|
|
"tokens_per_sec_mean": 6.43,
|
|
"tokens_per_sec_p50": 6.44
|
|
},
|
|
"P-HARD": {
|
|
"n_calls": 4,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": 117852.5,
|
|
"duration_ms_p50": 124494.0,
|
|
"tokens_per_sec_mean": 6.25,
|
|
"tokens_per_sec_p50": 6.25
|
|
}
|
|
},
|
|
"chars_split": {
|
|
"has_thinking": true,
|
|
"reasoning_chars_mean": 1063.5,
|
|
"answer_chars_mean": 944.25
|
|
},
|
|
"timeline": [
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": null,
|
|
"phase": null,
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": null,
|
|
"phase": null,
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": null,
|
|
"phase": null,
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "hello",
|
|
"run_idx": 0,
|
|
"phase": "cold",
|
|
"duration_ms": 18535,
|
|
"tokens_per_sec": 5.557054221742649,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "hello",
|
|
"run_idx": 1,
|
|
"phase": "warm",
|
|
"duration_ms": 18478,
|
|
"tokens_per_sec": 6.873038207598224,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "hello",
|
|
"run_idx": 2,
|
|
"phase": "warm",
|
|
"duration_ms": 14692,
|
|
"tokens_per_sec": 6.806425265450585,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "hello",
|
|
"run_idx": 3,
|
|
"phase": "warm",
|
|
"duration_ms": 14351,
|
|
"tokens_per_sec": 6.898473973939098,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "P-MEDIUM",
|
|
"run_idx": 0,
|
|
"phase": "cold",
|
|
"duration_ms": 99471,
|
|
"tokens_per_sec": 6.2430256054528455,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "P-MEDIUM",
|
|
"run_idx": 1,
|
|
"phase": "warm",
|
|
"duration_ms": 56166,
|
|
"tokens_per_sec": 6.445180358223837,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "P-MEDIUM",
|
|
"run_idx": 2,
|
|
"phase": "warm",
|
|
"duration_ms": 92504,
|
|
"tokens_per_sec": 6.442964628556602,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "P-MEDIUM",
|
|
"run_idx": 3,
|
|
"phase": "warm",
|
|
"duration_ms": 69604,
|
|
"tokens_per_sec": 6.5944485949083385,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "P-HARD",
|
|
"run_idx": 0,
|
|
"phase": "cold",
|
|
"duration_ms": 123427,
|
|
"tokens_per_sec": 6.165587756325601,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "P-HARD",
|
|
"run_idx": 1,
|
|
"phase": "warm",
|
|
"duration_ms": 90927,
|
|
"tokens_per_sec": 6.345749887272207,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "P-HARD",
|
|
"run_idx": 2,
|
|
"phase": "warm",
|
|
"duration_ms": 131495,
|
|
"tokens_per_sec": 6.319631925168257,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "P-HARD",
|
|
"run_idx": 3,
|
|
"phase": "warm",
|
|
"duration_ms": 125561,
|
|
"tokens_per_sec": 6.188227236164095,
|
|
"error": null
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx96k",
|
|
"n_calls": 15,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": 69299.5,
|
|
"duration_ms_p50": 68355.0,
|
|
"duration_ms_p95": 124990.95,
|
|
"tokens_per_sec_mean": 6.08,
|
|
"tokens_per_sec_p50": 6.44,
|
|
"tokens_per_sec_p95": 6.68,
|
|
"tokens_per_sec_max": 6.69,
|
|
"cold_warm_split": {
|
|
"cold": {
|
|
"n_calls": 3,
|
|
"duration_ms_mean": 78350.33,
|
|
"duration_ms_p50": 72290.0,
|
|
"tokens_per_sec_mean": 4.93
|
|
},
|
|
"warm": {
|
|
"n_calls": 9,
|
|
"duration_ms_mean": 66282.56,
|
|
"duration_ms_p50": 64420.0,
|
|
"tokens_per_sec_mean": 6.47
|
|
}
|
|
},
|
|
"per_prompt": {
|
|
"unknown": {
|
|
"n_calls": 3,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"tokens_per_sec_mean": null,
|
|
"tokens_per_sec_p50": null
|
|
},
|
|
"hello": {
|
|
"n_calls": 4,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": 25171.0,
|
|
"duration_ms_p50": 25926.5,
|
|
"tokens_per_sec_mean": 5.49,
|
|
"tokens_per_sec_p50": 6.3
|
|
},
|
|
"P-MEDIUM": {
|
|
"n_calls": 4,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": 73270.5,
|
|
"duration_ms_p50": 68355.0,
|
|
"tokens_per_sec_mean": 6.39,
|
|
"tokens_per_sec_p50": 6.46
|
|
},
|
|
"P-HARD": {
|
|
"n_calls": 4,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": 109457.0,
|
|
"duration_ms_p50": 117967.0,
|
|
"tokens_per_sec_mean": 6.37,
|
|
"tokens_per_sec_p50": 6.4
|
|
}
|
|
},
|
|
"chars_split": {
|
|
"has_thinking": true,
|
|
"reasoning_chars_mean": 1185.42,
|
|
"answer_chars_mean": 741.92
|
|
},
|
|
"timeline": [
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": null,
|
|
"phase": null,
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": null,
|
|
"phase": null,
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": null,
|
|
"phase": null,
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "hello",
|
|
"run_idx": 0,
|
|
"phase": "cold",
|
|
"duration_ms": 35465,
|
|
"tokens_per_sec": 2.7068941209643307,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "hello",
|
|
"run_idx": 1,
|
|
"phase": "warm",
|
|
"duration_ms": 27111,
|
|
"tokens_per_sec": 6.1598613109070115,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "hello",
|
|
"run_idx": 2,
|
|
"phase": "warm",
|
|
"duration_ms": 24742,
|
|
"tokens_per_sec": 6.668822245574327,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "hello",
|
|
"run_idx": 3,
|
|
"phase": "warm",
|
|
"duration_ms": 13366,
|
|
"tokens_per_sec": 6.43423612150232,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "P-MEDIUM",
|
|
"run_idx": 0,
|
|
"phase": "cold",
|
|
"duration_ms": 72290,
|
|
"tokens_per_sec": 5.97593028081339,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "P-MEDIUM",
|
|
"run_idx": 1,
|
|
"phase": "warm",
|
|
"duration_ms": 92055,
|
|
"tokens_per_sec": 6.474390310140676,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "P-MEDIUM",
|
|
"run_idx": 2,
|
|
"phase": "warm",
|
|
"duration_ms": 64317,
|
|
"tokens_per_sec": 6.436867391202949,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "P-MEDIUM",
|
|
"run_idx": 3,
|
|
"phase": "warm",
|
|
"duration_ms": 64420,
|
|
"tokens_per_sec": 6.690468798509779,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "P-HARD",
|
|
"run_idx": 0,
|
|
"phase": "cold",
|
|
"duration_ms": 127296,
|
|
"tokens_per_sec": 6.1038838612368025,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "P-HARD",
|
|
"run_idx": 1,
|
|
"phase": "warm",
|
|
"duration_ms": 123105,
|
|
"tokens_per_sec": 6.344177734454328,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "P-HARD",
|
|
"run_idx": 2,
|
|
"phase": "warm",
|
|
"duration_ms": 74598,
|
|
"tokens_per_sec": 6.568540711547227,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "P-HARD",
|
|
"run_idx": 3,
|
|
"phase": "warm",
|
|
"duration_ms": 112829,
|
|
"tokens_per_sec": 6.45224188816705,
|
|
"error": null
|
|
}
|
|
]
|
|
}
|
|
],
|
|
"n_calls_total": 90,
|
|
"n_errors_total": 0,
|
|
"chart_spec": {
|
|
"kind": "line",
|
|
"x": "param_value",
|
|
"y": "tokens_per_sec_mean",
|
|
"series": [
|
|
"qwen3:30b-a3b-iq2m-think500-ngl12-ctx64k",
|
|
"qwen3:30b-a3b-iq2m-think500-ngl12-ctx96k",
|
|
"qwen3:30b-a3b-iq2m-think500-ngl12-ctx131k",
|
|
"qwen3:30b-a3b-iq2m-think500-ngl24-ctx48k",
|
|
"qwen3:30b-a3b-iq2m-think500-ngl24-ctx64k",
|
|
"qwen3:30b-a3b-iq2m-think500-ngl24-ctx96k"
|
|
],
|
|
"data_url": "/data/e08a7051-a856-4053-b34b-561d7ce5e8cf/run.jsonl"
|
|
},
|
|
"site_grade": "standard",
|
|
"site_grade_reason": null,
|
|
"raw_data_urls": {
|
|
"jsonl": "/data/e08a7051-a856-4053-b34b-561d7ce5e8cf/run.jsonl",
|
|
"log": "/data/e08a7051-a856-4053-b34b-561d7ce5e8cf/run.log",
|
|
"md": null,
|
|
"metadata": "/data/e08a7051-a856-4053-b34b-561d7ce5e8cf/metadata.json",
|
|
"gitea_dir": "https://git.weeyuga.com/slobodanmargetic988/weeyuga-benchmarks-public/src/branch/main/runs/e08a7051-a856-4053-b34b-561d7ce5e8cf/"
|
|
},
|
|
"synthesis_doc": "A3B_AND_CPU_OVERNIGHT_2026-05-05.md",
|
|
"synthesis_docs_all": [
|
|
"A3B_AND_CPU_OVERNIGHT_2026-05-05.md",
|
|
"SITE_DATA_AUDIT_AND_MIGRATION_PLAN_2026-05-06.md"
|
|
],
|
|
"related_ids": [],
|
|
"status": "complete",
|
|
"visibility": "draft",
|
|
"cells_full": [
|
|
{
|
|
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx64k",
|
|
"n_calls": 15,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": 85573.42,
|
|
"duration_ms_p50": 78940.0,
|
|
"duration_ms_p95": 159864.3,
|
|
"tokens_per_sec_mean": 5.48,
|
|
"tokens_per_sec_p50": 5.31,
|
|
"tokens_per_sec_p95": 6.79,
|
|
"tokens_per_sec_max": 6.8,
|
|
"cold_warm_split": {
|
|
"cold": {
|
|
"n_calls": 3,
|
|
"duration_ms_mean": 92828.67,
|
|
"duration_ms_p50": 110762.0,
|
|
"tokens_per_sec_mean": 5.4
|
|
},
|
|
"warm": {
|
|
"n_calls": 9,
|
|
"duration_ms_mean": 83155.0,
|
|
"duration_ms_p50": 76258.0,
|
|
"tokens_per_sec_mean": 5.51
|
|
}
|
|
},
|
|
"per_prompt": {
|
|
"unknown": {
|
|
"n_calls": 3,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"tokens_per_sec_mean": null,
|
|
"tokens_per_sec_p50": null
|
|
},
|
|
"hello": {
|
|
"n_calls": 4,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": 18907.5,
|
|
"duration_ms_p50": 19953.0,
|
|
"tokens_per_sec_mean": 6.04,
|
|
"tokens_per_sec_p50": 6.21
|
|
},
|
|
"P-MEDIUM": {
|
|
"n_calls": 4,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": 84750.25,
|
|
"duration_ms_p50": 78940.0,
|
|
"tokens_per_sec_mean": 5.3,
|
|
"tokens_per_sec_p50": 5.32
|
|
},
|
|
"P-HARD": {
|
|
"n_calls": 4,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": 153062.5,
|
|
"duration_ms_p50": 153912.0,
|
|
"tokens_per_sec_mean": 5.11,
|
|
"tokens_per_sec_p50": 5.14
|
|
}
|
|
},
|
|
"chars_split": {
|
|
"has_thinking": true,
|
|
"reasoning_chars_mean": 1193.67,
|
|
"answer_chars_mean": 809.42
|
|
},
|
|
"timeline": [
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": null,
|
|
"phase": null,
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": null,
|
|
"phase": null,
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": null,
|
|
"phase": null,
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "hello",
|
|
"run_idx": 0,
|
|
"phase": "cold",
|
|
"duration_ms": 25689,
|
|
"tokens_per_sec": 5.644439254155475,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "hello",
|
|
"run_idx": 1,
|
|
"phase": "warm",
|
|
"duration_ms": 10035,
|
|
"tokens_per_sec": 6.776283009466866,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "hello",
|
|
"run_idx": 2,
|
|
"phase": "warm",
|
|
"duration_ms": 15585,
|
|
"tokens_per_sec": 6.801411613731151,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "hello",
|
|
"run_idx": 3,
|
|
"phase": "warm",
|
|
"duration_ms": 24321,
|
|
"tokens_per_sec": 4.934007647711853,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "P-MEDIUM",
|
|
"run_idx": 0,
|
|
"phase": "cold",
|
|
"duration_ms": 110762,
|
|
"tokens_per_sec": 5.29062313789928,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "P-MEDIUM",
|
|
"run_idx": 1,
|
|
"phase": "warm",
|
|
"duration_ms": 76258,
|
|
"tokens_per_sec": 5.101104146450209,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "P-MEDIUM",
|
|
"run_idx": 2,
|
|
"phase": "warm",
|
|
"duration_ms": 70359,
|
|
"tokens_per_sec": 5.344021376085505,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "P-MEDIUM",
|
|
"run_idx": 3,
|
|
"phase": "warm",
|
|
"duration_ms": 81622,
|
|
"tokens_per_sec": 5.451961480973267,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "P-HARD",
|
|
"run_idx": 0,
|
|
"phase": "cold",
|
|
"duration_ms": 142035,
|
|
"tokens_per_sec": 5.252226563875102,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "P-HARD",
|
|
"run_idx": 1,
|
|
"phase": "warm",
|
|
"duration_ms": 150027,
|
|
"tokens_per_sec": 5.339038972984863,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "P-HARD",
|
|
"run_idx": 2,
|
|
"phase": "warm",
|
|
"duration_ms": 162391,
|
|
"tokens_per_sec": 5.024909015893738,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "P-HARD",
|
|
"run_idx": 3,
|
|
"phase": "warm",
|
|
"duration_ms": 157797,
|
|
"tokens_per_sec": 4.835326400375165,
|
|
"error": null
|
|
}
|
|
],
|
|
"calls": [
|
|
{
|
|
"type": "vram_snapshot",
|
|
"model_key": "ngl12-ctx64k",
|
|
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx64k",
|
|
"ngl": 12,
|
|
"ctx": 65536,
|
|
"raw": "5397, 633, 6144"
|
|
},
|
|
{
|
|
"type": "ram_snapshot",
|
|
"model_key": "ngl12-ctx64k",
|
|
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx64k",
|
|
"ngl": 12,
|
|
"ctx": 65536,
|
|
"raw": "15020,13605,28625"
|
|
},
|
|
{
|
|
"type": "llama_bench",
|
|
"model_key": "ngl12-ctx64k",
|
|
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx64k",
|
|
"ngl": 12,
|
|
"ctx": 65536,
|
|
"raw": "llama-bench.exe : ggml_cuda_init: found 1 CUDA devices (Total VRAM: 6143 MiB):\r\nAt line:1 char:1\r\n+ & 'D:\\WeeyugaTools\\llama.cpp\\llama-bench.exe' -m 'D:\\WeeyugaModels\\ll ...\r\n+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\r\n + CategoryInfo : NotSpecified: (ggml_cuda_init:...RAM: 6143 MiB)::String) [], RemoteException\r\n + FullyQualifiedErrorId : NativeCommandError\r\n \r\n Device 0: NVIDIA GeForce GTX 1060, compute capability 6.1, VMM: yes, VRAM: 6143 MiB\r\nload_backend: loaded CUDA backend from D:\\WeeyugaTools\\llama.cpp\\ggml-cuda.dll\r\nload_backend: loaded RPC backend from D:\\WeeyugaTools\\llama.cpp\\ggml-rpc.dll\r\nload_backend: loaded CPU backend from D:\\WeeyugaTools\\llama.cpp\\ggml-cpu-haswell.dll\r\n| model | size | params | backend | ngl | test | t/s |\r\n| ------------------------------ | ---------: | ---------: | ---------- | --: | --------------: | -------------------: |\r\n| qwen3moe 30B.A3B IQ2_M - 2.7 bpw | 10.11 GiB | 30.53 B | CUDA | 12 | pp256 | 126.21 \u00b1 2.67 |\r\n| qwen3moe 30B.A3B IQ2_M - 2.7 bpw | 10.11 GiB | 30.53 B | CUDA | 12 | tg64 | 6.32 \u00b1 0.94 |\r\n\r\nbuild: 665abc609 (8951)\r\n\r"
|
|
},
|
|
{
|
|
"type": "call",
|
|
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx64k",
|
|
"model_key": "ngl12-ctx64k",
|
|
"ngl": 12,
|
|
"ctx": 65536,
|
|
"prompt_id": "hello",
|
|
"run_idx": 0,
|
|
"phase": "cold",
|
|
"total_duration_ms": 25689,
|
|
"prompt_tokens": 9,
|
|
"completion_tokens": 145,
|
|
"tokens_per_sec": 5.644439254155475,
|
|
"finish_reason": "stop",
|
|
"reasoning_chars": 569,
|
|
"content_chars": 35,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx64k",
|
|
"model_key": "ngl12-ctx64k",
|
|
"ngl": 12,
|
|
"ctx": 65536,
|
|
"prompt_id": "hello",
|
|
"run_idx": 1,
|
|
"phase": "warm",
|
|
"total_duration_ms": 10035,
|
|
"prompt_tokens": 9,
|
|
"completion_tokens": 68,
|
|
"tokens_per_sec": 6.776283009466866,
|
|
"finish_reason": "stop",
|
|
"reasoning_chars": 223,
|
|
"content_chars": 37,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx64k",
|
|
"model_key": "ngl12-ctx64k",
|
|
"ngl": 12,
|
|
"ctx": 65536,
|
|
"prompt_id": "hello",
|
|
"run_idx": 2,
|
|
"phase": "warm",
|
|
"total_duration_ms": 15585,
|
|
"prompt_tokens": 9,
|
|
"completion_tokens": 106,
|
|
"tokens_per_sec": 6.801411613731151,
|
|
"finish_reason": "stop",
|
|
"reasoning_chars": 390,
|
|
"content_chars": 37,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx64k",
|
|
"model_key": "ngl12-ctx64k",
|
|
"ngl": 12,
|
|
"ctx": 65536,
|
|
"prompt_id": "hello",
|
|
"run_idx": 3,
|
|
"phase": "warm",
|
|
"total_duration_ms": 24321,
|
|
"prompt_tokens": 9,
|
|
"completion_tokens": 120,
|
|
"tokens_per_sec": 4.934007647711853,
|
|
"finish_reason": "stop",
|
|
"reasoning_chars": 474,
|
|
"content_chars": 37,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx64k",
|
|
"model_key": "ngl12-ctx64k",
|
|
"ngl": 12,
|
|
"ctx": 65536,
|
|
"prompt_id": "P-MEDIUM",
|
|
"run_idx": 0,
|
|
"phase": "cold",
|
|
"total_duration_ms": 110762,
|
|
"prompt_tokens": 22,
|
|
"completion_tokens": 586,
|
|
"tokens_per_sec": 5.29062313789928,
|
|
"finish_reason": "stop",
|
|
"reasoning_chars": 2148,
|
|
"content_chars": 748,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx64k",
|
|
"model_key": "ngl12-ctx64k",
|
|
"ngl": 12,
|
|
"ctx": 65536,
|
|
"prompt_id": "P-MEDIUM",
|
|
"run_idx": 1,
|
|
"phase": "warm",
|
|
"total_duration_ms": 76258,
|
|
"prompt_tokens": 22,
|
|
"completion_tokens": 389,
|
|
"tokens_per_sec": 5.101104146450209,
|
|
"finish_reason": "stop",
|
|
"reasoning_chars": 1507,
|
|
"content_chars": 477,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx64k",
|
|
"model_key": "ngl12-ctx64k",
|
|
"ngl": 12,
|
|
"ctx": 65536,
|
|
"prompt_id": "P-MEDIUM",
|
|
"run_idx": 2,
|
|
"phase": "warm",
|
|
"total_duration_ms": 70359,
|
|
"prompt_tokens": 22,
|
|
"completion_tokens": 376,
|
|
"tokens_per_sec": 5.344021376085505,
|
|
"finish_reason": "stop",
|
|
"reasoning_chars": 1406,
|
|
"content_chars": 422,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx64k",
|
|
"model_key": "ngl12-ctx64k",
|
|
"ngl": 12,
|
|
"ctx": 65536,
|
|
"prompt_id": "P-MEDIUM",
|
|
"run_idx": 3,
|
|
"phase": "warm",
|
|
"total_duration_ms": 81622,
|
|
"prompt_tokens": 22,
|
|
"completion_tokens": 445,
|
|
"tokens_per_sec": 5.451961480973267,
|
|
"finish_reason": "stop",
|
|
"reasoning_chars": 1685,
|
|
"content_chars": 499,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx64k",
|
|
"model_key": "ngl12-ctx64k",
|
|
"ngl": 12,
|
|
"ctx": 65536,
|
|
"prompt_id": "P-HARD",
|
|
"run_idx": 0,
|
|
"phase": "cold",
|
|
"total_duration_ms": 142035,
|
|
"prompt_tokens": 30,
|
|
"completion_tokens": 746,
|
|
"tokens_per_sec": 5.252226563875102,
|
|
"finish_reason": "stop",
|
|
"reasoning_chars": 1459,
|
|
"content_chars": 1651,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx64k",
|
|
"model_key": "ngl12-ctx64k",
|
|
"ngl": 12,
|
|
"ctx": 65536,
|
|
"prompt_id": "P-HARD",
|
|
"run_idx": 1,
|
|
"phase": "warm",
|
|
"total_duration_ms": 150027,
|
|
"prompt_tokens": 30,
|
|
"completion_tokens": 801,
|
|
"tokens_per_sec": 5.339038972984863,
|
|
"finish_reason": "stop",
|
|
"reasoning_chars": 1018,
|
|
"content_chars": 2354,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx64k",
|
|
"model_key": "ngl12-ctx64k",
|
|
"ngl": 12,
|
|
"ctx": 65536,
|
|
"prompt_id": "P-HARD",
|
|
"run_idx": 2,
|
|
"phase": "warm",
|
|
"total_duration_ms": 162391,
|
|
"prompt_tokens": 30,
|
|
"completion_tokens": 816,
|
|
"tokens_per_sec": 5.024909015893738,
|
|
"finish_reason": "stop",
|
|
"reasoning_chars": 1663,
|
|
"content_chars": 1826,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx64k",
|
|
"model_key": "ngl12-ctx64k",
|
|
"ngl": 12,
|
|
"ctx": 65536,
|
|
"prompt_id": "P-HARD",
|
|
"run_idx": 3,
|
|
"phase": "warm",
|
|
"total_duration_ms": 157797,
|
|
"prompt_tokens": 30,
|
|
"completion_tokens": 763,
|
|
"tokens_per_sec": 4.835326400375165,
|
|
"finish_reason": "stop",
|
|
"reasoning_chars": 1782,
|
|
"content_chars": 1590,
|
|
"error": null
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx96k",
|
|
"n_calls": 15,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": 71805.83,
|
|
"duration_ms_p50": 58541.0,
|
|
"duration_ms_p95": 140580.5,
|
|
"tokens_per_sec_mean": 5.79,
|
|
"tokens_per_sec_p50": 5.75,
|
|
"tokens_per_sec_p95": 6.47,
|
|
"tokens_per_sec_max": 6.7,
|
|
"cold_warm_split": {
|
|
"cold": {
|
|
"n_calls": 3,
|
|
"duration_ms_mean": 65655.0,
|
|
"duration_ms_p50": 45721.0,
|
|
"tokens_per_sec_mean": 5.92
|
|
},
|
|
"warm": {
|
|
"n_calls": 9,
|
|
"duration_ms_mean": 73856.11,
|
|
"duration_ms_p50": 59728.0,
|
|
"tokens_per_sec_mean": 5.75
|
|
}
|
|
},
|
|
"per_prompt": {
|
|
"unknown": {
|
|
"n_calls": 3,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"tokens_per_sec_mean": null,
|
|
"tokens_per_sec_p50": null
|
|
},
|
|
"hello": {
|
|
"n_calls": 4,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": 20742.5,
|
|
"duration_ms_p50": 18111.0,
|
|
"tokens_per_sec_mean": 5.82,
|
|
"tokens_per_sec_p50": 5.83
|
|
},
|
|
"P-MEDIUM": {
|
|
"n_calls": 4,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": 66764.5,
|
|
"duration_ms_p50": 58541.0,
|
|
"tokens_per_sec_mean": 5.84,
|
|
"tokens_per_sec_p50": 5.76
|
|
},
|
|
"P-HARD": {
|
|
"n_calls": 4,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": 127910.5,
|
|
"duration_ms_p50": 132742.5,
|
|
"tokens_per_sec_mean": 5.71,
|
|
"tokens_per_sec_p50": 5.72
|
|
}
|
|
},
|
|
"chars_split": {
|
|
"has_thinking": true,
|
|
"reasoning_chars_mean": 1209.5,
|
|
"answer_chars_mean": 618.75
|
|
},
|
|
"timeline": [
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": null,
|
|
"phase": null,
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": null,
|
|
"phase": null,
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": null,
|
|
"phase": null,
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "hello",
|
|
"run_idx": 0,
|
|
"phase": "cold",
|
|
"duration_ms": 13701,
|
|
"tokens_per_sec": 5.766002481570688,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "hello",
|
|
"run_idx": 1,
|
|
"phase": "warm",
|
|
"duration_ms": 33047,
|
|
"tokens_per_sec": 4.932369050140709,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "hello",
|
|
"run_idx": 2,
|
|
"phase": "warm",
|
|
"duration_ms": 15830,
|
|
"tokens_per_sec": 6.69614655716993,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "hello",
|
|
"run_idx": 3,
|
|
"phase": "warm",
|
|
"duration_ms": 20392,
|
|
"tokens_per_sec": 5.884660651235779,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "P-MEDIUM",
|
|
"run_idx": 0,
|
|
"phase": "cold",
|
|
"duration_ms": 45721,
|
|
"tokens_per_sec": 6.277203035804117,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "P-MEDIUM",
|
|
"run_idx": 1,
|
|
"phase": "warm",
|
|
"duration_ms": 59728,
|
|
"tokens_per_sec": 5.575274578087329,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "P-MEDIUM",
|
|
"run_idx": 2,
|
|
"phase": "warm",
|
|
"duration_ms": 57354,
|
|
"tokens_per_sec": 5.666562053213377,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "P-MEDIUM",
|
|
"run_idx": 3,
|
|
"phase": "warm",
|
|
"duration_ms": 104255,
|
|
"tokens_per_sec": 5.85103831950506,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "P-HARD",
|
|
"run_idx": 0,
|
|
"phase": "cold",
|
|
"duration_ms": 137543,
|
|
"tokens_per_sec": 5.729117439637059,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "P-HARD",
|
|
"run_idx": 1,
|
|
"phase": "warm",
|
|
"duration_ms": 144293,
|
|
"tokens_per_sec": 5.516553124545196,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "P-HARD",
|
|
"run_idx": 2,
|
|
"phase": "warm",
|
|
"duration_ms": 101864,
|
|
"tokens_per_sec": 5.703683342495877,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "P-HARD",
|
|
"run_idx": 3,
|
|
"phase": "warm",
|
|
"duration_ms": 127942,
|
|
"tokens_per_sec": 5.893295399477889,
|
|
"error": null
|
|
}
|
|
],
|
|
"calls": [
|
|
{
|
|
"type": "vram_snapshot",
|
|
"model_key": "ngl12-ctx96k",
|
|
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx96k",
|
|
"ngl": 12,
|
|
"ctx": 98304,
|
|
"raw": "5870, 160, 6144"
|
|
},
|
|
{
|
|
"type": "ram_snapshot",
|
|
"model_key": "ngl12-ctx96k",
|
|
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx96k",
|
|
"ngl": 12,
|
|
"ctx": 98304,
|
|
"raw": "17590,11035,28625"
|
|
},
|
|
{
|
|
"type": "llama_bench",
|
|
"model_key": "ngl12-ctx96k",
|
|
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx96k",
|
|
"ngl": 12,
|
|
"ctx": 98304,
|
|
"raw": "llama-bench.exe : ggml_cuda_init: found 1 CUDA devices (Total VRAM: 6143 MiB):\r\nAt line:1 char:1\r\n+ & 'D:\\WeeyugaTools\\llama.cpp\\llama-bench.exe' -m 'D:\\WeeyugaModels\\ll ...\r\n+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\r\n + CategoryInfo : NotSpecified: (ggml_cuda_init:...RAM: 6143 MiB)::String) [], RemoteException\r\n + FullyQualifiedErrorId : NativeCommandError\r\n \r\n Device 0: NVIDIA GeForce GTX 1060, compute capability 6.1, VMM: yes, VRAM: 6143 MiB\r\nload_backend: loaded CUDA backend from D:\\WeeyugaTools\\llama.cpp\\ggml-cuda.dll\r\nload_backend: loaded RPC backend from D:\\WeeyugaTools\\llama.cpp\\ggml-rpc.dll\r\nload_backend: loaded CPU backend from D:\\WeeyugaTools\\llama.cpp\\ggml-cpu-haswell.dll\r\n| model | size | params | backend | ngl | test | t/s |\r\n| ------------------------------ | ---------: | ---------: | ---------- | --: | --------------: | -------------------: |\r\n| qwen3moe 30B.A3B IQ2_M - 2.7 bpw | 10.11 GiB | 30.53 B | CUDA | 12 | pp256 | 126.69 \u00b1 4.44 |\r\n| qwen3moe 30B.A3B IQ2_M - 2.7 bpw | 10.11 GiB | 30.53 B | CUDA | 12 | tg64 | 6.01 \u00b1 0.94 |\r\n\r\nbuild: 665abc609 (8951)\r\n\r"
|
|
},
|
|
{
|
|
"type": "call",
|
|
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx96k",
|
|
"model_key": "ngl12-ctx96k",
|
|
"ngl": 12,
|
|
"ctx": 98304,
|
|
"prompt_id": "hello",
|
|
"run_idx": 0,
|
|
"phase": "cold",
|
|
"total_duration_ms": 13701,
|
|
"prompt_tokens": 9,
|
|
"completion_tokens": 79,
|
|
"tokens_per_sec": 5.766002481570688,
|
|
"finish_reason": "stop",
|
|
"reasoning_chars": 280,
|
|
"content_chars": 37,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx96k",
|
|
"model_key": "ngl12-ctx96k",
|
|
"ngl": 12,
|
|
"ctx": 98304,
|
|
"prompt_id": "hello",
|
|
"run_idx": 1,
|
|
"phase": "warm",
|
|
"total_duration_ms": 33047,
|
|
"prompt_tokens": 9,
|
|
"completion_tokens": 163,
|
|
"tokens_per_sec": 4.932369050140709,
|
|
"finish_reason": "stop",
|
|
"reasoning_chars": 618,
|
|
"content_chars": 37,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx96k",
|
|
"model_key": "ngl12-ctx96k",
|
|
"ngl": 12,
|
|
"ctx": 98304,
|
|
"prompt_id": "hello",
|
|
"run_idx": 2,
|
|
"phase": "warm",
|
|
"total_duration_ms": 15830,
|
|
"prompt_tokens": 9,
|
|
"completion_tokens": 106,
|
|
"tokens_per_sec": 6.69614655716993,
|
|
"finish_reason": "stop",
|
|
"reasoning_chars": 400,
|
|
"content_chars": 37,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx96k",
|
|
"model_key": "ngl12-ctx96k",
|
|
"ngl": 12,
|
|
"ctx": 98304,
|
|
"prompt_id": "hello",
|
|
"run_idx": 3,
|
|
"phase": "warm",
|
|
"total_duration_ms": 20392,
|
|
"prompt_tokens": 9,
|
|
"completion_tokens": 120,
|
|
"tokens_per_sec": 5.884660651235779,
|
|
"finish_reason": "stop",
|
|
"reasoning_chars": 436,
|
|
"content_chars": 37,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx96k",
|
|
"model_key": "ngl12-ctx96k",
|
|
"ngl": 12,
|
|
"ctx": 98304,
|
|
"prompt_id": "P-MEDIUM",
|
|
"run_idx": 0,
|
|
"phase": "cold",
|
|
"total_duration_ms": 45721,
|
|
"prompt_tokens": 22,
|
|
"completion_tokens": 287,
|
|
"tokens_per_sec": 6.277203035804117,
|
|
"finish_reason": "stop",
|
|
"reasoning_chars": 1047,
|
|
"content_chars": 398,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx96k",
|
|
"model_key": "ngl12-ctx96k",
|
|
"ngl": 12,
|
|
"ctx": 98304,
|
|
"prompt_id": "P-MEDIUM",
|
|
"run_idx": 1,
|
|
"phase": "warm",
|
|
"total_duration_ms": 59728,
|
|
"prompt_tokens": 22,
|
|
"completion_tokens": 333,
|
|
"tokens_per_sec": 5.575274578087329,
|
|
"finish_reason": "stop",
|
|
"reasoning_chars": 1152,
|
|
"content_chars": 503,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx96k",
|
|
"model_key": "ngl12-ctx96k",
|
|
"ngl": 12,
|
|
"ctx": 98304,
|
|
"prompt_id": "P-MEDIUM",
|
|
"run_idx": 2,
|
|
"phase": "warm",
|
|
"total_duration_ms": 57354,
|
|
"prompt_tokens": 22,
|
|
"completion_tokens": 325,
|
|
"tokens_per_sec": 5.666562053213377,
|
|
"finish_reason": "stop",
|
|
"reasoning_chars": 1139,
|
|
"content_chars": 429,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx96k",
|
|
"model_key": "ngl12-ctx96k",
|
|
"ngl": 12,
|
|
"ctx": 98304,
|
|
"prompt_id": "P-MEDIUM",
|
|
"run_idx": 3,
|
|
"phase": "warm",
|
|
"total_duration_ms": 104255,
|
|
"prompt_tokens": 22,
|
|
"completion_tokens": 610,
|
|
"tokens_per_sec": 5.85103831950506,
|
|
"finish_reason": "stop",
|
|
"reasoning_chars": 2408,
|
|
"content_chars": 561,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx96k",
|
|
"model_key": "ngl12-ctx96k",
|
|
"ngl": 12,
|
|
"ctx": 98304,
|
|
"prompt_id": "P-HARD",
|
|
"run_idx": 0,
|
|
"phase": "cold",
|
|
"total_duration_ms": 137543,
|
|
"prompt_tokens": 30,
|
|
"completion_tokens": 788,
|
|
"tokens_per_sec": 5.729117439637059,
|
|
"finish_reason": "stop",
|
|
"reasoning_chars": 1966,
|
|
"content_chars": 1357,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx96k",
|
|
"model_key": "ngl12-ctx96k",
|
|
"ngl": 12,
|
|
"ctx": 98304,
|
|
"prompt_id": "P-HARD",
|
|
"run_idx": 1,
|
|
"phase": "warm",
|
|
"total_duration_ms": 144293,
|
|
"prompt_tokens": 30,
|
|
"completion_tokens": 796,
|
|
"tokens_per_sec": 5.516553124545196,
|
|
"finish_reason": "stop",
|
|
"reasoning_chars": 1825,
|
|
"content_chars": 1536,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx96k",
|
|
"model_key": "ngl12-ctx96k",
|
|
"ngl": 12,
|
|
"ctx": 98304,
|
|
"prompt_id": "P-HARD",
|
|
"run_idx": 2,
|
|
"phase": "warm",
|
|
"total_duration_ms": 101864,
|
|
"prompt_tokens": 30,
|
|
"completion_tokens": 581,
|
|
"tokens_per_sec": 5.703683342495877,
|
|
"finish_reason": "stop",
|
|
"reasoning_chars": 1282,
|
|
"content_chars": 1232,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx96k",
|
|
"model_key": "ngl12-ctx96k",
|
|
"ngl": 12,
|
|
"ctx": 98304,
|
|
"prompt_id": "P-HARD",
|
|
"run_idx": 3,
|
|
"phase": "warm",
|
|
"total_duration_ms": 127942,
|
|
"prompt_tokens": 30,
|
|
"completion_tokens": 754,
|
|
"tokens_per_sec": 5.893295399477889,
|
|
"finish_reason": "stop",
|
|
"reasoning_chars": 1961,
|
|
"content_chars": 1261,
|
|
"error": null
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx131k",
|
|
"n_calls": 15,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": 96406.17,
|
|
"duration_ms_p50": 105842.0,
|
|
"duration_ms_p95": 154492.35,
|
|
"tokens_per_sec_mean": 5.01,
|
|
"tokens_per_sec_p50": 5.13,
|
|
"tokens_per_sec_p95": 5.97,
|
|
"tokens_per_sec_max": 6.29,
|
|
"cold_warm_split": {
|
|
"cold": {
|
|
"n_calls": 3,
|
|
"duration_ms_mean": 109523.67,
|
|
"duration_ms_p50": 118125.0,
|
|
"tokens_per_sec_mean": 4.11
|
|
},
|
|
"warm": {
|
|
"n_calls": 9,
|
|
"duration_ms_mean": 92033.67,
|
|
"duration_ms_p50": 95486.0,
|
|
"tokens_per_sec_mean": 5.31
|
|
}
|
|
},
|
|
"per_prompt": {
|
|
"unknown": {
|
|
"n_calls": 3,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"tokens_per_sec_mean": null,
|
|
"tokens_per_sec_p50": null
|
|
},
|
|
"hello": {
|
|
"n_calls": 4,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": 33574.75,
|
|
"duration_ms_p50": 31872.5,
|
|
"tokens_per_sec_mean": 4.73,
|
|
"tokens_per_sec_p50": 5.19
|
|
},
|
|
"P-MEDIUM": {
|
|
"n_calls": 4,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": 103544.75,
|
|
"duration_ms_p50": 105842.0,
|
|
"tokens_per_sec_mean": 5.19,
|
|
"tokens_per_sec_p50": 5.05
|
|
},
|
|
"P-HARD": {
|
|
"n_calls": 4,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": 152099.0,
|
|
"duration_ms_p50": 150187.0,
|
|
"tokens_per_sec_mean": 5.11,
|
|
"tokens_per_sec_p50": 5.13
|
|
}
|
|
},
|
|
"chars_split": {
|
|
"has_thinking": true,
|
|
"reasoning_chars_mean": 1296.25,
|
|
"answer_chars_mean": 873.42
|
|
},
|
|
"timeline": [
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": null,
|
|
"phase": null,
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": null,
|
|
"phase": null,
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": null,
|
|
"phase": null,
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "hello",
|
|
"run_idx": 0,
|
|
"phase": "cold",
|
|
"duration_ms": 59744,
|
|
"tokens_per_sec": 2.259641135511516,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "hello",
|
|
"run_idx": 1,
|
|
"phase": "warm",
|
|
"duration_ms": 10810,
|
|
"tokens_per_sec": 6.290471785383904,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "hello",
|
|
"run_idx": 2,
|
|
"phase": "warm",
|
|
"duration_ms": 20002,
|
|
"tokens_per_sec": 5.049495050494951,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "hello",
|
|
"run_idx": 3,
|
|
"phase": "warm",
|
|
"duration_ms": 43743,
|
|
"tokens_per_sec": 5.326566536360103,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "P-MEDIUM",
|
|
"run_idx": 0,
|
|
"phase": "cold",
|
|
"duration_ms": 118125,
|
|
"tokens_per_sec": 4.960846560846561,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "P-MEDIUM",
|
|
"run_idx": 1,
|
|
"phase": "warm",
|
|
"duration_ms": 116198,
|
|
"tokens_per_sec": 5.146388061756657,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "P-MEDIUM",
|
|
"run_idx": 2,
|
|
"phase": "warm",
|
|
"duration_ms": 95486,
|
|
"tokens_per_sec": 4.932660285277422,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "P-MEDIUM",
|
|
"run_idx": 3,
|
|
"phase": "warm",
|
|
"duration_ms": 84370,
|
|
"tokens_per_sec": 5.71293113665995,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "P-HARD",
|
|
"run_idx": 0,
|
|
"phase": "cold",
|
|
"duration_ms": 150702,
|
|
"tokens_per_sec": 5.122692465926132,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "P-HARD",
|
|
"run_idx": 1,
|
|
"phase": "warm",
|
|
"duration_ms": 148897,
|
|
"tokens_per_sec": 5.131063755481978,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "P-HARD",
|
|
"run_idx": 2,
|
|
"phase": "warm",
|
|
"duration_ms": 159125,
|
|
"tokens_per_sec": 4.788688138256088,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "P-HARD",
|
|
"run_idx": 3,
|
|
"phase": "warm",
|
|
"duration_ms": 149672,
|
|
"tokens_per_sec": 5.378427494788604,
|
|
"error": null
|
|
}
|
|
],
|
|
"calls": [
|
|
{
|
|
"type": "vram_snapshot",
|
|
"model_key": "ngl12-ctx131k",
|
|
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx131k",
|
|
"ngl": 12,
|
|
"ctx": 131072,
|
|
"raw": "5814, 216, 6144"
|
|
},
|
|
{
|
|
"type": "ram_snapshot",
|
|
"model_key": "ngl12-ctx131k",
|
|
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx131k",
|
|
"ngl": 12,
|
|
"ctx": 131072,
|
|
"raw": "20818,7806,28625"
|
|
},
|
|
{
|
|
"type": "llama_bench",
|
|
"model_key": "ngl12-ctx131k",
|
|
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx131k",
|
|
"ngl": 12,
|
|
"ctx": 131072,
|
|
"raw": "llama-bench.exe : ggml_cuda_init: found 1 CUDA devices (Total VRAM: 6143 MiB):\r\nAt line:1 char:1\r\n+ & 'D:\\WeeyugaTools\\llama.cpp\\llama-bench.exe' -m 'D:\\WeeyugaModels\\ll ...\r\n+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\r\n + CategoryInfo : NotSpecified: (ggml_cuda_init:...RAM: 6143 MiB)::String) [], RemoteException\r\n + FullyQualifiedErrorId : NativeCommandError\r\n \r\n Device 0: NVIDIA GeForce GTX 1060, compute capability 6.1, VMM: yes, VRAM: 6143 MiB\r\nload_backend: loaded CUDA backend from D:\\WeeyugaTools\\llama.cpp\\ggml-cuda.dll\r\nload_backend: loaded RPC backend from D:\\WeeyugaTools\\llama.cpp\\ggml-rpc.dll\r\nload_backend: loaded CPU backend from D:\\WeeyugaTools\\llama.cpp\\ggml-cpu-haswell.dll\r\n| model | size | params | backend | ngl | test | t/s |\r\n| ------------------------------ | ---------: | ---------: | ---------- | --: | --------------: | -------------------: |\r\n| qwen3moe 30B.A3B IQ2_M - 2.7 bpw | 10.11 GiB | 30.53 B | CUDA | 12 | pp256 | 26.49 \u00b1 11.80 |\r\n| qwen3moe 30B.A3B IQ2_M - 2.7 bpw | 10.11 GiB | 30.53 B | CUDA | 12 | tg64 | 4.94 \u00b1 0.78 |\r\n\r\nbuild: 665abc609 (8951)\r\n\r"
|
|
},
|
|
{
|
|
"type": "call",
|
|
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx131k",
|
|
"model_key": "ngl12-ctx131k",
|
|
"ngl": 12,
|
|
"ctx": 131072,
|
|
"prompt_id": "hello",
|
|
"run_idx": 0,
|
|
"phase": "cold",
|
|
"total_duration_ms": 59744,
|
|
"prompt_tokens": 9,
|
|
"completion_tokens": 135,
|
|
"tokens_per_sec": 2.259641135511516,
|
|
"finish_reason": "stop",
|
|
"reasoning_chars": 504,
|
|
"content_chars": 37,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx131k",
|
|
"model_key": "ngl12-ctx131k",
|
|
"ngl": 12,
|
|
"ctx": 131072,
|
|
"prompt_id": "hello",
|
|
"run_idx": 1,
|
|
"phase": "warm",
|
|
"total_duration_ms": 10810,
|
|
"prompt_tokens": 9,
|
|
"completion_tokens": 68,
|
|
"tokens_per_sec": 6.290471785383904,
|
|
"finish_reason": "stop",
|
|
"reasoning_chars": 224,
|
|
"content_chars": 37,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx131k",
|
|
"model_key": "ngl12-ctx131k",
|
|
"ngl": 12,
|
|
"ctx": 131072,
|
|
"prompt_id": "hello",
|
|
"run_idx": 2,
|
|
"phase": "warm",
|
|
"total_duration_ms": 20002,
|
|
"prompt_tokens": 9,
|
|
"completion_tokens": 101,
|
|
"tokens_per_sec": 5.049495050494951,
|
|
"finish_reason": "stop",
|
|
"reasoning_chars": 384,
|
|
"content_chars": 37,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx131k",
|
|
"model_key": "ngl12-ctx131k",
|
|
"ngl": 12,
|
|
"ctx": 131072,
|
|
"prompt_id": "hello",
|
|
"run_idx": 3,
|
|
"phase": "warm",
|
|
"total_duration_ms": 43743,
|
|
"prompt_tokens": 9,
|
|
"completion_tokens": 233,
|
|
"tokens_per_sec": 5.326566536360103,
|
|
"finish_reason": "stop",
|
|
"reasoning_chars": 910,
|
|
"content_chars": 37,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx131k",
|
|
"model_key": "ngl12-ctx131k",
|
|
"ngl": 12,
|
|
"ctx": 131072,
|
|
"prompt_id": "P-MEDIUM",
|
|
"run_idx": 0,
|
|
"phase": "cold",
|
|
"total_duration_ms": 118125,
|
|
"prompt_tokens": 22,
|
|
"completion_tokens": 586,
|
|
"tokens_per_sec": 4.960846560846561,
|
|
"finish_reason": "stop",
|
|
"reasoning_chars": 2415,
|
|
"content_chars": 435,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx131k",
|
|
"model_key": "ngl12-ctx131k",
|
|
"ngl": 12,
|
|
"ctx": 131072,
|
|
"prompt_id": "P-MEDIUM",
|
|
"run_idx": 1,
|
|
"phase": "warm",
|
|
"total_duration_ms": 116198,
|
|
"prompt_tokens": 22,
|
|
"completion_tokens": 598,
|
|
"tokens_per_sec": 5.146388061756657,
|
|
"finish_reason": "stop",
|
|
"reasoning_chars": 2059,
|
|
"content_chars": 865,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx131k",
|
|
"model_key": "ngl12-ctx131k",
|
|
"ngl": 12,
|
|
"ctx": 131072,
|
|
"prompt_id": "P-MEDIUM",
|
|
"run_idx": 2,
|
|
"phase": "warm",
|
|
"total_duration_ms": 95486,
|
|
"prompt_tokens": 22,
|
|
"completion_tokens": 471,
|
|
"tokens_per_sec": 4.932660285277422,
|
|
"finish_reason": "stop",
|
|
"reasoning_chars": 1844,
|
|
"content_chars": 502,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx131k",
|
|
"model_key": "ngl12-ctx131k",
|
|
"ngl": 12,
|
|
"ctx": 131072,
|
|
"prompt_id": "P-MEDIUM",
|
|
"run_idx": 3,
|
|
"phase": "warm",
|
|
"total_duration_ms": 84370,
|
|
"prompt_tokens": 22,
|
|
"completion_tokens": 482,
|
|
"tokens_per_sec": 5.71293113665995,
|
|
"finish_reason": "stop",
|
|
"reasoning_chars": 1858,
|
|
"content_chars": 488,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx131k",
|
|
"model_key": "ngl12-ctx131k",
|
|
"ngl": 12,
|
|
"ctx": 131072,
|
|
"prompt_id": "P-HARD",
|
|
"run_idx": 0,
|
|
"phase": "cold",
|
|
"total_duration_ms": 150702,
|
|
"prompt_tokens": 30,
|
|
"completion_tokens": 772,
|
|
"tokens_per_sec": 5.122692465926132,
|
|
"finish_reason": "stop",
|
|
"reasoning_chars": 1228,
|
|
"content_chars": 2007,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx131k",
|
|
"model_key": "ngl12-ctx131k",
|
|
"ngl": 12,
|
|
"ctx": 131072,
|
|
"prompt_id": "P-HARD",
|
|
"run_idx": 1,
|
|
"phase": "warm",
|
|
"total_duration_ms": 148897,
|
|
"prompt_tokens": 30,
|
|
"completion_tokens": 764,
|
|
"tokens_per_sec": 5.131063755481978,
|
|
"finish_reason": "stop",
|
|
"reasoning_chars": 1083,
|
|
"content_chars": 2199,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx131k",
|
|
"model_key": "ngl12-ctx131k",
|
|
"ngl": 12,
|
|
"ctx": 131072,
|
|
"prompt_id": "P-HARD",
|
|
"run_idx": 2,
|
|
"phase": "warm",
|
|
"total_duration_ms": 159125,
|
|
"prompt_tokens": 30,
|
|
"completion_tokens": 762,
|
|
"tokens_per_sec": 4.788688138256088,
|
|
"finish_reason": "stop",
|
|
"reasoning_chars": 1767,
|
|
"content_chars": 1578,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx131k",
|
|
"model_key": "ngl12-ctx131k",
|
|
"ngl": 12,
|
|
"ctx": 131072,
|
|
"prompt_id": "P-HARD",
|
|
"run_idx": 3,
|
|
"phase": "warm",
|
|
"total_duration_ms": 149672,
|
|
"prompt_tokens": 30,
|
|
"completion_tokens": 805,
|
|
"tokens_per_sec": 5.378427494788604,
|
|
"finish_reason": "stop",
|
|
"reasoning_chars": 1279,
|
|
"content_chars": 2259,
|
|
"error": null
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx48k",
|
|
"n_calls": 15,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": 65539.5,
|
|
"duration_ms_p50": 64606.5,
|
|
"duration_ms_p95": 118069.75,
|
|
"tokens_per_sec_mean": 6.6,
|
|
"tokens_per_sec_p50": 6.59,
|
|
"tokens_per_sec_p95": 6.89,
|
|
"tokens_per_sec_max": 6.91,
|
|
"cold_warm_split": {
|
|
"cold": {
|
|
"n_calls": 3,
|
|
"duration_ms_mean": 66671.33,
|
|
"duration_ms_p50": 58574.0,
|
|
"tokens_per_sec_mean": 6.36
|
|
},
|
|
"warm": {
|
|
"n_calls": 9,
|
|
"duration_ms_mean": 65162.22,
|
|
"duration_ms_p50": 69317.0,
|
|
"tokens_per_sec_mean": 6.68
|
|
}
|
|
},
|
|
"per_prompt": {
|
|
"unknown": {
|
|
"n_calls": 3,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"tokens_per_sec_mean": null,
|
|
"tokens_per_sec_p50": null
|
|
},
|
|
"hello": {
|
|
"n_calls": 4,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": 16306.0,
|
|
"duration_ms_p50": 15313.0,
|
|
"tokens_per_sec_mean": 6.71,
|
|
"tokens_per_sec_p50": 6.86
|
|
},
|
|
"P-MEDIUM": {
|
|
"n_calls": 4,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": 75223.75,
|
|
"duration_ms_p50": 74910.0,
|
|
"tokens_per_sec_mean": 6.61,
|
|
"tokens_per_sec_p50": 6.59
|
|
},
|
|
"P-HARD": {
|
|
"n_calls": 4,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": 105088.75,
|
|
"duration_ms_p50": 114731.0,
|
|
"tokens_per_sec_mean": 6.49,
|
|
"tokens_per_sec_p50": 6.48
|
|
}
|
|
},
|
|
"chars_split": {
|
|
"has_thinking": true,
|
|
"reasoning_chars_mean": 1152.67,
|
|
"answer_chars_mean": 769.33
|
|
},
|
|
"timeline": [
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": null,
|
|
"phase": null,
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": null,
|
|
"phase": null,
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": null,
|
|
"phase": null,
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "hello",
|
|
"run_idx": 0,
|
|
"phase": "cold",
|
|
"duration_ms": 19864,
|
|
"tokens_per_sec": 6.192106322996375,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "hello",
|
|
"run_idx": 1,
|
|
"phase": "warm",
|
|
"duration_ms": 14734,
|
|
"tokens_per_sec": 6.854893443735578,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "hello",
|
|
"run_idx": 2,
|
|
"phase": "warm",
|
|
"duration_ms": 15636,
|
|
"tokens_per_sec": 6.907137375287798,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "hello",
|
|
"run_idx": 3,
|
|
"phase": "warm",
|
|
"duration_ms": 14990,
|
|
"tokens_per_sec": 6.871247498332221,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "P-MEDIUM",
|
|
"run_idx": 0,
|
|
"phase": "cold",
|
|
"duration_ms": 58574,
|
|
"tokens_per_sec": 6.572882166148803,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "P-MEDIUM",
|
|
"run_idx": 1,
|
|
"phase": "warm",
|
|
"duration_ms": 89924,
|
|
"tokens_per_sec": 6.60557804368133,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "P-MEDIUM",
|
|
"run_idx": 2,
|
|
"phase": "warm",
|
|
"duration_ms": 92501,
|
|
"tokens_per_sec": 6.551280526697008,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "P-MEDIUM",
|
|
"run_idx": 3,
|
|
"phase": "warm",
|
|
"duration_ms": 59896,
|
|
"tokens_per_sec": 6.728329103779885,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "P-HARD",
|
|
"run_idx": 0,
|
|
"phase": "cold",
|
|
"duration_ms": 121576,
|
|
"tokens_per_sec": 6.317036257156018,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "P-HARD",
|
|
"run_idx": 1,
|
|
"phase": "warm",
|
|
"duration_ms": 69317,
|
|
"tokens_per_sec": 6.6794581415814305,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "P-HARD",
|
|
"run_idx": 2,
|
|
"phase": "warm",
|
|
"duration_ms": 114261,
|
|
"tokens_per_sec": 6.485152414209573,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "P-HARD",
|
|
"run_idx": 3,
|
|
"phase": "warm",
|
|
"duration_ms": 115201,
|
|
"tokens_per_sec": 6.475638232307012,
|
|
"error": null
|
|
}
|
|
],
|
|
"calls": [
|
|
{
|
|
"type": "vram_snapshot",
|
|
"model_key": "ngl24-ctx48k",
|
|
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx48k",
|
|
"ngl": 24,
|
|
"ctx": 49152,
|
|
"raw": "5826, 204, 6144"
|
|
},
|
|
{
|
|
"type": "ram_snapshot",
|
|
"model_key": "ngl24-ctx48k",
|
|
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx48k",
|
|
"ngl": 24,
|
|
"ctx": 49152,
|
|
"raw": "17056,11569,28625"
|
|
},
|
|
{
|
|
"type": "llama_bench",
|
|
"model_key": "ngl24-ctx48k",
|
|
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx48k",
|
|
"ngl": 24,
|
|
"ctx": 49152,
|
|
"raw": "llama-bench.exe : ggml_cuda_init: found 1 CUDA devices (Total VRAM: 6143 MiB):\r\nAt line:1 char:1\r\n+ & 'D:\\WeeyugaTools\\llama.cpp\\llama-bench.exe' -m 'D:\\WeeyugaModels\\ll ...\r\n+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\r\n + CategoryInfo : NotSpecified: (ggml_cuda_init:...RAM: 6143 MiB)::String) [], RemoteException\r\n + FullyQualifiedErrorId : NativeCommandError\r\n \r\n Device 0: NVIDIA GeForce GTX 1060, compute capability 6.1, VMM: yes, VRAM: 6143 MiB\r\nload_backend: loaded CUDA backend from D:\\WeeyugaTools\\llama.cpp\\ggml-cuda.dll\r\nload_backend: loaded RPC backend from D:\\WeeyugaTools\\llama.cpp\\ggml-rpc.dll\r\nload_backend: loaded CPU backend from D:\\WeeyugaTools\\llama.cpp\\ggml-cpu-haswell.dll\r\n| model | size | params | backend | ngl | test | t/s |\r\n| ------------------------------ | ---------: | ---------: | ---------- | --: | --------------: | -------------------: |\r\n| qwen3moe 30B.A3B IQ2_M - 2.7 bpw | 10.11 GiB | 30.53 B | CUDA | 24 | pp256 | 48.01 \u00b1 0.46 |\r\n| qwen3moe 30B.A3B IQ2_M - 2.7 bpw | 10.11 GiB | 30.53 B | CUDA | 24 | tg64 | 6.50 \u00b1 0.03 |\r\n\r\nbuild: 665abc609 (8951)\r\n\r"
|
|
},
|
|
{
|
|
"type": "call",
|
|
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx48k",
|
|
"model_key": "ngl24-ctx48k",
|
|
"ngl": 24,
|
|
"ctx": 49152,
|
|
"prompt_id": "hello",
|
|
"run_idx": 0,
|
|
"phase": "cold",
|
|
"total_duration_ms": 19864,
|
|
"prompt_tokens": 9,
|
|
"completion_tokens": 123,
|
|
"tokens_per_sec": 6.192106322996375,
|
|
"finish_reason": "stop",
|
|
"reasoning_chars": 467,
|
|
"content_chars": 37,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx48k",
|
|
"model_key": "ngl24-ctx48k",
|
|
"ngl": 24,
|
|
"ctx": 49152,
|
|
"prompt_id": "hello",
|
|
"run_idx": 1,
|
|
"phase": "warm",
|
|
"total_duration_ms": 14734,
|
|
"prompt_tokens": 9,
|
|
"completion_tokens": 101,
|
|
"tokens_per_sec": 6.854893443735578,
|
|
"finish_reason": "stop",
|
|
"reasoning_chars": 370,
|
|
"content_chars": 37,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx48k",
|
|
"model_key": "ngl24-ctx48k",
|
|
"ngl": 24,
|
|
"ctx": 49152,
|
|
"prompt_id": "hello",
|
|
"run_idx": 2,
|
|
"phase": "warm",
|
|
"total_duration_ms": 15636,
|
|
"prompt_tokens": 9,
|
|
"completion_tokens": 108,
|
|
"tokens_per_sec": 6.907137375287798,
|
|
"finish_reason": "stop",
|
|
"reasoning_chars": 399,
|
|
"content_chars": 37,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx48k",
|
|
"model_key": "ngl24-ctx48k",
|
|
"ngl": 24,
|
|
"ctx": 49152,
|
|
"prompt_id": "hello",
|
|
"run_idx": 3,
|
|
"phase": "warm",
|
|
"total_duration_ms": 14990,
|
|
"prompt_tokens": 9,
|
|
"completion_tokens": 103,
|
|
"tokens_per_sec": 6.871247498332221,
|
|
"finish_reason": "stop",
|
|
"reasoning_chars": 380,
|
|
"content_chars": 37,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx48k",
|
|
"model_key": "ngl24-ctx48k",
|
|
"ngl": 24,
|
|
"ctx": 49152,
|
|
"prompt_id": "P-MEDIUM",
|
|
"run_idx": 0,
|
|
"phase": "cold",
|
|
"total_duration_ms": 58574,
|
|
"prompt_tokens": 22,
|
|
"completion_tokens": 385,
|
|
"tokens_per_sec": 6.572882166148803,
|
|
"finish_reason": "stop",
|
|
"reasoning_chars": 1365,
|
|
"content_chars": 480,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx48k",
|
|
"model_key": "ngl24-ctx48k",
|
|
"ngl": 24,
|
|
"ctx": 49152,
|
|
"prompt_id": "P-MEDIUM",
|
|
"run_idx": 1,
|
|
"phase": "warm",
|
|
"total_duration_ms": 89924,
|
|
"prompt_tokens": 22,
|
|
"completion_tokens": 594,
|
|
"tokens_per_sec": 6.60557804368133,
|
|
"finish_reason": "stop",
|
|
"reasoning_chars": 2406,
|
|
"content_chars": 484,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx48k",
|
|
"model_key": "ngl24-ctx48k",
|
|
"ngl": 24,
|
|
"ctx": 49152,
|
|
"prompt_id": "P-MEDIUM",
|
|
"run_idx": 2,
|
|
"phase": "warm",
|
|
"total_duration_ms": 92501,
|
|
"prompt_tokens": 22,
|
|
"completion_tokens": 606,
|
|
"tokens_per_sec": 6.551280526697008,
|
|
"finish_reason": "stop",
|
|
"reasoning_chars": 2308,
|
|
"content_chars": 671,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx48k",
|
|
"model_key": "ngl24-ctx48k",
|
|
"ngl": 24,
|
|
"ctx": 49152,
|
|
"prompt_id": "P-MEDIUM",
|
|
"run_idx": 3,
|
|
"phase": "warm",
|
|
"total_duration_ms": 59896,
|
|
"prompt_tokens": 22,
|
|
"completion_tokens": 403,
|
|
"tokens_per_sec": 6.728329103779885,
|
|
"finish_reason": "stop",
|
|
"reasoning_chars": 1446,
|
|
"content_chars": 537,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx48k",
|
|
"model_key": "ngl24-ctx48k",
|
|
"ngl": 24,
|
|
"ctx": 49152,
|
|
"prompt_id": "P-HARD",
|
|
"run_idx": 0,
|
|
"phase": "cold",
|
|
"total_duration_ms": 121576,
|
|
"prompt_tokens": 30,
|
|
"completion_tokens": 768,
|
|
"tokens_per_sec": 6.317036257156018,
|
|
"finish_reason": "stop",
|
|
"reasoning_chars": 1226,
|
|
"content_chars": 1991,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx48k",
|
|
"model_key": "ngl24-ctx48k",
|
|
"ngl": 24,
|
|
"ctx": 49152,
|
|
"prompt_id": "P-HARD",
|
|
"run_idx": 1,
|
|
"phase": "warm",
|
|
"total_duration_ms": 69317,
|
|
"prompt_tokens": 30,
|
|
"completion_tokens": 463,
|
|
"tokens_per_sec": 6.6794581415814305,
|
|
"finish_reason": "stop",
|
|
"reasoning_chars": 1048,
|
|
"content_chars": 963,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx48k",
|
|
"model_key": "ngl24-ctx48k",
|
|
"ngl": 24,
|
|
"ctx": 49152,
|
|
"prompt_id": "P-HARD",
|
|
"run_idx": 2,
|
|
"phase": "warm",
|
|
"total_duration_ms": 114261,
|
|
"prompt_tokens": 30,
|
|
"completion_tokens": 741,
|
|
"tokens_per_sec": 6.485152414209573,
|
|
"finish_reason": "stop",
|
|
"reasoning_chars": 1246,
|
|
"content_chars": 1964,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx48k",
|
|
"model_key": "ngl24-ctx48k",
|
|
"ngl": 24,
|
|
"ctx": 49152,
|
|
"prompt_id": "P-HARD",
|
|
"run_idx": 3,
|
|
"phase": "warm",
|
|
"total_duration_ms": 115201,
|
|
"prompt_tokens": 30,
|
|
"completion_tokens": 746,
|
|
"tokens_per_sec": 6.475638232307012,
|
|
"finish_reason": "stop",
|
|
"reasoning_chars": 1171,
|
|
"content_chars": 1994,
|
|
"error": null
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx64k",
|
|
"n_calls": 15,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": 71267.58,
|
|
"duration_ms_p50": 80265.5,
|
|
"duration_ms_p95": 128231.3,
|
|
"tokens_per_sec_mean": 6.41,
|
|
"tokens_per_sec_p50": 6.39,
|
|
"tokens_per_sec_p95": 6.88,
|
|
"tokens_per_sec_max": 6.9,
|
|
"cold_warm_split": {
|
|
"cold": {
|
|
"n_calls": 3,
|
|
"duration_ms_mean": 80477.67,
|
|
"duration_ms_p50": 99471.0,
|
|
"tokens_per_sec_mean": 5.99
|
|
},
|
|
"warm": {
|
|
"n_calls": 9,
|
|
"duration_ms_mean": 68197.56,
|
|
"duration_ms_p50": 69604.0,
|
|
"tokens_per_sec_mean": 6.55
|
|
}
|
|
},
|
|
"per_prompt": {
|
|
"unknown": {
|
|
"n_calls": 3,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"tokens_per_sec_mean": null,
|
|
"tokens_per_sec_p50": null
|
|
},
|
|
"hello": {
|
|
"n_calls": 4,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": 16514.0,
|
|
"duration_ms_p50": 16585.0,
|
|
"tokens_per_sec_mean": 6.53,
|
|
"tokens_per_sec_p50": 6.84
|
|
},
|
|
"P-MEDIUM": {
|
|
"n_calls": 4,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": 79436.25,
|
|
"duration_ms_p50": 81054.0,
|
|
"tokens_per_sec_mean": 6.43,
|
|
"tokens_per_sec_p50": 6.44
|
|
},
|
|
"P-HARD": {
|
|
"n_calls": 4,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": 117852.5,
|
|
"duration_ms_p50": 124494.0,
|
|
"tokens_per_sec_mean": 6.25,
|
|
"tokens_per_sec_p50": 6.25
|
|
}
|
|
},
|
|
"chars_split": {
|
|
"has_thinking": true,
|
|
"reasoning_chars_mean": 1063.5,
|
|
"answer_chars_mean": 944.25
|
|
},
|
|
"timeline": [
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": null,
|
|
"phase": null,
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": null,
|
|
"phase": null,
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": null,
|
|
"phase": null,
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "hello",
|
|
"run_idx": 0,
|
|
"phase": "cold",
|
|
"duration_ms": 18535,
|
|
"tokens_per_sec": 5.557054221742649,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "hello",
|
|
"run_idx": 1,
|
|
"phase": "warm",
|
|
"duration_ms": 18478,
|
|
"tokens_per_sec": 6.873038207598224,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "hello",
|
|
"run_idx": 2,
|
|
"phase": "warm",
|
|
"duration_ms": 14692,
|
|
"tokens_per_sec": 6.806425265450585,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "hello",
|
|
"run_idx": 3,
|
|
"phase": "warm",
|
|
"duration_ms": 14351,
|
|
"tokens_per_sec": 6.898473973939098,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "P-MEDIUM",
|
|
"run_idx": 0,
|
|
"phase": "cold",
|
|
"duration_ms": 99471,
|
|
"tokens_per_sec": 6.2430256054528455,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "P-MEDIUM",
|
|
"run_idx": 1,
|
|
"phase": "warm",
|
|
"duration_ms": 56166,
|
|
"tokens_per_sec": 6.445180358223837,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "P-MEDIUM",
|
|
"run_idx": 2,
|
|
"phase": "warm",
|
|
"duration_ms": 92504,
|
|
"tokens_per_sec": 6.442964628556602,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "P-MEDIUM",
|
|
"run_idx": 3,
|
|
"phase": "warm",
|
|
"duration_ms": 69604,
|
|
"tokens_per_sec": 6.5944485949083385,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "P-HARD",
|
|
"run_idx": 0,
|
|
"phase": "cold",
|
|
"duration_ms": 123427,
|
|
"tokens_per_sec": 6.165587756325601,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "P-HARD",
|
|
"run_idx": 1,
|
|
"phase": "warm",
|
|
"duration_ms": 90927,
|
|
"tokens_per_sec": 6.345749887272207,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "P-HARD",
|
|
"run_idx": 2,
|
|
"phase": "warm",
|
|
"duration_ms": 131495,
|
|
"tokens_per_sec": 6.319631925168257,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "P-HARD",
|
|
"run_idx": 3,
|
|
"phase": "warm",
|
|
"duration_ms": 125561,
|
|
"tokens_per_sec": 6.188227236164095,
|
|
"error": null
|
|
}
|
|
],
|
|
"calls": [
|
|
{
|
|
"type": "vram_snapshot",
|
|
"model_key": "ngl24-ctx64k",
|
|
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx64k",
|
|
"ngl": 24,
|
|
"ctx": 65536,
|
|
"raw": "5780, 250, 6144"
|
|
},
|
|
{
|
|
"type": "ram_snapshot",
|
|
"model_key": "ngl24-ctx64k",
|
|
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx64k",
|
|
"ngl": 24,
|
|
"ctx": 65536,
|
|
"raw": "18513,10111,28625"
|
|
},
|
|
{
|
|
"type": "llama_bench",
|
|
"model_key": "ngl24-ctx64k",
|
|
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx64k",
|
|
"ngl": 24,
|
|
"ctx": 65536,
|
|
"raw": "llama-bench.exe : ggml_cuda_init: found 1 CUDA devices (Total VRAM: 6143 MiB):\r\nAt line:1 char:1\r\n+ & 'D:\\WeeyugaTools\\llama.cpp\\llama-bench.exe' -m 'D:\\WeeyugaModels\\ll ...\r\n+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\r\n + CategoryInfo : NotSpecified: (ggml_cuda_init:...RAM: 6143 MiB)::String) [], RemoteException\r\n + FullyQualifiedErrorId : NativeCommandError\r\n \r\n Device 0: NVIDIA GeForce GTX 1060, compute capability 6.1, VMM: yes, VRAM: 6143 MiB\r\nload_backend: loaded CUDA backend from D:\\WeeyugaTools\\llama.cpp\\ggml-cuda.dll\r\nload_backend: loaded RPC backend from D:\\WeeyugaTools\\llama.cpp\\ggml-rpc.dll\r\nload_backend: loaded CPU backend from D:\\WeeyugaTools\\llama.cpp\\ggml-cpu-haswell.dll\r\n| model | size | params | backend | ngl | test | t/s |\r\n| ------------------------------ | ---------: | ---------: | ---------- | --: | --------------: | -------------------: |\r\n| qwen3moe 30B.A3B IQ2_M - 2.7 bpw | 10.11 GiB | 30.53 B | CUDA | 24 | pp256 | 27.92 \u00b1 3.61 |\r\n| qwen3moe 30B.A3B IQ2_M - 2.7 bpw | 10.11 GiB | 30.53 B | CUDA | 24 | tg64 | 5.63 \u00b1 0.39 |\r\n\r\nbuild: 665abc609 (8951)\r\n\r"
|
|
},
|
|
{
|
|
"type": "call",
|
|
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx64k",
|
|
"model_key": "ngl24-ctx64k",
|
|
"ngl": 24,
|
|
"ctx": 65536,
|
|
"prompt_id": "hello",
|
|
"run_idx": 0,
|
|
"phase": "cold",
|
|
"total_duration_ms": 18535,
|
|
"prompt_tokens": 9,
|
|
"completion_tokens": 103,
|
|
"tokens_per_sec": 5.557054221742649,
|
|
"finish_reason": "stop",
|
|
"reasoning_chars": 380,
|
|
"content_chars": 37,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx64k",
|
|
"model_key": "ngl24-ctx64k",
|
|
"ngl": 24,
|
|
"ctx": 65536,
|
|
"prompt_id": "hello",
|
|
"run_idx": 1,
|
|
"phase": "warm",
|
|
"total_duration_ms": 18478,
|
|
"prompt_tokens": 9,
|
|
"completion_tokens": 127,
|
|
"tokens_per_sec": 6.873038207598224,
|
|
"finish_reason": "stop",
|
|
"reasoning_chars": 495,
|
|
"content_chars": 37,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx64k",
|
|
"model_key": "ngl24-ctx64k",
|
|
"ngl": 24,
|
|
"ctx": 65536,
|
|
"prompt_id": "hello",
|
|
"run_idx": 2,
|
|
"phase": "warm",
|
|
"total_duration_ms": 14692,
|
|
"prompt_tokens": 9,
|
|
"completion_tokens": 100,
|
|
"tokens_per_sec": 6.806425265450585,
|
|
"finish_reason": "stop",
|
|
"reasoning_chars": 361,
|
|
"content_chars": 37,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx64k",
|
|
"model_key": "ngl24-ctx64k",
|
|
"ngl": 24,
|
|
"ctx": 65536,
|
|
"prompt_id": "hello",
|
|
"run_idx": 3,
|
|
"phase": "warm",
|
|
"total_duration_ms": 14351,
|
|
"prompt_tokens": 9,
|
|
"completion_tokens": 99,
|
|
"tokens_per_sec": 6.898473973939098,
|
|
"finish_reason": "stop",
|
|
"reasoning_chars": 377,
|
|
"content_chars": 37,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx64k",
|
|
"model_key": "ngl24-ctx64k",
|
|
"ngl": 24,
|
|
"ctx": 65536,
|
|
"prompt_id": "P-MEDIUM",
|
|
"run_idx": 0,
|
|
"phase": "cold",
|
|
"total_duration_ms": 99471,
|
|
"prompt_tokens": 22,
|
|
"completion_tokens": 621,
|
|
"tokens_per_sec": 6.2430256054528455,
|
|
"finish_reason": "stop",
|
|
"reasoning_chars": 0,
|
|
"content_chars": 2991,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx64k",
|
|
"model_key": "ngl24-ctx64k",
|
|
"ngl": 24,
|
|
"ctx": 65536,
|
|
"prompt_id": "P-MEDIUM",
|
|
"run_idx": 1,
|
|
"phase": "warm",
|
|
"total_duration_ms": 56166,
|
|
"prompt_tokens": 22,
|
|
"completion_tokens": 362,
|
|
"tokens_per_sec": 6.445180358223837,
|
|
"finish_reason": "stop",
|
|
"reasoning_chars": 1274,
|
|
"content_chars": 513,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx64k",
|
|
"model_key": "ngl24-ctx64k",
|
|
"ngl": 24,
|
|
"ctx": 65536,
|
|
"prompt_id": "P-MEDIUM",
|
|
"run_idx": 2,
|
|
"phase": "warm",
|
|
"total_duration_ms": 92504,
|
|
"prompt_tokens": 22,
|
|
"completion_tokens": 596,
|
|
"tokens_per_sec": 6.442964628556602,
|
|
"finish_reason": "stop",
|
|
"reasoning_chars": 2393,
|
|
"content_chars": 481,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx64k",
|
|
"model_key": "ngl24-ctx64k",
|
|
"ngl": 24,
|
|
"ctx": 65536,
|
|
"prompt_id": "P-MEDIUM",
|
|
"run_idx": 3,
|
|
"phase": "warm",
|
|
"total_duration_ms": 69604,
|
|
"prompt_tokens": 22,
|
|
"completion_tokens": 459,
|
|
"tokens_per_sec": 6.5944485949083385,
|
|
"finish_reason": "stop",
|
|
"reasoning_chars": 1810,
|
|
"content_chars": 507,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx64k",
|
|
"model_key": "ngl24-ctx64k",
|
|
"ngl": 24,
|
|
"ctx": 65536,
|
|
"prompt_id": "P-HARD",
|
|
"run_idx": 0,
|
|
"phase": "cold",
|
|
"total_duration_ms": 123427,
|
|
"prompt_tokens": 30,
|
|
"completion_tokens": 761,
|
|
"tokens_per_sec": 6.165587756325601,
|
|
"finish_reason": "stop",
|
|
"reasoning_chars": 1221,
|
|
"content_chars": 1919,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx64k",
|
|
"model_key": "ngl24-ctx64k",
|
|
"ngl": 24,
|
|
"ctx": 65536,
|
|
"prompt_id": "P-HARD",
|
|
"run_idx": 1,
|
|
"phase": "warm",
|
|
"total_duration_ms": 90927,
|
|
"prompt_tokens": 30,
|
|
"completion_tokens": 577,
|
|
"tokens_per_sec": 6.345749887272207,
|
|
"finish_reason": "stop",
|
|
"reasoning_chars": 1191,
|
|
"content_chars": 1282,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx64k",
|
|
"model_key": "ngl24-ctx64k",
|
|
"ngl": 24,
|
|
"ctx": 65536,
|
|
"prompt_id": "P-HARD",
|
|
"run_idx": 2,
|
|
"phase": "warm",
|
|
"total_duration_ms": 131495,
|
|
"prompt_tokens": 30,
|
|
"completion_tokens": 831,
|
|
"tokens_per_sec": 6.319631925168257,
|
|
"finish_reason": "stop",
|
|
"reasoning_chars": 2262,
|
|
"content_chars": 1303,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx64k",
|
|
"model_key": "ngl24-ctx64k",
|
|
"ngl": 24,
|
|
"ctx": 65536,
|
|
"prompt_id": "P-HARD",
|
|
"run_idx": 3,
|
|
"phase": "warm",
|
|
"total_duration_ms": 125561,
|
|
"prompt_tokens": 30,
|
|
"completion_tokens": 777,
|
|
"tokens_per_sec": 6.188227236164095,
|
|
"finish_reason": "stop",
|
|
"reasoning_chars": 998,
|
|
"content_chars": 2187,
|
|
"error": null
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx96k",
|
|
"n_calls": 15,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": 69299.5,
|
|
"duration_ms_p50": 68355.0,
|
|
"duration_ms_p95": 124990.95,
|
|
"tokens_per_sec_mean": 6.08,
|
|
"tokens_per_sec_p50": 6.44,
|
|
"tokens_per_sec_p95": 6.68,
|
|
"tokens_per_sec_max": 6.69,
|
|
"cold_warm_split": {
|
|
"cold": {
|
|
"n_calls": 3,
|
|
"duration_ms_mean": 78350.33,
|
|
"duration_ms_p50": 72290.0,
|
|
"tokens_per_sec_mean": 4.93
|
|
},
|
|
"warm": {
|
|
"n_calls": 9,
|
|
"duration_ms_mean": 66282.56,
|
|
"duration_ms_p50": 64420.0,
|
|
"tokens_per_sec_mean": 6.47
|
|
}
|
|
},
|
|
"per_prompt": {
|
|
"unknown": {
|
|
"n_calls": 3,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"tokens_per_sec_mean": null,
|
|
"tokens_per_sec_p50": null
|
|
},
|
|
"hello": {
|
|
"n_calls": 4,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": 25171.0,
|
|
"duration_ms_p50": 25926.5,
|
|
"tokens_per_sec_mean": 5.49,
|
|
"tokens_per_sec_p50": 6.3
|
|
},
|
|
"P-MEDIUM": {
|
|
"n_calls": 4,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": 73270.5,
|
|
"duration_ms_p50": 68355.0,
|
|
"tokens_per_sec_mean": 6.39,
|
|
"tokens_per_sec_p50": 6.46
|
|
},
|
|
"P-HARD": {
|
|
"n_calls": 4,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": 109457.0,
|
|
"duration_ms_p50": 117967.0,
|
|
"tokens_per_sec_mean": 6.37,
|
|
"tokens_per_sec_p50": 6.4
|
|
}
|
|
},
|
|
"chars_split": {
|
|
"has_thinking": true,
|
|
"reasoning_chars_mean": 1185.42,
|
|
"answer_chars_mean": 741.92
|
|
},
|
|
"timeline": [
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": null,
|
|
"phase": null,
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": null,
|
|
"phase": null,
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": null,
|
|
"phase": null,
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "hello",
|
|
"run_idx": 0,
|
|
"phase": "cold",
|
|
"duration_ms": 35465,
|
|
"tokens_per_sec": 2.7068941209643307,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "hello",
|
|
"run_idx": 1,
|
|
"phase": "warm",
|
|
"duration_ms": 27111,
|
|
"tokens_per_sec": 6.1598613109070115,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "hello",
|
|
"run_idx": 2,
|
|
"phase": "warm",
|
|
"duration_ms": 24742,
|
|
"tokens_per_sec": 6.668822245574327,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "hello",
|
|
"run_idx": 3,
|
|
"phase": "warm",
|
|
"duration_ms": 13366,
|
|
"tokens_per_sec": 6.43423612150232,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "P-MEDIUM",
|
|
"run_idx": 0,
|
|
"phase": "cold",
|
|
"duration_ms": 72290,
|
|
"tokens_per_sec": 5.97593028081339,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "P-MEDIUM",
|
|
"run_idx": 1,
|
|
"phase": "warm",
|
|
"duration_ms": 92055,
|
|
"tokens_per_sec": 6.474390310140676,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "P-MEDIUM",
|
|
"run_idx": 2,
|
|
"phase": "warm",
|
|
"duration_ms": 64317,
|
|
"tokens_per_sec": 6.436867391202949,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "P-MEDIUM",
|
|
"run_idx": 3,
|
|
"phase": "warm",
|
|
"duration_ms": 64420,
|
|
"tokens_per_sec": 6.690468798509779,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "P-HARD",
|
|
"run_idx": 0,
|
|
"phase": "cold",
|
|
"duration_ms": 127296,
|
|
"tokens_per_sec": 6.1038838612368025,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "P-HARD",
|
|
"run_idx": 1,
|
|
"phase": "warm",
|
|
"duration_ms": 123105,
|
|
"tokens_per_sec": 6.344177734454328,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "P-HARD",
|
|
"run_idx": 2,
|
|
"phase": "warm",
|
|
"duration_ms": 74598,
|
|
"tokens_per_sec": 6.568540711547227,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": "P-HARD",
|
|
"run_idx": 3,
|
|
"phase": "warm",
|
|
"duration_ms": 112829,
|
|
"tokens_per_sec": 6.45224188816705,
|
|
"error": null
|
|
}
|
|
],
|
|
"calls": [
|
|
{
|
|
"type": "vram_snapshot",
|
|
"model_key": "ngl24-ctx96k",
|
|
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx96k",
|
|
"ngl": 24,
|
|
"ctx": 98304,
|
|
"raw": "5750, 280, 6144"
|
|
},
|
|
{
|
|
"type": "ram_snapshot",
|
|
"model_key": "ngl24-ctx96k",
|
|
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx96k",
|
|
"ngl": 24,
|
|
"ctx": 98304,
|
|
"raw": "21927,6698,28625"
|
|
},
|
|
{
|
|
"type": "llama_bench",
|
|
"model_key": "ngl24-ctx96k",
|
|
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx96k",
|
|
"ngl": 24,
|
|
"ctx": 98304,
|
|
"raw": "llama-bench.exe : ggml_cuda_init: found 1 CUDA devices (Total VRAM: 6143 MiB):\r\nAt line:1 char:1\r\n+ & 'D:\\WeeyugaTools\\llama.cpp\\llama-bench.exe' -m 'D:\\WeeyugaModels\\ll ...\r\n+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\r\n + CategoryInfo : NotSpecified: (ggml_cuda_init:...RAM: 6143 MiB)::String) [], RemoteException\r\n + FullyQualifiedErrorId : NativeCommandError\r\n \r\n Device 0: NVIDIA GeForce GTX 1060, compute capability 6.1, VMM: yes, VRAM: 6143 MiB\r\nload_backend: loaded CUDA backend from D:\\WeeyugaTools\\llama.cpp\\ggml-cuda.dll\r\nload_backend: loaded RPC backend from D:\\WeeyugaTools\\llama.cpp\\ggml-rpc.dll\r\nload_backend: loaded CPU backend from D:\\WeeyugaTools\\llama.cpp\\ggml-cpu-haswell.dll\r\n| model | size | params | backend | ngl | test | t/s |\r\n| ------------------------------ | ---------: | ---------: | ---------- | --: | --------------: | -------------------: |\r\n| qwen3moe 30B.A3B IQ2_M - 2.7 bpw | 10.11 GiB | 30.53 B | CUDA | 24 | pp256 | 15.72 \u00b1 3.11 |\r\n| qwen3moe 30B.A3B IQ2_M - 2.7 bpw | 10.11 GiB | 30.53 B | CUDA | 24 | tg64 | 5.22 \u00b1 0.53 |\r\n\r\nbuild: 665abc609 (8951)\r\n\r"
|
|
},
|
|
{
|
|
"type": "call",
|
|
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx96k",
|
|
"model_key": "ngl24-ctx96k",
|
|
"ngl": 24,
|
|
"ctx": 98304,
|
|
"prompt_id": "hello",
|
|
"run_idx": 0,
|
|
"phase": "cold",
|
|
"total_duration_ms": 35465,
|
|
"prompt_tokens": 9,
|
|
"completion_tokens": 96,
|
|
"tokens_per_sec": 2.7068941209643307,
|
|
"finish_reason": "stop",
|
|
"reasoning_chars": 365,
|
|
"content_chars": 37,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx96k",
|
|
"model_key": "ngl24-ctx96k",
|
|
"ngl": 24,
|
|
"ctx": 98304,
|
|
"prompt_id": "hello",
|
|
"run_idx": 1,
|
|
"phase": "warm",
|
|
"total_duration_ms": 27111,
|
|
"prompt_tokens": 9,
|
|
"completion_tokens": 167,
|
|
"tokens_per_sec": 6.1598613109070115,
|
|
"finish_reason": "stop",
|
|
"reasoning_chars": 547,
|
|
"content_chars": 152,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx96k",
|
|
"model_key": "ngl24-ctx96k",
|
|
"ngl": 24,
|
|
"ctx": 98304,
|
|
"prompt_id": "hello",
|
|
"run_idx": 2,
|
|
"phase": "warm",
|
|
"total_duration_ms": 24742,
|
|
"prompt_tokens": 9,
|
|
"completion_tokens": 165,
|
|
"tokens_per_sec": 6.668822245574327,
|
|
"finish_reason": "stop",
|
|
"reasoning_chars": 571,
|
|
"content_chars": 116,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx96k",
|
|
"model_key": "ngl24-ctx96k",
|
|
"ngl": 24,
|
|
"ctx": 98304,
|
|
"prompt_id": "hello",
|
|
"run_idx": 3,
|
|
"phase": "warm",
|
|
"total_duration_ms": 13366,
|
|
"prompt_tokens": 9,
|
|
"completion_tokens": 86,
|
|
"tokens_per_sec": 6.43423612150232,
|
|
"finish_reason": "stop",
|
|
"reasoning_chars": 290,
|
|
"content_chars": 37,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx96k",
|
|
"model_key": "ngl24-ctx96k",
|
|
"ngl": 24,
|
|
"ctx": 98304,
|
|
"prompt_id": "P-MEDIUM",
|
|
"run_idx": 0,
|
|
"phase": "cold",
|
|
"total_duration_ms": 72290,
|
|
"prompt_tokens": 22,
|
|
"completion_tokens": 432,
|
|
"tokens_per_sec": 5.97593028081339,
|
|
"finish_reason": "stop",
|
|
"reasoning_chars": 1712,
|
|
"content_chars": 427,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx96k",
|
|
"model_key": "ngl24-ctx96k",
|
|
"ngl": 24,
|
|
"ctx": 98304,
|
|
"prompt_id": "P-MEDIUM",
|
|
"run_idx": 1,
|
|
"phase": "warm",
|
|
"total_duration_ms": 92055,
|
|
"prompt_tokens": 22,
|
|
"completion_tokens": 596,
|
|
"tokens_per_sec": 6.474390310140676,
|
|
"finish_reason": "stop",
|
|
"reasoning_chars": 2005,
|
|
"content_chars": 919,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx96k",
|
|
"model_key": "ngl24-ctx96k",
|
|
"ngl": 24,
|
|
"ctx": 98304,
|
|
"prompt_id": "P-MEDIUM",
|
|
"run_idx": 2,
|
|
"phase": "warm",
|
|
"total_duration_ms": 64317,
|
|
"prompt_tokens": 22,
|
|
"completion_tokens": 414,
|
|
"tokens_per_sec": 6.436867391202949,
|
|
"finish_reason": "stop",
|
|
"reasoning_chars": 1583,
|
|
"content_chars": 467,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx96k",
|
|
"model_key": "ngl24-ctx96k",
|
|
"ngl": 24,
|
|
"ctx": 98304,
|
|
"prompt_id": "P-MEDIUM",
|
|
"run_idx": 3,
|
|
"phase": "warm",
|
|
"total_duration_ms": 64420,
|
|
"prompt_tokens": 22,
|
|
"completion_tokens": 431,
|
|
"tokens_per_sec": 6.690468798509779,
|
|
"finish_reason": "stop",
|
|
"reasoning_chars": 1652,
|
|
"content_chars": 492,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx96k",
|
|
"model_key": "ngl24-ctx96k",
|
|
"ngl": 24,
|
|
"ctx": 98304,
|
|
"prompt_id": "P-HARD",
|
|
"run_idx": 0,
|
|
"phase": "cold",
|
|
"total_duration_ms": 127296,
|
|
"prompt_tokens": 30,
|
|
"completion_tokens": 777,
|
|
"tokens_per_sec": 6.1038838612368025,
|
|
"finish_reason": "stop",
|
|
"reasoning_chars": 1080,
|
|
"content_chars": 2215,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx96k",
|
|
"model_key": "ngl24-ctx96k",
|
|
"ngl": 24,
|
|
"ctx": 98304,
|
|
"prompt_id": "P-HARD",
|
|
"run_idx": 1,
|
|
"phase": "warm",
|
|
"total_duration_ms": 123105,
|
|
"prompt_tokens": 30,
|
|
"completion_tokens": 781,
|
|
"tokens_per_sec": 6.344177734454328,
|
|
"finish_reason": "stop",
|
|
"reasoning_chars": 1163,
|
|
"content_chars": 2172,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx96k",
|
|
"model_key": "ngl24-ctx96k",
|
|
"ngl": 24,
|
|
"ctx": 98304,
|
|
"prompt_id": "P-HARD",
|
|
"run_idx": 2,
|
|
"phase": "warm",
|
|
"total_duration_ms": 74598,
|
|
"prompt_tokens": 30,
|
|
"completion_tokens": 490,
|
|
"tokens_per_sec": 6.568540711547227,
|
|
"finish_reason": "stop",
|
|
"reasoning_chars": 1140,
|
|
"content_chars": 974,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx96k",
|
|
"model_key": "ngl24-ctx96k",
|
|
"ngl": 24,
|
|
"ctx": 98304,
|
|
"prompt_id": "P-HARD",
|
|
"run_idx": 3,
|
|
"phase": "warm",
|
|
"total_duration_ms": 112829,
|
|
"prompt_tokens": 30,
|
|
"completion_tokens": 728,
|
|
"tokens_per_sec": 6.45224188816705,
|
|
"finish_reason": "stop",
|
|
"reasoning_chars": 2117,
|
|
"content_chars": 895,
|
|
"error": null
|
|
}
|
|
]
|
|
}
|
|
],
|
|
"meta_record": {
|
|
"type": "meta",
|
|
"benchmark_run_id": "e08a7051-a856-4053-b34b-561d7ce5e8cf",
|
|
"harness_version": "predator-a3b-ngl-ctx-2d-1",
|
|
"started_at_utc": "2026-05-05T07:43:00Z",
|
|
"host": "Slobodans-MacBook-Air.local",
|
|
"node": "predator",
|
|
"engine": "llamacpp",
|
|
"purpose": "Sloba 2026-05-05: NGL=12 and NGL=24 \u00d7 ctx sweep up to 131k. Find the actual context ceiling at the two NGL sweet spots from the prior bench.",
|
|
"cells": [
|
|
{
|
|
"key": "ngl12-ctx64k",
|
|
"ngl": 12,
|
|
"ctx": 65536
|
|
},
|
|
{
|
|
"key": "ngl12-ctx96k",
|
|
"ngl": 12,
|
|
"ctx": 98304
|
|
},
|
|
{
|
|
"key": "ngl12-ctx131k",
|
|
"ngl": 12,
|
|
"ctx": 131072
|
|
},
|
|
{
|
|
"key": "ngl24-ctx48k",
|
|
"ngl": 24,
|
|
"ctx": 49152
|
|
},
|
|
{
|
|
"key": "ngl24-ctx64k",
|
|
"ngl": 24,
|
|
"ctx": 65536
|
|
},
|
|
{
|
|
"key": "ngl24-ctx96k",
|
|
"ngl": 24,
|
|
"ctx": 98304
|
|
}
|
|
],
|
|
"gguf": "Qwen3-30B-A3B-UD-IQ2_M.gguf",
|
|
"max_tokens_per_prompt": {
|
|
"hello": 512,
|
|
"P-MEDIUM": 1024,
|
|
"P-HARD": 2048
|
|
},
|
|
"methodology_notes": "Sloba 2026-05-05: 'max tokens should be bigger than reasoning budget'. Prior bench had hello max_tokens=256 < reasoning_budget=500 \u2192 content_chars=0. Bumped to 512/1024/2048 so reasoning + answer both fit."
|
|
},
|
|
"packaged_at": "2026-05-06T12:28:24Z"
|
|
}
|