Files
weeyuga-benchmarks-public/runs/e08a7051-a856-4053-b34b-561d7ce5e8cf/metadata.json

1812 lines
64 KiB
JSON

{
"id": "e08a7051-a856-4053-b34b-561d7ce5e8cf",
"title": "predator-a3b-ngl-ctx-2d-1 \u2014 qwen3 on predator",
"date": "2026-05-05",
"started_at": "2026-05-05T07:43:00Z",
"git_sha": "f06800254dfd046e154c94f663fb7f9f66c79ae3",
"hardware": "predator",
"hardware_label": "Predator \u00b7 gaming laptop \u00b7 GTX 1060 6 GB \u00b7 28 GB RAM",
"engine": "llamacpp",
"harness": "predator-a3b-ngl-ctx-2d-1",
"model_family": "qwen3",
"model_sizes": [
"qwen3:30b-a3b-iq2m"
],
"task_kind": "param-sweep",
"tags": [
"a3b",
"ctx",
"iq2",
"ngl",
"predator",
"qwen3"
],
"headline": "90 calls across 6 cell(s); ~5.9 tok/s mean; p50 73.4s",
"janie_blurb_md": null,
"janie_blurb_status": "pending",
"caveat": null,
"caveat_severity": null,
"methodology_ref": "A3B_AND_CPU_OVERNIGHT_2026-05-05",
"methodology_deviations_md": null,
"results_table": [
{
"label": "qwen3:30b-a3b-iq2m-think500-ngl12-ctx64k",
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx64k",
"n_calls": 15,
"n_errors": 0,
"duration_ms_mean": 85573.42,
"duration_ms_p50": 78940.0,
"duration_ms_p95": 159864.3,
"tokens_per_sec_mean": 5.48,
"tokens_per_sec_p50": 5.31,
"tokens_per_sec_p95": 6.79,
"tokens_per_sec_max": 6.8,
"memory_mb": null,
"watts_avg": null,
"notes": null
},
{
"label": "qwen3:30b-a3b-iq2m-think500-ngl12-ctx96k",
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx96k",
"n_calls": 15,
"n_errors": 0,
"duration_ms_mean": 71805.83,
"duration_ms_p50": 58541.0,
"duration_ms_p95": 140580.5,
"tokens_per_sec_mean": 5.79,
"tokens_per_sec_p50": 5.75,
"tokens_per_sec_p95": 6.47,
"tokens_per_sec_max": 6.7,
"memory_mb": null,
"watts_avg": null,
"notes": null
},
{
"label": "qwen3:30b-a3b-iq2m-think500-ngl12-ctx131k",
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx131k",
"n_calls": 15,
"n_errors": 0,
"duration_ms_mean": 96406.17,
"duration_ms_p50": 105842.0,
"duration_ms_p95": 154492.35,
"tokens_per_sec_mean": 5.01,
"tokens_per_sec_p50": 5.13,
"tokens_per_sec_p95": 5.97,
"tokens_per_sec_max": 6.29,
"memory_mb": null,
"watts_avg": null,
"notes": null
},
{
"label": "qwen3:30b-a3b-iq2m-think500-ngl24-ctx48k",
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx48k",
"n_calls": 15,
"n_errors": 0,
"duration_ms_mean": 65539.5,
"duration_ms_p50": 64606.5,
"duration_ms_p95": 118069.75,
"tokens_per_sec_mean": 6.6,
"tokens_per_sec_p50": 6.59,
"tokens_per_sec_p95": 6.89,
"tokens_per_sec_max": 6.91,
"memory_mb": null,
"watts_avg": null,
"notes": null
},
{
"label": "qwen3:30b-a3b-iq2m-think500-ngl24-ctx64k",
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx64k",
"n_calls": 15,
"n_errors": 0,
"duration_ms_mean": 71267.58,
"duration_ms_p50": 80265.5,
"duration_ms_p95": 128231.3,
"tokens_per_sec_mean": 6.41,
"tokens_per_sec_p50": 6.39,
"tokens_per_sec_p95": 6.88,
"tokens_per_sec_max": 6.9,
"memory_mb": null,
"watts_avg": null,
"notes": null
},
{
"label": "qwen3:30b-a3b-iq2m-think500-ngl24-ctx96k",
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx96k",
"n_calls": 15,
"n_errors": 0,
"duration_ms_mean": 69299.5,
"duration_ms_p50": 68355.0,
"duration_ms_p95": 124990.95,
"tokens_per_sec_mean": 6.08,
"tokens_per_sec_p50": 6.44,
"tokens_per_sec_p95": 6.68,
"tokens_per_sec_max": 6.69,
"memory_mb": null,
"watts_avg": null,
"notes": null
}
],
"cells": [
{
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx64k",
"n_calls": 15,
"n_errors": 0,
"duration_ms_mean": 85573.42,
"duration_ms_p50": 78940.0,
"duration_ms_p95": 159864.3,
"tokens_per_sec_mean": 5.48,
"tokens_per_sec_p50": 5.31,
"tokens_per_sec_p95": 6.79,
"tokens_per_sec_max": 6.8
},
{
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx96k",
"n_calls": 15,
"n_errors": 0,
"duration_ms_mean": 71805.83,
"duration_ms_p50": 58541.0,
"duration_ms_p95": 140580.5,
"tokens_per_sec_mean": 5.79,
"tokens_per_sec_p50": 5.75,
"tokens_per_sec_p95": 6.47,
"tokens_per_sec_max": 6.7
},
{
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx131k",
"n_calls": 15,
"n_errors": 0,
"duration_ms_mean": 96406.17,
"duration_ms_p50": 105842.0,
"duration_ms_p95": 154492.35,
"tokens_per_sec_mean": 5.01,
"tokens_per_sec_p50": 5.13,
"tokens_per_sec_p95": 5.97,
"tokens_per_sec_max": 6.29
},
{
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx48k",
"n_calls": 15,
"n_errors": 0,
"duration_ms_mean": 65539.5,
"duration_ms_p50": 64606.5,
"duration_ms_p95": 118069.75,
"tokens_per_sec_mean": 6.6,
"tokens_per_sec_p50": 6.59,
"tokens_per_sec_p95": 6.89,
"tokens_per_sec_max": 6.91
},
{
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx64k",
"n_calls": 15,
"n_errors": 0,
"duration_ms_mean": 71267.58,
"duration_ms_p50": 80265.5,
"duration_ms_p95": 128231.3,
"tokens_per_sec_mean": 6.41,
"tokens_per_sec_p50": 6.39,
"tokens_per_sec_p95": 6.88,
"tokens_per_sec_max": 6.9
},
{
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx96k",
"n_calls": 15,
"n_errors": 0,
"duration_ms_mean": 69299.5,
"duration_ms_p50": 68355.0,
"duration_ms_p95": 124990.95,
"tokens_per_sec_mean": 6.08,
"tokens_per_sec_p50": 6.44,
"tokens_per_sec_p95": 6.68,
"tokens_per_sec_max": 6.69
}
],
"n_calls_total": 90,
"n_errors_total": 0,
"chart_spec": {
"kind": "line",
"x": "param_value",
"y": "tokens_per_sec_mean",
"series": [
"qwen3:30b-a3b-iq2m-think500-ngl12-ctx64k",
"qwen3:30b-a3b-iq2m-think500-ngl12-ctx96k",
"qwen3:30b-a3b-iq2m-think500-ngl12-ctx131k",
"qwen3:30b-a3b-iq2m-think500-ngl24-ctx48k",
"qwen3:30b-a3b-iq2m-think500-ngl24-ctx64k",
"qwen3:30b-a3b-iq2m-think500-ngl24-ctx96k"
],
"data_url": "/data/e08a7051-a856-4053-b34b-561d7ce5e8cf/run.jsonl"
},
"site_grade": "standard",
"site_grade_reason": null,
"raw_data_urls": {
"jsonl": "/data/e08a7051-a856-4053-b34b-561d7ce5e8cf/run.jsonl",
"log": "/data/e08a7051-a856-4053-b34b-561d7ce5e8cf/run.log",
"md": null,
"metadata": "/data/e08a7051-a856-4053-b34b-561d7ce5e8cf/metadata.json",
"gitea_dir": "https://git.weeyuga.com/slobodanmargetic988/weeyuga-benchmarks-public/src/branch/main/runs/e08a7051-a856-4053-b34b-561d7ce5e8cf/"
},
"synthesis_doc": "A3B_AND_CPU_OVERNIGHT_2026-05-05.md",
"synthesis_docs_all": [
"A3B_AND_CPU_OVERNIGHT_2026-05-05.md"
],
"related_ids": [],
"status": "complete",
"visibility": "draft",
"cells_full": [
{
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx64k",
"n_calls": 15,
"n_errors": 0,
"duration_ms_mean": 85573.42,
"duration_ms_p50": 78940.0,
"duration_ms_p95": 159864.3,
"tokens_per_sec_mean": 5.48,
"tokens_per_sec_p50": 5.31,
"tokens_per_sec_p95": 6.79,
"tokens_per_sec_max": 6.8,
"calls": [
{
"type": "vram_snapshot",
"model_key": "ngl12-ctx64k",
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx64k",
"ngl": 12,
"ctx": 65536,
"raw": "5397, 633, 6144"
},
{
"type": "ram_snapshot",
"model_key": "ngl12-ctx64k",
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx64k",
"ngl": 12,
"ctx": 65536,
"raw": "15020,13605,28625"
},
{
"type": "llama_bench",
"model_key": "ngl12-ctx64k",
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx64k",
"ngl": 12,
"ctx": 65536,
"raw": "llama-bench.exe : ggml_cuda_init: found 1 CUDA devices (Total VRAM: 6143 MiB):\r\nAt line:1 char:1\r\n+ & 'D:\\WeeyugaTools\\llama.cpp\\llama-bench.exe' -m 'D:\\WeeyugaModels\\ll ...\r\n+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\r\n + CategoryInfo : NotSpecified: (ggml_cuda_init:...RAM: 6143 MiB)::String) [], RemoteException\r\n + FullyQualifiedErrorId : NativeCommandError\r\n \r\n Device 0: NVIDIA GeForce GTX 1060, compute capability 6.1, VMM: yes, VRAM: 6143 MiB\r\nload_backend: loaded CUDA backend from D:\\WeeyugaTools\\llama.cpp\\ggml-cuda.dll\r\nload_backend: loaded RPC backend from D:\\WeeyugaTools\\llama.cpp\\ggml-rpc.dll\r\nload_backend: loaded CPU backend from D:\\WeeyugaTools\\llama.cpp\\ggml-cpu-haswell.dll\r\n| model | size | params | backend | ngl | test | t/s |\r\n| ------------------------------ | ---------: | ---------: | ---------- | --: | --------------: | -------------------: |\r\n| qwen3moe 30B.A3B IQ2_M - 2.7 bpw | 10.11 GiB | 30.53 B | CUDA | 12 | pp256 | 126.21 \u00b1 2.67 |\r\n| qwen3moe 30B.A3B IQ2_M - 2.7 bpw | 10.11 GiB | 30.53 B | CUDA | 12 | tg64 | 6.32 \u00b1 0.94 |\r\n\r\nbuild: 665abc609 (8951)\r\n\r"
},
{
"type": "call",
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx64k",
"model_key": "ngl12-ctx64k",
"ngl": 12,
"ctx": 65536,
"prompt_id": "hello",
"run_idx": 0,
"phase": "cold",
"total_duration_ms": 25689,
"prompt_tokens": 9,
"completion_tokens": 145,
"tokens_per_sec": 5.644439254155475,
"finish_reason": "stop",
"reasoning_chars": 569,
"content_chars": 35,
"error": null
},
{
"type": "call",
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx64k",
"model_key": "ngl12-ctx64k",
"ngl": 12,
"ctx": 65536,
"prompt_id": "hello",
"run_idx": 1,
"phase": "warm",
"total_duration_ms": 10035,
"prompt_tokens": 9,
"completion_tokens": 68,
"tokens_per_sec": 6.776283009466866,
"finish_reason": "stop",
"reasoning_chars": 223,
"content_chars": 37,
"error": null
},
{
"type": "call",
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx64k",
"model_key": "ngl12-ctx64k",
"ngl": 12,
"ctx": 65536,
"prompt_id": "hello",
"run_idx": 2,
"phase": "warm",
"total_duration_ms": 15585,
"prompt_tokens": 9,
"completion_tokens": 106,
"tokens_per_sec": 6.801411613731151,
"finish_reason": "stop",
"reasoning_chars": 390,
"content_chars": 37,
"error": null
},
{
"type": "call",
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx64k",
"model_key": "ngl12-ctx64k",
"ngl": 12,
"ctx": 65536,
"prompt_id": "hello",
"run_idx": 3,
"phase": "warm",
"total_duration_ms": 24321,
"prompt_tokens": 9,
"completion_tokens": 120,
"tokens_per_sec": 4.934007647711853,
"finish_reason": "stop",
"reasoning_chars": 474,
"content_chars": 37,
"error": null
},
{
"type": "call",
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx64k",
"model_key": "ngl12-ctx64k",
"ngl": 12,
"ctx": 65536,
"prompt_id": "P-MEDIUM",
"run_idx": 0,
"phase": "cold",
"total_duration_ms": 110762,
"prompt_tokens": 22,
"completion_tokens": 586,
"tokens_per_sec": 5.29062313789928,
"finish_reason": "stop",
"reasoning_chars": 2148,
"content_chars": 748,
"error": null
},
{
"type": "call",
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx64k",
"model_key": "ngl12-ctx64k",
"ngl": 12,
"ctx": 65536,
"prompt_id": "P-MEDIUM",
"run_idx": 1,
"phase": "warm",
"total_duration_ms": 76258,
"prompt_tokens": 22,
"completion_tokens": 389,
"tokens_per_sec": 5.101104146450209,
"finish_reason": "stop",
"reasoning_chars": 1507,
"content_chars": 477,
"error": null
},
{
"type": "call",
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx64k",
"model_key": "ngl12-ctx64k",
"ngl": 12,
"ctx": 65536,
"prompt_id": "P-MEDIUM",
"run_idx": 2,
"phase": "warm",
"total_duration_ms": 70359,
"prompt_tokens": 22,
"completion_tokens": 376,
"tokens_per_sec": 5.344021376085505,
"finish_reason": "stop",
"reasoning_chars": 1406,
"content_chars": 422,
"error": null
},
{
"type": "call",
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx64k",
"model_key": "ngl12-ctx64k",
"ngl": 12,
"ctx": 65536,
"prompt_id": "P-MEDIUM",
"run_idx": 3,
"phase": "warm",
"total_duration_ms": 81622,
"prompt_tokens": 22,
"completion_tokens": 445,
"tokens_per_sec": 5.451961480973267,
"finish_reason": "stop",
"reasoning_chars": 1685,
"content_chars": 499,
"error": null
},
{
"type": "call",
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx64k",
"model_key": "ngl12-ctx64k",
"ngl": 12,
"ctx": 65536,
"prompt_id": "P-HARD",
"run_idx": 0,
"phase": "cold",
"total_duration_ms": 142035,
"prompt_tokens": 30,
"completion_tokens": 746,
"tokens_per_sec": 5.252226563875102,
"finish_reason": "stop",
"reasoning_chars": 1459,
"content_chars": 1651,
"error": null
},
{
"type": "call",
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx64k",
"model_key": "ngl12-ctx64k",
"ngl": 12,
"ctx": 65536,
"prompt_id": "P-HARD",
"run_idx": 1,
"phase": "warm",
"total_duration_ms": 150027,
"prompt_tokens": 30,
"completion_tokens": 801,
"tokens_per_sec": 5.339038972984863,
"finish_reason": "stop",
"reasoning_chars": 1018,
"content_chars": 2354,
"error": null
},
{
"type": "call",
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx64k",
"model_key": "ngl12-ctx64k",
"ngl": 12,
"ctx": 65536,
"prompt_id": "P-HARD",
"run_idx": 2,
"phase": "warm",
"total_duration_ms": 162391,
"prompt_tokens": 30,
"completion_tokens": 816,
"tokens_per_sec": 5.024909015893738,
"finish_reason": "stop",
"reasoning_chars": 1663,
"content_chars": 1826,
"error": null
},
{
"type": "call",
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx64k",
"model_key": "ngl12-ctx64k",
"ngl": 12,
"ctx": 65536,
"prompt_id": "P-HARD",
"run_idx": 3,
"phase": "warm",
"total_duration_ms": 157797,
"prompt_tokens": 30,
"completion_tokens": 763,
"tokens_per_sec": 4.835326400375165,
"finish_reason": "stop",
"reasoning_chars": 1782,
"content_chars": 1590,
"error": null
}
]
},
{
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx96k",
"n_calls": 15,
"n_errors": 0,
"duration_ms_mean": 71805.83,
"duration_ms_p50": 58541.0,
"duration_ms_p95": 140580.5,
"tokens_per_sec_mean": 5.79,
"tokens_per_sec_p50": 5.75,
"tokens_per_sec_p95": 6.47,
"tokens_per_sec_max": 6.7,
"calls": [
{
"type": "vram_snapshot",
"model_key": "ngl12-ctx96k",
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx96k",
"ngl": 12,
"ctx": 98304,
"raw": "5870, 160, 6144"
},
{
"type": "ram_snapshot",
"model_key": "ngl12-ctx96k",
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx96k",
"ngl": 12,
"ctx": 98304,
"raw": "17590,11035,28625"
},
{
"type": "llama_bench",
"model_key": "ngl12-ctx96k",
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx96k",
"ngl": 12,
"ctx": 98304,
"raw": "llama-bench.exe : ggml_cuda_init: found 1 CUDA devices (Total VRAM: 6143 MiB):\r\nAt line:1 char:1\r\n+ & 'D:\\WeeyugaTools\\llama.cpp\\llama-bench.exe' -m 'D:\\WeeyugaModels\\ll ...\r\n+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\r\n + CategoryInfo : NotSpecified: (ggml_cuda_init:...RAM: 6143 MiB)::String) [], RemoteException\r\n + FullyQualifiedErrorId : NativeCommandError\r\n \r\n Device 0: NVIDIA GeForce GTX 1060, compute capability 6.1, VMM: yes, VRAM: 6143 MiB\r\nload_backend: loaded CUDA backend from D:\\WeeyugaTools\\llama.cpp\\ggml-cuda.dll\r\nload_backend: loaded RPC backend from D:\\WeeyugaTools\\llama.cpp\\ggml-rpc.dll\r\nload_backend: loaded CPU backend from D:\\WeeyugaTools\\llama.cpp\\ggml-cpu-haswell.dll\r\n| model | size | params | backend | ngl | test | t/s |\r\n| ------------------------------ | ---------: | ---------: | ---------- | --: | --------------: | -------------------: |\r\n| qwen3moe 30B.A3B IQ2_M - 2.7 bpw | 10.11 GiB | 30.53 B | CUDA | 12 | pp256 | 126.69 \u00b1 4.44 |\r\n| qwen3moe 30B.A3B IQ2_M - 2.7 bpw | 10.11 GiB | 30.53 B | CUDA | 12 | tg64 | 6.01 \u00b1 0.94 |\r\n\r\nbuild: 665abc609 (8951)\r\n\r"
},
{
"type": "call",
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx96k",
"model_key": "ngl12-ctx96k",
"ngl": 12,
"ctx": 98304,
"prompt_id": "hello",
"run_idx": 0,
"phase": "cold",
"total_duration_ms": 13701,
"prompt_tokens": 9,
"completion_tokens": 79,
"tokens_per_sec": 5.766002481570688,
"finish_reason": "stop",
"reasoning_chars": 280,
"content_chars": 37,
"error": null
},
{
"type": "call",
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx96k",
"model_key": "ngl12-ctx96k",
"ngl": 12,
"ctx": 98304,
"prompt_id": "hello",
"run_idx": 1,
"phase": "warm",
"total_duration_ms": 33047,
"prompt_tokens": 9,
"completion_tokens": 163,
"tokens_per_sec": 4.932369050140709,
"finish_reason": "stop",
"reasoning_chars": 618,
"content_chars": 37,
"error": null
},
{
"type": "call",
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx96k",
"model_key": "ngl12-ctx96k",
"ngl": 12,
"ctx": 98304,
"prompt_id": "hello",
"run_idx": 2,
"phase": "warm",
"total_duration_ms": 15830,
"prompt_tokens": 9,
"completion_tokens": 106,
"tokens_per_sec": 6.69614655716993,
"finish_reason": "stop",
"reasoning_chars": 400,
"content_chars": 37,
"error": null
},
{
"type": "call",
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx96k",
"model_key": "ngl12-ctx96k",
"ngl": 12,
"ctx": 98304,
"prompt_id": "hello",
"run_idx": 3,
"phase": "warm",
"total_duration_ms": 20392,
"prompt_tokens": 9,
"completion_tokens": 120,
"tokens_per_sec": 5.884660651235779,
"finish_reason": "stop",
"reasoning_chars": 436,
"content_chars": 37,
"error": null
},
{
"type": "call",
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx96k",
"model_key": "ngl12-ctx96k",
"ngl": 12,
"ctx": 98304,
"prompt_id": "P-MEDIUM",
"run_idx": 0,
"phase": "cold",
"total_duration_ms": 45721,
"prompt_tokens": 22,
"completion_tokens": 287,
"tokens_per_sec": 6.277203035804117,
"finish_reason": "stop",
"reasoning_chars": 1047,
"content_chars": 398,
"error": null
},
{
"type": "call",
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx96k",
"model_key": "ngl12-ctx96k",
"ngl": 12,
"ctx": 98304,
"prompt_id": "P-MEDIUM",
"run_idx": 1,
"phase": "warm",
"total_duration_ms": 59728,
"prompt_tokens": 22,
"completion_tokens": 333,
"tokens_per_sec": 5.575274578087329,
"finish_reason": "stop",
"reasoning_chars": 1152,
"content_chars": 503,
"error": null
},
{
"type": "call",
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx96k",
"model_key": "ngl12-ctx96k",
"ngl": 12,
"ctx": 98304,
"prompt_id": "P-MEDIUM",
"run_idx": 2,
"phase": "warm",
"total_duration_ms": 57354,
"prompt_tokens": 22,
"completion_tokens": 325,
"tokens_per_sec": 5.666562053213377,
"finish_reason": "stop",
"reasoning_chars": 1139,
"content_chars": 429,
"error": null
},
{
"type": "call",
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx96k",
"model_key": "ngl12-ctx96k",
"ngl": 12,
"ctx": 98304,
"prompt_id": "P-MEDIUM",
"run_idx": 3,
"phase": "warm",
"total_duration_ms": 104255,
"prompt_tokens": 22,
"completion_tokens": 610,
"tokens_per_sec": 5.85103831950506,
"finish_reason": "stop",
"reasoning_chars": 2408,
"content_chars": 561,
"error": null
},
{
"type": "call",
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx96k",
"model_key": "ngl12-ctx96k",
"ngl": 12,
"ctx": 98304,
"prompt_id": "P-HARD",
"run_idx": 0,
"phase": "cold",
"total_duration_ms": 137543,
"prompt_tokens": 30,
"completion_tokens": 788,
"tokens_per_sec": 5.729117439637059,
"finish_reason": "stop",
"reasoning_chars": 1966,
"content_chars": 1357,
"error": null
},
{
"type": "call",
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx96k",
"model_key": "ngl12-ctx96k",
"ngl": 12,
"ctx": 98304,
"prompt_id": "P-HARD",
"run_idx": 1,
"phase": "warm",
"total_duration_ms": 144293,
"prompt_tokens": 30,
"completion_tokens": 796,
"tokens_per_sec": 5.516553124545196,
"finish_reason": "stop",
"reasoning_chars": 1825,
"content_chars": 1536,
"error": null
},
{
"type": "call",
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx96k",
"model_key": "ngl12-ctx96k",
"ngl": 12,
"ctx": 98304,
"prompt_id": "P-HARD",
"run_idx": 2,
"phase": "warm",
"total_duration_ms": 101864,
"prompt_tokens": 30,
"completion_tokens": 581,
"tokens_per_sec": 5.703683342495877,
"finish_reason": "stop",
"reasoning_chars": 1282,
"content_chars": 1232,
"error": null
},
{
"type": "call",
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx96k",
"model_key": "ngl12-ctx96k",
"ngl": 12,
"ctx": 98304,
"prompt_id": "P-HARD",
"run_idx": 3,
"phase": "warm",
"total_duration_ms": 127942,
"prompt_tokens": 30,
"completion_tokens": 754,
"tokens_per_sec": 5.893295399477889,
"finish_reason": "stop",
"reasoning_chars": 1961,
"content_chars": 1261,
"error": null
}
]
},
{
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx131k",
"n_calls": 15,
"n_errors": 0,
"duration_ms_mean": 96406.17,
"duration_ms_p50": 105842.0,
"duration_ms_p95": 154492.35,
"tokens_per_sec_mean": 5.01,
"tokens_per_sec_p50": 5.13,
"tokens_per_sec_p95": 5.97,
"tokens_per_sec_max": 6.29,
"calls": [
{
"type": "vram_snapshot",
"model_key": "ngl12-ctx131k",
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx131k",
"ngl": 12,
"ctx": 131072,
"raw": "5814, 216, 6144"
},
{
"type": "ram_snapshot",
"model_key": "ngl12-ctx131k",
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx131k",
"ngl": 12,
"ctx": 131072,
"raw": "20818,7806,28625"
},
{
"type": "llama_bench",
"model_key": "ngl12-ctx131k",
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx131k",
"ngl": 12,
"ctx": 131072,
"raw": "llama-bench.exe : ggml_cuda_init: found 1 CUDA devices (Total VRAM: 6143 MiB):\r\nAt line:1 char:1\r\n+ & 'D:\\WeeyugaTools\\llama.cpp\\llama-bench.exe' -m 'D:\\WeeyugaModels\\ll ...\r\n+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\r\n + CategoryInfo : NotSpecified: (ggml_cuda_init:...RAM: 6143 MiB)::String) [], RemoteException\r\n + FullyQualifiedErrorId : NativeCommandError\r\n \r\n Device 0: NVIDIA GeForce GTX 1060, compute capability 6.1, VMM: yes, VRAM: 6143 MiB\r\nload_backend: loaded CUDA backend from D:\\WeeyugaTools\\llama.cpp\\ggml-cuda.dll\r\nload_backend: loaded RPC backend from D:\\WeeyugaTools\\llama.cpp\\ggml-rpc.dll\r\nload_backend: loaded CPU backend from D:\\WeeyugaTools\\llama.cpp\\ggml-cpu-haswell.dll\r\n| model | size | params | backend | ngl | test | t/s |\r\n| ------------------------------ | ---------: | ---------: | ---------- | --: | --------------: | -------------------: |\r\n| qwen3moe 30B.A3B IQ2_M - 2.7 bpw | 10.11 GiB | 30.53 B | CUDA | 12 | pp256 | 26.49 \u00b1 11.80 |\r\n| qwen3moe 30B.A3B IQ2_M - 2.7 bpw | 10.11 GiB | 30.53 B | CUDA | 12 | tg64 | 4.94 \u00b1 0.78 |\r\n\r\nbuild: 665abc609 (8951)\r\n\r"
},
{
"type": "call",
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx131k",
"model_key": "ngl12-ctx131k",
"ngl": 12,
"ctx": 131072,
"prompt_id": "hello",
"run_idx": 0,
"phase": "cold",
"total_duration_ms": 59744,
"prompt_tokens": 9,
"completion_tokens": 135,
"tokens_per_sec": 2.259641135511516,
"finish_reason": "stop",
"reasoning_chars": 504,
"content_chars": 37,
"error": null
},
{
"type": "call",
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx131k",
"model_key": "ngl12-ctx131k",
"ngl": 12,
"ctx": 131072,
"prompt_id": "hello",
"run_idx": 1,
"phase": "warm",
"total_duration_ms": 10810,
"prompt_tokens": 9,
"completion_tokens": 68,
"tokens_per_sec": 6.290471785383904,
"finish_reason": "stop",
"reasoning_chars": 224,
"content_chars": 37,
"error": null
},
{
"type": "call",
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx131k",
"model_key": "ngl12-ctx131k",
"ngl": 12,
"ctx": 131072,
"prompt_id": "hello",
"run_idx": 2,
"phase": "warm",
"total_duration_ms": 20002,
"prompt_tokens": 9,
"completion_tokens": 101,
"tokens_per_sec": 5.049495050494951,
"finish_reason": "stop",
"reasoning_chars": 384,
"content_chars": 37,
"error": null
},
{
"type": "call",
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx131k",
"model_key": "ngl12-ctx131k",
"ngl": 12,
"ctx": 131072,
"prompt_id": "hello",
"run_idx": 3,
"phase": "warm",
"total_duration_ms": 43743,
"prompt_tokens": 9,
"completion_tokens": 233,
"tokens_per_sec": 5.326566536360103,
"finish_reason": "stop",
"reasoning_chars": 910,
"content_chars": 37,
"error": null
},
{
"type": "call",
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx131k",
"model_key": "ngl12-ctx131k",
"ngl": 12,
"ctx": 131072,
"prompt_id": "P-MEDIUM",
"run_idx": 0,
"phase": "cold",
"total_duration_ms": 118125,
"prompt_tokens": 22,
"completion_tokens": 586,
"tokens_per_sec": 4.960846560846561,
"finish_reason": "stop",
"reasoning_chars": 2415,
"content_chars": 435,
"error": null
},
{
"type": "call",
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx131k",
"model_key": "ngl12-ctx131k",
"ngl": 12,
"ctx": 131072,
"prompt_id": "P-MEDIUM",
"run_idx": 1,
"phase": "warm",
"total_duration_ms": 116198,
"prompt_tokens": 22,
"completion_tokens": 598,
"tokens_per_sec": 5.146388061756657,
"finish_reason": "stop",
"reasoning_chars": 2059,
"content_chars": 865,
"error": null
},
{
"type": "call",
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx131k",
"model_key": "ngl12-ctx131k",
"ngl": 12,
"ctx": 131072,
"prompt_id": "P-MEDIUM",
"run_idx": 2,
"phase": "warm",
"total_duration_ms": 95486,
"prompt_tokens": 22,
"completion_tokens": 471,
"tokens_per_sec": 4.932660285277422,
"finish_reason": "stop",
"reasoning_chars": 1844,
"content_chars": 502,
"error": null
},
{
"type": "call",
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx131k",
"model_key": "ngl12-ctx131k",
"ngl": 12,
"ctx": 131072,
"prompt_id": "P-MEDIUM",
"run_idx": 3,
"phase": "warm",
"total_duration_ms": 84370,
"prompt_tokens": 22,
"completion_tokens": 482,
"tokens_per_sec": 5.71293113665995,
"finish_reason": "stop",
"reasoning_chars": 1858,
"content_chars": 488,
"error": null
},
{
"type": "call",
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx131k",
"model_key": "ngl12-ctx131k",
"ngl": 12,
"ctx": 131072,
"prompt_id": "P-HARD",
"run_idx": 0,
"phase": "cold",
"total_duration_ms": 150702,
"prompt_tokens": 30,
"completion_tokens": 772,
"tokens_per_sec": 5.122692465926132,
"finish_reason": "stop",
"reasoning_chars": 1228,
"content_chars": 2007,
"error": null
},
{
"type": "call",
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx131k",
"model_key": "ngl12-ctx131k",
"ngl": 12,
"ctx": 131072,
"prompt_id": "P-HARD",
"run_idx": 1,
"phase": "warm",
"total_duration_ms": 148897,
"prompt_tokens": 30,
"completion_tokens": 764,
"tokens_per_sec": 5.131063755481978,
"finish_reason": "stop",
"reasoning_chars": 1083,
"content_chars": 2199,
"error": null
},
{
"type": "call",
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx131k",
"model_key": "ngl12-ctx131k",
"ngl": 12,
"ctx": 131072,
"prompt_id": "P-HARD",
"run_idx": 2,
"phase": "warm",
"total_duration_ms": 159125,
"prompt_tokens": 30,
"completion_tokens": 762,
"tokens_per_sec": 4.788688138256088,
"finish_reason": "stop",
"reasoning_chars": 1767,
"content_chars": 1578,
"error": null
},
{
"type": "call",
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx131k",
"model_key": "ngl12-ctx131k",
"ngl": 12,
"ctx": 131072,
"prompt_id": "P-HARD",
"run_idx": 3,
"phase": "warm",
"total_duration_ms": 149672,
"prompt_tokens": 30,
"completion_tokens": 805,
"tokens_per_sec": 5.378427494788604,
"finish_reason": "stop",
"reasoning_chars": 1279,
"content_chars": 2259,
"error": null
}
]
},
{
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx48k",
"n_calls": 15,
"n_errors": 0,
"duration_ms_mean": 65539.5,
"duration_ms_p50": 64606.5,
"duration_ms_p95": 118069.75,
"tokens_per_sec_mean": 6.6,
"tokens_per_sec_p50": 6.59,
"tokens_per_sec_p95": 6.89,
"tokens_per_sec_max": 6.91,
"calls": [
{
"type": "vram_snapshot",
"model_key": "ngl24-ctx48k",
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx48k",
"ngl": 24,
"ctx": 49152,
"raw": "5826, 204, 6144"
},
{
"type": "ram_snapshot",
"model_key": "ngl24-ctx48k",
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx48k",
"ngl": 24,
"ctx": 49152,
"raw": "17056,11569,28625"
},
{
"type": "llama_bench",
"model_key": "ngl24-ctx48k",
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx48k",
"ngl": 24,
"ctx": 49152,
"raw": "llama-bench.exe : ggml_cuda_init: found 1 CUDA devices (Total VRAM: 6143 MiB):\r\nAt line:1 char:1\r\n+ & 'D:\\WeeyugaTools\\llama.cpp\\llama-bench.exe' -m 'D:\\WeeyugaModels\\ll ...\r\n+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\r\n + CategoryInfo : NotSpecified: (ggml_cuda_init:...RAM: 6143 MiB)::String) [], RemoteException\r\n + FullyQualifiedErrorId : NativeCommandError\r\n \r\n Device 0: NVIDIA GeForce GTX 1060, compute capability 6.1, VMM: yes, VRAM: 6143 MiB\r\nload_backend: loaded CUDA backend from D:\\WeeyugaTools\\llama.cpp\\ggml-cuda.dll\r\nload_backend: loaded RPC backend from D:\\WeeyugaTools\\llama.cpp\\ggml-rpc.dll\r\nload_backend: loaded CPU backend from D:\\WeeyugaTools\\llama.cpp\\ggml-cpu-haswell.dll\r\n| model | size | params | backend | ngl | test | t/s |\r\n| ------------------------------ | ---------: | ---------: | ---------- | --: | --------------: | -------------------: |\r\n| qwen3moe 30B.A3B IQ2_M - 2.7 bpw | 10.11 GiB | 30.53 B | CUDA | 24 | pp256 | 48.01 \u00b1 0.46 |\r\n| qwen3moe 30B.A3B IQ2_M - 2.7 bpw | 10.11 GiB | 30.53 B | CUDA | 24 | tg64 | 6.50 \u00b1 0.03 |\r\n\r\nbuild: 665abc609 (8951)\r\n\r"
},
{
"type": "call",
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx48k",
"model_key": "ngl24-ctx48k",
"ngl": 24,
"ctx": 49152,
"prompt_id": "hello",
"run_idx": 0,
"phase": "cold",
"total_duration_ms": 19864,
"prompt_tokens": 9,
"completion_tokens": 123,
"tokens_per_sec": 6.192106322996375,
"finish_reason": "stop",
"reasoning_chars": 467,
"content_chars": 37,
"error": null
},
{
"type": "call",
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx48k",
"model_key": "ngl24-ctx48k",
"ngl": 24,
"ctx": 49152,
"prompt_id": "hello",
"run_idx": 1,
"phase": "warm",
"total_duration_ms": 14734,
"prompt_tokens": 9,
"completion_tokens": 101,
"tokens_per_sec": 6.854893443735578,
"finish_reason": "stop",
"reasoning_chars": 370,
"content_chars": 37,
"error": null
},
{
"type": "call",
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx48k",
"model_key": "ngl24-ctx48k",
"ngl": 24,
"ctx": 49152,
"prompt_id": "hello",
"run_idx": 2,
"phase": "warm",
"total_duration_ms": 15636,
"prompt_tokens": 9,
"completion_tokens": 108,
"tokens_per_sec": 6.907137375287798,
"finish_reason": "stop",
"reasoning_chars": 399,
"content_chars": 37,
"error": null
},
{
"type": "call",
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx48k",
"model_key": "ngl24-ctx48k",
"ngl": 24,
"ctx": 49152,
"prompt_id": "hello",
"run_idx": 3,
"phase": "warm",
"total_duration_ms": 14990,
"prompt_tokens": 9,
"completion_tokens": 103,
"tokens_per_sec": 6.871247498332221,
"finish_reason": "stop",
"reasoning_chars": 380,
"content_chars": 37,
"error": null
},
{
"type": "call",
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx48k",
"model_key": "ngl24-ctx48k",
"ngl": 24,
"ctx": 49152,
"prompt_id": "P-MEDIUM",
"run_idx": 0,
"phase": "cold",
"total_duration_ms": 58574,
"prompt_tokens": 22,
"completion_tokens": 385,
"tokens_per_sec": 6.572882166148803,
"finish_reason": "stop",
"reasoning_chars": 1365,
"content_chars": 480,
"error": null
},
{
"type": "call",
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx48k",
"model_key": "ngl24-ctx48k",
"ngl": 24,
"ctx": 49152,
"prompt_id": "P-MEDIUM",
"run_idx": 1,
"phase": "warm",
"total_duration_ms": 89924,
"prompt_tokens": 22,
"completion_tokens": 594,
"tokens_per_sec": 6.60557804368133,
"finish_reason": "stop",
"reasoning_chars": 2406,
"content_chars": 484,
"error": null
},
{
"type": "call",
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx48k",
"model_key": "ngl24-ctx48k",
"ngl": 24,
"ctx": 49152,
"prompt_id": "P-MEDIUM",
"run_idx": 2,
"phase": "warm",
"total_duration_ms": 92501,
"prompt_tokens": 22,
"completion_tokens": 606,
"tokens_per_sec": 6.551280526697008,
"finish_reason": "stop",
"reasoning_chars": 2308,
"content_chars": 671,
"error": null
},
{
"type": "call",
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx48k",
"model_key": "ngl24-ctx48k",
"ngl": 24,
"ctx": 49152,
"prompt_id": "P-MEDIUM",
"run_idx": 3,
"phase": "warm",
"total_duration_ms": 59896,
"prompt_tokens": 22,
"completion_tokens": 403,
"tokens_per_sec": 6.728329103779885,
"finish_reason": "stop",
"reasoning_chars": 1446,
"content_chars": 537,
"error": null
},
{
"type": "call",
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx48k",
"model_key": "ngl24-ctx48k",
"ngl": 24,
"ctx": 49152,
"prompt_id": "P-HARD",
"run_idx": 0,
"phase": "cold",
"total_duration_ms": 121576,
"prompt_tokens": 30,
"completion_tokens": 768,
"tokens_per_sec": 6.317036257156018,
"finish_reason": "stop",
"reasoning_chars": 1226,
"content_chars": 1991,
"error": null
},
{
"type": "call",
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx48k",
"model_key": "ngl24-ctx48k",
"ngl": 24,
"ctx": 49152,
"prompt_id": "P-HARD",
"run_idx": 1,
"phase": "warm",
"total_duration_ms": 69317,
"prompt_tokens": 30,
"completion_tokens": 463,
"tokens_per_sec": 6.6794581415814305,
"finish_reason": "stop",
"reasoning_chars": 1048,
"content_chars": 963,
"error": null
},
{
"type": "call",
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx48k",
"model_key": "ngl24-ctx48k",
"ngl": 24,
"ctx": 49152,
"prompt_id": "P-HARD",
"run_idx": 2,
"phase": "warm",
"total_duration_ms": 114261,
"prompt_tokens": 30,
"completion_tokens": 741,
"tokens_per_sec": 6.485152414209573,
"finish_reason": "stop",
"reasoning_chars": 1246,
"content_chars": 1964,
"error": null
},
{
"type": "call",
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx48k",
"model_key": "ngl24-ctx48k",
"ngl": 24,
"ctx": 49152,
"prompt_id": "P-HARD",
"run_idx": 3,
"phase": "warm",
"total_duration_ms": 115201,
"prompt_tokens": 30,
"completion_tokens": 746,
"tokens_per_sec": 6.475638232307012,
"finish_reason": "stop",
"reasoning_chars": 1171,
"content_chars": 1994,
"error": null
}
]
},
{
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx64k",
"n_calls": 15,
"n_errors": 0,
"duration_ms_mean": 71267.58,
"duration_ms_p50": 80265.5,
"duration_ms_p95": 128231.3,
"tokens_per_sec_mean": 6.41,
"tokens_per_sec_p50": 6.39,
"tokens_per_sec_p95": 6.88,
"tokens_per_sec_max": 6.9,
"calls": [
{
"type": "vram_snapshot",
"model_key": "ngl24-ctx64k",
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx64k",
"ngl": 24,
"ctx": 65536,
"raw": "5780, 250, 6144"
},
{
"type": "ram_snapshot",
"model_key": "ngl24-ctx64k",
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx64k",
"ngl": 24,
"ctx": 65536,
"raw": "18513,10111,28625"
},
{
"type": "llama_bench",
"model_key": "ngl24-ctx64k",
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx64k",
"ngl": 24,
"ctx": 65536,
"raw": "llama-bench.exe : ggml_cuda_init: found 1 CUDA devices (Total VRAM: 6143 MiB):\r\nAt line:1 char:1\r\n+ & 'D:\\WeeyugaTools\\llama.cpp\\llama-bench.exe' -m 'D:\\WeeyugaModels\\ll ...\r\n+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\r\n + CategoryInfo : NotSpecified: (ggml_cuda_init:...RAM: 6143 MiB)::String) [], RemoteException\r\n + FullyQualifiedErrorId : NativeCommandError\r\n \r\n Device 0: NVIDIA GeForce GTX 1060, compute capability 6.1, VMM: yes, VRAM: 6143 MiB\r\nload_backend: loaded CUDA backend from D:\\WeeyugaTools\\llama.cpp\\ggml-cuda.dll\r\nload_backend: loaded RPC backend from D:\\WeeyugaTools\\llama.cpp\\ggml-rpc.dll\r\nload_backend: loaded CPU backend from D:\\WeeyugaTools\\llama.cpp\\ggml-cpu-haswell.dll\r\n| model | size | params | backend | ngl | test | t/s |\r\n| ------------------------------ | ---------: | ---------: | ---------- | --: | --------------: | -------------------: |\r\n| qwen3moe 30B.A3B IQ2_M - 2.7 bpw | 10.11 GiB | 30.53 B | CUDA | 24 | pp256 | 27.92 \u00b1 3.61 |\r\n| qwen3moe 30B.A3B IQ2_M - 2.7 bpw | 10.11 GiB | 30.53 B | CUDA | 24 | tg64 | 5.63 \u00b1 0.39 |\r\n\r\nbuild: 665abc609 (8951)\r\n\r"
},
{
"type": "call",
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx64k",
"model_key": "ngl24-ctx64k",
"ngl": 24,
"ctx": 65536,
"prompt_id": "hello",
"run_idx": 0,
"phase": "cold",
"total_duration_ms": 18535,
"prompt_tokens": 9,
"completion_tokens": 103,
"tokens_per_sec": 5.557054221742649,
"finish_reason": "stop",
"reasoning_chars": 380,
"content_chars": 37,
"error": null
},
{
"type": "call",
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx64k",
"model_key": "ngl24-ctx64k",
"ngl": 24,
"ctx": 65536,
"prompt_id": "hello",
"run_idx": 1,
"phase": "warm",
"total_duration_ms": 18478,
"prompt_tokens": 9,
"completion_tokens": 127,
"tokens_per_sec": 6.873038207598224,
"finish_reason": "stop",
"reasoning_chars": 495,
"content_chars": 37,
"error": null
},
{
"type": "call",
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx64k",
"model_key": "ngl24-ctx64k",
"ngl": 24,
"ctx": 65536,
"prompt_id": "hello",
"run_idx": 2,
"phase": "warm",
"total_duration_ms": 14692,
"prompt_tokens": 9,
"completion_tokens": 100,
"tokens_per_sec": 6.806425265450585,
"finish_reason": "stop",
"reasoning_chars": 361,
"content_chars": 37,
"error": null
},
{
"type": "call",
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx64k",
"model_key": "ngl24-ctx64k",
"ngl": 24,
"ctx": 65536,
"prompt_id": "hello",
"run_idx": 3,
"phase": "warm",
"total_duration_ms": 14351,
"prompt_tokens": 9,
"completion_tokens": 99,
"tokens_per_sec": 6.898473973939098,
"finish_reason": "stop",
"reasoning_chars": 377,
"content_chars": 37,
"error": null
},
{
"type": "call",
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx64k",
"model_key": "ngl24-ctx64k",
"ngl": 24,
"ctx": 65536,
"prompt_id": "P-MEDIUM",
"run_idx": 0,
"phase": "cold",
"total_duration_ms": 99471,
"prompt_tokens": 22,
"completion_tokens": 621,
"tokens_per_sec": 6.2430256054528455,
"finish_reason": "stop",
"reasoning_chars": 0,
"content_chars": 2991,
"error": null
},
{
"type": "call",
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx64k",
"model_key": "ngl24-ctx64k",
"ngl": 24,
"ctx": 65536,
"prompt_id": "P-MEDIUM",
"run_idx": 1,
"phase": "warm",
"total_duration_ms": 56166,
"prompt_tokens": 22,
"completion_tokens": 362,
"tokens_per_sec": 6.445180358223837,
"finish_reason": "stop",
"reasoning_chars": 1274,
"content_chars": 513,
"error": null
},
{
"type": "call",
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx64k",
"model_key": "ngl24-ctx64k",
"ngl": 24,
"ctx": 65536,
"prompt_id": "P-MEDIUM",
"run_idx": 2,
"phase": "warm",
"total_duration_ms": 92504,
"prompt_tokens": 22,
"completion_tokens": 596,
"tokens_per_sec": 6.442964628556602,
"finish_reason": "stop",
"reasoning_chars": 2393,
"content_chars": 481,
"error": null
},
{
"type": "call",
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx64k",
"model_key": "ngl24-ctx64k",
"ngl": 24,
"ctx": 65536,
"prompt_id": "P-MEDIUM",
"run_idx": 3,
"phase": "warm",
"total_duration_ms": 69604,
"prompt_tokens": 22,
"completion_tokens": 459,
"tokens_per_sec": 6.5944485949083385,
"finish_reason": "stop",
"reasoning_chars": 1810,
"content_chars": 507,
"error": null
},
{
"type": "call",
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx64k",
"model_key": "ngl24-ctx64k",
"ngl": 24,
"ctx": 65536,
"prompt_id": "P-HARD",
"run_idx": 0,
"phase": "cold",
"total_duration_ms": 123427,
"prompt_tokens": 30,
"completion_tokens": 761,
"tokens_per_sec": 6.165587756325601,
"finish_reason": "stop",
"reasoning_chars": 1221,
"content_chars": 1919,
"error": null
},
{
"type": "call",
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx64k",
"model_key": "ngl24-ctx64k",
"ngl": 24,
"ctx": 65536,
"prompt_id": "P-HARD",
"run_idx": 1,
"phase": "warm",
"total_duration_ms": 90927,
"prompt_tokens": 30,
"completion_tokens": 577,
"tokens_per_sec": 6.345749887272207,
"finish_reason": "stop",
"reasoning_chars": 1191,
"content_chars": 1282,
"error": null
},
{
"type": "call",
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx64k",
"model_key": "ngl24-ctx64k",
"ngl": 24,
"ctx": 65536,
"prompt_id": "P-HARD",
"run_idx": 2,
"phase": "warm",
"total_duration_ms": 131495,
"prompt_tokens": 30,
"completion_tokens": 831,
"tokens_per_sec": 6.319631925168257,
"finish_reason": "stop",
"reasoning_chars": 2262,
"content_chars": 1303,
"error": null
},
{
"type": "call",
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx64k",
"model_key": "ngl24-ctx64k",
"ngl": 24,
"ctx": 65536,
"prompt_id": "P-HARD",
"run_idx": 3,
"phase": "warm",
"total_duration_ms": 125561,
"prompt_tokens": 30,
"completion_tokens": 777,
"tokens_per_sec": 6.188227236164095,
"finish_reason": "stop",
"reasoning_chars": 998,
"content_chars": 2187,
"error": null
}
]
},
{
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx96k",
"n_calls": 15,
"n_errors": 0,
"duration_ms_mean": 69299.5,
"duration_ms_p50": 68355.0,
"duration_ms_p95": 124990.95,
"tokens_per_sec_mean": 6.08,
"tokens_per_sec_p50": 6.44,
"tokens_per_sec_p95": 6.68,
"tokens_per_sec_max": 6.69,
"calls": [
{
"type": "vram_snapshot",
"model_key": "ngl24-ctx96k",
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx96k",
"ngl": 24,
"ctx": 98304,
"raw": "5750, 280, 6144"
},
{
"type": "ram_snapshot",
"model_key": "ngl24-ctx96k",
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx96k",
"ngl": 24,
"ctx": 98304,
"raw": "21927,6698,28625"
},
{
"type": "llama_bench",
"model_key": "ngl24-ctx96k",
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx96k",
"ngl": 24,
"ctx": 98304,
"raw": "llama-bench.exe : ggml_cuda_init: found 1 CUDA devices (Total VRAM: 6143 MiB):\r\nAt line:1 char:1\r\n+ & 'D:\\WeeyugaTools\\llama.cpp\\llama-bench.exe' -m 'D:\\WeeyugaModels\\ll ...\r\n+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\r\n + CategoryInfo : NotSpecified: (ggml_cuda_init:...RAM: 6143 MiB)::String) [], RemoteException\r\n + FullyQualifiedErrorId : NativeCommandError\r\n \r\n Device 0: NVIDIA GeForce GTX 1060, compute capability 6.1, VMM: yes, VRAM: 6143 MiB\r\nload_backend: loaded CUDA backend from D:\\WeeyugaTools\\llama.cpp\\ggml-cuda.dll\r\nload_backend: loaded RPC backend from D:\\WeeyugaTools\\llama.cpp\\ggml-rpc.dll\r\nload_backend: loaded CPU backend from D:\\WeeyugaTools\\llama.cpp\\ggml-cpu-haswell.dll\r\n| model | size | params | backend | ngl | test | t/s |\r\n| ------------------------------ | ---------: | ---------: | ---------- | --: | --------------: | -------------------: |\r\n| qwen3moe 30B.A3B IQ2_M - 2.7 bpw | 10.11 GiB | 30.53 B | CUDA | 24 | pp256 | 15.72 \u00b1 3.11 |\r\n| qwen3moe 30B.A3B IQ2_M - 2.7 bpw | 10.11 GiB | 30.53 B | CUDA | 24 | tg64 | 5.22 \u00b1 0.53 |\r\n\r\nbuild: 665abc609 (8951)\r\n\r"
},
{
"type": "call",
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx96k",
"model_key": "ngl24-ctx96k",
"ngl": 24,
"ctx": 98304,
"prompt_id": "hello",
"run_idx": 0,
"phase": "cold",
"total_duration_ms": 35465,
"prompt_tokens": 9,
"completion_tokens": 96,
"tokens_per_sec": 2.7068941209643307,
"finish_reason": "stop",
"reasoning_chars": 365,
"content_chars": 37,
"error": null
},
{
"type": "call",
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx96k",
"model_key": "ngl24-ctx96k",
"ngl": 24,
"ctx": 98304,
"prompt_id": "hello",
"run_idx": 1,
"phase": "warm",
"total_duration_ms": 27111,
"prompt_tokens": 9,
"completion_tokens": 167,
"tokens_per_sec": 6.1598613109070115,
"finish_reason": "stop",
"reasoning_chars": 547,
"content_chars": 152,
"error": null
},
{
"type": "call",
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx96k",
"model_key": "ngl24-ctx96k",
"ngl": 24,
"ctx": 98304,
"prompt_id": "hello",
"run_idx": 2,
"phase": "warm",
"total_duration_ms": 24742,
"prompt_tokens": 9,
"completion_tokens": 165,
"tokens_per_sec": 6.668822245574327,
"finish_reason": "stop",
"reasoning_chars": 571,
"content_chars": 116,
"error": null
},
{
"type": "call",
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx96k",
"model_key": "ngl24-ctx96k",
"ngl": 24,
"ctx": 98304,
"prompt_id": "hello",
"run_idx": 3,
"phase": "warm",
"total_duration_ms": 13366,
"prompt_tokens": 9,
"completion_tokens": 86,
"tokens_per_sec": 6.43423612150232,
"finish_reason": "stop",
"reasoning_chars": 290,
"content_chars": 37,
"error": null
},
{
"type": "call",
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx96k",
"model_key": "ngl24-ctx96k",
"ngl": 24,
"ctx": 98304,
"prompt_id": "P-MEDIUM",
"run_idx": 0,
"phase": "cold",
"total_duration_ms": 72290,
"prompt_tokens": 22,
"completion_tokens": 432,
"tokens_per_sec": 5.97593028081339,
"finish_reason": "stop",
"reasoning_chars": 1712,
"content_chars": 427,
"error": null
},
{
"type": "call",
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx96k",
"model_key": "ngl24-ctx96k",
"ngl": 24,
"ctx": 98304,
"prompt_id": "P-MEDIUM",
"run_idx": 1,
"phase": "warm",
"total_duration_ms": 92055,
"prompt_tokens": 22,
"completion_tokens": 596,
"tokens_per_sec": 6.474390310140676,
"finish_reason": "stop",
"reasoning_chars": 2005,
"content_chars": 919,
"error": null
},
{
"type": "call",
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx96k",
"model_key": "ngl24-ctx96k",
"ngl": 24,
"ctx": 98304,
"prompt_id": "P-MEDIUM",
"run_idx": 2,
"phase": "warm",
"total_duration_ms": 64317,
"prompt_tokens": 22,
"completion_tokens": 414,
"tokens_per_sec": 6.436867391202949,
"finish_reason": "stop",
"reasoning_chars": 1583,
"content_chars": 467,
"error": null
},
{
"type": "call",
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx96k",
"model_key": "ngl24-ctx96k",
"ngl": 24,
"ctx": 98304,
"prompt_id": "P-MEDIUM",
"run_idx": 3,
"phase": "warm",
"total_duration_ms": 64420,
"prompt_tokens": 22,
"completion_tokens": 431,
"tokens_per_sec": 6.690468798509779,
"finish_reason": "stop",
"reasoning_chars": 1652,
"content_chars": 492,
"error": null
},
{
"type": "call",
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx96k",
"model_key": "ngl24-ctx96k",
"ngl": 24,
"ctx": 98304,
"prompt_id": "P-HARD",
"run_idx": 0,
"phase": "cold",
"total_duration_ms": 127296,
"prompt_tokens": 30,
"completion_tokens": 777,
"tokens_per_sec": 6.1038838612368025,
"finish_reason": "stop",
"reasoning_chars": 1080,
"content_chars": 2215,
"error": null
},
{
"type": "call",
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx96k",
"model_key": "ngl24-ctx96k",
"ngl": 24,
"ctx": 98304,
"prompt_id": "P-HARD",
"run_idx": 1,
"phase": "warm",
"total_duration_ms": 123105,
"prompt_tokens": 30,
"completion_tokens": 781,
"tokens_per_sec": 6.344177734454328,
"finish_reason": "stop",
"reasoning_chars": 1163,
"content_chars": 2172,
"error": null
},
{
"type": "call",
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx96k",
"model_key": "ngl24-ctx96k",
"ngl": 24,
"ctx": 98304,
"prompt_id": "P-HARD",
"run_idx": 2,
"phase": "warm",
"total_duration_ms": 74598,
"prompt_tokens": 30,
"completion_tokens": 490,
"tokens_per_sec": 6.568540711547227,
"finish_reason": "stop",
"reasoning_chars": 1140,
"content_chars": 974,
"error": null
},
{
"type": "call",
"cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx96k",
"model_key": "ngl24-ctx96k",
"ngl": 24,
"ctx": 98304,
"prompt_id": "P-HARD",
"run_idx": 3,
"phase": "warm",
"total_duration_ms": 112829,
"prompt_tokens": 30,
"completion_tokens": 728,
"tokens_per_sec": 6.45224188816705,
"finish_reason": "stop",
"reasoning_chars": 2117,
"content_chars": 895,
"error": null
}
]
}
],
"meta_record": {
"type": "meta",
"benchmark_run_id": "e08a7051-a856-4053-b34b-561d7ce5e8cf",
"harness_version": "predator-a3b-ngl-ctx-2d-1",
"started_at_utc": "2026-05-05T07:43:00Z",
"host": "Slobodans-MacBook-Air.local",
"node": "predator",
"engine": "llamacpp",
"purpose": "Sloba 2026-05-05: NGL=12 and NGL=24 \u00d7 ctx sweep up to 131k. Find the actual context ceiling at the two NGL sweet spots from the prior bench.",
"cells": [
{
"key": "ngl12-ctx64k",
"ngl": 12,
"ctx": 65536
},
{
"key": "ngl12-ctx96k",
"ngl": 12,
"ctx": 98304
},
{
"key": "ngl12-ctx131k",
"ngl": 12,
"ctx": 131072
},
{
"key": "ngl24-ctx48k",
"ngl": 24,
"ctx": 49152
},
{
"key": "ngl24-ctx64k",
"ngl": 24,
"ctx": 65536
},
{
"key": "ngl24-ctx96k",
"ngl": 24,
"ctx": 98304
}
],
"gguf": "Qwen3-30B-A3B-UD-IQ2_M.gguf",
"max_tokens_per_prompt": {
"hello": 512,
"P-MEDIUM": 1024,
"P-HARD": 2048
},
"methodology_notes": "Sloba 2026-05-05: 'max tokens should be bigger than reasoning budget'. Prior bench had hello max_tokens=256 < reasoning_budget=500 \u2192 content_chars=0. Bumped to 512/1024/2048 so reasoning + answer both fit."
},
"packaged_at": "2026-05-06T08:04:27Z"
}