{ "id": "e08a7051-a856-4053-b34b-561d7ce5e8cf", "title": "predator-a3b-ngl-ctx-2d-1 \u2014 qwen3 on predator", "date": "2026-05-05", "started_at": "2026-05-05T07:43:00Z", "git_sha": "f06800254dfd046e154c94f663fb7f9f66c79ae3", "hardware": "predator", "hardware_label": "Predator \u00b7 gaming laptop \u00b7 GTX 1060 6 GB \u00b7 28 GB RAM", "engine": "llamacpp", "harness": "predator-a3b-ngl-ctx-2d-1", "model_family": "qwen3", "model_sizes": [ "qwen3:30b-a3b-iq2m" ], "task_kind": "param-sweep", "tags": [ "a3b", "ctx", "iq2", "ngl", "predator", "qwen3" ], "headline": "90 calls across 6 cell(s); ~5.9 tok/s mean; p50 73.4s", "janie_blurb_md": null, "janie_blurb_status": "pending", "caveat": null, "caveat_severity": null, "methodology_ref": "A3B_AND_CPU_OVERNIGHT_2026-05-05", "methodology_deviations_md": null, "results_table": [ { "label": "qwen3:30b-a3b-iq2m-think500-ngl12-ctx64k", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx64k", "n_calls": 15, "n_errors": 0, "duration_ms_mean": 85573.42, "duration_ms_p50": 78940.0, "duration_ms_p95": 159864.3, "tokens_per_sec_mean": 5.48, "tokens_per_sec_p50": 5.31, "tokens_per_sec_p95": 6.79, "tokens_per_sec_max": 6.8, "memory_mb": null, "watts_avg": null, "notes": null }, { "label": "qwen3:30b-a3b-iq2m-think500-ngl12-ctx96k", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx96k", "n_calls": 15, "n_errors": 0, "duration_ms_mean": 71805.83, "duration_ms_p50": 58541.0, "duration_ms_p95": 140580.5, "tokens_per_sec_mean": 5.79, "tokens_per_sec_p50": 5.75, "tokens_per_sec_p95": 6.47, "tokens_per_sec_max": 6.7, "memory_mb": null, "watts_avg": null, "notes": null }, { "label": "qwen3:30b-a3b-iq2m-think500-ngl12-ctx131k", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx131k", "n_calls": 15, "n_errors": 0, "duration_ms_mean": 96406.17, "duration_ms_p50": 105842.0, "duration_ms_p95": 154492.35, "tokens_per_sec_mean": 5.01, "tokens_per_sec_p50": 5.13, "tokens_per_sec_p95": 5.97, "tokens_per_sec_max": 6.29, "memory_mb": null, "watts_avg": null, "notes": null }, { "label": "qwen3:30b-a3b-iq2m-think500-ngl24-ctx48k", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx48k", "n_calls": 15, "n_errors": 0, "duration_ms_mean": 65539.5, "duration_ms_p50": 64606.5, "duration_ms_p95": 118069.75, "tokens_per_sec_mean": 6.6, "tokens_per_sec_p50": 6.59, "tokens_per_sec_p95": 6.89, "tokens_per_sec_max": 6.91, "memory_mb": null, "watts_avg": null, "notes": null }, { "label": "qwen3:30b-a3b-iq2m-think500-ngl24-ctx64k", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx64k", "n_calls": 15, "n_errors": 0, "duration_ms_mean": 71267.58, "duration_ms_p50": 80265.5, "duration_ms_p95": 128231.3, "tokens_per_sec_mean": 6.41, "tokens_per_sec_p50": 6.39, "tokens_per_sec_p95": 6.88, "tokens_per_sec_max": 6.9, "memory_mb": null, "watts_avg": null, "notes": null }, { "label": "qwen3:30b-a3b-iq2m-think500-ngl24-ctx96k", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx96k", "n_calls": 15, "n_errors": 0, "duration_ms_mean": 69299.5, "duration_ms_p50": 68355.0, "duration_ms_p95": 124990.95, "tokens_per_sec_mean": 6.08, "tokens_per_sec_p50": 6.44, "tokens_per_sec_p95": 6.68, "tokens_per_sec_max": 6.69, "memory_mb": null, "watts_avg": null, "notes": null } ], "cells": [ { "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx64k", "n_calls": 15, "n_errors": 0, "duration_ms_mean": 85573.42, "duration_ms_p50": 78940.0, "duration_ms_p95": 159864.3, "tokens_per_sec_mean": 5.48, "tokens_per_sec_p50": 5.31, "tokens_per_sec_p95": 6.79, "tokens_per_sec_max": 6.8 }, { "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx96k", "n_calls": 15, "n_errors": 0, "duration_ms_mean": 71805.83, "duration_ms_p50": 58541.0, "duration_ms_p95": 140580.5, "tokens_per_sec_mean": 5.79, "tokens_per_sec_p50": 5.75, "tokens_per_sec_p95": 6.47, "tokens_per_sec_max": 6.7 }, { "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx131k", "n_calls": 15, "n_errors": 0, "duration_ms_mean": 96406.17, "duration_ms_p50": 105842.0, "duration_ms_p95": 154492.35, "tokens_per_sec_mean": 5.01, "tokens_per_sec_p50": 5.13, "tokens_per_sec_p95": 5.97, "tokens_per_sec_max": 6.29 }, { "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx48k", "n_calls": 15, "n_errors": 0, "duration_ms_mean": 65539.5, "duration_ms_p50": 64606.5, "duration_ms_p95": 118069.75, "tokens_per_sec_mean": 6.6, "tokens_per_sec_p50": 6.59, "tokens_per_sec_p95": 6.89, "tokens_per_sec_max": 6.91 }, { "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx64k", "n_calls": 15, "n_errors": 0, "duration_ms_mean": 71267.58, "duration_ms_p50": 80265.5, "duration_ms_p95": 128231.3, "tokens_per_sec_mean": 6.41, "tokens_per_sec_p50": 6.39, "tokens_per_sec_p95": 6.88, "tokens_per_sec_max": 6.9 }, { "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx96k", "n_calls": 15, "n_errors": 0, "duration_ms_mean": 69299.5, "duration_ms_p50": 68355.0, "duration_ms_p95": 124990.95, "tokens_per_sec_mean": 6.08, "tokens_per_sec_p50": 6.44, "tokens_per_sec_p95": 6.68, "tokens_per_sec_max": 6.69 } ], "n_calls_total": 90, "n_errors_total": 0, "chart_spec": { "kind": "line", "x": "param_value", "y": "tokens_per_sec_mean", "series": [ "qwen3:30b-a3b-iq2m-think500-ngl12-ctx64k", "qwen3:30b-a3b-iq2m-think500-ngl12-ctx96k", "qwen3:30b-a3b-iq2m-think500-ngl12-ctx131k", "qwen3:30b-a3b-iq2m-think500-ngl24-ctx48k", "qwen3:30b-a3b-iq2m-think500-ngl24-ctx64k", "qwen3:30b-a3b-iq2m-think500-ngl24-ctx96k" ], "data_url": "/data/e08a7051-a856-4053-b34b-561d7ce5e8cf/run.jsonl" }, "site_grade": "standard", "site_grade_reason": null, "raw_data_urls": { "jsonl": "/data/e08a7051-a856-4053-b34b-561d7ce5e8cf/run.jsonl", "log": "/data/e08a7051-a856-4053-b34b-561d7ce5e8cf/run.log", "md": null, "metadata": "/data/e08a7051-a856-4053-b34b-561d7ce5e8cf/metadata.json", "gitea_dir": "https://git.weeyuga.com/slobodanmargetic988/weeyuga-benchmarks-public/src/branch/main/runs/e08a7051-a856-4053-b34b-561d7ce5e8cf/" }, "synthesis_doc": "A3B_AND_CPU_OVERNIGHT_2026-05-05.md", "synthesis_docs_all": [ "A3B_AND_CPU_OVERNIGHT_2026-05-05.md" ], "related_ids": [], "status": "complete", "visibility": "draft", "cells_full": [ { "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx64k", "n_calls": 15, "n_errors": 0, "duration_ms_mean": 85573.42, "duration_ms_p50": 78940.0, "duration_ms_p95": 159864.3, "tokens_per_sec_mean": 5.48, "tokens_per_sec_p50": 5.31, "tokens_per_sec_p95": 6.79, "tokens_per_sec_max": 6.8, "calls": [ { "type": "vram_snapshot", "model_key": "ngl12-ctx64k", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx64k", "ngl": 12, "ctx": 65536, "raw": "5397, 633, 6144" }, { "type": "ram_snapshot", "model_key": "ngl12-ctx64k", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx64k", "ngl": 12, "ctx": 65536, "raw": "15020,13605,28625" }, { "type": "llama_bench", "model_key": "ngl12-ctx64k", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx64k", "ngl": 12, "ctx": 65536, "raw": "llama-bench.exe : ggml_cuda_init: found 1 CUDA devices (Total VRAM: 6143 MiB):\r\nAt line:1 char:1\r\n+ & 'D:\\WeeyugaTools\\llama.cpp\\llama-bench.exe' -m 'D:\\WeeyugaModels\\ll ...\r\n+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\r\n + CategoryInfo : NotSpecified: (ggml_cuda_init:...RAM: 6143 MiB)::String) [], RemoteException\r\n + FullyQualifiedErrorId : NativeCommandError\r\n \r\n Device 0: NVIDIA GeForce GTX 1060, compute capability 6.1, VMM: yes, VRAM: 6143 MiB\r\nload_backend: loaded CUDA backend from D:\\WeeyugaTools\\llama.cpp\\ggml-cuda.dll\r\nload_backend: loaded RPC backend from D:\\WeeyugaTools\\llama.cpp\\ggml-rpc.dll\r\nload_backend: loaded CPU backend from D:\\WeeyugaTools\\llama.cpp\\ggml-cpu-haswell.dll\r\n| model | size | params | backend | ngl | test | t/s |\r\n| ------------------------------ | ---------: | ---------: | ---------- | --: | --------------: | -------------------: |\r\n| qwen3moe 30B.A3B IQ2_M - 2.7 bpw | 10.11 GiB | 30.53 B | CUDA | 12 | pp256 | 126.21 \u00b1 2.67 |\r\n| qwen3moe 30B.A3B IQ2_M - 2.7 bpw | 10.11 GiB | 30.53 B | CUDA | 12 | tg64 | 6.32 \u00b1 0.94 |\r\n\r\nbuild: 665abc609 (8951)\r\n\r" }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx64k", "model_key": "ngl12-ctx64k", "ngl": 12, "ctx": 65536, "prompt_id": "hello", "run_idx": 0, "phase": "cold", "total_duration_ms": 25689, "prompt_tokens": 9, "completion_tokens": 145, "tokens_per_sec": 5.644439254155475, "finish_reason": "stop", "reasoning_chars": 569, "content_chars": 35, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx64k", "model_key": "ngl12-ctx64k", "ngl": 12, "ctx": 65536, "prompt_id": "hello", "run_idx": 1, "phase": "warm", "total_duration_ms": 10035, "prompt_tokens": 9, "completion_tokens": 68, "tokens_per_sec": 6.776283009466866, "finish_reason": "stop", "reasoning_chars": 223, "content_chars": 37, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx64k", "model_key": "ngl12-ctx64k", "ngl": 12, "ctx": 65536, "prompt_id": "hello", "run_idx": 2, "phase": "warm", "total_duration_ms": 15585, "prompt_tokens": 9, "completion_tokens": 106, "tokens_per_sec": 6.801411613731151, "finish_reason": "stop", "reasoning_chars": 390, "content_chars": 37, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx64k", "model_key": "ngl12-ctx64k", "ngl": 12, "ctx": 65536, "prompt_id": "hello", "run_idx": 3, "phase": "warm", "total_duration_ms": 24321, "prompt_tokens": 9, "completion_tokens": 120, "tokens_per_sec": 4.934007647711853, "finish_reason": "stop", "reasoning_chars": 474, "content_chars": 37, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx64k", "model_key": "ngl12-ctx64k", "ngl": 12, "ctx": 65536, "prompt_id": "P-MEDIUM", "run_idx": 0, "phase": "cold", "total_duration_ms": 110762, "prompt_tokens": 22, "completion_tokens": 586, "tokens_per_sec": 5.29062313789928, "finish_reason": "stop", "reasoning_chars": 2148, "content_chars": 748, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx64k", "model_key": "ngl12-ctx64k", "ngl": 12, "ctx": 65536, "prompt_id": "P-MEDIUM", "run_idx": 1, "phase": "warm", "total_duration_ms": 76258, "prompt_tokens": 22, "completion_tokens": 389, "tokens_per_sec": 5.101104146450209, "finish_reason": "stop", "reasoning_chars": 1507, "content_chars": 477, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx64k", "model_key": "ngl12-ctx64k", "ngl": 12, "ctx": 65536, "prompt_id": "P-MEDIUM", "run_idx": 2, "phase": "warm", "total_duration_ms": 70359, "prompt_tokens": 22, "completion_tokens": 376, "tokens_per_sec": 5.344021376085505, "finish_reason": "stop", "reasoning_chars": 1406, "content_chars": 422, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx64k", "model_key": "ngl12-ctx64k", "ngl": 12, "ctx": 65536, "prompt_id": "P-MEDIUM", "run_idx": 3, "phase": "warm", "total_duration_ms": 81622, "prompt_tokens": 22, "completion_tokens": 445, "tokens_per_sec": 5.451961480973267, "finish_reason": "stop", "reasoning_chars": 1685, "content_chars": 499, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx64k", "model_key": "ngl12-ctx64k", "ngl": 12, "ctx": 65536, "prompt_id": "P-HARD", "run_idx": 0, "phase": "cold", "total_duration_ms": 142035, "prompt_tokens": 30, "completion_tokens": 746, "tokens_per_sec": 5.252226563875102, "finish_reason": "stop", "reasoning_chars": 1459, "content_chars": 1651, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx64k", "model_key": "ngl12-ctx64k", "ngl": 12, "ctx": 65536, "prompt_id": "P-HARD", "run_idx": 1, "phase": "warm", "total_duration_ms": 150027, "prompt_tokens": 30, "completion_tokens": 801, "tokens_per_sec": 5.339038972984863, "finish_reason": "stop", "reasoning_chars": 1018, "content_chars": 2354, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx64k", "model_key": "ngl12-ctx64k", "ngl": 12, "ctx": 65536, "prompt_id": "P-HARD", "run_idx": 2, "phase": "warm", "total_duration_ms": 162391, "prompt_tokens": 30, "completion_tokens": 816, "tokens_per_sec": 5.024909015893738, "finish_reason": "stop", "reasoning_chars": 1663, "content_chars": 1826, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx64k", "model_key": "ngl12-ctx64k", "ngl": 12, "ctx": 65536, "prompt_id": "P-HARD", "run_idx": 3, "phase": "warm", "total_duration_ms": 157797, "prompt_tokens": 30, "completion_tokens": 763, "tokens_per_sec": 4.835326400375165, "finish_reason": "stop", "reasoning_chars": 1782, "content_chars": 1590, "error": null } ] }, { "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx96k", "n_calls": 15, "n_errors": 0, "duration_ms_mean": 71805.83, "duration_ms_p50": 58541.0, "duration_ms_p95": 140580.5, "tokens_per_sec_mean": 5.79, "tokens_per_sec_p50": 5.75, "tokens_per_sec_p95": 6.47, "tokens_per_sec_max": 6.7, "calls": [ { "type": "vram_snapshot", "model_key": "ngl12-ctx96k", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx96k", "ngl": 12, "ctx": 98304, "raw": "5870, 160, 6144" }, { "type": "ram_snapshot", "model_key": "ngl12-ctx96k", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx96k", "ngl": 12, "ctx": 98304, "raw": "17590,11035,28625" }, { "type": "llama_bench", "model_key": "ngl12-ctx96k", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx96k", "ngl": 12, "ctx": 98304, "raw": "llama-bench.exe : ggml_cuda_init: found 1 CUDA devices (Total VRAM: 6143 MiB):\r\nAt line:1 char:1\r\n+ & 'D:\\WeeyugaTools\\llama.cpp\\llama-bench.exe' -m 'D:\\WeeyugaModels\\ll ...\r\n+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\r\n + CategoryInfo : NotSpecified: (ggml_cuda_init:...RAM: 6143 MiB)::String) [], RemoteException\r\n + FullyQualifiedErrorId : NativeCommandError\r\n \r\n Device 0: NVIDIA GeForce GTX 1060, compute capability 6.1, VMM: yes, VRAM: 6143 MiB\r\nload_backend: loaded CUDA backend from D:\\WeeyugaTools\\llama.cpp\\ggml-cuda.dll\r\nload_backend: loaded RPC backend from D:\\WeeyugaTools\\llama.cpp\\ggml-rpc.dll\r\nload_backend: loaded CPU backend from D:\\WeeyugaTools\\llama.cpp\\ggml-cpu-haswell.dll\r\n| model | size | params | backend | ngl | test | t/s |\r\n| ------------------------------ | ---------: | ---------: | ---------- | --: | --------------: | -------------------: |\r\n| qwen3moe 30B.A3B IQ2_M - 2.7 bpw | 10.11 GiB | 30.53 B | CUDA | 12 | pp256 | 126.69 \u00b1 4.44 |\r\n| qwen3moe 30B.A3B IQ2_M - 2.7 bpw | 10.11 GiB | 30.53 B | CUDA | 12 | tg64 | 6.01 \u00b1 0.94 |\r\n\r\nbuild: 665abc609 (8951)\r\n\r" }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx96k", "model_key": "ngl12-ctx96k", "ngl": 12, "ctx": 98304, "prompt_id": "hello", "run_idx": 0, "phase": "cold", "total_duration_ms": 13701, "prompt_tokens": 9, "completion_tokens": 79, "tokens_per_sec": 5.766002481570688, "finish_reason": "stop", "reasoning_chars": 280, "content_chars": 37, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx96k", "model_key": "ngl12-ctx96k", "ngl": 12, "ctx": 98304, "prompt_id": "hello", "run_idx": 1, "phase": "warm", "total_duration_ms": 33047, "prompt_tokens": 9, "completion_tokens": 163, "tokens_per_sec": 4.932369050140709, "finish_reason": "stop", "reasoning_chars": 618, "content_chars": 37, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx96k", "model_key": "ngl12-ctx96k", "ngl": 12, "ctx": 98304, "prompt_id": "hello", "run_idx": 2, "phase": "warm", "total_duration_ms": 15830, "prompt_tokens": 9, "completion_tokens": 106, "tokens_per_sec": 6.69614655716993, "finish_reason": "stop", "reasoning_chars": 400, "content_chars": 37, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx96k", "model_key": "ngl12-ctx96k", "ngl": 12, "ctx": 98304, "prompt_id": "hello", "run_idx": 3, "phase": "warm", "total_duration_ms": 20392, "prompt_tokens": 9, "completion_tokens": 120, "tokens_per_sec": 5.884660651235779, "finish_reason": "stop", "reasoning_chars": 436, "content_chars": 37, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx96k", "model_key": "ngl12-ctx96k", "ngl": 12, "ctx": 98304, "prompt_id": "P-MEDIUM", "run_idx": 0, "phase": "cold", "total_duration_ms": 45721, "prompt_tokens": 22, "completion_tokens": 287, "tokens_per_sec": 6.277203035804117, "finish_reason": "stop", "reasoning_chars": 1047, "content_chars": 398, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx96k", "model_key": "ngl12-ctx96k", "ngl": 12, "ctx": 98304, "prompt_id": "P-MEDIUM", "run_idx": 1, "phase": "warm", "total_duration_ms": 59728, "prompt_tokens": 22, "completion_tokens": 333, "tokens_per_sec": 5.575274578087329, "finish_reason": "stop", "reasoning_chars": 1152, "content_chars": 503, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx96k", "model_key": "ngl12-ctx96k", "ngl": 12, "ctx": 98304, "prompt_id": "P-MEDIUM", "run_idx": 2, "phase": "warm", "total_duration_ms": 57354, "prompt_tokens": 22, "completion_tokens": 325, "tokens_per_sec": 5.666562053213377, "finish_reason": "stop", "reasoning_chars": 1139, "content_chars": 429, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx96k", "model_key": "ngl12-ctx96k", "ngl": 12, "ctx": 98304, "prompt_id": "P-MEDIUM", "run_idx": 3, "phase": "warm", "total_duration_ms": 104255, "prompt_tokens": 22, "completion_tokens": 610, "tokens_per_sec": 5.85103831950506, "finish_reason": "stop", "reasoning_chars": 2408, "content_chars": 561, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx96k", "model_key": "ngl12-ctx96k", "ngl": 12, "ctx": 98304, "prompt_id": "P-HARD", "run_idx": 0, "phase": "cold", "total_duration_ms": 137543, "prompt_tokens": 30, "completion_tokens": 788, "tokens_per_sec": 5.729117439637059, "finish_reason": "stop", "reasoning_chars": 1966, "content_chars": 1357, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx96k", "model_key": "ngl12-ctx96k", "ngl": 12, "ctx": 98304, "prompt_id": "P-HARD", "run_idx": 1, "phase": "warm", "total_duration_ms": 144293, "prompt_tokens": 30, "completion_tokens": 796, "tokens_per_sec": 5.516553124545196, "finish_reason": "stop", "reasoning_chars": 1825, "content_chars": 1536, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx96k", "model_key": "ngl12-ctx96k", "ngl": 12, "ctx": 98304, "prompt_id": "P-HARD", "run_idx": 2, "phase": "warm", "total_duration_ms": 101864, "prompt_tokens": 30, "completion_tokens": 581, "tokens_per_sec": 5.703683342495877, "finish_reason": "stop", "reasoning_chars": 1282, "content_chars": 1232, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx96k", "model_key": "ngl12-ctx96k", "ngl": 12, "ctx": 98304, "prompt_id": "P-HARD", "run_idx": 3, "phase": "warm", "total_duration_ms": 127942, "prompt_tokens": 30, "completion_tokens": 754, "tokens_per_sec": 5.893295399477889, "finish_reason": "stop", "reasoning_chars": 1961, "content_chars": 1261, "error": null } ] }, { "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx131k", "n_calls": 15, "n_errors": 0, "duration_ms_mean": 96406.17, "duration_ms_p50": 105842.0, "duration_ms_p95": 154492.35, "tokens_per_sec_mean": 5.01, "tokens_per_sec_p50": 5.13, "tokens_per_sec_p95": 5.97, "tokens_per_sec_max": 6.29, "calls": [ { "type": "vram_snapshot", "model_key": "ngl12-ctx131k", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx131k", "ngl": 12, "ctx": 131072, "raw": "5814, 216, 6144" }, { "type": "ram_snapshot", "model_key": "ngl12-ctx131k", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx131k", "ngl": 12, "ctx": 131072, "raw": "20818,7806,28625" }, { "type": "llama_bench", "model_key": "ngl12-ctx131k", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx131k", "ngl": 12, "ctx": 131072, "raw": "llama-bench.exe : ggml_cuda_init: found 1 CUDA devices (Total VRAM: 6143 MiB):\r\nAt line:1 char:1\r\n+ & 'D:\\WeeyugaTools\\llama.cpp\\llama-bench.exe' -m 'D:\\WeeyugaModels\\ll ...\r\n+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\r\n + CategoryInfo : NotSpecified: (ggml_cuda_init:...RAM: 6143 MiB)::String) [], RemoteException\r\n + FullyQualifiedErrorId : NativeCommandError\r\n \r\n Device 0: NVIDIA GeForce GTX 1060, compute capability 6.1, VMM: yes, VRAM: 6143 MiB\r\nload_backend: loaded CUDA backend from D:\\WeeyugaTools\\llama.cpp\\ggml-cuda.dll\r\nload_backend: loaded RPC backend from D:\\WeeyugaTools\\llama.cpp\\ggml-rpc.dll\r\nload_backend: loaded CPU backend from D:\\WeeyugaTools\\llama.cpp\\ggml-cpu-haswell.dll\r\n| model | size | params | backend | ngl | test | t/s |\r\n| ------------------------------ | ---------: | ---------: | ---------- | --: | --------------: | -------------------: |\r\n| qwen3moe 30B.A3B IQ2_M - 2.7 bpw | 10.11 GiB | 30.53 B | CUDA | 12 | pp256 | 26.49 \u00b1 11.80 |\r\n| qwen3moe 30B.A3B IQ2_M - 2.7 bpw | 10.11 GiB | 30.53 B | CUDA | 12 | tg64 | 4.94 \u00b1 0.78 |\r\n\r\nbuild: 665abc609 (8951)\r\n\r" }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx131k", "model_key": "ngl12-ctx131k", "ngl": 12, "ctx": 131072, "prompt_id": "hello", "run_idx": 0, "phase": "cold", "total_duration_ms": 59744, "prompt_tokens": 9, "completion_tokens": 135, "tokens_per_sec": 2.259641135511516, "finish_reason": "stop", "reasoning_chars": 504, "content_chars": 37, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx131k", "model_key": "ngl12-ctx131k", "ngl": 12, "ctx": 131072, "prompt_id": "hello", "run_idx": 1, "phase": "warm", "total_duration_ms": 10810, "prompt_tokens": 9, "completion_tokens": 68, "tokens_per_sec": 6.290471785383904, "finish_reason": "stop", "reasoning_chars": 224, "content_chars": 37, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx131k", "model_key": "ngl12-ctx131k", "ngl": 12, "ctx": 131072, "prompt_id": "hello", "run_idx": 2, "phase": "warm", "total_duration_ms": 20002, "prompt_tokens": 9, "completion_tokens": 101, "tokens_per_sec": 5.049495050494951, "finish_reason": "stop", "reasoning_chars": 384, "content_chars": 37, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx131k", "model_key": "ngl12-ctx131k", "ngl": 12, "ctx": 131072, "prompt_id": "hello", "run_idx": 3, "phase": "warm", "total_duration_ms": 43743, "prompt_tokens": 9, "completion_tokens": 233, "tokens_per_sec": 5.326566536360103, "finish_reason": "stop", "reasoning_chars": 910, "content_chars": 37, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx131k", "model_key": "ngl12-ctx131k", "ngl": 12, "ctx": 131072, "prompt_id": "P-MEDIUM", "run_idx": 0, "phase": "cold", "total_duration_ms": 118125, "prompt_tokens": 22, "completion_tokens": 586, "tokens_per_sec": 4.960846560846561, "finish_reason": "stop", "reasoning_chars": 2415, "content_chars": 435, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx131k", "model_key": "ngl12-ctx131k", "ngl": 12, "ctx": 131072, "prompt_id": "P-MEDIUM", "run_idx": 1, "phase": "warm", "total_duration_ms": 116198, "prompt_tokens": 22, "completion_tokens": 598, "tokens_per_sec": 5.146388061756657, "finish_reason": "stop", "reasoning_chars": 2059, "content_chars": 865, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx131k", "model_key": "ngl12-ctx131k", "ngl": 12, "ctx": 131072, "prompt_id": "P-MEDIUM", "run_idx": 2, "phase": "warm", "total_duration_ms": 95486, "prompt_tokens": 22, "completion_tokens": 471, "tokens_per_sec": 4.932660285277422, "finish_reason": "stop", "reasoning_chars": 1844, "content_chars": 502, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx131k", "model_key": "ngl12-ctx131k", "ngl": 12, "ctx": 131072, "prompt_id": "P-MEDIUM", "run_idx": 3, "phase": "warm", "total_duration_ms": 84370, "prompt_tokens": 22, "completion_tokens": 482, "tokens_per_sec": 5.71293113665995, "finish_reason": "stop", "reasoning_chars": 1858, "content_chars": 488, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx131k", "model_key": "ngl12-ctx131k", "ngl": 12, "ctx": 131072, "prompt_id": "P-HARD", "run_idx": 0, "phase": "cold", "total_duration_ms": 150702, "prompt_tokens": 30, "completion_tokens": 772, "tokens_per_sec": 5.122692465926132, "finish_reason": "stop", "reasoning_chars": 1228, "content_chars": 2007, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx131k", "model_key": "ngl12-ctx131k", "ngl": 12, "ctx": 131072, "prompt_id": "P-HARD", "run_idx": 1, "phase": "warm", "total_duration_ms": 148897, "prompt_tokens": 30, "completion_tokens": 764, "tokens_per_sec": 5.131063755481978, "finish_reason": "stop", "reasoning_chars": 1083, "content_chars": 2199, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx131k", "model_key": "ngl12-ctx131k", "ngl": 12, "ctx": 131072, "prompt_id": "P-HARD", "run_idx": 2, "phase": "warm", "total_duration_ms": 159125, "prompt_tokens": 30, "completion_tokens": 762, "tokens_per_sec": 4.788688138256088, "finish_reason": "stop", "reasoning_chars": 1767, "content_chars": 1578, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx131k", "model_key": "ngl12-ctx131k", "ngl": 12, "ctx": 131072, "prompt_id": "P-HARD", "run_idx": 3, "phase": "warm", "total_duration_ms": 149672, "prompt_tokens": 30, "completion_tokens": 805, "tokens_per_sec": 5.378427494788604, "finish_reason": "stop", "reasoning_chars": 1279, "content_chars": 2259, "error": null } ] }, { "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx48k", "n_calls": 15, "n_errors": 0, "duration_ms_mean": 65539.5, "duration_ms_p50": 64606.5, "duration_ms_p95": 118069.75, "tokens_per_sec_mean": 6.6, "tokens_per_sec_p50": 6.59, "tokens_per_sec_p95": 6.89, "tokens_per_sec_max": 6.91, "calls": [ { "type": "vram_snapshot", "model_key": "ngl24-ctx48k", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx48k", "ngl": 24, "ctx": 49152, "raw": "5826, 204, 6144" }, { "type": "ram_snapshot", "model_key": "ngl24-ctx48k", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx48k", "ngl": 24, "ctx": 49152, "raw": "17056,11569,28625" }, { "type": "llama_bench", "model_key": "ngl24-ctx48k", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx48k", "ngl": 24, "ctx": 49152, "raw": "llama-bench.exe : ggml_cuda_init: found 1 CUDA devices (Total VRAM: 6143 MiB):\r\nAt line:1 char:1\r\n+ & 'D:\\WeeyugaTools\\llama.cpp\\llama-bench.exe' -m 'D:\\WeeyugaModels\\ll ...\r\n+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\r\n + CategoryInfo : NotSpecified: (ggml_cuda_init:...RAM: 6143 MiB)::String) [], RemoteException\r\n + FullyQualifiedErrorId : NativeCommandError\r\n \r\n Device 0: NVIDIA GeForce GTX 1060, compute capability 6.1, VMM: yes, VRAM: 6143 MiB\r\nload_backend: loaded CUDA backend from D:\\WeeyugaTools\\llama.cpp\\ggml-cuda.dll\r\nload_backend: loaded RPC backend from D:\\WeeyugaTools\\llama.cpp\\ggml-rpc.dll\r\nload_backend: loaded CPU backend from D:\\WeeyugaTools\\llama.cpp\\ggml-cpu-haswell.dll\r\n| model | size | params | backend | ngl | test | t/s |\r\n| ------------------------------ | ---------: | ---------: | ---------- | --: | --------------: | -------------------: |\r\n| qwen3moe 30B.A3B IQ2_M - 2.7 bpw | 10.11 GiB | 30.53 B | CUDA | 24 | pp256 | 48.01 \u00b1 0.46 |\r\n| qwen3moe 30B.A3B IQ2_M - 2.7 bpw | 10.11 GiB | 30.53 B | CUDA | 24 | tg64 | 6.50 \u00b1 0.03 |\r\n\r\nbuild: 665abc609 (8951)\r\n\r" }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx48k", "model_key": "ngl24-ctx48k", "ngl": 24, "ctx": 49152, "prompt_id": "hello", "run_idx": 0, "phase": "cold", "total_duration_ms": 19864, "prompt_tokens": 9, "completion_tokens": 123, "tokens_per_sec": 6.192106322996375, "finish_reason": "stop", "reasoning_chars": 467, "content_chars": 37, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx48k", "model_key": "ngl24-ctx48k", "ngl": 24, "ctx": 49152, "prompt_id": "hello", "run_idx": 1, "phase": "warm", "total_duration_ms": 14734, "prompt_tokens": 9, "completion_tokens": 101, "tokens_per_sec": 6.854893443735578, "finish_reason": "stop", "reasoning_chars": 370, "content_chars": 37, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx48k", "model_key": "ngl24-ctx48k", "ngl": 24, "ctx": 49152, "prompt_id": "hello", "run_idx": 2, "phase": "warm", "total_duration_ms": 15636, "prompt_tokens": 9, "completion_tokens": 108, "tokens_per_sec": 6.907137375287798, "finish_reason": "stop", "reasoning_chars": 399, "content_chars": 37, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx48k", "model_key": "ngl24-ctx48k", "ngl": 24, "ctx": 49152, "prompt_id": "hello", "run_idx": 3, "phase": "warm", "total_duration_ms": 14990, "prompt_tokens": 9, "completion_tokens": 103, "tokens_per_sec": 6.871247498332221, "finish_reason": "stop", "reasoning_chars": 380, "content_chars": 37, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx48k", "model_key": "ngl24-ctx48k", "ngl": 24, "ctx": 49152, "prompt_id": "P-MEDIUM", "run_idx": 0, "phase": "cold", "total_duration_ms": 58574, "prompt_tokens": 22, "completion_tokens": 385, "tokens_per_sec": 6.572882166148803, "finish_reason": "stop", "reasoning_chars": 1365, "content_chars": 480, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx48k", "model_key": "ngl24-ctx48k", "ngl": 24, "ctx": 49152, "prompt_id": "P-MEDIUM", "run_idx": 1, "phase": "warm", "total_duration_ms": 89924, "prompt_tokens": 22, "completion_tokens": 594, "tokens_per_sec": 6.60557804368133, "finish_reason": "stop", "reasoning_chars": 2406, "content_chars": 484, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx48k", "model_key": "ngl24-ctx48k", "ngl": 24, "ctx": 49152, "prompt_id": "P-MEDIUM", "run_idx": 2, "phase": "warm", "total_duration_ms": 92501, "prompt_tokens": 22, "completion_tokens": 606, "tokens_per_sec": 6.551280526697008, "finish_reason": "stop", "reasoning_chars": 2308, "content_chars": 671, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx48k", "model_key": "ngl24-ctx48k", "ngl": 24, "ctx": 49152, "prompt_id": "P-MEDIUM", "run_idx": 3, "phase": "warm", "total_duration_ms": 59896, "prompt_tokens": 22, "completion_tokens": 403, "tokens_per_sec": 6.728329103779885, "finish_reason": "stop", "reasoning_chars": 1446, "content_chars": 537, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx48k", "model_key": "ngl24-ctx48k", "ngl": 24, "ctx": 49152, "prompt_id": "P-HARD", "run_idx": 0, "phase": "cold", "total_duration_ms": 121576, "prompt_tokens": 30, "completion_tokens": 768, "tokens_per_sec": 6.317036257156018, "finish_reason": "stop", "reasoning_chars": 1226, "content_chars": 1991, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx48k", "model_key": "ngl24-ctx48k", "ngl": 24, "ctx": 49152, "prompt_id": "P-HARD", "run_idx": 1, "phase": "warm", "total_duration_ms": 69317, "prompt_tokens": 30, "completion_tokens": 463, "tokens_per_sec": 6.6794581415814305, "finish_reason": "stop", "reasoning_chars": 1048, "content_chars": 963, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx48k", "model_key": "ngl24-ctx48k", "ngl": 24, "ctx": 49152, "prompt_id": "P-HARD", "run_idx": 2, "phase": "warm", "total_duration_ms": 114261, "prompt_tokens": 30, "completion_tokens": 741, "tokens_per_sec": 6.485152414209573, "finish_reason": "stop", "reasoning_chars": 1246, "content_chars": 1964, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx48k", "model_key": "ngl24-ctx48k", "ngl": 24, "ctx": 49152, "prompt_id": "P-HARD", "run_idx": 3, "phase": "warm", "total_duration_ms": 115201, "prompt_tokens": 30, "completion_tokens": 746, "tokens_per_sec": 6.475638232307012, "finish_reason": "stop", "reasoning_chars": 1171, "content_chars": 1994, "error": null } ] }, { "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx64k", "n_calls": 15, "n_errors": 0, "duration_ms_mean": 71267.58, "duration_ms_p50": 80265.5, "duration_ms_p95": 128231.3, "tokens_per_sec_mean": 6.41, "tokens_per_sec_p50": 6.39, "tokens_per_sec_p95": 6.88, "tokens_per_sec_max": 6.9, "calls": [ { "type": "vram_snapshot", "model_key": "ngl24-ctx64k", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx64k", "ngl": 24, "ctx": 65536, "raw": "5780, 250, 6144" }, { "type": "ram_snapshot", "model_key": "ngl24-ctx64k", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx64k", "ngl": 24, "ctx": 65536, "raw": "18513,10111,28625" }, { "type": "llama_bench", "model_key": "ngl24-ctx64k", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx64k", "ngl": 24, "ctx": 65536, "raw": "llama-bench.exe : ggml_cuda_init: found 1 CUDA devices (Total VRAM: 6143 MiB):\r\nAt line:1 char:1\r\n+ & 'D:\\WeeyugaTools\\llama.cpp\\llama-bench.exe' -m 'D:\\WeeyugaModels\\ll ...\r\n+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\r\n + CategoryInfo : NotSpecified: (ggml_cuda_init:...RAM: 6143 MiB)::String) [], RemoteException\r\n + FullyQualifiedErrorId : NativeCommandError\r\n \r\n Device 0: NVIDIA GeForce GTX 1060, compute capability 6.1, VMM: yes, VRAM: 6143 MiB\r\nload_backend: loaded CUDA backend from D:\\WeeyugaTools\\llama.cpp\\ggml-cuda.dll\r\nload_backend: loaded RPC backend from D:\\WeeyugaTools\\llama.cpp\\ggml-rpc.dll\r\nload_backend: loaded CPU backend from D:\\WeeyugaTools\\llama.cpp\\ggml-cpu-haswell.dll\r\n| model | size | params | backend | ngl | test | t/s |\r\n| ------------------------------ | ---------: | ---------: | ---------- | --: | --------------: | -------------------: |\r\n| qwen3moe 30B.A3B IQ2_M - 2.7 bpw | 10.11 GiB | 30.53 B | CUDA | 24 | pp256 | 27.92 \u00b1 3.61 |\r\n| qwen3moe 30B.A3B IQ2_M - 2.7 bpw | 10.11 GiB | 30.53 B | CUDA | 24 | tg64 | 5.63 \u00b1 0.39 |\r\n\r\nbuild: 665abc609 (8951)\r\n\r" }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx64k", "model_key": "ngl24-ctx64k", "ngl": 24, "ctx": 65536, "prompt_id": "hello", "run_idx": 0, "phase": "cold", "total_duration_ms": 18535, "prompt_tokens": 9, "completion_tokens": 103, "tokens_per_sec": 5.557054221742649, "finish_reason": "stop", "reasoning_chars": 380, "content_chars": 37, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx64k", "model_key": "ngl24-ctx64k", "ngl": 24, "ctx": 65536, "prompt_id": "hello", "run_idx": 1, "phase": "warm", "total_duration_ms": 18478, "prompt_tokens": 9, "completion_tokens": 127, "tokens_per_sec": 6.873038207598224, "finish_reason": "stop", "reasoning_chars": 495, "content_chars": 37, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx64k", "model_key": "ngl24-ctx64k", "ngl": 24, "ctx": 65536, "prompt_id": "hello", "run_idx": 2, "phase": "warm", "total_duration_ms": 14692, "prompt_tokens": 9, "completion_tokens": 100, "tokens_per_sec": 6.806425265450585, "finish_reason": "stop", "reasoning_chars": 361, "content_chars": 37, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx64k", "model_key": "ngl24-ctx64k", "ngl": 24, "ctx": 65536, "prompt_id": "hello", "run_idx": 3, "phase": "warm", "total_duration_ms": 14351, "prompt_tokens": 9, "completion_tokens": 99, "tokens_per_sec": 6.898473973939098, "finish_reason": "stop", "reasoning_chars": 377, "content_chars": 37, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx64k", "model_key": "ngl24-ctx64k", "ngl": 24, "ctx": 65536, "prompt_id": "P-MEDIUM", "run_idx": 0, "phase": "cold", "total_duration_ms": 99471, "prompt_tokens": 22, "completion_tokens": 621, "tokens_per_sec": 6.2430256054528455, "finish_reason": "stop", "reasoning_chars": 0, "content_chars": 2991, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx64k", "model_key": "ngl24-ctx64k", "ngl": 24, "ctx": 65536, "prompt_id": "P-MEDIUM", "run_idx": 1, "phase": "warm", "total_duration_ms": 56166, "prompt_tokens": 22, "completion_tokens": 362, "tokens_per_sec": 6.445180358223837, "finish_reason": "stop", "reasoning_chars": 1274, "content_chars": 513, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx64k", "model_key": "ngl24-ctx64k", "ngl": 24, "ctx": 65536, "prompt_id": "P-MEDIUM", "run_idx": 2, "phase": "warm", "total_duration_ms": 92504, "prompt_tokens": 22, "completion_tokens": 596, "tokens_per_sec": 6.442964628556602, "finish_reason": "stop", "reasoning_chars": 2393, "content_chars": 481, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx64k", "model_key": "ngl24-ctx64k", "ngl": 24, "ctx": 65536, "prompt_id": "P-MEDIUM", "run_idx": 3, "phase": "warm", "total_duration_ms": 69604, "prompt_tokens": 22, "completion_tokens": 459, "tokens_per_sec": 6.5944485949083385, "finish_reason": "stop", "reasoning_chars": 1810, "content_chars": 507, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx64k", "model_key": "ngl24-ctx64k", "ngl": 24, "ctx": 65536, "prompt_id": "P-HARD", "run_idx": 0, "phase": "cold", "total_duration_ms": 123427, "prompt_tokens": 30, "completion_tokens": 761, "tokens_per_sec": 6.165587756325601, "finish_reason": "stop", "reasoning_chars": 1221, "content_chars": 1919, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx64k", "model_key": "ngl24-ctx64k", "ngl": 24, "ctx": 65536, "prompt_id": "P-HARD", "run_idx": 1, "phase": "warm", "total_duration_ms": 90927, "prompt_tokens": 30, "completion_tokens": 577, "tokens_per_sec": 6.345749887272207, "finish_reason": "stop", "reasoning_chars": 1191, "content_chars": 1282, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx64k", "model_key": "ngl24-ctx64k", "ngl": 24, "ctx": 65536, "prompt_id": "P-HARD", "run_idx": 2, "phase": "warm", "total_duration_ms": 131495, "prompt_tokens": 30, "completion_tokens": 831, "tokens_per_sec": 6.319631925168257, "finish_reason": "stop", "reasoning_chars": 2262, "content_chars": 1303, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx64k", "model_key": "ngl24-ctx64k", "ngl": 24, "ctx": 65536, "prompt_id": "P-HARD", "run_idx": 3, "phase": "warm", "total_duration_ms": 125561, "prompt_tokens": 30, "completion_tokens": 777, "tokens_per_sec": 6.188227236164095, "finish_reason": "stop", "reasoning_chars": 998, "content_chars": 2187, "error": null } ] }, { "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx96k", "n_calls": 15, "n_errors": 0, "duration_ms_mean": 69299.5, "duration_ms_p50": 68355.0, "duration_ms_p95": 124990.95, "tokens_per_sec_mean": 6.08, "tokens_per_sec_p50": 6.44, "tokens_per_sec_p95": 6.68, "tokens_per_sec_max": 6.69, "calls": [ { "type": "vram_snapshot", "model_key": "ngl24-ctx96k", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx96k", "ngl": 24, "ctx": 98304, "raw": "5750, 280, 6144" }, { "type": "ram_snapshot", "model_key": "ngl24-ctx96k", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx96k", "ngl": 24, "ctx": 98304, "raw": "21927,6698,28625" }, { "type": "llama_bench", "model_key": "ngl24-ctx96k", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx96k", "ngl": 24, "ctx": 98304, "raw": "llama-bench.exe : ggml_cuda_init: found 1 CUDA devices (Total VRAM: 6143 MiB):\r\nAt line:1 char:1\r\n+ & 'D:\\WeeyugaTools\\llama.cpp\\llama-bench.exe' -m 'D:\\WeeyugaModels\\ll ...\r\n+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\r\n + CategoryInfo : NotSpecified: (ggml_cuda_init:...RAM: 6143 MiB)::String) [], RemoteException\r\n + FullyQualifiedErrorId : NativeCommandError\r\n \r\n Device 0: NVIDIA GeForce GTX 1060, compute capability 6.1, VMM: yes, VRAM: 6143 MiB\r\nload_backend: loaded CUDA backend from D:\\WeeyugaTools\\llama.cpp\\ggml-cuda.dll\r\nload_backend: loaded RPC backend from D:\\WeeyugaTools\\llama.cpp\\ggml-rpc.dll\r\nload_backend: loaded CPU backend from D:\\WeeyugaTools\\llama.cpp\\ggml-cpu-haswell.dll\r\n| model | size | params | backend | ngl | test | t/s |\r\n| ------------------------------ | ---------: | ---------: | ---------- | --: | --------------: | -------------------: |\r\n| qwen3moe 30B.A3B IQ2_M - 2.7 bpw | 10.11 GiB | 30.53 B | CUDA | 24 | pp256 | 15.72 \u00b1 3.11 |\r\n| qwen3moe 30B.A3B IQ2_M - 2.7 bpw | 10.11 GiB | 30.53 B | CUDA | 24 | tg64 | 5.22 \u00b1 0.53 |\r\n\r\nbuild: 665abc609 (8951)\r\n\r" }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx96k", "model_key": "ngl24-ctx96k", "ngl": 24, "ctx": 98304, "prompt_id": "hello", "run_idx": 0, "phase": "cold", "total_duration_ms": 35465, "prompt_tokens": 9, "completion_tokens": 96, "tokens_per_sec": 2.7068941209643307, "finish_reason": "stop", "reasoning_chars": 365, "content_chars": 37, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx96k", "model_key": "ngl24-ctx96k", "ngl": 24, "ctx": 98304, "prompt_id": "hello", "run_idx": 1, "phase": "warm", "total_duration_ms": 27111, "prompt_tokens": 9, "completion_tokens": 167, "tokens_per_sec": 6.1598613109070115, "finish_reason": "stop", "reasoning_chars": 547, "content_chars": 152, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx96k", "model_key": "ngl24-ctx96k", "ngl": 24, "ctx": 98304, "prompt_id": "hello", "run_idx": 2, "phase": "warm", "total_duration_ms": 24742, "prompt_tokens": 9, "completion_tokens": 165, "tokens_per_sec": 6.668822245574327, "finish_reason": "stop", "reasoning_chars": 571, "content_chars": 116, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx96k", "model_key": "ngl24-ctx96k", "ngl": 24, "ctx": 98304, "prompt_id": "hello", "run_idx": 3, "phase": "warm", "total_duration_ms": 13366, "prompt_tokens": 9, "completion_tokens": 86, "tokens_per_sec": 6.43423612150232, "finish_reason": "stop", "reasoning_chars": 290, "content_chars": 37, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx96k", "model_key": "ngl24-ctx96k", "ngl": 24, "ctx": 98304, "prompt_id": "P-MEDIUM", "run_idx": 0, "phase": "cold", "total_duration_ms": 72290, "prompt_tokens": 22, "completion_tokens": 432, "tokens_per_sec": 5.97593028081339, "finish_reason": "stop", "reasoning_chars": 1712, "content_chars": 427, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx96k", "model_key": "ngl24-ctx96k", "ngl": 24, "ctx": 98304, "prompt_id": "P-MEDIUM", "run_idx": 1, "phase": "warm", "total_duration_ms": 92055, "prompt_tokens": 22, "completion_tokens": 596, "tokens_per_sec": 6.474390310140676, "finish_reason": "stop", "reasoning_chars": 2005, "content_chars": 919, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx96k", "model_key": "ngl24-ctx96k", "ngl": 24, "ctx": 98304, "prompt_id": "P-MEDIUM", "run_idx": 2, "phase": "warm", "total_duration_ms": 64317, "prompt_tokens": 22, "completion_tokens": 414, "tokens_per_sec": 6.436867391202949, "finish_reason": "stop", "reasoning_chars": 1583, "content_chars": 467, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx96k", "model_key": "ngl24-ctx96k", "ngl": 24, "ctx": 98304, "prompt_id": "P-MEDIUM", "run_idx": 3, "phase": "warm", "total_duration_ms": 64420, "prompt_tokens": 22, "completion_tokens": 431, "tokens_per_sec": 6.690468798509779, "finish_reason": "stop", "reasoning_chars": 1652, "content_chars": 492, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx96k", "model_key": "ngl24-ctx96k", "ngl": 24, "ctx": 98304, "prompt_id": "P-HARD", "run_idx": 0, "phase": "cold", "total_duration_ms": 127296, "prompt_tokens": 30, "completion_tokens": 777, "tokens_per_sec": 6.1038838612368025, "finish_reason": "stop", "reasoning_chars": 1080, "content_chars": 2215, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx96k", "model_key": "ngl24-ctx96k", "ngl": 24, "ctx": 98304, "prompt_id": "P-HARD", "run_idx": 1, "phase": "warm", "total_duration_ms": 123105, "prompt_tokens": 30, "completion_tokens": 781, "tokens_per_sec": 6.344177734454328, "finish_reason": "stop", "reasoning_chars": 1163, "content_chars": 2172, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx96k", "model_key": "ngl24-ctx96k", "ngl": 24, "ctx": 98304, "prompt_id": "P-HARD", "run_idx": 2, "phase": "warm", "total_duration_ms": 74598, "prompt_tokens": 30, "completion_tokens": 490, "tokens_per_sec": 6.568540711547227, "finish_reason": "stop", "reasoning_chars": 1140, "content_chars": 974, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx96k", "model_key": "ngl24-ctx96k", "ngl": 24, "ctx": 98304, "prompt_id": "P-HARD", "run_idx": 3, "phase": "warm", "total_duration_ms": 112829, "prompt_tokens": 30, "completion_tokens": 728, "tokens_per_sec": 6.45224188816705, "finish_reason": "stop", "reasoning_chars": 2117, "content_chars": 895, "error": null } ] } ], "meta_record": { "type": "meta", "benchmark_run_id": "e08a7051-a856-4053-b34b-561d7ce5e8cf", "harness_version": "predator-a3b-ngl-ctx-2d-1", "started_at_utc": "2026-05-05T07:43:00Z", "host": "Slobodans-MacBook-Air.local", "node": "predator", "engine": "llamacpp", "purpose": "Sloba 2026-05-05: NGL=12 and NGL=24 \u00d7 ctx sweep up to 131k. Find the actual context ceiling at the two NGL sweet spots from the prior bench.", "cells": [ { "key": "ngl12-ctx64k", "ngl": 12, "ctx": 65536 }, { "key": "ngl12-ctx96k", "ngl": 12, "ctx": 98304 }, { "key": "ngl12-ctx131k", "ngl": 12, "ctx": 131072 }, { "key": "ngl24-ctx48k", "ngl": 24, "ctx": 49152 }, { "key": "ngl24-ctx64k", "ngl": 24, "ctx": 65536 }, { "key": "ngl24-ctx96k", "ngl": 24, "ctx": 98304 } ], "gguf": "Qwen3-30B-A3B-UD-IQ2_M.gguf", "max_tokens_per_prompt": { "hello": 512, "P-MEDIUM": 1024, "P-HARD": 2048 }, "methodology_notes": "Sloba 2026-05-05: 'max tokens should be bigger than reasoning budget'. Prior bench had hello max_tokens=256 < reasoning_budget=500 \u2192 content_chars=0. Bumped to 512/1024/2048 so reasoning + answer both fit." }, "packaged_at": "2026-05-06T08:04:27Z" }