{ "id": "79afe366-1055-4e45-adf6-593864a530e8", "title": "predator-a3b-ngl6-ctx-1 \u2014 qwen3 on predator", "date": "2026-05-05", "started_at": "2026-05-05T09:44:34Z", "git_sha": "3ff8bd1808a1df00426d9b447559c774e4704c39", "hardware": "predator", "hardware_label": "Predator \u00b7 gaming laptop \u00b7 GTX 1060 6 GB \u00b7 28 GB RAM", "engine": "llamacpp", "harness": "predator-a3b-ngl6-ctx-1", "model_family": "qwen3", "model_sizes": [ "qwen3:30b-a3b-iq2m" ], "task_kind": "param-sweep", "tags": [ "a3b", "ctx", "iq2", "ngl", "predator", "qwen3" ], "headline": "45 calls across 3 cell(s); ~5.2 tok/s mean; p50 84.8s", "janie_blurb_md": null, "janie_blurb_status": "pending", "caveat": null, "caveat_severity": null, "methodology_ref": "A3B_AND_CPU_OVERNIGHT_2026-05-05", "methodology_deviations_md": null, "results_table": [ { "label": "qwen3:30b-a3b-iq2m-think500-ngl6-ctx64k", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl6-ctx64k", "n_calls": 15, "n_errors": 0, "duration_ms_mean": 81326.17, "duration_ms_p50": 73215.0, "duration_ms_p95": 152048.1, "tokens_per_sec_mean": 5.42, "tokens_per_sec_p50": 5.35, "tokens_per_sec_p95": 5.91, "tokens_per_sec_max": 5.93, "memory_mb": null, "watts_avg": null, "notes": null }, { "label": "qwen3:30b-a3b-iq2m-think500-ngl6-ctx96k", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl6-ctx96k", "n_calls": 15, "n_errors": 0, "duration_ms_mean": 84532.92, "duration_ms_p50": 99578.5, "duration_ms_p95": 152059.4, "tokens_per_sec_mean": 5.43, "tokens_per_sec_p50": 5.38, "tokens_per_sec_p95": 5.78, "tokens_per_sec_max": 5.94, "memory_mb": null, "watts_avg": null, "notes": null }, { "label": "qwen3:30b-a3b-iq2m-think500-ngl6-ctx131k", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl6-ctx131k", "n_calls": 15, "n_errors": 0, "duration_ms_mean": 91748.5, "duration_ms_p50": 98184.0, "duration_ms_p95": 161421.6, "tokens_per_sec_mean": 4.79, "tokens_per_sec_p50": 4.8, "tokens_per_sec_p95": 4.93, "tokens_per_sec_max": 4.93, "memory_mb": null, "watts_avg": null, "notes": null } ], "cells": [ { "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl6-ctx64k", "n_calls": 15, "n_errors": 0, "duration_ms_mean": 81326.17, "duration_ms_p50": 73215.0, "duration_ms_p95": 152048.1, "tokens_per_sec_mean": 5.42, "tokens_per_sec_p50": 5.35, "tokens_per_sec_p95": 5.91, "tokens_per_sec_max": 5.93, "cold_warm_split": { "cold": { "n_calls": 3, "duration_ms_mean": 88941.0, "duration_ms_p50": 81140.0, "tokens_per_sec_mean": 5.21 }, "warm": { "n_calls": 9, "duration_ms_mean": 78787.89, "duration_ms_p50": 70839.0, "tokens_per_sec_mean": 5.49 } }, "per_prompt": { "unknown": { "n_calls": 3, "n_errors": 0, "duration_ms_mean": null, "duration_ms_p50": null, "tokens_per_sec_mean": null, "tokens_per_sec_p50": null }, "hello": { "n_calls": 4, "n_errors": 0, "duration_ms_mean": 22599.5, "duration_ms_p50": 19202.0, "tokens_per_sec_mean": 5.54, "tokens_per_sec_p50": 5.54 }, "P-MEDIUM": { "n_calls": 4, "n_errors": 0, "duration_ms_mean": 71981.0, "duration_ms_p50": 73215.0, "tokens_per_sec_mean": 5.49, "tokens_per_sec_p50": 5.52 }, "P-HARD": { "n_calls": 4, "n_errors": 0, "duration_ms_mean": 149398.0, "duration_ms_p50": 146891.5, "tokens_per_sec_mean": 5.23, "tokens_per_sec_p50": 5.26 } }, "chars_split": { "has_thinking": true, "reasoning_chars_mean": 1030.83, "answer_chars_mean": 891.17 }, "timeline": [ { "prompt_id": null, "run_idx": null, "phase": null, "duration_ms": null, "tokens_per_sec": null, "error": null }, { "prompt_id": null, "run_idx": null, "phase": null, "duration_ms": null, "tokens_per_sec": null, "error": null }, { "prompt_id": null, "run_idx": null, "phase": null, "duration_ms": null, "tokens_per_sec": null, "error": null }, { "prompt_id": "hello", "run_idx": 0, "phase": "cold", "duration_ms": 38170, "tokens_per_sec": 5.187319884726224, "error": null }, { "prompt_id": "hello", "run_idx": 1, "phase": "warm", "duration_ms": 15973, "tokens_per_sec": 5.884930820760032, "error": null }, { "prompt_id": "hello", "run_idx": 2, "phase": "warm", "duration_ms": 13824, "tokens_per_sec": 5.931712962962963, "error": null }, { "prompt_id": "hello", "run_idx": 3, "phase": "warm", "duration_ms": 22431, "tokens_per_sec": 5.171414560206856, "error": null }, { "prompt_id": "P-MEDIUM", "run_idx": 0, "phase": "cold", "duration_ms": 81140, "tokens_per_sec": 5.385753019472516, "error": null }, { "prompt_id": "P-MEDIUM", "run_idx": 1, "phase": "warm", "duration_ms": 70839, "tokens_per_sec": 5.533674953062579, "error": null }, { "prompt_id": "P-MEDIUM", "run_idx": 2, "phase": "warm", "duration_ms": 60354, "tokens_per_sec": 5.550584882526428, "error": null }, { "prompt_id": "P-MEDIUM", "run_idx": 3, "phase": "warm", "duration_ms": 75591, "tokens_per_sec": 5.5033006574856795, "error": null }, { "prompt_id": "P-HARD", "run_idx": 0, "phase": "cold", "duration_ms": 147513, "tokens_per_sec": 5.063960464501434, "error": null }, { "prompt_id": "P-HARD", "run_idx": 1, "phase": "warm", "duration_ms": 157591, "tokens_per_sec": 5.254107150789068, "error": null }, { "prompt_id": "P-HARD", "run_idx": 2, "phase": "warm", "duration_ms": 146270, "tokens_per_sec": 5.271074041156765, "error": null }, { "prompt_id": "P-HARD", "run_idx": 3, "phase": "warm", "duration_ms": 146218, "tokens_per_sec": 5.313983230518815, "error": null } ] }, { "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl6-ctx96k", "n_calls": 15, "n_errors": 0, "duration_ms_mean": 84532.92, "duration_ms_p50": 99578.5, "duration_ms_p95": 152059.4, "tokens_per_sec_mean": 5.43, "tokens_per_sec_p50": 5.38, "tokens_per_sec_p95": 5.78, "tokens_per_sec_max": 5.94, "cold_warm_split": { "cold": { "n_calls": 3, "duration_ms_mean": 92845.33, "duration_ms_p50": 110870.0, "tokens_per_sec_mean": 5.26 }, "warm": { "n_calls": 9, "duration_ms_mean": 81762.11, "duration_ms_p50": 88383.0, "tokens_per_sec_mean": 5.49 } }, "per_prompt": { "unknown": { "n_calls": 3, "n_errors": 0, "duration_ms_mean": null, "duration_ms_p50": null, "tokens_per_sec_mean": null, "tokens_per_sec_p50": null }, "hello": { "n_calls": 4, "n_errors": 0, "duration_ms_mean": 17637.5, "duration_ms_p50": 17538.5, "tokens_per_sec_mean": 5.59, "tokens_per_sec_p50": 5.55 }, "P-MEDIUM": { "n_calls": 4, "n_errors": 0, "duration_ms_mean": 99966.0, "duration_ms_p50": 110822.0, "tokens_per_sec_mean": 5.4, "tokens_per_sec_p50": 5.38 }, "P-HARD": { "n_calls": 4, "n_errors": 0, "duration_ms_mean": 135995.25, "duration_ms_p50": 151381.0, "tokens_per_sec_mean": 5.3, "tokens_per_sec_p50": 5.23 } }, "chars_split": { "has_thinking": true, "reasoning_chars_mean": 1265.92, "answer_chars_mean": 766.25 }, "timeline": [ { "prompt_id": null, "run_idx": null, "phase": null, "duration_ms": null, "tokens_per_sec": null, "error": null }, { "prompt_id": null, "run_idx": null, "phase": null, "duration_ms": null, "tokens_per_sec": null, "error": null }, { "prompt_id": null, "run_idx": null, "phase": null, "duration_ms": null, "tokens_per_sec": null, "error": null }, { "prompt_id": "hello", "run_idx": 0, "phase": "cold", "duration_ms": 16242, "tokens_per_sec": 5.294914419406477, "error": null }, { "prompt_id": "hello", "run_idx": 1, "phase": "warm", "duration_ms": 18234, "tokens_per_sec": 5.6487879785017, "error": null }, { "prompt_id": "hello", "run_idx": 2, "phase": "warm", "duration_ms": 19231, "tokens_per_sec": 5.45993448078623, "error": null }, { "prompt_id": "hello", "run_idx": 3, "phase": "warm", "duration_ms": 16843, "tokens_per_sec": 5.937184587068812, "error": null }, { "prompt_id": "P-MEDIUM", "run_idx": 0, "phase": "cold", "duration_ms": 110870, "tokens_per_sec": 5.276449896274916, "error": null }, { "prompt_id": "P-MEDIUM", "run_idx": 1, "phase": "warm", "duration_ms": 110774, "tokens_per_sec": 5.3712965136223305, "error": null }, { "prompt_id": "P-MEDIUM", "run_idx": 2, "phase": "warm", "duration_ms": 112101, "tokens_per_sec": 5.379077795916182, "error": null }, { "prompt_id": "P-MEDIUM", "run_idx": 3, "phase": "warm", "duration_ms": 66119, "tokens_per_sec": 5.580846655273068, "error": null }, { "prompt_id": "P-HARD", "run_idx": 0, "phase": "cold", "duration_ms": 151424, "tokens_per_sec": 5.197326711749788, "error": null }, { "prompt_id": "P-HARD", "run_idx": 1, "phase": "warm", "duration_ms": 152836, "tokens_per_sec": 5.227825904891517, "error": null }, { "prompt_id": "P-HARD", "run_idx": 2, "phase": "warm", "duration_ms": 88383, "tokens_per_sec": 5.555366982338232, "error": null }, { "prompt_id": "P-HARD", "run_idx": 3, "phase": "warm", "duration_ms": 151338, "tokens_per_sec": 5.226711070583726, "error": null } ] }, { "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl6-ctx131k", "n_calls": 15, "n_errors": 0, "duration_ms_mean": 91748.5, "duration_ms_p50": 98184.0, "duration_ms_p95": 161421.6, "tokens_per_sec_mean": 4.79, "tokens_per_sec_p50": 4.8, "tokens_per_sec_p95": 4.93, "tokens_per_sec_max": 4.93, "cold_warm_split": { "cold": { "n_calls": 3, "duration_ms_mean": 107527.33, "duration_ms_p50": 120915.0, "tokens_per_sec_mean": 4.69 }, "warm": { "n_calls": 9, "duration_ms_mean": 86488.89, "duration_ms_p50": 96577.0, "tokens_per_sec_mean": 4.82 } }, "per_prompt": { "unknown": { "n_calls": 3, "n_errors": 0, "duration_ms_mean": null, "duration_ms_p50": null, "tokens_per_sec_mean": null, "tokens_per_sec_p50": null }, "hello": { "n_calls": 4, "n_errors": 0, "duration_ms_mean": 34153.25, "duration_ms_p50": 37396.5, "tokens_per_sec_mean": 4.73, "tokens_per_sec_p50": 4.79 }, "P-MEDIUM": { "n_calls": 4, "n_errors": 0, "duration_ms_mean": 98701.0, "duration_ms_p50": 100326.5, "tokens_per_sec_mean": 4.82, "tokens_per_sec_p50": 4.8 }, "P-HARD": { "n_calls": 4, "n_errors": 0, "duration_ms_mean": 142391.25, "duration_ms_p50": 151248.0, "tokens_per_sec_mean": 4.8, "tokens_per_sec_p50": 4.79 } }, "chars_split": { "has_thinking": true, "reasoning_chars_mean": 1250.42, "answer_chars_mean": 714.92 }, "timeline": [ { "prompt_id": null, "run_idx": null, "phase": null, "duration_ms": null, "tokens_per_sec": null, "error": null }, { "prompt_id": null, "run_idx": null, "phase": null, "duration_ms": null, "tokens_per_sec": null, "error": null }, { "prompt_id": null, "run_idx": null, "phase": null, "duration_ms": null, "tokens_per_sec": null, "error": null }, { "prompt_id": "hello", "run_idx": 0, "phase": "cold", "duration_ms": 34389, "tokens_per_sec": 4.449097095001308, "error": null }, { "prompt_id": "hello", "run_idx": 1, "phase": "warm", "duration_ms": 18091, "tokens_per_sec": 4.698468851915317, "error": null }, { "prompt_id": "hello", "run_idx": 2, "phase": "warm", "duration_ms": 40404, "tokens_per_sec": 4.875754875754875, "error": null }, { "prompt_id": "hello", "run_idx": 3, "phase": "warm", "duration_ms": 43729, "tokens_per_sec": 4.9166457042237415, "error": null }, { "prompt_id": "P-MEDIUM", "run_idx": 0, "phase": "cold", "duration_ms": 120915, "tokens_per_sec": 4.920812140760038, "error": null }, { "prompt_id": "P-MEDIUM", "run_idx": 1, "phase": "warm", "duration_ms": 73236, "tokens_per_sec": 4.765415915669889, "error": null }, { "prompt_id": "P-MEDIUM", "run_idx": 2, "phase": "warm", "duration_ms": 104076, "tokens_per_sec": 4.82339828586802, "error": null }, { "prompt_id": "P-MEDIUM", "run_idx": 3, "phase": "warm", "duration_ms": 96577, "tokens_per_sec": 4.783747683195792, "error": null }, { "prompt_id": "P-HARD", "run_idx": 0, "phase": "cold", "duration_ms": 167278, "tokens_per_sec": 4.698764930235895, "error": null }, { "prompt_id": "P-HARD", "run_idx": 1, "phase": "warm", "duration_ms": 145866, "tokens_per_sec": 4.744080183181824, "error": null }, { "prompt_id": "P-HARD", "run_idx": 2, "phase": "warm", "duration_ms": 99791, "tokens_per_sec": 4.93030433606237, "error": null }, { "prompt_id": "P-HARD", "run_idx": 3, "phase": "warm", "duration_ms": 156630, "tokens_per_sec": 4.839430505011811, "error": null } ] } ], "n_calls_total": 45, "n_errors_total": 0, "chart_spec": { "kind": "line", "x": "param_value", "y": "tokens_per_sec_mean", "series": [ "qwen3:30b-a3b-iq2m-think500-ngl6-ctx64k", "qwen3:30b-a3b-iq2m-think500-ngl6-ctx96k", "qwen3:30b-a3b-iq2m-think500-ngl6-ctx131k" ], "data_url": "/data/79afe366-1055-4e45-adf6-593864a530e8/run.jsonl" }, "site_grade": "standard", "site_grade_reason": null, "raw_data_urls": { "jsonl": "/data/79afe366-1055-4e45-adf6-593864a530e8/run.jsonl", "log": "/data/79afe366-1055-4e45-adf6-593864a530e8/run.log", "md": null, "metadata": "/data/79afe366-1055-4e45-adf6-593864a530e8/metadata.json", "gitea_dir": "https://git.weeyuga.com/slobodanmargetic988/weeyuga-benchmarks-public/src/branch/main/runs/79afe366-1055-4e45-adf6-593864a530e8/" }, "synthesis_doc": "A3B_AND_CPU_OVERNIGHT_2026-05-05.md", "synthesis_docs_all": [ "A3B_AND_CPU_OVERNIGHT_2026-05-05.md", "SITE_DATA_AUDIT_AND_MIGRATION_PLAN_2026-05-06.md" ], "related_ids": [], "status": "complete", "visibility": "draft", "cells_full": [ { "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl6-ctx64k", "n_calls": 15, "n_errors": 0, "duration_ms_mean": 81326.17, "duration_ms_p50": 73215.0, "duration_ms_p95": 152048.1, "tokens_per_sec_mean": 5.42, "tokens_per_sec_p50": 5.35, "tokens_per_sec_p95": 5.91, "tokens_per_sec_max": 5.93, "cold_warm_split": { "cold": { "n_calls": 3, "duration_ms_mean": 88941.0, "duration_ms_p50": 81140.0, "tokens_per_sec_mean": 5.21 }, "warm": { "n_calls": 9, "duration_ms_mean": 78787.89, "duration_ms_p50": 70839.0, "tokens_per_sec_mean": 5.49 } }, "per_prompt": { "unknown": { "n_calls": 3, "n_errors": 0, "duration_ms_mean": null, "duration_ms_p50": null, "tokens_per_sec_mean": null, "tokens_per_sec_p50": null }, "hello": { "n_calls": 4, "n_errors": 0, "duration_ms_mean": 22599.5, "duration_ms_p50": 19202.0, "tokens_per_sec_mean": 5.54, "tokens_per_sec_p50": 5.54 }, "P-MEDIUM": { "n_calls": 4, "n_errors": 0, "duration_ms_mean": 71981.0, "duration_ms_p50": 73215.0, "tokens_per_sec_mean": 5.49, "tokens_per_sec_p50": 5.52 }, "P-HARD": { "n_calls": 4, "n_errors": 0, "duration_ms_mean": 149398.0, "duration_ms_p50": 146891.5, "tokens_per_sec_mean": 5.23, "tokens_per_sec_p50": 5.26 } }, "chars_split": { "has_thinking": true, "reasoning_chars_mean": 1030.83, "answer_chars_mean": 891.17 }, "timeline": [ { "prompt_id": null, "run_idx": null, "phase": null, "duration_ms": null, "tokens_per_sec": null, "error": null }, { "prompt_id": null, "run_idx": null, "phase": null, "duration_ms": null, "tokens_per_sec": null, "error": null }, { "prompt_id": null, "run_idx": null, "phase": null, "duration_ms": null, "tokens_per_sec": null, "error": null }, { "prompt_id": "hello", "run_idx": 0, "phase": "cold", "duration_ms": 38170, "tokens_per_sec": 5.187319884726224, "error": null }, { "prompt_id": "hello", "run_idx": 1, "phase": "warm", "duration_ms": 15973, "tokens_per_sec": 5.884930820760032, "error": null }, { "prompt_id": "hello", "run_idx": 2, "phase": "warm", "duration_ms": 13824, "tokens_per_sec": 5.931712962962963, "error": null }, { "prompt_id": "hello", "run_idx": 3, "phase": "warm", "duration_ms": 22431, "tokens_per_sec": 5.171414560206856, "error": null }, { "prompt_id": "P-MEDIUM", "run_idx": 0, "phase": "cold", "duration_ms": 81140, "tokens_per_sec": 5.385753019472516, "error": null }, { "prompt_id": "P-MEDIUM", "run_idx": 1, "phase": "warm", "duration_ms": 70839, "tokens_per_sec": 5.533674953062579, "error": null }, { "prompt_id": "P-MEDIUM", "run_idx": 2, "phase": "warm", "duration_ms": 60354, "tokens_per_sec": 5.550584882526428, "error": null }, { "prompt_id": "P-MEDIUM", "run_idx": 3, "phase": "warm", "duration_ms": 75591, "tokens_per_sec": 5.5033006574856795, "error": null }, { "prompt_id": "P-HARD", "run_idx": 0, "phase": "cold", "duration_ms": 147513, "tokens_per_sec": 5.063960464501434, "error": null }, { "prompt_id": "P-HARD", "run_idx": 1, "phase": "warm", "duration_ms": 157591, "tokens_per_sec": 5.254107150789068, "error": null }, { "prompt_id": "P-HARD", "run_idx": 2, "phase": "warm", "duration_ms": 146270, "tokens_per_sec": 5.271074041156765, "error": null }, { "prompt_id": "P-HARD", "run_idx": 3, "phase": "warm", "duration_ms": 146218, "tokens_per_sec": 5.313983230518815, "error": null } ], "calls": [ { "type": "vram_snapshot", "model_key": "ngl6-ctx64k", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl6-ctx64k", "ngl": 6, "ctx": 65536, "raw": "3039, 2991, 6144" }, { "type": "ram_snapshot", "model_key": "ngl6-ctx64k", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl6-ctx64k", "ngl": 6, "ctx": 65536, "raw": "14325,14300,28625" }, { "type": "llama_bench", "model_key": "ngl6-ctx64k", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl6-ctx64k", "ngl": 6, "ctx": 65536, "raw": "llama-bench.exe : ggml_cuda_init: found 1 CUDA devices (Total VRAM: 6143 MiB):\r\nAt line:1 char:1\r\n+ & 'D:\\WeeyugaTools\\llama.cpp\\llama-bench.exe' -m 'D:\\WeeyugaModels\\ll ...\r\n+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\r\n + CategoryInfo : NotSpecified: (ggml_cuda_init:...RAM: 6143 MiB)::String) [], RemoteException\r\n + FullyQualifiedErrorId : NativeCommandError\r\n \r\n Device 0: NVIDIA GeForce GTX 1060, compute capability 6.1, VMM: yes, VRAM: 6143 MiB\r\nload_backend: loaded CUDA backend from D:\\WeeyugaTools\\llama.cpp\\ggml-cuda.dll\r\nload_backend: loaded RPC backend from D:\\WeeyugaTools\\llama.cpp\\ggml-rpc.dll\r\nload_backend: loaded CPU backend from D:\\WeeyugaTools\\llama.cpp\\ggml-cpu-haswell.dll\r\n| model | size | params | backend | ngl | test | t/s |\r\n| ------------------------------ | ---------: | ---------: | ---------- | --: | --------------: | -------------------: |\r\n| qwen3moe 30B.A3B IQ2_M - 2.7 bpw | 10.11 GiB | 30.53 B | CUDA | 6 | pp256 | 114.59 \u00b1 4.05 |\r\n| qwen3moe 30B.A3B IQ2_M - 2.7 bpw | 10.11 GiB | 30.53 B | CUDA | 6 | tg64 | 6.01 \u00b1 0.25 |\r\n\r\nbuild: 665abc609 (8951)\r\n\r" }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl6-ctx64k", "model_key": "ngl6-ctx64k", "ngl": 6, "ctx": 65536, "prompt_id": "hello", "run_idx": 0, "phase": "cold", "total_duration_ms": 38170, "prompt_tokens": 9, "completion_tokens": 198, "tokens_per_sec": 5.187319884726224, "finish_reason": "stop", "reasoning_chars": 834, "content_chars": 37, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl6-ctx64k", "model_key": "ngl6-ctx64k", "ngl": 6, "ctx": 65536, "prompt_id": "hello", "run_idx": 1, "phase": "warm", "total_duration_ms": 15973, "prompt_tokens": 9, "completion_tokens": 94, "tokens_per_sec": 5.884930820760032, "finish_reason": "stop", "reasoning_chars": 346, "content_chars": 37, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl6-ctx64k", "model_key": "ngl6-ctx64k", "ngl": 6, "ctx": 65536, "prompt_id": "hello", "run_idx": 2, "phase": "warm", "total_duration_ms": 13824, "prompt_tokens": 9, "completion_tokens": 82, "tokens_per_sec": 5.931712962962963, "finish_reason": "stop", "reasoning_chars": 300, "content_chars": 37, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl6-ctx64k", "model_key": "ngl6-ctx64k", "ngl": 6, "ctx": 65536, "prompt_id": "hello", "run_idx": 3, "phase": "warm", "total_duration_ms": 22431, "prompt_tokens": 9, "completion_tokens": 116, "tokens_per_sec": 5.171414560206856, "finish_reason": "stop", "reasoning_chars": 449, "content_chars": 37, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl6-ctx64k", "model_key": "ngl6-ctx64k", "ngl": 6, "ctx": 65536, "prompt_id": "P-MEDIUM", "run_idx": 0, "phase": "cold", "total_duration_ms": 81140, "prompt_tokens": 22, "completion_tokens": 437, "tokens_per_sec": 5.385753019472516, "finish_reason": "stop", "reasoning_chars": 1693, "content_chars": 498, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl6-ctx64k", "model_key": "ngl6-ctx64k", "ngl": 6, "ctx": 65536, "prompt_id": "P-MEDIUM", "run_idx": 1, "phase": "warm", "total_duration_ms": 70839, "prompt_tokens": 22, "completion_tokens": 392, "tokens_per_sec": 5.533674953062579, "finish_reason": "stop", "reasoning_chars": 1555, "content_chars": 385, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl6-ctx64k", "model_key": "ngl6-ctx64k", "ngl": 6, "ctx": 65536, "prompt_id": "P-MEDIUM", "run_idx": 2, "phase": "warm", "total_duration_ms": 60354, "prompt_tokens": 22, "completion_tokens": 335, "tokens_per_sec": 5.550584882526428, "finish_reason": "stop", "reasoning_chars": 1201, "content_chars": 455, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl6-ctx64k", "model_key": "ngl6-ctx64k", "ngl": 6, "ctx": 65536, "prompt_id": "P-MEDIUM", "run_idx": 3, "phase": "warm", "total_duration_ms": 75591, "prompt_tokens": 22, "completion_tokens": 416, "tokens_per_sec": 5.5033006574856795, "finish_reason": "stop", "reasoning_chars": 1486, "content_chars": 473, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl6-ctx64k", "model_key": "ngl6-ctx64k", "ngl": 6, "ctx": 65536, "prompt_id": "P-HARD", "run_idx": 0, "phase": "cold", "total_duration_ms": 147513, "prompt_tokens": 30, "completion_tokens": 747, "tokens_per_sec": 5.063960464501434, "finish_reason": "stop", "reasoning_chars": 1206, "content_chars": 2019, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl6-ctx64k", "model_key": "ngl6-ctx64k", "ngl": 6, "ctx": 65536, "prompt_id": "P-HARD", "run_idx": 1, "phase": "warm", "total_duration_ms": 157591, "prompt_tokens": 30, "completion_tokens": 828, "tokens_per_sec": 5.254107150789068, "finish_reason": "stop", "reasoning_chars": 1237, "content_chars": 2268, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl6-ctx64k", "model_key": "ngl6-ctx64k", "ngl": 6, "ctx": 65536, "prompt_id": "P-HARD", "run_idx": 2, "phase": "warm", "total_duration_ms": 146270, "prompt_tokens": 30, "completion_tokens": 771, "tokens_per_sec": 5.271074041156765, "finish_reason": "stop", "reasoning_chars": 1032, "content_chars": 2223, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl6-ctx64k", "model_key": "ngl6-ctx64k", "ngl": 6, "ctx": 65536, "prompt_id": "P-HARD", "run_idx": 3, "phase": "warm", "total_duration_ms": 146218, "prompt_tokens": 30, "completion_tokens": 777, "tokens_per_sec": 5.313983230518815, "finish_reason": "stop", "reasoning_chars": 1031, "content_chars": 2225, "error": null } ] }, { "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl6-ctx96k", "n_calls": 15, "n_errors": 0, "duration_ms_mean": 84532.92, "duration_ms_p50": 99578.5, "duration_ms_p95": 152059.4, "tokens_per_sec_mean": 5.43, "tokens_per_sec_p50": 5.38, "tokens_per_sec_p95": 5.78, "tokens_per_sec_max": 5.94, "cold_warm_split": { "cold": { "n_calls": 3, "duration_ms_mean": 92845.33, "duration_ms_p50": 110870.0, "tokens_per_sec_mean": 5.26 }, "warm": { "n_calls": 9, "duration_ms_mean": 81762.11, "duration_ms_p50": 88383.0, "tokens_per_sec_mean": 5.49 } }, "per_prompt": { "unknown": { "n_calls": 3, "n_errors": 0, "duration_ms_mean": null, "duration_ms_p50": null, "tokens_per_sec_mean": null, "tokens_per_sec_p50": null }, "hello": { "n_calls": 4, "n_errors": 0, "duration_ms_mean": 17637.5, "duration_ms_p50": 17538.5, "tokens_per_sec_mean": 5.59, "tokens_per_sec_p50": 5.55 }, "P-MEDIUM": { "n_calls": 4, "n_errors": 0, "duration_ms_mean": 99966.0, "duration_ms_p50": 110822.0, "tokens_per_sec_mean": 5.4, "tokens_per_sec_p50": 5.38 }, "P-HARD": { "n_calls": 4, "n_errors": 0, "duration_ms_mean": 135995.25, "duration_ms_p50": 151381.0, "tokens_per_sec_mean": 5.3, "tokens_per_sec_p50": 5.23 } }, "chars_split": { "has_thinking": true, "reasoning_chars_mean": 1265.92, "answer_chars_mean": 766.25 }, "timeline": [ { "prompt_id": null, "run_idx": null, "phase": null, "duration_ms": null, "tokens_per_sec": null, "error": null }, { "prompt_id": null, "run_idx": null, "phase": null, "duration_ms": null, "tokens_per_sec": null, "error": null }, { "prompt_id": null, "run_idx": null, "phase": null, "duration_ms": null, "tokens_per_sec": null, "error": null }, { "prompt_id": "hello", "run_idx": 0, "phase": "cold", "duration_ms": 16242, "tokens_per_sec": 5.294914419406477, "error": null }, { "prompt_id": "hello", "run_idx": 1, "phase": "warm", "duration_ms": 18234, "tokens_per_sec": 5.6487879785017, "error": null }, { "prompt_id": "hello", "run_idx": 2, "phase": "warm", "duration_ms": 19231, "tokens_per_sec": 5.45993448078623, "error": null }, { "prompt_id": "hello", "run_idx": 3, "phase": "warm", "duration_ms": 16843, "tokens_per_sec": 5.937184587068812, "error": null }, { "prompt_id": "P-MEDIUM", "run_idx": 0, "phase": "cold", "duration_ms": 110870, "tokens_per_sec": 5.276449896274916, "error": null }, { "prompt_id": "P-MEDIUM", "run_idx": 1, "phase": "warm", "duration_ms": 110774, "tokens_per_sec": 5.3712965136223305, "error": null }, { "prompt_id": "P-MEDIUM", "run_idx": 2, "phase": "warm", "duration_ms": 112101, "tokens_per_sec": 5.379077795916182, "error": null }, { "prompt_id": "P-MEDIUM", "run_idx": 3, "phase": "warm", "duration_ms": 66119, "tokens_per_sec": 5.580846655273068, "error": null }, { "prompt_id": "P-HARD", "run_idx": 0, "phase": "cold", "duration_ms": 151424, "tokens_per_sec": 5.197326711749788, "error": null }, { "prompt_id": "P-HARD", "run_idx": 1, "phase": "warm", "duration_ms": 152836, "tokens_per_sec": 5.227825904891517, "error": null }, { "prompt_id": "P-HARD", "run_idx": 2, "phase": "warm", "duration_ms": 88383, "tokens_per_sec": 5.555366982338232, "error": null }, { "prompt_id": "P-HARD", "run_idx": 3, "phase": "warm", "duration_ms": 151338, "tokens_per_sec": 5.226711070583726, "error": null } ], "calls": [ { "type": "vram_snapshot", "model_key": "ngl6-ctx96k", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl6-ctx96k", "ngl": 6, "ctx": 98304, "raw": "3526, 2504, 6144" }, { "type": "ram_snapshot", "model_key": "ngl6-ctx96k", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl6-ctx96k", "ngl": 6, "ctx": 98304, "raw": "17037,11588,28625" }, { "type": "llama_bench", "model_key": "ngl6-ctx96k", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl6-ctx96k", "ngl": 6, "ctx": 98304, "raw": "llama-bench.exe : ggml_cuda_init: found 1 CUDA devices (Total VRAM: 6143 MiB):\r\nAt line:1 char:1\r\n+ & 'D:\\WeeyugaTools\\llama.cpp\\llama-bench.exe' -m 'D:\\WeeyugaModels\\ll ...\r\n+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\r\n + CategoryInfo : NotSpecified: (ggml_cuda_init:...RAM: 6143 MiB)::String) [], RemoteException\r\n + FullyQualifiedErrorId : NativeCommandError\r\n \r\n Device 0: NVIDIA GeForce GTX 1060, compute capability 6.1, VMM: yes, VRAM: 6143 MiB\r\nload_backend: loaded CUDA backend from D:\\WeeyugaTools\\llama.cpp\\ggml-cuda.dll\r\nload_backend: loaded RPC backend from D:\\WeeyugaTools\\llama.cpp\\ggml-rpc.dll\r\nload_backend: loaded CPU backend from D:\\WeeyugaTools\\llama.cpp\\ggml-cpu-haswell.dll\r\n| model | size | params | backend | ngl | test | t/s |\r\n| ------------------------------ | ---------: | ---------: | ---------- | --: | --------------: | -------------------: |\r\n| qwen3moe 30B.A3B IQ2_M - 2.7 bpw | 10.11 GiB | 30.53 B | CUDA | 6 | pp256 | 115.02 \u00b1 3.48 |\r\n| qwen3moe 30B.A3B IQ2_M - 2.7 bpw | 10.11 GiB | 30.53 B | CUDA | 6 | tg64 | 6.00 \u00b1 0.26 |\r\n\r\nbuild: 665abc609 (8951)\r\n\r" }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl6-ctx96k", "model_key": "ngl6-ctx96k", "ngl": 6, "ctx": 98304, "prompt_id": "hello", "run_idx": 0, "phase": "cold", "total_duration_ms": 16242, "prompt_tokens": 9, "completion_tokens": 86, "tokens_per_sec": 5.294914419406477, "finish_reason": "stop", "reasoning_chars": 290, "content_chars": 37, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl6-ctx96k", "model_key": "ngl6-ctx96k", "ngl": 6, "ctx": 98304, "prompt_id": "hello", "run_idx": 1, "phase": "warm", "total_duration_ms": 18234, "prompt_tokens": 9, "completion_tokens": 103, "tokens_per_sec": 5.6487879785017, "finish_reason": "stop", "reasoning_chars": 377, "content_chars": 37, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl6-ctx96k", "model_key": "ngl6-ctx96k", "ngl": 6, "ctx": 98304, "prompt_id": "hello", "run_idx": 2, "phase": "warm", "total_duration_ms": 19231, "prompt_tokens": 9, "completion_tokens": 105, "tokens_per_sec": 5.45993448078623, "finish_reason": "stop", "reasoning_chars": 384, "content_chars": 37, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl6-ctx96k", "model_key": "ngl6-ctx96k", "ngl": 6, "ctx": 98304, "prompt_id": "hello", "run_idx": 3, "phase": "warm", "total_duration_ms": 16843, "prompt_tokens": 9, "completion_tokens": 100, "tokens_per_sec": 5.937184587068812, "finish_reason": "stop", "reasoning_chars": 367, "content_chars": 37, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl6-ctx96k", "model_key": "ngl6-ctx96k", "ngl": 6, "ctx": 98304, "prompt_id": "P-MEDIUM", "run_idx": 0, "phase": "cold", "total_duration_ms": 110870, "prompt_tokens": 22, "completion_tokens": 585, "tokens_per_sec": 5.276449896274916, "finish_reason": "stop", "reasoning_chars": 2434, "content_chars": 447, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl6-ctx96k", "model_key": "ngl6-ctx96k", "ngl": 6, "ctx": 98304, "prompt_id": "P-MEDIUM", "run_idx": 1, "phase": "warm", "total_duration_ms": 110774, "prompt_tokens": 22, "completion_tokens": 595, "tokens_per_sec": 5.3712965136223305, "finish_reason": "stop", "reasoning_chars": 2056, "content_chars": 871, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl6-ctx96k", "model_key": "ngl6-ctx96k", "ngl": 6, "ctx": 98304, "prompt_id": "P-MEDIUM", "run_idx": 2, "phase": "warm", "total_duration_ms": 112101, "prompt_tokens": 22, "completion_tokens": 603, "tokens_per_sec": 5.379077795916182, "finish_reason": "stop", "reasoning_chars": 2125, "content_chars": 810, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl6-ctx96k", "model_key": "ngl6-ctx96k", "ngl": 6, "ctx": 98304, "prompt_id": "P-MEDIUM", "run_idx": 3, "phase": "warm", "total_duration_ms": 66119, "prompt_tokens": 22, "completion_tokens": 369, "tokens_per_sec": 5.580846655273068, "finish_reason": "stop", "reasoning_chars": 1364, "content_chars": 421, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl6-ctx96k", "model_key": "ngl6-ctx96k", "ngl": 6, "ctx": 98304, "prompt_id": "P-HARD", "run_idx": 0, "phase": "cold", "total_duration_ms": 151424, "prompt_tokens": 30, "completion_tokens": 787, "tokens_per_sec": 5.197326711749788, "finish_reason": "stop", "reasoning_chars": 2143, "content_chars": 1193, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl6-ctx96k", "model_key": "ngl6-ctx96k", "ngl": 6, "ctx": 98304, "prompt_id": "P-HARD", "run_idx": 1, "phase": "warm", "total_duration_ms": 152836, "prompt_tokens": 30, "completion_tokens": 799, "tokens_per_sec": 5.227825904891517, "finish_reason": "stop", "reasoning_chars": 1256, "content_chars": 2176, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl6-ctx96k", "model_key": "ngl6-ctx96k", "ngl": 6, "ctx": 98304, "prompt_id": "P-HARD", "run_idx": 2, "phase": "warm", "total_duration_ms": 88383, "prompt_tokens": 30, "completion_tokens": 491, "tokens_per_sec": 5.555366982338232, "finish_reason": "stop", "reasoning_chars": 1180, "content_chars": 1027, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl6-ctx96k", "model_key": "ngl6-ctx96k", "ngl": 6, "ctx": 98304, "prompt_id": "P-HARD", "run_idx": 3, "phase": "warm", "total_duration_ms": 151338, "prompt_tokens": 30, "completion_tokens": 791, "tokens_per_sec": 5.226711070583726, "finish_reason": "stop", "reasoning_chars": 1215, "content_chars": 2102, "error": null } ] }, { "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl6-ctx131k", "n_calls": 15, "n_errors": 0, "duration_ms_mean": 91748.5, "duration_ms_p50": 98184.0, "duration_ms_p95": 161421.6, "tokens_per_sec_mean": 4.79, "tokens_per_sec_p50": 4.8, "tokens_per_sec_p95": 4.93, "tokens_per_sec_max": 4.93, "cold_warm_split": { "cold": { "n_calls": 3, "duration_ms_mean": 107527.33, "duration_ms_p50": 120915.0, "tokens_per_sec_mean": 4.69 }, "warm": { "n_calls": 9, "duration_ms_mean": 86488.89, "duration_ms_p50": 96577.0, "tokens_per_sec_mean": 4.82 } }, "per_prompt": { "unknown": { "n_calls": 3, "n_errors": 0, "duration_ms_mean": null, "duration_ms_p50": null, "tokens_per_sec_mean": null, "tokens_per_sec_p50": null }, "hello": { "n_calls": 4, "n_errors": 0, "duration_ms_mean": 34153.25, "duration_ms_p50": 37396.5, "tokens_per_sec_mean": 4.73, "tokens_per_sec_p50": 4.79 }, "P-MEDIUM": { "n_calls": 4, "n_errors": 0, "duration_ms_mean": 98701.0, "duration_ms_p50": 100326.5, "tokens_per_sec_mean": 4.82, "tokens_per_sec_p50": 4.8 }, "P-HARD": { "n_calls": 4, "n_errors": 0, "duration_ms_mean": 142391.25, "duration_ms_p50": 151248.0, "tokens_per_sec_mean": 4.8, "tokens_per_sec_p50": 4.79 } }, "chars_split": { "has_thinking": true, "reasoning_chars_mean": 1250.42, "answer_chars_mean": 714.92 }, "timeline": [ { "prompt_id": null, "run_idx": null, "phase": null, "duration_ms": null, "tokens_per_sec": null, "error": null }, { "prompt_id": null, "run_idx": null, "phase": null, "duration_ms": null, "tokens_per_sec": null, "error": null }, { "prompt_id": null, "run_idx": null, "phase": null, "duration_ms": null, "tokens_per_sec": null, "error": null }, { "prompt_id": "hello", "run_idx": 0, "phase": "cold", "duration_ms": 34389, "tokens_per_sec": 4.449097095001308, "error": null }, { "prompt_id": "hello", "run_idx": 1, "phase": "warm", "duration_ms": 18091, "tokens_per_sec": 4.698468851915317, "error": null }, { "prompt_id": "hello", "run_idx": 2, "phase": "warm", "duration_ms": 40404, "tokens_per_sec": 4.875754875754875, "error": null }, { "prompt_id": "hello", "run_idx": 3, "phase": "warm", "duration_ms": 43729, "tokens_per_sec": 4.9166457042237415, "error": null }, { "prompt_id": "P-MEDIUM", "run_idx": 0, "phase": "cold", "duration_ms": 120915, "tokens_per_sec": 4.920812140760038, "error": null }, { "prompt_id": "P-MEDIUM", "run_idx": 1, "phase": "warm", "duration_ms": 73236, "tokens_per_sec": 4.765415915669889, "error": null }, { "prompt_id": "P-MEDIUM", "run_idx": 2, "phase": "warm", "duration_ms": 104076, "tokens_per_sec": 4.82339828586802, "error": null }, { "prompt_id": "P-MEDIUM", "run_idx": 3, "phase": "warm", "duration_ms": 96577, "tokens_per_sec": 4.783747683195792, "error": null }, { "prompt_id": "P-HARD", "run_idx": 0, "phase": "cold", "duration_ms": 167278, "tokens_per_sec": 4.698764930235895, "error": null }, { "prompt_id": "P-HARD", "run_idx": 1, "phase": "warm", "duration_ms": 145866, "tokens_per_sec": 4.744080183181824, "error": null }, { "prompt_id": "P-HARD", "run_idx": 2, "phase": "warm", "duration_ms": 99791, "tokens_per_sec": 4.93030433606237, "error": null }, { "prompt_id": "P-HARD", "run_idx": 3, "phase": "warm", "duration_ms": 156630, "tokens_per_sec": 4.839430505011811, "error": null } ], "calls": [ { "type": "vram_snapshot", "model_key": "ngl6-ctx131k", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl6-ctx131k", "ngl": 6, "ctx": 131072, "raw": "3997, 2033, 6144" }, { "type": "ram_snapshot", "model_key": "ngl6-ctx131k", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl6-ctx131k", "ngl": 6, "ctx": 131072, "raw": "19867,8758,28625" }, { "type": "llama_bench", "model_key": "ngl6-ctx131k", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl6-ctx131k", "ngl": 6, "ctx": 131072, "raw": "llama-bench.exe : ggml_cuda_init: found 1 CUDA devices (Total VRAM: 6143 MiB):\r\nAt line:1 char:1\r\n+ & 'D:\\WeeyugaTools\\llama.cpp\\llama-bench.exe' -m 'D:\\WeeyugaModels\\ll ...\r\n+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\r\n + CategoryInfo : NotSpecified: (ggml_cuda_init:...RAM: 6143 MiB)::String) [], RemoteException\r\n + FullyQualifiedErrorId : NativeCommandError\r\n \r\n Device 0: NVIDIA GeForce GTX 1060, compute capability 6.1, VMM: yes, VRAM: 6143 MiB\r\nload_backend: loaded CUDA backend from D:\\WeeyugaTools\\llama.cpp\\ggml-cuda.dll\r\nload_backend: loaded RPC backend from D:\\WeeyugaTools\\llama.cpp\\ggml-rpc.dll\r\nload_backend: loaded CPU backend from D:\\WeeyugaTools\\llama.cpp\\ggml-cpu-haswell.dll\r\n| model | size | params | backend | ngl | test | t/s |\r\n| ------------------------------ | ---------: | ---------: | ---------- | --: | --------------: | -------------------: |\r\n| qwen3moe 30B.A3B IQ2_M - 2.7 bpw | 10.11 GiB | 30.53 B | CUDA | 6 | pp256 | 113.80 \u00b1 3.44 |\r\n| qwen3moe 30B.A3B IQ2_M - 2.7 bpw | 10.11 GiB | 30.53 B | CUDA | 6 | tg64 | 4.96 \u00b1 0.46 |\r\n\r\nbuild: 665abc609 (8951)\r\n\r" }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl6-ctx131k", "model_key": "ngl6-ctx131k", "ngl": 6, "ctx": 131072, "prompt_id": "hello", "run_idx": 0, "phase": "cold", "total_duration_ms": 34389, "prompt_tokens": 9, "completion_tokens": 153, "tokens_per_sec": 4.449097095001308, "finish_reason": "stop", "reasoning_chars": 595, "content_chars": 37, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl6-ctx131k", "model_key": "ngl6-ctx131k", "ngl": 6, "ctx": 131072, "prompt_id": "hello", "run_idx": 1, "phase": "warm", "total_duration_ms": 18091, "prompt_tokens": 9, "completion_tokens": 85, "tokens_per_sec": 4.698468851915317, "finish_reason": "stop", "reasoning_chars": 235, "content_chars": 113, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl6-ctx131k", "model_key": "ngl6-ctx131k", "ngl": 6, "ctx": 131072, "prompt_id": "hello", "run_idx": 2, "phase": "warm", "total_duration_ms": 40404, "prompt_tokens": 9, "completion_tokens": 197, "tokens_per_sec": 4.875754875754875, "finish_reason": "stop", "reasoning_chars": 796, "content_chars": 37, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl6-ctx131k", "model_key": "ngl6-ctx131k", "ngl": 6, "ctx": 131072, "prompt_id": "hello", "run_idx": 3, "phase": "warm", "total_duration_ms": 43729, "prompt_tokens": 9, "completion_tokens": 215, "tokens_per_sec": 4.9166457042237415, "finish_reason": "stop", "reasoning_chars": 859, "content_chars": 37, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl6-ctx131k", "model_key": "ngl6-ctx131k", "ngl": 6, "ctx": 131072, "prompt_id": "P-MEDIUM", "run_idx": 0, "phase": "cold", "total_duration_ms": 120915, "prompt_tokens": 22, "completion_tokens": 595, "tokens_per_sec": 4.920812140760038, "finish_reason": "stop", "reasoning_chars": 1981, "content_chars": 893, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl6-ctx131k", "model_key": "ngl6-ctx131k", "ngl": 6, "ctx": 131072, "prompt_id": "P-MEDIUM", "run_idx": 1, "phase": "warm", "total_duration_ms": 73236, "prompt_tokens": 22, "completion_tokens": 349, "tokens_per_sec": 4.765415915669889, "finish_reason": "stop", "reasoning_chars": 1233, "content_chars": 463, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl6-ctx131k", "model_key": "ngl6-ctx131k", "ngl": 6, "ctx": 131072, "prompt_id": "P-MEDIUM", "run_idx": 2, "phase": "warm", "total_duration_ms": 104076, "prompt_tokens": 22, "completion_tokens": 502, "tokens_per_sec": 4.82339828586802, "finish_reason": "stop", "reasoning_chars": 1965, "content_chars": 455, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl6-ctx131k", "model_key": "ngl6-ctx131k", "ngl": 6, "ctx": 131072, "prompt_id": "P-MEDIUM", "run_idx": 3, "phase": "warm", "total_duration_ms": 96577, "prompt_tokens": 22, "completion_tokens": 462, "tokens_per_sec": 4.783747683195792, "finish_reason": "stop", "reasoning_chars": 1771, "content_chars": 513, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl6-ctx131k", "model_key": "ngl6-ctx131k", "ngl": 6, "ctx": 131072, "prompt_id": "P-HARD", "run_idx": 0, "phase": "cold", "total_duration_ms": 167278, "prompt_tokens": 30, "completion_tokens": 786, "tokens_per_sec": 4.698764930235895, "finish_reason": "stop", "reasoning_chars": 1176, "content_chars": 2193, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl6-ctx131k", "model_key": "ngl6-ctx131k", "ngl": 6, "ctx": 131072, "prompt_id": "P-HARD", "run_idx": 1, "phase": "warm", "total_duration_ms": 145866, "prompt_tokens": 30, "completion_tokens": 692, "tokens_per_sec": 4.744080183181824, "finish_reason": "stop", "reasoning_chars": 2123, "content_chars": 790, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl6-ctx131k", "model_key": "ngl6-ctx131k", "ngl": 6, "ctx": 131072, "prompt_id": "P-HARD", "run_idx": 2, "phase": "warm", "total_duration_ms": 99791, "prompt_tokens": 30, "completion_tokens": 492, "tokens_per_sec": 4.93030433606237, "finish_reason": "stop", "reasoning_chars": 1133, "content_chars": 946, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl6-ctx131k", "model_key": "ngl6-ctx131k", "ngl": 6, "ctx": 131072, "prompt_id": "P-HARD", "run_idx": 3, "phase": "warm", "total_duration_ms": 156630, "prompt_tokens": 30, "completion_tokens": 758, "tokens_per_sec": 4.839430505011811, "finish_reason": "stop", "reasoning_chars": 1138, "content_chars": 2102, "error": null } ] } ], "meta_record": { "type": "meta", "benchmark_run_id": "79afe366-1055-4e45-adf6-593864a530e8", "harness_version": "predator-a3b-ngl6-ctx-1", "started_at_utc": "2026-05-05T09:44:34Z", "host": "Slobodans-MacBook-Air.local", "node": "predator", "engine": "llamacpp", "purpose": "Sloba 2026-05-05: NGL=6 large-context rerun with new max_tokens methodology. 'no need to run small context just do 64 96 131'.", "cells": [ { "key": "ngl6-ctx64k", "ngl": 6, "ctx": 65536 }, { "key": "ngl6-ctx96k", "ngl": 6, "ctx": 65536 }, { "key": "ngl6-ctx131k", "ngl": 6, "ctx": 65536 } ], "gguf": "Qwen3-30B-A3B-UD-IQ2_M.gguf", "max_tokens_per_prompt": { "hello": 512, "P-MEDIUM": 1024, "P-HARD": 2048 } }, "packaged_at": "2026-05-06T12:28:24Z" }