{ "id": "5fb2913d-6500-4ecf-9e97-d43f7dd61145", "title": "Predator Qwen rerun", "date": "2026-05-04", "started_at": "2026-05-04T22:02:17Z", "git_sha": "3ff8bd1808a1df00426d9b447559c774e4704c39", "hardware": "predator", "hardware_label": "Predator \u00b7 gaming laptop \u00b7 GTX 1060 6 GB \u00b7 28 GB RAM", "engine": "llamacpp", "harness": "predator-a3b-1", "model_family": "qwen3", "model_sizes": [ "qwen3:30b-a3b-iq2m" ], "task_kind": "chat", "tags": [ "a3b", "iq2", "predator", "qwen3" ], "headline": "14 calls across 1 cell(s); ~3.9 tok/s mean; p50 101.9s", "janie_blurb_md": null, "janie_blurb_status": "pending", "caveat": null, "caveat_severity": null, "methodology_ref": "A3B_AND_CPU_OVERNIGHT_2026-05-05", "methodology_deviations_md": null, "results_table": [ { "label": "qwen3:30b-a3b-iq2m-think500", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500", "n_calls": 14, "n_errors": 0, "duration_ms_mean": 107312.08, "duration_ms_p50": 101920.0, "duration_ms_p95": 211459.2, "tokens_per_sec_mean": 3.87, "tokens_per_sec_p50": 3.99, "tokens_per_sec_p95": 4.06, "tokens_per_sec_max": 4.06, "memory_mb": null, "watts_avg": null, "notes": null } ], "cells": [ { "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500", "n_calls": 14, "n_errors": 0, "duration_ms_mean": 107312.08, "duration_ms_p50": 101920.0, "duration_ms_p95": 211459.2, "tokens_per_sec_mean": 3.87, "tokens_per_sec_p50": 3.99, "tokens_per_sec_p95": 4.06, "tokens_per_sec_max": 4.06, "cold_warm_split": { "cold": { "n_calls": 3, "duration_ms_mean": 109544.33, "duration_ms_p50": 97117.0, "tokens_per_sec_mean": 3.47 }, "warm": { "n_calls": 9, "duration_ms_mean": 106568.0, "duration_ms_p50": 106723.0, "tokens_per_sec_mean": 4.01 } }, "per_prompt": { "unknown": { "n_calls": 2, "n_errors": 0, "duration_ms_mean": null, "duration_ms_p50": null, "tokens_per_sec_mean": null, "tokens_per_sec_p50": null }, "hello": { "n_calls": 4, "n_errors": 0, "duration_ms_mean": 18483.25, "duration_ms_p50": 16067.0, "tokens_per_sec_mean": 3.61, "tokens_per_sec_p50": 3.95 }, "P-MEDIUM": { "n_calls": 4, "n_errors": 0, "duration_ms_mean": 104743.5, "duration_ms_p50": 101920.0, "tokens_per_sec_mean": 4.02, "tokens_per_sec_p50": 4.02 }, "P-HARD": { "n_calls": 4, "n_errors": 0, "duration_ms_mean": 198709.5, "duration_ms_p50": 196861.0, "tokens_per_sec_mean": 3.99, "tokens_per_sec_p50": 3.99 } }, "chars_split": { "has_thinking": true, "reasoning_chars_mean": 1081.42, "answer_chars_mean": 835.08 }, "timeline": [ { "prompt_id": null, "run_idx": null, "phase": null, "duration_ms": null, "tokens_per_sec": null, "error": null }, { "prompt_id": null, "run_idx": null, "phase": null, "duration_ms": null, "tokens_per_sec": null, "error": null }, { "prompt_id": "hello", "run_idx": 0, "phase": "cold", "duration_ms": 26022, "tokens_per_sec": 2.459457382215049, "error": null }, { "prompt_id": "hello", "run_idx": 1, "phase": "warm", "duration_ms": 15870, "tokens_per_sec": 3.9697542533081287, "error": null }, { "prompt_id": "hello", "run_idx": 2, "phase": "warm", "duration_ms": 15777, "tokens_per_sec": 4.056537998352032, "error": null }, { "prompt_id": "hello", "run_idx": 3, "phase": "warm", "duration_ms": 16264, "tokens_per_sec": 3.935071323167733, "error": null }, { "prompt_id": "P-MEDIUM", "run_idx": 0, "phase": "cold", "duration_ms": 97117, "tokens_per_sec": 3.995181070255465, "error": null }, { "prompt_id": "P-MEDIUM", "run_idx": 1, "phase": "warm", "duration_ms": 86427, "tokens_per_sec": 4.061230865354577, "error": null }, { "prompt_id": "P-MEDIUM", "run_idx": 2, "phase": "warm", "duration_ms": 106723, "tokens_per_sec": 4.047862222763603, "error": null }, { "prompt_id": "P-MEDIUM", "run_idx": 3, "phase": "warm", "duration_ms": 128707, "tokens_per_sec": 3.9780276131057364, "error": null }, { "prompt_id": "P-HARD", "run_idx": 0, "phase": "cold", "duration_ms": 205494, "tokens_per_sec": 3.9514535704205476, "error": null }, { "prompt_id": "P-HARD", "run_idx": 1, "phase": "warm", "duration_ms": 188228, "tokens_per_sec": 4.037656459187794, "error": null }, { "prompt_id": "P-HARD", "run_idx": 2, "phase": "warm", "duration_ms": 218750, "tokens_per_sec": 4.027428571428572, "error": null }, { "prompt_id": "P-HARD", "run_idx": 3, "phase": "warm", "duration_ms": 182366, "tokens_per_sec": 3.9590713181185087, "error": null } ] } ], "n_calls_total": 14, "n_errors_total": 0, "chart_spec": { "kind": "bar", "x": "cell", "y": "tokens_per_sec_mean", "series": [ "qwen3:30b-a3b-iq2m-think500" ], "data_url": "/data/5fb2913d-6500-4ecf-9e97-d43f7dd61145/run.jsonl" }, "site_grade": "flagship", "site_grade_reason": null, "raw_data_urls": { "jsonl": "/data/5fb2913d-6500-4ecf-9e97-d43f7dd61145/run.jsonl", "log": "/data/5fb2913d-6500-4ecf-9e97-d43f7dd61145/run.log", "md": "/data/5fb2913d-6500-4ecf-9e97-d43f7dd61145/run.md", "metadata": "/data/5fb2913d-6500-4ecf-9e97-d43f7dd61145/metadata.json", "gitea_dir": "https://git.weeyuga.com/slobodanmargetic988/weeyuga-benchmarks-public/src/branch/main/runs/5fb2913d-6500-4ecf-9e97-d43f7dd61145/" }, "synthesis_doc": "A3B_AND_CPU_OVERNIGHT_2026-05-05.md", "synthesis_docs_all": [ "A3B_AND_CPU_OVERNIGHT_2026-05-05.md", "A3B_CROSS_MACHINE_2026-05-05.md", "A3B_NGL_RETUNE_2026-05-05.md", "SITE_DATA_AUDIT_AND_MIGRATION_PLAN_2026-05-06.md" ], "related_ids": [], "status": "complete", "visibility": "draft", "cells_full": [ { "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500", "n_calls": 14, "n_errors": 0, "duration_ms_mean": 107312.08, "duration_ms_p50": 101920.0, "duration_ms_p95": 211459.2, "tokens_per_sec_mean": 3.87, "tokens_per_sec_p50": 3.99, "tokens_per_sec_p95": 4.06, "tokens_per_sec_max": 4.06, "cold_warm_split": { "cold": { "n_calls": 3, "duration_ms_mean": 109544.33, "duration_ms_p50": 97117.0, "tokens_per_sec_mean": 3.47 }, "warm": { "n_calls": 9, "duration_ms_mean": 106568.0, "duration_ms_p50": 106723.0, "tokens_per_sec_mean": 4.01 } }, "per_prompt": { "unknown": { "n_calls": 2, "n_errors": 0, "duration_ms_mean": null, "duration_ms_p50": null, "tokens_per_sec_mean": null, "tokens_per_sec_p50": null }, "hello": { "n_calls": 4, "n_errors": 0, "duration_ms_mean": 18483.25, "duration_ms_p50": 16067.0, "tokens_per_sec_mean": 3.61, "tokens_per_sec_p50": 3.95 }, "P-MEDIUM": { "n_calls": 4, "n_errors": 0, "duration_ms_mean": 104743.5, "duration_ms_p50": 101920.0, "tokens_per_sec_mean": 4.02, "tokens_per_sec_p50": 4.02 }, "P-HARD": { "n_calls": 4, "n_errors": 0, "duration_ms_mean": 198709.5, "duration_ms_p50": 196861.0, "tokens_per_sec_mean": 3.99, "tokens_per_sec_p50": 3.99 } }, "chars_split": { "has_thinking": true, "reasoning_chars_mean": 1081.42, "answer_chars_mean": 835.08 }, "timeline": [ { "prompt_id": null, "run_idx": null, "phase": null, "duration_ms": null, "tokens_per_sec": null, "error": null }, { "prompt_id": null, "run_idx": null, "phase": null, "duration_ms": null, "tokens_per_sec": null, "error": null }, { "prompt_id": "hello", "run_idx": 0, "phase": "cold", "duration_ms": 26022, "tokens_per_sec": 2.459457382215049, "error": null }, { "prompt_id": "hello", "run_idx": 1, "phase": "warm", "duration_ms": 15870, "tokens_per_sec": 3.9697542533081287, "error": null }, { "prompt_id": "hello", "run_idx": 2, "phase": "warm", "duration_ms": 15777, "tokens_per_sec": 4.056537998352032, "error": null }, { "prompt_id": "hello", "run_idx": 3, "phase": "warm", "duration_ms": 16264, "tokens_per_sec": 3.935071323167733, "error": null }, { "prompt_id": "P-MEDIUM", "run_idx": 0, "phase": "cold", "duration_ms": 97117, "tokens_per_sec": 3.995181070255465, "error": null }, { "prompt_id": "P-MEDIUM", "run_idx": 1, "phase": "warm", "duration_ms": 86427, "tokens_per_sec": 4.061230865354577, "error": null }, { "prompt_id": "P-MEDIUM", "run_idx": 2, "phase": "warm", "duration_ms": 106723, "tokens_per_sec": 4.047862222763603, "error": null }, { "prompt_id": "P-MEDIUM", "run_idx": 3, "phase": "warm", "duration_ms": 128707, "tokens_per_sec": 3.9780276131057364, "error": null }, { "prompt_id": "P-HARD", "run_idx": 0, "phase": "cold", "duration_ms": 205494, "tokens_per_sec": 3.9514535704205476, "error": null }, { "prompt_id": "P-HARD", "run_idx": 1, "phase": "warm", "duration_ms": 188228, "tokens_per_sec": 4.037656459187794, "error": null }, { "prompt_id": "P-HARD", "run_idx": 2, "phase": "warm", "duration_ms": 218750, "tokens_per_sec": 4.027428571428572, "error": null }, { "prompt_id": "P-HARD", "run_idx": 3, "phase": "warm", "duration_ms": 182366, "tokens_per_sec": 3.9590713181185087, "error": null } ], "calls": [ { "type": "vram_snapshot", "model_key": "qwen3-30b-a3b", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500", "raw": "5849, 181, 6144" }, { "type": "llama_bench", "model_key": "qwen3-30b-a3b", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500", "raw": "llama-bench.exe : ggml_cuda_init: found 1 CUDA devices (Total VRAM: 6143 MiB):\r\nAt line:1 char:1\r\n+ & 'D:\\WeeyugaTools\\llama.cpp\\llama-bench.exe' -m 'D:\\WeeyugaModels\\ll ...\r\n+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\r\n + CategoryInfo : NotSpecified: (ggml_cuda_init:...RAM: 6143 MiB)::String) [], RemoteException\r\n + FullyQualifiedErrorId : NativeCommandError\r\n \r\n Device 0: NVIDIA GeForce GTX 1060, compute capability 6.1, VMM: yes, VRAM: 6143 MiB\r\nload_backend: loaded CUDA backend from D:\\WeeyugaTools\\llama.cpp\\ggml-cuda.dll\r\nload_backend: loaded RPC backend from D:\\WeeyugaTools\\llama.cpp\\ggml-rpc.dll\r\nload_backend: loaded CPU backend from D:\\WeeyugaTools\\llama.cpp\\ggml-cpu-haswell.dll\r\n| model | size | params | backend | ngl | test | t/s |\r\n| ------------------------------ | ---------: | ---------: | ---------- | --: | --------------: | -------------------: |\r\n| qwen3moe 30B.A3B IQ2_M - 2.7 bpw | 10.11 GiB | 30.53 B | CUDA | 99 | pp256 | 39.77 \u00b1 2.45 |\r\n| qwen3moe 30B.A3B IQ2_M - 2.7 bpw | 10.11 GiB | 30.53 B | CUDA | 99 | tg64 | 3.94 \u00b1 0.05 |\r\n\r\nbuild: 665abc609 (8951)\r\n\r" }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500", "model_key": "qwen3-30b-a3b", "prompt_id": "hello", "run_idx": 0, "phase": "cold", "total_duration_ms": 26022, "prompt_tokens": 9, "completion_tokens": 64, "tokens_per_sec": 2.459457382215049, "finish_reason": "length", "reasoning_chars": 273, "content_chars": 0, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500", "model_key": "qwen3-30b-a3b", "prompt_id": "hello", "run_idx": 1, "phase": "warm", "total_duration_ms": 15870, "prompt_tokens": 9, "completion_tokens": 63, "tokens_per_sec": 3.9697542533081287, "finish_reason": "stop", "reasoning_chars": 193, "content_chars": 37, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500", "model_key": "qwen3-30b-a3b", "prompt_id": "hello", "run_idx": 2, "phase": "warm", "total_duration_ms": 15777, "prompt_tokens": 9, "completion_tokens": 64, "tokens_per_sec": 4.056537998352032, "finish_reason": "length", "reasoning_chars": 276, "content_chars": 0, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500", "model_key": "qwen3-30b-a3b", "prompt_id": "hello", "run_idx": 3, "phase": "warm", "total_duration_ms": 16264, "prompt_tokens": 9, "completion_tokens": 64, "tokens_per_sec": 3.935071323167733, "finish_reason": "length", "reasoning_chars": 287, "content_chars": 0, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500", "model_key": "qwen3-30b-a3b", "prompt_id": "P-MEDIUM", "run_idx": 0, "phase": "cold", "total_duration_ms": 97117, "prompt_tokens": 22, "completion_tokens": 388, "tokens_per_sec": 3.995181070255465, "finish_reason": "stop", "reasoning_chars": 1474, "content_chars": 483, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500", "model_key": "qwen3-30b-a3b", "prompt_id": "P-MEDIUM", "run_idx": 1, "phase": "warm", "total_duration_ms": 86427, "prompt_tokens": 22, "completion_tokens": 351, "tokens_per_sec": 4.061230865354577, "finish_reason": "stop", "reasoning_chars": 1315, "content_chars": 434, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500", "model_key": "qwen3-30b-a3b", "prompt_id": "P-MEDIUM", "run_idx": 2, "phase": "warm", "total_duration_ms": 106723, "prompt_tokens": 22, "completion_tokens": 432, "tokens_per_sec": 4.047862222763603, "finish_reason": "stop", "reasoning_chars": 1654, "content_chars": 464, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500", "model_key": "qwen3-30b-a3b", "prompt_id": "P-MEDIUM", "run_idx": 3, "phase": "warm", "total_duration_ms": 128707, "prompt_tokens": 22, "completion_tokens": 512, "tokens_per_sec": 3.9780276131057364, "finish_reason": "length", "reasoning_chars": 2388, "content_chars": 32, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500", "model_key": "qwen3-30b-a3b", "prompt_id": "P-HARD", "run_idx": 0, "phase": "cold", "total_duration_ms": 205494, "prompt_tokens": 30, "completion_tokens": 812, "tokens_per_sec": 3.9514535704205476, "finish_reason": "stop", "reasoning_chars": 1108, "content_chars": 2418, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500", "model_key": "qwen3-30b-a3b", "prompt_id": "P-HARD", "run_idx": 1, "phase": "warm", "total_duration_ms": 188228, "prompt_tokens": 30, "completion_tokens": 760, "tokens_per_sec": 4.037656459187794, "finish_reason": "stop", "reasoning_chars": 1081, "content_chars": 2229, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500", "model_key": "qwen3-30b-a3b", "prompt_id": "P-HARD", "run_idx": 2, "phase": "warm", "total_duration_ms": 218750, "prompt_tokens": 30, "completion_tokens": 881, "tokens_per_sec": 4.027428571428572, "finish_reason": "stop", "reasoning_chars": 1191, "content_chars": 2555, "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500", "model_key": "qwen3-30b-a3b", "prompt_id": "P-HARD", "run_idx": 3, "phase": "warm", "total_duration_ms": 182366, "prompt_tokens": 30, "completion_tokens": 722, "tokens_per_sec": 3.9590713181185087, "finish_reason": "stop", "reasoning_chars": 1737, "content_chars": 1369, "error": null } ] } ], "meta_record": { "type": "meta", "benchmark_run_id": "5fb2913d-6500-4ecf-9e97-d43f7dd61145", "harness_version": "predator-a3b-1", "started_at_utc": "2026-05-04T22:02:17Z", "host": "Slobodans-MacBook-Air.local", "node": "predator", "engine": "llamacpp", "predator_target_url": "http://10.8.0.7:11436", "purpose": "Sloba 2026-05-04: Qwen3-30B-A3B (MoE, 3B active) on GTX 1060 6GB w/ partial offload + reasoning_budget=500", "models": { "qwen3-30b-a3b": { "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500", "gguf": "Qwen3-30B-A3B-UD-IQ2_M.gguf", "reasoning_budget": 500, "note": "30B total / 3B active MoE; 17.7GB Q4_K_M; partial CPU offload via mmap" } } }, "packaged_at": "2026-05-06T12:28:24Z" }