{ "id": "23066b38-ea9c-4dd3-b2f5-32912a67fce4", "title": "Predator Qwen rerun \u2014 23066b38-ea9c-4dd3-b2f5-32912a67fce4", "date": "2026-05-04", "hardware": "pavilion", "engine": "llamacpp", "harness": "pavilion-a3b-1", "model_family": "qwen3", "model_sizes": [ "qwen3:30b-a3b-iq2xxs" ], "task_kind": null, "headline": "`Answer chars` is `len(message.content)`. Both averaged over the 3 warm runs.", "summary_md_path": "runs/23066b38-ea9c-4dd3-b2f5-32912a67fce4.md", "raw_jsonl_path": "runs/23066b38-ea9c-4dd3-b2f5-32912a67fce4.jsonl", "log_path": "runs/23066b38-ea9c-4dd3-b2f5-32912a67fce4.log", "synthesis_doc": "A3B_AND_CPU_OVERNIGHT_2026-05-05.md", "cells": [ { "cell_id": "pavilion:llamacpp:qwen3:30b-a3b-iq2xxs-think500", "n_calls": 14, "n_errors": 0, "duration_ms_mean": 72439.92, "duration_ms_p50": 57784.0, "tokens_per_sec_mean": 5.88, "tokens_per_sec_max": 8.19 } ], "n_calls_total": 14, "n_errors_total": 0, "started_at": "2026-05-04T22:11:43Z", "tags": [ "a3b", "iq2", "pavilion", "qwen3" ], "status": "complete", "visibility": "draft", "cells_full": [ { "cell_id": "pavilion:llamacpp:qwen3:30b-a3b-iq2xxs-think500", "n_calls": 14, "n_errors": 0, "duration_ms_mean": 72439.92, "duration_ms_p50": 57784.0, "tokens_per_sec_mean": 5.88, "tokens_per_sec_max": 8.19, "calls": [ { "type": "vram_snapshot", "model_key": "qwen3-30b-a3b-pavilion", "cell_id": "pavilion:llamacpp:qwen3:30b-a3b-iq2xxs-think500", "raw": "3842, 163, 4096" }, { "type": "llama_bench", "model_key": "qwen3-30b-a3b-pavilion", "cell_id": "pavilion:llamacpp:qwen3:30b-a3b-iq2xxs-think500", "raw": "llama-bench.exe : ggml_cuda_init: found 1 CUDA devices (Total VRAM: 4095 MiB):\r\nAt line:1 char:1\r\n+ & 'D:\\WeeyugaTools\\llama.cpp\\llama-bench.exe' -m 'D:\\WeeyugaModels\\ll ...\r\n+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\r\n + CategoryInfo : NotSpecified: (ggml_cuda_init:...RAM: 4095 MiB)::String) [], RemoteException\r\n + FullyQualifiedErrorId : NativeCommandError\r\n \r\n Device 0: NVIDIA GeForce GTX 1050, compute capability 6.1, VMM: yes, VRAM: 4095 MiB\r\nload_backend: loaded CUDA backend from D:\\WeeyugaTools\\llama.cpp\\ggml-cuda.dll\r\nload_backend: loaded RPC backend from D:\\WeeyugaTools\\llama.cpp\\ggml-rpc.dll\r\nload_backend: loaded CPU backend from D:\\WeeyugaTools\\llama.cpp\\ggml-cpu-haswell.dll\r\n| model | size | params | backend | ngl | test | t/s |\r\n| ------------------------------ | ---------: | ---------: | ---------- | --: | --------------: | -------------------: |\r\n| qwen3moe 30B.A3B IQ2_XXS - 2.0625 bpw | 9.65 GiB | 30.53 B | CUDA | 12 | pp256 | 6.27 \u00b1 2.87 |\r\n| qwen3moe 30B.A3B IQ2_XXS - 2.0625 bpw | 9.65 GiB | 30.53 B | CUDA | 12 | tg64 | 4.90 \u00b1 0.67 |\r\n\r\nbuild: 665abc609 (8951)\r\n\r" }, { "type": "call", "cell_id": "pavilion:llamacpp:qwen3:30b-a3b-iq2xxs-think500", "model_key": "qwen3-30b-a3b-pavilion", "prompt_id": "hello", "run_idx": 0, "phase": "cold", "total_duration_ms": 226760, "prompt_tokens": 9, "completion_tokens": 64, "tokens_per_sec": 0.28223672605397776, "finish_reason": "length", "reasoning_chars": 273, "content_chars": 0, "error": null }, { "type": "call", "cell_id": "pavilion:llamacpp:qwen3:30b-a3b-iq2xxs-think500", "model_key": "qwen3-30b-a3b-pavilion", "prompt_id": "hello", "run_idx": 1, "phase": "warm", "total_duration_ms": 15935, "prompt_tokens": 9, "completion_tokens": 64, "tokens_per_sec": 4.016316284907436, "finish_reason": "length", "reasoning_chars": 273, "content_chars": 0, "error": null }, { "type": "call", "cell_id": "pavilion:llamacpp:qwen3:30b-a3b-iq2xxs-think500", "model_key": "qwen3-30b-a3b-pavilion", "prompt_id": "hello", "run_idx": 2, "phase": "warm", "total_duration_ms": 16991, "prompt_tokens": 9, "completion_tokens": 64, "tokens_per_sec": 3.7667000176564063, "finish_reason": "length", "reasoning_chars": 266, "content_chars": 0, "error": null }, { "type": "call", "cell_id": "pavilion:llamacpp:qwen3:30b-a3b-iq2xxs-think500", "model_key": "qwen3-30b-a3b-pavilion", "prompt_id": "hello", "run_idx": 3, "phase": "warm", "total_duration_ms": 11898, "prompt_tokens": 9, "completion_tokens": 64, "tokens_per_sec": 5.379055303412338, "finish_reason": "length", "reasoning_chars": 283, "content_chars": 0, "error": null }, { "type": "call", "cell_id": "pavilion:llamacpp:qwen3:30b-a3b-iq2xxs-think500", "model_key": "qwen3-30b-a3b-pavilion", "prompt_id": "P-MEDIUM", "run_idx": 0, "phase": "cold", "total_duration_ms": 132262, "prompt_tokens": 22, "completion_tokens": 461, "tokens_per_sec": 3.4855060410397543, "finish_reason": "stop", "reasoning_chars": 1700, "content_chars": 607, "error": null }, { "type": "call", "cell_id": "pavilion:llamacpp:qwen3:30b-a3b-iq2xxs-think500", "model_key": "qwen3-30b-a3b-pavilion", "prompt_id": "P-MEDIUM", "run_idx": 1, "phase": "warm", "total_duration_ms": 61800, "prompt_tokens": 22, "completion_tokens": 459, "tokens_per_sec": 7.427184466019418, "finish_reason": "stop", "reasoning_chars": 1693, "content_chars": 460, "error": null }, { "type": "call", "cell_id": "pavilion:llamacpp:qwen3:30b-a3b-iq2xxs-think500", "model_key": "qwen3-30b-a3b-pavilion", "prompt_id": "P-MEDIUM", "run_idx": 2, "phase": "warm", "total_duration_ms": 49471, "prompt_tokens": 22, "completion_tokens": 405, "tokens_per_sec": 8.186614380141902, "finish_reason": "stop", "reasoning_chars": 1552, "content_chars": 460, "error": null }, { "type": "call", "cell_id": "pavilion:llamacpp:qwen3:30b-a3b-iq2xxs-think500", "model_key": "qwen3-30b-a3b-pavilion", "prompt_id": "P-MEDIUM", "run_idx": 3, "phase": "warm", "total_duration_ms": 44543, "prompt_tokens": 22, "completion_tokens": 363, "tokens_per_sec": 8.149428641986395, "finish_reason": "stop", "reasoning_chars": 1344, "content_chars": 513, "error": null }, { "type": "call", "cell_id": "pavilion:llamacpp:qwen3:30b-a3b-iq2xxs-think500", "model_key": "qwen3-30b-a3b-pavilion", "prompt_id": "P-HARD", "run_idx": 0, "phase": "cold", "total_duration_ms": 125661, "prompt_tokens": 30, "completion_tokens": 742, "tokens_per_sec": 5.904775546907951, "finish_reason": "stop", "reasoning_chars": 1254, "content_chars": 1825, "error": null }, { "type": "call", "cell_id": "pavilion:llamacpp:qwen3:30b-a3b-iq2xxs-think500", "model_key": "qwen3-30b-a3b-pavilion", "prompt_id": "P-HARD", "run_idx": 1, "phase": "warm", "total_duration_ms": 68390, "prompt_tokens": 30, "completion_tokens": 547, "tokens_per_sec": 7.998245357508408, "finish_reason": "stop", "reasoning_chars": 1204, "content_chars": 1099, "error": null }, { "type": "call", "cell_id": "pavilion:llamacpp:qwen3:30b-a3b-iq2xxs-think500", "model_key": "qwen3-30b-a3b-pavilion", "prompt_id": "P-HARD", "run_idx": 2, "phase": "warm", "total_duration_ms": 56872, "prompt_tokens": 30, "completion_tokens": 456, "tokens_per_sec": 8.018005345336897, "finish_reason": "stop", "reasoning_chars": 898, "content_chars": 1099, "error": null }, { "type": "call", "cell_id": "pavilion:llamacpp:qwen3:30b-a3b-iq2xxs-think500", "model_key": "qwen3-30b-a3b-pavilion", "prompt_id": "P-HARD", "run_idx": 3, "phase": "warm", "total_duration_ms": 58696, "prompt_tokens": 30, "completion_tokens": 467, "tokens_per_sec": 7.956249148153196, "finish_reason": "stop", "reasoning_chars": 956, "content_chars": 983, "error": null } ] } ], "meta_record": { "type": "meta", "benchmark_run_id": "23066b38-ea9c-4dd3-b2f5-32912a67fce4", "harness_version": "pavilion-a3b-1", "started_at_utc": "2026-05-04T22:11:43Z", "host": "Slobodans-MacBook-Air.local", "node": "pavilion", "engine": "llamacpp", "pavilion_target_url": "http://10.8.0.3:11437", "purpose": "Sloba 2026-05-04: Qwen3-30B-A3B (MoE) on weaker Pavilion hardware (GTX 1050 4GB / 16GB RAM) for cross-machine A3B comparison", "models": { "qwen3-30b-a3b-pavilion": { "cell_id": "pavilion:llamacpp:qwen3:30b-a3b-iq2xxs-think500", "gguf": "Qwen3-30B-A3B-UD-IQ2_XXS.gguf", "reasoning_budget": 500, "ngl": 12, "ctx": 2048, "note": "30B total / 3B active MoE; 9.9GB IQ2_XXS; partial CPU offload heavy" } } }, "packaged_at": "2026-05-05T17:43:34Z" }