{ "id": "23066b38-ea9c-4dd3-b2f5-32912a67fce4", "title": "Predator Qwen rerun", "date": "2026-05-04", "started_at": "2026-05-04T22:11:43Z", "git_sha": "f06800254dfd046e154c94f663fb7f9f66c79ae3", "hardware": "pavilion", "hardware_label": "Pavilion \u00b7 HP laptop \u00b7 GTX 1050 4 GB \u00b7 16 GB RAM \u00b7 i7-9750H", "engine": "llamacpp", "harness": "pavilion-a3b-1", "model_family": "qwen3", "model_sizes": [ "qwen3:30b-a3b-iq2xxs" ], "task_kind": "chat", "tags": [ "a3b", "iq2", "pavilion", "qwen3" ], "headline": "14 calls across 1 cell(s); ~5.9 tok/s mean; p50 57.8s", "janie_blurb_md": null, "janie_blurb_status": "pending", "caveat": null, "caveat_severity": null, "methodology_ref": "A3B_AND_CPU_OVERNIGHT_2026-05-05", "methodology_deviations_md": null, "results_table": [ { "label": "qwen3:30b-a3b-iq2xxs-think500", "cell_id": "pavilion:llamacpp:qwen3:30b-a3b-iq2xxs-think500", "n_calls": 14, "n_errors": 0, "duration_ms_mean": 72439.92, "duration_ms_p50": 57784.0, "duration_ms_p95": 174786.1, "tokens_per_sec_mean": 5.88, "tokens_per_sec_p50": 6.67, "tokens_per_sec_p95": 8.17, "tokens_per_sec_max": 8.19, "memory_mb": null, "watts_avg": null, "notes": null } ], "cells": [ { "cell_id": "pavilion:llamacpp:qwen3:30b-a3b-iq2xxs-think500", "n_calls": 14, "n_errors": 0, "duration_ms_mean": 72439.92, "duration_ms_p50": 57784.0, "duration_ms_p95": 174786.1, "tokens_per_sec_mean": 5.88, "tokens_per_sec_p50": 6.67, "tokens_per_sec_p95": 8.17, "tokens_per_sec_max": 8.19 } ], "n_calls_total": 14, "n_errors_total": 0, "chart_spec": { "kind": "bar", "x": "cell", "y": "tokens_per_sec_mean", "series": [ "qwen3:30b-a3b-iq2xxs-think500" ], "data_url": "/data/23066b38-ea9c-4dd3-b2f5-32912a67fce4/run.jsonl" }, "site_grade": "flagship", "site_grade_reason": null, "raw_data_urls": { "jsonl": "/data/23066b38-ea9c-4dd3-b2f5-32912a67fce4/run.jsonl", "log": "/data/23066b38-ea9c-4dd3-b2f5-32912a67fce4/run.log", "md": "/data/23066b38-ea9c-4dd3-b2f5-32912a67fce4/run.md", "metadata": "/data/23066b38-ea9c-4dd3-b2f5-32912a67fce4/metadata.json", "gitea_dir": "https://git.weeyuga.com/slobodanmargetic988/weeyuga-benchmarks-public/src/branch/main/runs/23066b38-ea9c-4dd3-b2f5-32912a67fce4/" }, "synthesis_doc": "A3B_AND_CPU_OVERNIGHT_2026-05-05.md", "synthesis_docs_all": [ "A3B_AND_CPU_OVERNIGHT_2026-05-05.md", "A3B_CROSS_MACHINE_2026-05-05.md" ], "related_ids": [], "status": "complete", "visibility": "draft", "cells_full": [ { "cell_id": "pavilion:llamacpp:qwen3:30b-a3b-iq2xxs-think500", "n_calls": 14, "n_errors": 0, "duration_ms_mean": 72439.92, "duration_ms_p50": 57784.0, "duration_ms_p95": 174786.1, "tokens_per_sec_mean": 5.88, "tokens_per_sec_p50": 6.67, "tokens_per_sec_p95": 8.17, "tokens_per_sec_max": 8.19, "calls": [ { "type": "vram_snapshot", "model_key": "qwen3-30b-a3b-pavilion", "cell_id": "pavilion:llamacpp:qwen3:30b-a3b-iq2xxs-think500", "raw": "3842, 163, 4096" }, { "type": "llama_bench", "model_key": "qwen3-30b-a3b-pavilion", "cell_id": "pavilion:llamacpp:qwen3:30b-a3b-iq2xxs-think500", "raw": "llama-bench.exe : ggml_cuda_init: found 1 CUDA devices (Total VRAM: 4095 MiB):\r\nAt line:1 char:1\r\n+ & 'D:\\WeeyugaTools\\llama.cpp\\llama-bench.exe' -m 'D:\\WeeyugaModels\\ll ...\r\n+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\r\n + CategoryInfo : NotSpecified: (ggml_cuda_init:...RAM: 4095 MiB)::String) [], RemoteException\r\n + FullyQualifiedErrorId : NativeCommandError\r\n \r\n Device 0: NVIDIA GeForce GTX 1050, compute capability 6.1, VMM: yes, VRAM: 4095 MiB\r\nload_backend: loaded CUDA backend from D:\\WeeyugaTools\\llama.cpp\\ggml-cuda.dll\r\nload_backend: loaded RPC backend from D:\\WeeyugaTools\\llama.cpp\\ggml-rpc.dll\r\nload_backend: loaded CPU backend from D:\\WeeyugaTools\\llama.cpp\\ggml-cpu-haswell.dll\r\n| model | size | params | backend | ngl | test | t/s |\r\n| ------------------------------ | ---------: | ---------: | ---------- | --: | --------------: | -------------------: |\r\n| qwen3moe 30B.A3B IQ2_XXS - 2.0625 bpw | 9.65 GiB | 30.53 B | CUDA | 12 | pp256 | 6.27 \u00b1 2.87 |\r\n| qwen3moe 30B.A3B IQ2_XXS - 2.0625 bpw | 9.65 GiB | 30.53 B | CUDA | 12 | tg64 | 4.90 \u00b1 0.67 |\r\n\r\nbuild: 665abc609 (8951)\r\n\r" }, { "type": "call", "cell_id": "pavilion:llamacpp:qwen3:30b-a3b-iq2xxs-think500", "model_key": "qwen3-30b-a3b-pavilion", "prompt_id": "hello", "run_idx": 0, "phase": "cold", "total_duration_ms": 226760, "prompt_tokens": 9, "completion_tokens": 64, "tokens_per_sec": 0.28223672605397776, "finish_reason": "length", "reasoning_chars": 273, "content_chars": 0, "error": null }, { "type": "call", "cell_id": "pavilion:llamacpp:qwen3:30b-a3b-iq2xxs-think500", "model_key": "qwen3-30b-a3b-pavilion", "prompt_id": "hello", "run_idx": 1, "phase": "warm", "total_duration_ms": 15935, "prompt_tokens": 9, "completion_tokens": 64, "tokens_per_sec": 4.016316284907436, "finish_reason": "length", "reasoning_chars": 273, "content_chars": 0, "error": null }, { "type": "call", "cell_id": "pavilion:llamacpp:qwen3:30b-a3b-iq2xxs-think500", "model_key": "qwen3-30b-a3b-pavilion", "prompt_id": "hello", "run_idx": 2, "phase": "warm", "total_duration_ms": 16991, "prompt_tokens": 9, "completion_tokens": 64, "tokens_per_sec": 3.7667000176564063, "finish_reason": "length", "reasoning_chars": 266, "content_chars": 0, "error": null }, { "type": "call", "cell_id": "pavilion:llamacpp:qwen3:30b-a3b-iq2xxs-think500", "model_key": "qwen3-30b-a3b-pavilion", "prompt_id": "hello", "run_idx": 3, "phase": "warm", "total_duration_ms": 11898, "prompt_tokens": 9, "completion_tokens": 64, "tokens_per_sec": 5.379055303412338, "finish_reason": "length", "reasoning_chars": 283, "content_chars": 0, "error": null }, { "type": "call", "cell_id": "pavilion:llamacpp:qwen3:30b-a3b-iq2xxs-think500", "model_key": "qwen3-30b-a3b-pavilion", "prompt_id": "P-MEDIUM", "run_idx": 0, "phase": "cold", "total_duration_ms": 132262, "prompt_tokens": 22, "completion_tokens": 461, "tokens_per_sec": 3.4855060410397543, "finish_reason": "stop", "reasoning_chars": 1700, "content_chars": 607, "error": null }, { "type": "call", "cell_id": "pavilion:llamacpp:qwen3:30b-a3b-iq2xxs-think500", "model_key": "qwen3-30b-a3b-pavilion", "prompt_id": "P-MEDIUM", "run_idx": 1, "phase": "warm", "total_duration_ms": 61800, "prompt_tokens": 22, "completion_tokens": 459, "tokens_per_sec": 7.427184466019418, "finish_reason": "stop", "reasoning_chars": 1693, "content_chars": 460, "error": null }, { "type": "call", "cell_id": "pavilion:llamacpp:qwen3:30b-a3b-iq2xxs-think500", "model_key": "qwen3-30b-a3b-pavilion", "prompt_id": "P-MEDIUM", "run_idx": 2, "phase": "warm", "total_duration_ms": 49471, "prompt_tokens": 22, "completion_tokens": 405, "tokens_per_sec": 8.186614380141902, "finish_reason": "stop", "reasoning_chars": 1552, "content_chars": 460, "error": null }, { "type": "call", "cell_id": "pavilion:llamacpp:qwen3:30b-a3b-iq2xxs-think500", "model_key": "qwen3-30b-a3b-pavilion", "prompt_id": "P-MEDIUM", "run_idx": 3, "phase": "warm", "total_duration_ms": 44543, "prompt_tokens": 22, "completion_tokens": 363, "tokens_per_sec": 8.149428641986395, "finish_reason": "stop", "reasoning_chars": 1344, "content_chars": 513, "error": null }, { "type": "call", "cell_id": "pavilion:llamacpp:qwen3:30b-a3b-iq2xxs-think500", "model_key": "qwen3-30b-a3b-pavilion", "prompt_id": "P-HARD", "run_idx": 0, "phase": "cold", "total_duration_ms": 125661, "prompt_tokens": 30, "completion_tokens": 742, "tokens_per_sec": 5.904775546907951, "finish_reason": "stop", "reasoning_chars": 1254, "content_chars": 1825, "error": null }, { "type": "call", "cell_id": "pavilion:llamacpp:qwen3:30b-a3b-iq2xxs-think500", "model_key": "qwen3-30b-a3b-pavilion", "prompt_id": "P-HARD", "run_idx": 1, "phase": "warm", "total_duration_ms": 68390, "prompt_tokens": 30, "completion_tokens": 547, "tokens_per_sec": 7.998245357508408, "finish_reason": "stop", "reasoning_chars": 1204, "content_chars": 1099, "error": null }, { "type": "call", "cell_id": "pavilion:llamacpp:qwen3:30b-a3b-iq2xxs-think500", "model_key": "qwen3-30b-a3b-pavilion", "prompt_id": "P-HARD", "run_idx": 2, "phase": "warm", "total_duration_ms": 56872, "prompt_tokens": 30, "completion_tokens": 456, "tokens_per_sec": 8.018005345336897, "finish_reason": "stop", "reasoning_chars": 898, "content_chars": 1099, "error": null }, { "type": "call", "cell_id": "pavilion:llamacpp:qwen3:30b-a3b-iq2xxs-think500", "model_key": "qwen3-30b-a3b-pavilion", "prompt_id": "P-HARD", "run_idx": 3, "phase": "warm", "total_duration_ms": 58696, "prompt_tokens": 30, "completion_tokens": 467, "tokens_per_sec": 7.956249148153196, "finish_reason": "stop", "reasoning_chars": 956, "content_chars": 983, "error": null } ] } ], "meta_record": { "type": "meta", "benchmark_run_id": "23066b38-ea9c-4dd3-b2f5-32912a67fce4", "harness_version": "pavilion-a3b-1", "started_at_utc": "2026-05-04T22:11:43Z", "host": "Slobodans-MacBook-Air.local", "node": "pavilion", "engine": "llamacpp", "pavilion_target_url": "http://10.8.0.3:11437", "purpose": "Sloba 2026-05-04: Qwen3-30B-A3B (MoE) on weaker Pavilion hardware (GTX 1050 4GB / 16GB RAM) for cross-machine A3B comparison", "models": { "qwen3-30b-a3b-pavilion": { "cell_id": "pavilion:llamacpp:qwen3:30b-a3b-iq2xxs-think500", "gguf": "Qwen3-30B-A3B-UD-IQ2_XXS.gguf", "reasoning_budget": 500, "ngl": 12, "ctx": 2048, "note": "30B total / 3B active MoE; 9.9GB IQ2_XXS; partial CPU offload heavy" } } }, "packaged_at": "2026-05-06T08:04:27Z" }