{ "id": "fe4fa289-67a9-48e8-8a80-d5d88c875208", "title": "predator-a3b-1 \u2014 qwen3 on predator", "date": "2026-05-04", "hardware": "predator", "engine": "llamacpp", "harness": "predator-a3b-1", "model_family": "qwen3", "model_sizes": [ "qwen3:30b-a3b" ], "task_kind": null, "headline": "2 calls across 1 cell(s)", "summary_md_path": null, "raw_jsonl_path": "runs/fe4fa289-67a9-48e8-8a80-d5d88c875208.jsonl", "log_path": "runs/fe4fa289-67a9-48e8-8a80-d5d88c875208.log", "synthesis_doc": null, "cells": [ { "cell_id": "predator:llamacpp:qwen3:30b-a3b-q4km-think500", "n_calls": 2, "n_errors": 0, "duration_ms_mean": null, "duration_ms_p50": null, "tokens_per_sec_mean": null, "tokens_per_sec_max": null } ], "n_calls_total": 2, "n_errors_total": 0, "started_at": "2026-05-04T18:50:19Z", "tags": [ "a3b", "predator", "qwen3" ], "status": "complete", "visibility": "draft", "cells_full": [ { "cell_id": "predator:llamacpp:qwen3:30b-a3b-q4km-think500", "n_calls": 2, "n_errors": 0, "duration_ms_mean": null, "duration_ms_p50": null, "tokens_per_sec_mean": null, "tokens_per_sec_max": null, "calls": [ { "type": "vram_snapshot", "model_key": "qwen3-30b-a3b", "cell_id": "predator:llamacpp:qwen3:30b-a3b-q4km-think500", "raw": "5787, 243, 6144" }, { "type": "llama_bench", "model_key": "qwen3-30b-a3b", "cell_id": "predator:llamacpp:qwen3:30b-a3b-q4km-think500", "raw": "llama-bench.exe : ggml_cuda_init: found 1 CUDA devices (Total VRAM: 6143 MiB):\r\nAt line:1 char:1\r\n+ & 'D:\\WeeyugaTools\\llama.cpp\\llama-bench.exe' -m 'D:\\WeeyugaModels\\ll ...\r\n+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\r\n + CategoryInfo : NotSpecified: (ggml_cuda_init:...RAM: 6143 MiB)::String) [], RemoteException\r\n + FullyQualifiedErrorId : NativeCommandError\r\n \r\n Device 0: NVIDIA GeForce GTX 1060, compute capability 6.1, VMM: yes, VRAM: 6143 MiB\r\nload_backend: loaded CUDA backend from D:\\WeeyugaTools\\llama.cpp\\ggml-cuda.dll\r\nload_backend: loaded RPC backend from D:\\WeeyugaTools\\llama.cpp\\ggml-rpc.dll\r\nload_backend: loaded CPU backend from D:\\WeeyugaTools\\llama.cpp\\ggml-cpu-haswell.dll\r\n\r" } ] } ], "meta_record": { "type": "meta", "benchmark_run_id": "fe4fa289-67a9-48e8-8a80-d5d88c875208", "harness_version": "predator-a3b-1", "started_at_utc": "2026-05-04T18:50:19Z", "host": "Slobodans-MacBook-Air.local", "node": "predator", "engine": "llamacpp", "predator_target_url": "http://10.8.0.7:11436", "purpose": "Sloba 2026-05-04: Qwen3-30B-A3B (MoE, 3B active) on GTX 1060 6GB w/ partial offload + reasoning_budget=500", "models": { "qwen3-30b-a3b": { "cell_id": "predator:llamacpp:qwen3:30b-a3b-q4km-think500", "gguf": "Qwen3-30B-A3B-Q4_K_M.gguf", "reasoning_budget": 500, "note": "30B total / 3B active MoE; 17.7GB Q4_K_M; partial CPU offload via mmap" } } }, "packaged_at": "2026-05-05T17:43:34Z" }