{ "id": "fe4fa289-67a9-48e8-8a80-d5d88c875208", "title": "predator-a3b-1 \u2014 qwen3 on predator", "date": "2026-05-04", "started_at": "2026-05-04T18:50:19Z", "git_sha": "f06800254dfd046e154c94f663fb7f9f66c79ae3", "hardware": "predator", "hardware_label": "Predator \u00b7 gaming laptop \u00b7 GTX 1060 6 GB \u00b7 28 GB RAM", "engine": "llamacpp", "harness": "predator-a3b-1", "model_family": "qwen3", "model_sizes": [ "qwen3:30b-a3b" ], "task_kind": "chat", "tags": [ "a3b", "predator", "qwen3" ], "headline": "2 calls across 1 cell(s)", "janie_blurb_md": null, "janie_blurb_status": "pending", "caveat": null, "caveat_severity": null, "methodology_ref": "HARNESS#predator-a3b-1", "methodology_deviations_md": null, "results_table": [ { "label": "qwen3:30b-a3b-q4km-think500", "cell_id": "predator:llamacpp:qwen3:30b-a3b-q4km-think500", "n_calls": 2, "n_errors": 0, "duration_ms_mean": null, "duration_ms_p50": null, "duration_ms_p95": null, "tokens_per_sec_mean": null, "tokens_per_sec_p50": null, "tokens_per_sec_p95": null, "tokens_per_sec_max": null, "memory_mb": null, "watts_avg": null, "notes": null } ], "cells": [ { "cell_id": "predator:llamacpp:qwen3:30b-a3b-q4km-think500", "n_calls": 2, "n_errors": 0, "duration_ms_mean": null, "duration_ms_p50": null, "duration_ms_p95": null, "tokens_per_sec_mean": null, "tokens_per_sec_p50": null, "tokens_per_sec_p95": null, "tokens_per_sec_max": null } ], "n_calls_total": 2, "n_errors_total": 0, "chart_spec": { "kind": "bar", "x": "cell", "y": "tokens_per_sec_mean", "series": [ "qwen3:30b-a3b-q4km-think500" ], "data_url": "/data/fe4fa289-67a9-48e8-8a80-d5d88c875208/run.jsonl" }, "site_grade": "archive-only", "site_grade_reason": "no documented method (no synthesis doc and no run.md)", "raw_data_urls": { "jsonl": "/data/fe4fa289-67a9-48e8-8a80-d5d88c875208/run.jsonl", "log": "/data/fe4fa289-67a9-48e8-8a80-d5d88c875208/run.log", "md": null, "metadata": "/data/fe4fa289-67a9-48e8-8a80-d5d88c875208/metadata.json", "gitea_dir": "https://git.weeyuga.com/slobodanmargetic988/weeyuga-benchmarks-public/src/branch/main/runs/fe4fa289-67a9-48e8-8a80-d5d88c875208/" }, "synthesis_doc": null, "synthesis_docs_all": [], "related_ids": [], "status": "complete", "visibility": "draft", "cells_full": [ { "cell_id": "predator:llamacpp:qwen3:30b-a3b-q4km-think500", "n_calls": 2, "n_errors": 0, "duration_ms_mean": null, "duration_ms_p50": null, "duration_ms_p95": null, "tokens_per_sec_mean": null, "tokens_per_sec_p50": null, "tokens_per_sec_p95": null, "tokens_per_sec_max": null, "calls": [ { "type": "vram_snapshot", "model_key": "qwen3-30b-a3b", "cell_id": "predator:llamacpp:qwen3:30b-a3b-q4km-think500", "raw": "5787, 243, 6144" }, { "type": "llama_bench", "model_key": "qwen3-30b-a3b", "cell_id": "predator:llamacpp:qwen3:30b-a3b-q4km-think500", "raw": "llama-bench.exe : ggml_cuda_init: found 1 CUDA devices (Total VRAM: 6143 MiB):\r\nAt line:1 char:1\r\n+ & 'D:\\WeeyugaTools\\llama.cpp\\llama-bench.exe' -m 'D:\\WeeyugaModels\\ll ...\r\n+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\r\n + CategoryInfo : NotSpecified: (ggml_cuda_init:...RAM: 6143 MiB)::String) [], RemoteException\r\n + FullyQualifiedErrorId : NativeCommandError\r\n \r\n Device 0: NVIDIA GeForce GTX 1060, compute capability 6.1, VMM: yes, VRAM: 6143 MiB\r\nload_backend: loaded CUDA backend from D:\\WeeyugaTools\\llama.cpp\\ggml-cuda.dll\r\nload_backend: loaded RPC backend from D:\\WeeyugaTools\\llama.cpp\\ggml-rpc.dll\r\nload_backend: loaded CPU backend from D:\\WeeyugaTools\\llama.cpp\\ggml-cpu-haswell.dll\r\n\r" } ] } ], "meta_record": { "type": "meta", "benchmark_run_id": "fe4fa289-67a9-48e8-8a80-d5d88c875208", "harness_version": "predator-a3b-1", "started_at_utc": "2026-05-04T18:50:19Z", "host": "Slobodans-MacBook-Air.local", "node": "predator", "engine": "llamacpp", "predator_target_url": "http://10.8.0.7:11436", "purpose": "Sloba 2026-05-04: Qwen3-30B-A3B (MoE, 3B active) on GTX 1060 6GB w/ partial offload + reasoning_budget=500", "models": { "qwen3-30b-a3b": { "cell_id": "predator:llamacpp:qwen3:30b-a3b-q4km-think500", "gguf": "Qwen3-30B-A3B-Q4_K_M.gguf", "reasoning_budget": 500, "note": "30B total / 3B active MoE; 17.7GB Q4_K_M; partial CPU offload via mmap" } } }, "packaged_at": "2026-05-06T08:04:27Z" }