{ "id": "09d8fbde-0008-49bb-99da-03eeaca72be1", "title": "Predator trio bench", "date": "2026-05-04", "started_at": "2026-05-04T16:01:52Z", "git_sha": "3ff8bd1808a1df00426d9b447559c774e4704c39", "hardware": "predator", "hardware_label": "Predator \u00b7 gaming laptop \u00b7 GTX 1060 6 GB \u00b7 28 GB RAM", "engine": "llamacpp", "harness": "predator-trio-1", "model_family": "gemma/granite/qwen3.5", "model_sizes": [ "granite-4.1:8b", "gemma-4:e4b-it", "qwen3.5:9b" ], "task_kind": "chat", "tags": [ "gemma", "granite", "predator", "qwen3", "qwen3.5", "trio" ], "headline": "42 calls across 3 cell(s); ~16.5 tok/s mean; p50 9.6s", "janie_blurb_md": null, "janie_blurb_status": "pending", "caveat": null, "caveat_severity": null, "methodology_ref": "A3B_AND_CPU_OVERNIGHT_2026-05-05", "methodology_deviations_md": null, "results_table": [ { "label": "granite-4.1:8b-q4km", "cell_id": "predator:llamacpp:granite-4.1:8b-q4km", "n_calls": 14, "n_errors": 0, "duration_ms_mean": 8831.33, "duration_ms_p50": 6403.0, "duration_ms_p95": 19524.25, "tokens_per_sec_mean": 13.49, "tokens_per_sec_p50": 15.16, "tokens_per_sec_p95": 15.73, "tokens_per_sec_max": 15.75, "memory_mb": null, "watts_avg": null, "notes": null }, { "label": "gemma-4:e4b-it-q4km", "cell_id": "predator:llamacpp:gemma-4:e4b-it-q4km", "n_calls": 14, "n_errors": 0, "duration_ms_mean": 9888.42, "duration_ms_p50": 8199.0, "duration_ms_p95": 21724.5, "tokens_per_sec_mean": 21.82, "tokens_per_sec_p50": 22.91, "tokens_per_sec_p95": 23.64, "tokens_per_sec_max": 23.66, "memory_mb": null, "watts_avg": null, "notes": null }, { "label": "qwen3.5:9b-q4km", "cell_id": "predator:llamacpp:qwen3.5:9b-q4km", "n_calls": 14, "n_errors": 0, "duration_ms_mean": 37136.33, "duration_ms_p50": 35324.5, "duration_ms_p95": 71874.15, "tokens_per_sec_mean": 14.04, "tokens_per_sec_p50": 14.42, "tokens_per_sec_p95": 14.57, "tokens_per_sec_max": 14.57, "memory_mb": null, "watts_avg": null, "notes": null } ], "cells": [ { "cell_id": "predator:llamacpp:granite-4.1:8b-q4km", "n_calls": 14, "n_errors": 0, "duration_ms_mean": 8831.33, "duration_ms_p50": 6403.0, "duration_ms_p95": 19524.25, "tokens_per_sec_mean": 13.49, "tokens_per_sec_p50": 15.16, "tokens_per_sec_p95": 15.73, "tokens_per_sec_max": 15.75, "cold_warm_split": { "cold": { "n_calls": 3, "duration_ms_mean": 9384.0, "duration_ms_p50": 6095.0, "tokens_per_sec_mean": 11.89 }, "warm": { "n_calls": 9, "duration_ms_mean": 8647.11, "duration_ms_p50": 6602.0, "tokens_per_sec_mean": 14.03 } }, "per_prompt": { "unknown": { "n_calls": 2, "n_errors": 0, "duration_ms_mean": null, "duration_ms_p50": null, "tokens_per_sec_mean": null, "tokens_per_sec_p50": null }, "hello": { "n_calls": 4, "n_errors": 0, "duration_ms_mean": 1121.5, "duration_ms_p50": 918.0, "tokens_per_sec_mean": 9.73, "tokens_per_sec_p50": 10.89 }, "P-MEDIUM": { "n_calls": 4, "n_errors": 0, "duration_ms_mean": 6472.5, "duration_ms_p50": 6403.0, "tokens_per_sec_mean": 15.05, "tokens_per_sec_p50": 15.16 }, "P-HARD": { "n_calls": 4, "n_errors": 0, "duration_ms_mean": 18900.0, "duration_ms_p50": 18835.5, "tokens_per_sec_mean": 15.7, "tokens_per_sec_p50": 15.72 } }, "chars_split": null, "timeline": [ { "prompt_id": null, "run_idx": null, "phase": null, "duration_ms": null, "tokens_per_sec": null, "error": null }, { "prompt_id": null, "run_idx": null, "phase": null, "duration_ms": null, "tokens_per_sec": null, "error": null }, { "prompt_id": "hello", "run_idx": 0, "phase": "cold", "duration_ms": 1782, "tokens_per_sec": 5.611672278338945, "error": null }, { "prompt_id": "hello", "run_idx": 1, "phase": "warm", "duration_ms": 919, "tokens_per_sec": 10.88139281828074, "error": null }, { "prompt_id": "hello", "run_idx": 2, "phase": "warm", "duration_ms": 917, "tokens_per_sec": 10.905125408942203, "error": null }, { "prompt_id": "hello", "run_idx": 3, "phase": "warm", "duration_ms": 868, "tokens_per_sec": 11.52073732718894, "error": null }, { "prompt_id": "P-MEDIUM", "run_idx": 0, "phase": "cold", "duration_ms": 6095, "tokens_per_sec": 14.438063986874488, "error": null }, { "prompt_id": "P-MEDIUM", "run_idx": 1, "phase": "warm", "duration_ms": 6204, "tokens_per_sec": 15.151515151515152, "error": null }, { "prompt_id": "P-MEDIUM", "run_idx": 2, "phase": "warm", "duration_ms": 6989, "tokens_per_sec": 15.16669051366433, "error": null }, { "prompt_id": "P-MEDIUM", "run_idx": 3, "phase": "warm", "duration_ms": 6602, "tokens_per_sec": 15.44986367767343, "error": null }, { "prompt_id": "P-HARD", "run_idx": 0, "phase": "cold", "duration_ms": 20275, "tokens_per_sec": 15.635018495684342, "error": null }, { "prompt_id": "P-HARD", "run_idx": 1, "phase": "warm", "duration_ms": 18761, "tokens_per_sec": 15.72410852299984, "error": null }, { "prompt_id": "P-HARD", "run_idx": 2, "phase": "warm", "duration_ms": 18910, "tokens_per_sec": 15.70597567424643, "error": null }, { "prompt_id": "P-HARD", "run_idx": 3, "phase": "warm", "duration_ms": 17654, "tokens_per_sec": 15.747139458479666, "error": null } ] }, { "cell_id": "predator:llamacpp:gemma-4:e4b-it-q4km", "n_calls": 14, "n_errors": 0, "duration_ms_mean": 9888.42, "duration_ms_p50": 8199.0, "duration_ms_p95": 21724.5, "tokens_per_sec_mean": 21.82, "tokens_per_sec_p50": 22.91, "tokens_per_sec_p95": 23.64, "tokens_per_sec_max": 23.66, "cold_warm_split": { "cold": { "n_calls": 3, "duration_ms_mean": 11939.0, "duration_ms_p50": 3858.0, "tokens_per_sec_mean": 21.37 }, "warm": { "n_calls": 9, "duration_ms_mean": 9204.89, "duration_ms_p50": 12258.0, "tokens_per_sec_mean": 21.97 } }, "per_prompt": { "unknown": { "n_calls": 2, "n_errors": 0, "duration_ms_mean": null, "duration_ms_p50": null, "tokens_per_sec_mean": null, "tokens_per_sec_p50": null }, "hello": { "n_calls": 4, "n_errors": 0, "duration_ms_mean": 2534.5, "duration_ms_p50": 2884.0, "tokens_per_sec_mean": 19.17, "tokens_per_sec_p50": 20.06 }, "P-MEDIUM": { "n_calls": 4, "n_errors": 0, "duration_ms_mean": 8322.5, "duration_ms_p50": 8199.0, "tokens_per_sec_mean": 22.77, "tokens_per_sec_p50": 22.85 }, "P-HARD": { "n_calls": 4, "n_errors": 0, "duration_ms_mean": 18808.25, "duration_ms_p50": 16034.0, "tokens_per_sec_mean": 23.52, "tokens_per_sec_p50": 23.53 } }, "chars_split": null, "timeline": [ { "prompt_id": null, "run_idx": null, "phase": null, "duration_ms": null, "tokens_per_sec": null, "error": null }, { "prompt_id": null, "run_idx": null, "phase": null, "duration_ms": null, "tokens_per_sec": null, "error": null }, { "prompt_id": "hello", "run_idx": 0, "phase": "cold", "duration_ms": 3519, "tokens_per_sec": 18.186984938903098, "error": null }, { "prompt_id": "hello", "run_idx": 1, "phase": "warm", "duration_ms": 851, "tokens_per_sec": 14.10105757931845, "error": null }, { "prompt_id": "hello", "run_idx": 2, "phase": "warm", "duration_ms": 2918, "tokens_per_sec": 21.93283070596299, "error": null }, { "prompt_id": "hello", "run_idx": 3, "phase": "warm", "duration_ms": 2850, "tokens_per_sec": 22.45614035087719, "error": null }, { "prompt_id": "P-MEDIUM", "run_idx": 0, "phase": "cold", "duration_ms": 3858, "tokens_per_sec": 22.291342664593053, "error": null }, { "prompt_id": "P-MEDIUM", "run_idx": 1, "phase": "warm", "duration_ms": 4140, "tokens_per_sec": 21.73913043478261, "error": null }, { "prompt_id": "P-MEDIUM", "run_idx": 2, "phase": "warm", "duration_ms": 12258, "tokens_per_sec": 23.413281122532226, "error": null }, { "prompt_id": "P-MEDIUM", "run_idx": 3, "phase": "warm", "duration_ms": 13034, "tokens_per_sec": 23.63050483351235, "error": null }, { "prompt_id": "P-HARD", "run_idx": 0, "phase": "cold", "duration_ms": 28440, "tokens_per_sec": 23.628691983122362, "error": null }, { "prompt_id": "P-HARD", "run_idx": 1, "phase": "warm", "duration_ms": 16230, "tokens_per_sec": 23.65988909426987, "error": null }, { "prompt_id": "P-HARD", "run_idx": 2, "phase": "warm", "duration_ms": 15838, "tokens_per_sec": 23.424674832680896, "error": null }, { "prompt_id": "P-HARD", "run_idx": 3, "phase": "warm", "duration_ms": 14725, "tokens_per_sec": 23.3616298811545, "error": null } ] }, { "cell_id": "predator:llamacpp:qwen3.5:9b-q4km", "n_calls": 14, "n_errors": 0, "duration_ms_mean": 37136.33, "duration_ms_p50": 35324.5, "duration_ms_p95": 71874.15, "tokens_per_sec_mean": 14.04, "tokens_per_sec_p50": 14.42, "tokens_per_sec_p95": 14.57, "tokens_per_sec_max": 14.57, "cold_warm_split": { "cold": { "n_calls": 3, "duration_ms_mean": 38309.67, "duration_ms_p50": 35678.0, "tokens_per_sec_mean": 13.28 }, "warm": { "n_calls": 9, "duration_ms_mean": 36745.22, "duration_ms_p50": 35311.0, "tokens_per_sec_mean": 14.29 } }, "per_prompt": { "unknown": { "n_calls": 2, "n_errors": 0, "duration_ms_mean": null, "duration_ms_p50": null, "tokens_per_sec_mean": null, "tokens_per_sec_p50": null }, "hello": { "n_calls": 4, "n_errors": 0, "duration_ms_mean": 4857.5, "duration_ms_p50": 4646.0, "tokens_per_sec_mean": 13.25, "tokens_per_sec_p50": 13.78 }, "P-MEDIUM": { "n_calls": 4, "n_errors": 0, "duration_ms_mean": 35390.5, "duration_ms_p50": 35324.5, "tokens_per_sec_mean": 14.47, "tokens_per_sec_p50": 14.49 }, "P-HARD": { "n_calls": 4, "n_errors": 0, "duration_ms_mean": 71161.0, "duration_ms_p50": 70320.5, "tokens_per_sec_mean": 14.4, "tokens_per_sec_p50": 14.56 } }, "chars_split": null, "timeline": [ { "prompt_id": null, "run_idx": null, "phase": null, "duration_ms": null, "tokens_per_sec": null, "error": null }, { "prompt_id": null, "run_idx": null, "phase": null, "duration_ms": null, "tokens_per_sec": null, "error": null }, { "prompt_id": "hello", "run_idx": 0, "phase": "cold", "duration_ms": 5514, "tokens_per_sec": 11.606819006166122, "error": null }, { "prompt_id": "hello", "run_idx": 1, "phase": "warm", "duration_ms": 4659, "tokens_per_sec": 13.736853402017601, "error": null }, { "prompt_id": "hello", "run_idx": 2, "phase": "warm", "duration_ms": 4633, "tokens_per_sec": 13.813943449169004, "error": null }, { "prompt_id": "hello", "run_idx": 3, "phase": "warm", "duration_ms": 4624, "tokens_per_sec": 13.84083044982699, "error": null }, { "prompt_id": "P-MEDIUM", "run_idx": 0, "phase": "cold", "duration_ms": 35678, "tokens_per_sec": 14.350580189472506, "error": null }, { "prompt_id": "P-MEDIUM", "run_idx": 1, "phase": "warm", "duration_ms": 35311, "tokens_per_sec": 14.499730962023166, "error": null }, { "prompt_id": "P-MEDIUM", "run_idx": 2, "phase": "warm", "duration_ms": 35338, "tokens_per_sec": 14.488652442130284, "error": null }, { "prompt_id": "P-MEDIUM", "run_idx": 3, "phase": "warm", "duration_ms": 35235, "tokens_per_sec": 14.531006101887328, "error": null }, { "prompt_id": "P-HARD", "run_idx": 0, "phase": "cold", "duration_ms": 73737, "tokens_per_sec": 13.887193674817256, "error": null }, { "prompt_id": "P-HARD", "run_idx": 1, "phase": "warm", "duration_ms": 70291, "tokens_per_sec": 14.568010129319545, "error": null }, { "prompt_id": "P-HARD", "run_idx": 2, "phase": "warm", "duration_ms": 70266, "tokens_per_sec": 14.573193294054022, "error": null }, { "prompt_id": "P-HARD", "run_idx": 3, "phase": "warm", "duration_ms": 70350, "tokens_per_sec": 14.555792466240229, "error": null } ] } ], "n_calls_total": 42, "n_errors_total": 0, "chart_spec": { "kind": "bar", "x": "cell", "y": "tokens_per_sec_mean", "series": [ "granite-4.1:8b-q4km", "gemma-4:e4b-it-q4km", "qwen3.5:9b-q4km" ], "data_url": "/data/09d8fbde-0008-49bb-99da-03eeaca72be1/run.jsonl" }, "site_grade": "flagship", "site_grade_reason": null, "raw_data_urls": { "jsonl": "/data/09d8fbde-0008-49bb-99da-03eeaca72be1/run.jsonl", "log": "/data/09d8fbde-0008-49bb-99da-03eeaca72be1/run.log", "md": "/data/09d8fbde-0008-49bb-99da-03eeaca72be1/run.md", "metadata": "/data/09d8fbde-0008-49bb-99da-03eeaca72be1/metadata.json", "gitea_dir": "https://git.weeyuga.com/slobodanmargetic988/weeyuga-benchmarks-public/src/branch/main/runs/09d8fbde-0008-49bb-99da-03eeaca72be1/" }, "synthesis_doc": "A3B_AND_CPU_OVERNIGHT_2026-05-05.md", "synthesis_docs_all": [ "A3B_AND_CPU_OVERNIGHT_2026-05-05.md", "A3B_CROSS_MACHINE_2026-05-05.md", "SITE_DATA_AUDIT_AND_MIGRATION_PLAN_2026-05-06.md" ], "related_ids": [], "status": "complete", "visibility": "draft", "cells_full": [ { "cell_id": "predator:llamacpp:granite-4.1:8b-q4km", "n_calls": 14, "n_errors": 0, "duration_ms_mean": 8831.33, "duration_ms_p50": 6403.0, "duration_ms_p95": 19524.25, "tokens_per_sec_mean": 13.49, "tokens_per_sec_p50": 15.16, "tokens_per_sec_p95": 15.73, "tokens_per_sec_max": 15.75, "cold_warm_split": { "cold": { "n_calls": 3, "duration_ms_mean": 9384.0, "duration_ms_p50": 6095.0, "tokens_per_sec_mean": 11.89 }, "warm": { "n_calls": 9, "duration_ms_mean": 8647.11, "duration_ms_p50": 6602.0, "tokens_per_sec_mean": 14.03 } }, "per_prompt": { "unknown": { "n_calls": 2, "n_errors": 0, "duration_ms_mean": null, "duration_ms_p50": null, "tokens_per_sec_mean": null, "tokens_per_sec_p50": null }, "hello": { "n_calls": 4, "n_errors": 0, "duration_ms_mean": 1121.5, "duration_ms_p50": 918.0, "tokens_per_sec_mean": 9.73, "tokens_per_sec_p50": 10.89 }, "P-MEDIUM": { "n_calls": 4, "n_errors": 0, "duration_ms_mean": 6472.5, "duration_ms_p50": 6403.0, "tokens_per_sec_mean": 15.05, "tokens_per_sec_p50": 15.16 }, "P-HARD": { "n_calls": 4, "n_errors": 0, "duration_ms_mean": 18900.0, "duration_ms_p50": 18835.5, "tokens_per_sec_mean": 15.7, "tokens_per_sec_p50": 15.72 } }, "chars_split": null, "timeline": [ { "prompt_id": null, "run_idx": null, "phase": null, "duration_ms": null, "tokens_per_sec": null, "error": null }, { "prompt_id": null, "run_idx": null, "phase": null, "duration_ms": null, "tokens_per_sec": null, "error": null }, { "prompt_id": "hello", "run_idx": 0, "phase": "cold", "duration_ms": 1782, "tokens_per_sec": 5.611672278338945, "error": null }, { "prompt_id": "hello", "run_idx": 1, "phase": "warm", "duration_ms": 919, "tokens_per_sec": 10.88139281828074, "error": null }, { "prompt_id": "hello", "run_idx": 2, "phase": "warm", "duration_ms": 917, "tokens_per_sec": 10.905125408942203, "error": null }, { "prompt_id": "hello", "run_idx": 3, "phase": "warm", "duration_ms": 868, "tokens_per_sec": 11.52073732718894, "error": null }, { "prompt_id": "P-MEDIUM", "run_idx": 0, "phase": "cold", "duration_ms": 6095, "tokens_per_sec": 14.438063986874488, "error": null }, { "prompt_id": "P-MEDIUM", "run_idx": 1, "phase": "warm", "duration_ms": 6204, "tokens_per_sec": 15.151515151515152, "error": null }, { "prompt_id": "P-MEDIUM", "run_idx": 2, "phase": "warm", "duration_ms": 6989, "tokens_per_sec": 15.16669051366433, "error": null }, { "prompt_id": "P-MEDIUM", "run_idx": 3, "phase": "warm", "duration_ms": 6602, "tokens_per_sec": 15.44986367767343, "error": null }, { "prompt_id": "P-HARD", "run_idx": 0, "phase": "cold", "duration_ms": 20275, "tokens_per_sec": 15.635018495684342, "error": null }, { "prompt_id": "P-HARD", "run_idx": 1, "phase": "warm", "duration_ms": 18761, "tokens_per_sec": 15.72410852299984, "error": null }, { "prompt_id": "P-HARD", "run_idx": 2, "phase": "warm", "duration_ms": 18910, "tokens_per_sec": 15.70597567424643, "error": null }, { "prompt_id": "P-HARD", "run_idx": 3, "phase": "warm", "duration_ms": 17654, "tokens_per_sec": 15.747139458479666, "error": null } ], "calls": [ { "type": "vram_snapshot", "model_key": "granite", "cell_id": "predator:llamacpp:granite-4.1:8b-q4km", "raw": "5990, 40, 6144" }, { "type": "llama_bench", "model_key": "granite", "cell_id": "predator:llamacpp:granite-4.1:8b-q4km", "raw": "llama-bench.exe : ggml_cuda_init: found 1 CUDA devices (Total VRAM: 6143 MiB):\r\nAt line:1 char:1\r\n+ & 'D:\\WeeyugaTools\\llama.cpp\\llama-bench.exe' -m 'D:\\WeeyugaModels\\ll ...\r\n+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\r\n + CategoryInfo : NotSpecified: (ggml_cuda_init:...RAM: 6143 MiB)::String) [], RemoteException\r\n + FullyQualifiedErrorId : NativeCommandError\r\n \r\n Device 0: NVIDIA GeForce GTX 1060, compute capability 6.1, VMM: yes, VRAM: 6143 MiB\r\nload_backend: loaded CUDA backend from D:\\WeeyugaTools\\llama.cpp\\ggml-cuda.dll\r\nload_backend: loaded RPC backend from D:\\WeeyugaTools\\llama.cpp\\ggml-rpc.dll\r\nload_backend: loaded CPU backend from D:\\WeeyugaTools\\llama.cpp\\ggml-cpu-haswell.dll\r\n| model | size | params | backend | ngl | test | t/s |\r\n| ------------------------------ | ---------: | ---------: | ---------- | --: | --------------: | -------------------: |\r\n| granite 3B Q4_K - Medium | 4.98 GiB | 8.79 B | CUDA | 99 | pp512 | 142.89 \u00b1 0.51 |\r\n| granite 3B Q4_K - Medium | 4.98 GiB | 8.79 B | CUDA | 99 | tg128 | 12.19 \u00b1 0.04 |\r\n\r\nbuild: 665abc609 (8951)\r\n\r" }, { "type": "call", "cell_id": "predator:llamacpp:granite-4.1:8b-q4km", "model_key": "granite", "prompt_id": "hello", "run_idx": 0, "phase": "cold", "total_duration_ms": 1782, "prompt_tokens": 9, "completion_tokens": 10, "tokens_per_sec": 5.611672278338945, "finish_reason": "stop", "error": null }, { "type": "call", "cell_id": "predator:llamacpp:granite-4.1:8b-q4km", "model_key": "granite", "prompt_id": "hello", "run_idx": 1, "phase": "warm", "total_duration_ms": 919, "prompt_tokens": 9, "completion_tokens": 10, "tokens_per_sec": 10.88139281828074, "finish_reason": "stop", "error": null }, { "type": "call", "cell_id": "predator:llamacpp:granite-4.1:8b-q4km", "model_key": "granite", "prompt_id": "hello", "run_idx": 2, "phase": "warm", "total_duration_ms": 917, "prompt_tokens": 9, "completion_tokens": 10, "tokens_per_sec": 10.905125408942203, "finish_reason": "stop", "error": null }, { "type": "call", "cell_id": "predator:llamacpp:granite-4.1:8b-q4km", "model_key": "granite", "prompt_id": "hello", "run_idx": 3, "phase": "warm", "total_duration_ms": 868, "prompt_tokens": 9, "completion_tokens": 10, "tokens_per_sec": 11.52073732718894, "finish_reason": "stop", "error": null }, { "type": "call", "cell_id": "predator:llamacpp:granite-4.1:8b-q4km", "model_key": "granite", "prompt_id": "P-MEDIUM", "run_idx": 0, "phase": "cold", "total_duration_ms": 6095, "prompt_tokens": 22, "completion_tokens": 88, "tokens_per_sec": 14.438063986874488, "finish_reason": "stop", "error": null }, { "type": "call", "cell_id": "predator:llamacpp:granite-4.1:8b-q4km", "model_key": "granite", "prompt_id": "P-MEDIUM", "run_idx": 1, "phase": "warm", "total_duration_ms": 6204, "prompt_tokens": 22, "completion_tokens": 94, "tokens_per_sec": 15.151515151515152, "finish_reason": "stop", "error": null }, { "type": "call", "cell_id": "predator:llamacpp:granite-4.1:8b-q4km", "model_key": "granite", "prompt_id": "P-MEDIUM", "run_idx": 2, "phase": "warm", "total_duration_ms": 6989, "prompt_tokens": 22, "completion_tokens": 106, "tokens_per_sec": 15.16669051366433, "finish_reason": "stop", "error": null }, { "type": "call", "cell_id": "predator:llamacpp:granite-4.1:8b-q4km", "model_key": "granite", "prompt_id": "P-MEDIUM", "run_idx": 3, "phase": "warm", "total_duration_ms": 6602, "prompt_tokens": 22, "completion_tokens": 102, "tokens_per_sec": 15.44986367767343, "finish_reason": "stop", "error": null }, { "type": "call", "cell_id": "predator:llamacpp:granite-4.1:8b-q4km", "model_key": "granite", "prompt_id": "P-HARD", "run_idx": 0, "phase": "cold", "total_duration_ms": 20275, "prompt_tokens": 28, "completion_tokens": 317, "tokens_per_sec": 15.635018495684342, "finish_reason": "stop", "error": null }, { "type": "call", "cell_id": "predator:llamacpp:granite-4.1:8b-q4km", "model_key": "granite", "prompt_id": "P-HARD", "run_idx": 1, "phase": "warm", "total_duration_ms": 18761, "prompt_tokens": 28, "completion_tokens": 295, "tokens_per_sec": 15.72410852299984, "finish_reason": "stop", "error": null }, { "type": "call", "cell_id": "predator:llamacpp:granite-4.1:8b-q4km", "model_key": "granite", "prompt_id": "P-HARD", "run_idx": 2, "phase": "warm", "total_duration_ms": 18910, "prompt_tokens": 28, "completion_tokens": 297, "tokens_per_sec": 15.70597567424643, "finish_reason": "stop", "error": null }, { "type": "call", "cell_id": "predator:llamacpp:granite-4.1:8b-q4km", "model_key": "granite", "prompt_id": "P-HARD", "run_idx": 3, "phase": "warm", "total_duration_ms": 17654, "prompt_tokens": 28, "completion_tokens": 278, "tokens_per_sec": 15.747139458479666, "finish_reason": "stop", "error": null } ] }, { "cell_id": "predator:llamacpp:gemma-4:e4b-it-q4km", "n_calls": 14, "n_errors": 0, "duration_ms_mean": 9888.42, "duration_ms_p50": 8199.0, "duration_ms_p95": 21724.5, "tokens_per_sec_mean": 21.82, "tokens_per_sec_p50": 22.91, "tokens_per_sec_p95": 23.64, "tokens_per_sec_max": 23.66, "cold_warm_split": { "cold": { "n_calls": 3, "duration_ms_mean": 11939.0, "duration_ms_p50": 3858.0, "tokens_per_sec_mean": 21.37 }, "warm": { "n_calls": 9, "duration_ms_mean": 9204.89, "duration_ms_p50": 12258.0, "tokens_per_sec_mean": 21.97 } }, "per_prompt": { "unknown": { "n_calls": 2, "n_errors": 0, "duration_ms_mean": null, "duration_ms_p50": null, "tokens_per_sec_mean": null, "tokens_per_sec_p50": null }, "hello": { "n_calls": 4, "n_errors": 0, "duration_ms_mean": 2534.5, "duration_ms_p50": 2884.0, "tokens_per_sec_mean": 19.17, "tokens_per_sec_p50": 20.06 }, "P-MEDIUM": { "n_calls": 4, "n_errors": 0, "duration_ms_mean": 8322.5, "duration_ms_p50": 8199.0, "tokens_per_sec_mean": 22.77, "tokens_per_sec_p50": 22.85 }, "P-HARD": { "n_calls": 4, "n_errors": 0, "duration_ms_mean": 18808.25, "duration_ms_p50": 16034.0, "tokens_per_sec_mean": 23.52, "tokens_per_sec_p50": 23.53 } }, "chars_split": null, "timeline": [ { "prompt_id": null, "run_idx": null, "phase": null, "duration_ms": null, "tokens_per_sec": null, "error": null }, { "prompt_id": null, "run_idx": null, "phase": null, "duration_ms": null, "tokens_per_sec": null, "error": null }, { "prompt_id": "hello", "run_idx": 0, "phase": "cold", "duration_ms": 3519, "tokens_per_sec": 18.186984938903098, "error": null }, { "prompt_id": "hello", "run_idx": 1, "phase": "warm", "duration_ms": 851, "tokens_per_sec": 14.10105757931845, "error": null }, { "prompt_id": "hello", "run_idx": 2, "phase": "warm", "duration_ms": 2918, "tokens_per_sec": 21.93283070596299, "error": null }, { "prompt_id": "hello", "run_idx": 3, "phase": "warm", "duration_ms": 2850, "tokens_per_sec": 22.45614035087719, "error": null }, { "prompt_id": "P-MEDIUM", "run_idx": 0, "phase": "cold", "duration_ms": 3858, "tokens_per_sec": 22.291342664593053, "error": null }, { "prompt_id": "P-MEDIUM", "run_idx": 1, "phase": "warm", "duration_ms": 4140, "tokens_per_sec": 21.73913043478261, "error": null }, { "prompt_id": "P-MEDIUM", "run_idx": 2, "phase": "warm", "duration_ms": 12258, "tokens_per_sec": 23.413281122532226, "error": null }, { "prompt_id": "P-MEDIUM", "run_idx": 3, "phase": "warm", "duration_ms": 13034, "tokens_per_sec": 23.63050483351235, "error": null }, { "prompt_id": "P-HARD", "run_idx": 0, "phase": "cold", "duration_ms": 28440, "tokens_per_sec": 23.628691983122362, "error": null }, { "prompt_id": "P-HARD", "run_idx": 1, "phase": "warm", "duration_ms": 16230, "tokens_per_sec": 23.65988909426987, "error": null }, { "prompt_id": "P-HARD", "run_idx": 2, "phase": "warm", "duration_ms": 15838, "tokens_per_sec": 23.424674832680896, "error": null }, { "prompt_id": "P-HARD", "run_idx": 3, "phase": "warm", "duration_ms": 14725, "tokens_per_sec": 23.3616298811545, "error": null } ], "calls": [ { "type": "vram_snapshot", "model_key": "gemma", "cell_id": "predator:llamacpp:gemma-4:e4b-it-q4km", "raw": "4408, 1622, 6144" }, { "type": "llama_bench", "model_key": "gemma", "cell_id": "predator:llamacpp:gemma-4:e4b-it-q4km", "raw": "llama-bench.exe : ggml_cuda_init: found 1 CUDA devices (Total VRAM: 6143 MiB):\r\nAt line:1 char:1\r\n+ & 'D:\\WeeyugaTools\\llama.cpp\\llama-bench.exe' -m 'D:\\WeeyugaModels\\ll ...\r\n+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\r\n + CategoryInfo : NotSpecified: (ggml_cuda_init:...RAM: 6143 MiB)::String) [], RemoteException\r\n + FullyQualifiedErrorId : NativeCommandError\r\n \r\n Device 0: NVIDIA GeForce GTX 1060, compute capability 6.1, VMM: yes, VRAM: 6143 MiB\r\nload_backend: loaded CUDA backend from D:\\WeeyugaTools\\llama.cpp\\ggml-cuda.dll\r\nload_backend: loaded RPC backend from D:\\WeeyugaTools\\llama.cpp\\ggml-rpc.dll\r\nload_backend: loaded CPU backend from D:\\WeeyugaTools\\llama.cpp\\ggml-cpu-haswell.dll\r\n| model | size | params | backend | ngl | test | t/s |\r\n| ------------------------------ | ---------: | ---------: | ---------- | --: | --------------: | -------------------: |\r\n| gemma4 E4B Q4_K - Medium | 4.62 GiB | 7.52 B | CUDA | 99 | pp512 | 498.70 \u00b1 0.89 |\r\n| gemma4 E4B Q4_K - Medium | 4.62 GiB | 7.52 B | CUDA | 99 | tg128 | 24.53 \u00b1 0.06 |\r\n\r\nbuild: 665abc609 (8951)\r\n\r" }, { "type": "call", "cell_id": "predator:llamacpp:gemma-4:e4b-it-q4km", "model_key": "gemma", "prompt_id": "hello", "run_idx": 0, "phase": "cold", "total_duration_ms": 3519, "prompt_tokens": 17, "completion_tokens": 64, "tokens_per_sec": 18.186984938903098, "finish_reason": "length", "error": null }, { "type": "call", "cell_id": "predator:llamacpp:gemma-4:e4b-it-q4km", "model_key": "gemma", "prompt_id": "hello", "run_idx": 1, "phase": "warm", "total_duration_ms": 851, "prompt_tokens": 17, "completion_tokens": 12, "tokens_per_sec": 14.10105757931845, "finish_reason": "stop", "error": null }, { "type": "call", "cell_id": "predator:llamacpp:gemma-4:e4b-it-q4km", "model_key": "gemma", "prompt_id": "hello", "run_idx": 2, "phase": "warm", "total_duration_ms": 2918, "prompt_tokens": 17, "completion_tokens": 64, "tokens_per_sec": 21.93283070596299, "finish_reason": "length", "error": null }, { "type": "call", "cell_id": "predator:llamacpp:gemma-4:e4b-it-q4km", "model_key": "gemma", "prompt_id": "hello", "run_idx": 3, "phase": "warm", "total_duration_ms": 2850, "prompt_tokens": 17, "completion_tokens": 64, "tokens_per_sec": 22.45614035087719, "finish_reason": "length", "error": null }, { "type": "call", "cell_id": "predator:llamacpp:gemma-4:e4b-it-q4km", "model_key": "gemma", "prompt_id": "P-MEDIUM", "run_idx": 0, "phase": "cold", "total_duration_ms": 3858, "prompt_tokens": 29, "completion_tokens": 86, "tokens_per_sec": 22.291342664593053, "finish_reason": "stop", "error": null }, { "type": "call", "cell_id": "predator:llamacpp:gemma-4:e4b-it-q4km", "model_key": "gemma", "prompt_id": "P-MEDIUM", "run_idx": 1, "phase": "warm", "total_duration_ms": 4140, "prompt_tokens": 29, "completion_tokens": 90, "tokens_per_sec": 21.73913043478261, "finish_reason": "stop", "error": null }, { "type": "call", "cell_id": "predator:llamacpp:gemma-4:e4b-it-q4km", "model_key": "gemma", "prompt_id": "P-MEDIUM", "run_idx": 2, "phase": "warm", "total_duration_ms": 12258, "prompt_tokens": 29, "completion_tokens": 287, "tokens_per_sec": 23.413281122532226, "finish_reason": "stop", "error": null }, { "type": "call", "cell_id": "predator:llamacpp:gemma-4:e4b-it-q4km", "model_key": "gemma", "prompt_id": "P-MEDIUM", "run_idx": 3, "phase": "warm", "total_duration_ms": 13034, "prompt_tokens": 29, "completion_tokens": 308, "tokens_per_sec": 23.63050483351235, "finish_reason": "stop", "error": null }, { "type": "call", "cell_id": "predator:llamacpp:gemma-4:e4b-it-q4km", "model_key": "gemma", "prompt_id": "P-HARD", "run_idx": 0, "phase": "cold", "total_duration_ms": 28440, "prompt_tokens": 37, "completion_tokens": 672, "tokens_per_sec": 23.628691983122362, "finish_reason": "stop", "error": null }, { "type": "call", "cell_id": "predator:llamacpp:gemma-4:e4b-it-q4km", "model_key": "gemma", "prompt_id": "P-HARD", "run_idx": 1, "phase": "warm", "total_duration_ms": 16230, "prompt_tokens": 37, "completion_tokens": 384, "tokens_per_sec": 23.65988909426987, "finish_reason": "stop", "error": null }, { "type": "call", "cell_id": "predator:llamacpp:gemma-4:e4b-it-q4km", "model_key": "gemma", "prompt_id": "P-HARD", "run_idx": 2, "phase": "warm", "total_duration_ms": 15838, "prompt_tokens": 37, "completion_tokens": 371, "tokens_per_sec": 23.424674832680896, "finish_reason": "stop", "error": null }, { "type": "call", "cell_id": "predator:llamacpp:gemma-4:e4b-it-q4km", "model_key": "gemma", "prompt_id": "P-HARD", "run_idx": 3, "phase": "warm", "total_duration_ms": 14725, "prompt_tokens": 37, "completion_tokens": 344, "tokens_per_sec": 23.3616298811545, "finish_reason": "stop", "error": null } ] }, { "cell_id": "predator:llamacpp:qwen3.5:9b-q4km", "n_calls": 14, "n_errors": 0, "duration_ms_mean": 37136.33, "duration_ms_p50": 35324.5, "duration_ms_p95": 71874.15, "tokens_per_sec_mean": 14.04, "tokens_per_sec_p50": 14.42, "tokens_per_sec_p95": 14.57, "tokens_per_sec_max": 14.57, "cold_warm_split": { "cold": { "n_calls": 3, "duration_ms_mean": 38309.67, "duration_ms_p50": 35678.0, "tokens_per_sec_mean": 13.28 }, "warm": { "n_calls": 9, "duration_ms_mean": 36745.22, "duration_ms_p50": 35311.0, "tokens_per_sec_mean": 14.29 } }, "per_prompt": { "unknown": { "n_calls": 2, "n_errors": 0, "duration_ms_mean": null, "duration_ms_p50": null, "tokens_per_sec_mean": null, "tokens_per_sec_p50": null }, "hello": { "n_calls": 4, "n_errors": 0, "duration_ms_mean": 4857.5, "duration_ms_p50": 4646.0, "tokens_per_sec_mean": 13.25, "tokens_per_sec_p50": 13.78 }, "P-MEDIUM": { "n_calls": 4, "n_errors": 0, "duration_ms_mean": 35390.5, "duration_ms_p50": 35324.5, "tokens_per_sec_mean": 14.47, "tokens_per_sec_p50": 14.49 }, "P-HARD": { "n_calls": 4, "n_errors": 0, "duration_ms_mean": 71161.0, "duration_ms_p50": 70320.5, "tokens_per_sec_mean": 14.4, "tokens_per_sec_p50": 14.56 } }, "chars_split": null, "timeline": [ { "prompt_id": null, "run_idx": null, "phase": null, "duration_ms": null, "tokens_per_sec": null, "error": null }, { "prompt_id": null, "run_idx": null, "phase": null, "duration_ms": null, "tokens_per_sec": null, "error": null }, { "prompt_id": "hello", "run_idx": 0, "phase": "cold", "duration_ms": 5514, "tokens_per_sec": 11.606819006166122, "error": null }, { "prompt_id": "hello", "run_idx": 1, "phase": "warm", "duration_ms": 4659, "tokens_per_sec": 13.736853402017601, "error": null }, { "prompt_id": "hello", "run_idx": 2, "phase": "warm", "duration_ms": 4633, "tokens_per_sec": 13.813943449169004, "error": null }, { "prompt_id": "hello", "run_idx": 3, "phase": "warm", "duration_ms": 4624, "tokens_per_sec": 13.84083044982699, "error": null }, { "prompt_id": "P-MEDIUM", "run_idx": 0, "phase": "cold", "duration_ms": 35678, "tokens_per_sec": 14.350580189472506, "error": null }, { "prompt_id": "P-MEDIUM", "run_idx": 1, "phase": "warm", "duration_ms": 35311, "tokens_per_sec": 14.499730962023166, "error": null }, { "prompt_id": "P-MEDIUM", "run_idx": 2, "phase": "warm", "duration_ms": 35338, "tokens_per_sec": 14.488652442130284, "error": null }, { "prompt_id": "P-MEDIUM", "run_idx": 3, "phase": "warm", "duration_ms": 35235, "tokens_per_sec": 14.531006101887328, "error": null }, { "prompt_id": "P-HARD", "run_idx": 0, "phase": "cold", "duration_ms": 73737, "tokens_per_sec": 13.887193674817256, "error": null }, { "prompt_id": "P-HARD", "run_idx": 1, "phase": "warm", "duration_ms": 70291, "tokens_per_sec": 14.568010129319545, "error": null }, { "prompt_id": "P-HARD", "run_idx": 2, "phase": "warm", "duration_ms": 70266, "tokens_per_sec": 14.573193294054022, "error": null }, { "prompt_id": "P-HARD", "run_idx": 3, "phase": "warm", "duration_ms": 70350, "tokens_per_sec": 14.555792466240229, "error": null } ], "calls": [ { "type": "vram_snapshot", "model_key": "qwen", "cell_id": "predator:llamacpp:qwen3.5:9b-q4km", "raw": "5954, 76, 6144" }, { "type": "llama_bench", "model_key": "qwen", "cell_id": "predator:llamacpp:qwen3.5:9b-q4km", "raw": "llama-bench.exe : ggml_cuda_init: found 1 CUDA devices (Total VRAM: 6143 MiB):\r\nAt line:1 char:1\r\n+ & 'D:\\WeeyugaTools\\llama.cpp\\llama-bench.exe' -m 'D:\\WeeyugaModels\\ll ...\r\n+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\r\n + CategoryInfo : NotSpecified: (ggml_cuda_init:...RAM: 6143 MiB)::String) [], RemoteException\r\n + FullyQualifiedErrorId : NativeCommandError\r\n \r\n Device 0: NVIDIA GeForce GTX 1060, compute capability 6.1, VMM: yes, VRAM: 6143 MiB\r\nload_backend: loaded CUDA backend from D:\\WeeyugaTools\\llama.cpp\\ggml-cuda.dll\r\nload_backend: loaded RPC backend from D:\\WeeyugaTools\\llama.cpp\\ggml-rpc.dll\r\nload_backend: loaded CPU backend from D:\\WeeyugaTools\\llama.cpp\\ggml-cpu-haswell.dll\r\n| model | size | params | backend | ngl | test | t/s |\r\n| ------------------------------ | ---------: | ---------: | ---------- | --: | --------------: | -------------------: |\r\n| qwen35 9B Q4_K - Medium | 5.28 GiB | 8.95 B | CUDA | 99 | pp512 | 63.64 \u00b1 0.14 |\r\n| qwen35 9B Q4_K - Medium | 5.28 GiB | 8.95 B | CUDA | 99 | tg128 | 7.43 \u00b1 0.01 |\r\n\r\nbuild: 665abc609 (8951)\r\n\r" }, { "type": "call", "cell_id": "predator:llamacpp:qwen3.5:9b-q4km", "model_key": "qwen", "prompt_id": "hello", "run_idx": 0, "phase": "cold", "total_duration_ms": 5514, "prompt_tokens": 11, "completion_tokens": 64, "tokens_per_sec": 11.606819006166122, "finish_reason": "length", "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3.5:9b-q4km", "model_key": "qwen", "prompt_id": "hello", "run_idx": 1, "phase": "warm", "total_duration_ms": 4659, "prompt_tokens": 11, "completion_tokens": 64, "tokens_per_sec": 13.736853402017601, "finish_reason": "length", "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3.5:9b-q4km", "model_key": "qwen", "prompt_id": "hello", "run_idx": 2, "phase": "warm", "total_duration_ms": 4633, "prompt_tokens": 11, "completion_tokens": 64, "tokens_per_sec": 13.813943449169004, "finish_reason": "length", "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3.5:9b-q4km", "model_key": "qwen", "prompt_id": "hello", "run_idx": 3, "phase": "warm", "total_duration_ms": 4624, "prompt_tokens": 11, "completion_tokens": 64, "tokens_per_sec": 13.84083044982699, "finish_reason": "length", "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3.5:9b-q4km", "model_key": "qwen", "prompt_id": "P-MEDIUM", "run_idx": 0, "phase": "cold", "total_duration_ms": 35678, "prompt_tokens": 24, "completion_tokens": 512, "tokens_per_sec": 14.350580189472506, "finish_reason": "length", "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3.5:9b-q4km", "model_key": "qwen", "prompt_id": "P-MEDIUM", "run_idx": 1, "phase": "warm", "total_duration_ms": 35311, "prompt_tokens": 24, "completion_tokens": 512, "tokens_per_sec": 14.499730962023166, "finish_reason": "length", "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3.5:9b-q4km", "model_key": "qwen", "prompt_id": "P-MEDIUM", "run_idx": 2, "phase": "warm", "total_duration_ms": 35338, "prompt_tokens": 24, "completion_tokens": 512, "tokens_per_sec": 14.488652442130284, "finish_reason": "length", "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3.5:9b-q4km", "model_key": "qwen", "prompt_id": "P-MEDIUM", "run_idx": 3, "phase": "warm", "total_duration_ms": 35235, "prompt_tokens": 24, "completion_tokens": 512, "tokens_per_sec": 14.531006101887328, "finish_reason": "length", "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3.5:9b-q4km", "model_key": "qwen", "prompt_id": "P-HARD", "run_idx": 0, "phase": "cold", "total_duration_ms": 73737, "prompt_tokens": 32, "completion_tokens": 1024, "tokens_per_sec": 13.887193674817256, "finish_reason": "length", "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3.5:9b-q4km", "model_key": "qwen", "prompt_id": "P-HARD", "run_idx": 1, "phase": "warm", "total_duration_ms": 70291, "prompt_tokens": 32, "completion_tokens": 1024, "tokens_per_sec": 14.568010129319545, "finish_reason": "length", "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3.5:9b-q4km", "model_key": "qwen", "prompt_id": "P-HARD", "run_idx": 2, "phase": "warm", "total_duration_ms": 70266, "prompt_tokens": 32, "completion_tokens": 1024, "tokens_per_sec": 14.573193294054022, "finish_reason": "length", "error": null }, { "type": "call", "cell_id": "predator:llamacpp:qwen3.5:9b-q4km", "model_key": "qwen", "prompt_id": "P-HARD", "run_idx": 3, "phase": "warm", "total_duration_ms": 70350, "prompt_tokens": 32, "completion_tokens": 1024, "tokens_per_sec": 14.555792466240229, "finish_reason": "length", "error": null } ] } ], "meta_record": { "type": "meta", "benchmark_run_id": "09d8fbde-0008-49bb-99da-03eeaca72be1", "harness_version": "predator-trio-1", "started_at_utc": "2026-05-04T16:01:52Z", "host": "Slobodans-MacBook-Air.local", "models": { "granite": { "cell_id": "predator:llamacpp:granite-4.1:8b-q4km", "gguf": "granite-4.1-8b-Q4_K_M.gguf" }, "gemma": { "cell_id": "predator:llamacpp:gemma-4:e4b-it-q4km", "gguf": "gemma-4-E4B-it-Q4_K_M.gguf" }, "qwen": { "cell_id": "predator:llamacpp:qwen3.5:9b-q4km", "gguf": "Qwen3.5-9B-Q4_K_M.gguf" } }, "node": "predator", "engine": "llamacpp", "predator_target_url": "http://10.8.0.7:11436" }, "packaged_at": "2026-05-06T12:28:24Z" }