diff --git a/catalogue.json b/catalogue.json index 13e85d0..d2e40fb 100644 --- a/catalogue.json +++ b/catalogue.json @@ -1,19 +1,78 @@ { - "schema_version": "1.0-draft", - "generated_at": "2026-05-05T17:43:34Z", - "git_sha": "d15ba45042cd26173e1696b3e47fd9e4fbf2613d", + "schema_version": "1.0", + "generated_at": "2026-05-06T08:04:27Z", + "git_sha": "f06800254dfd046e154c94f663fb7f9f66c79ae3", "site_base_url": "https://benchmarks.weeyuga.com", - "data_base_url_lean": "https://benchmarks.weeyuga.com/data", - "gitea_archive_url_lean": "https://git.weeyuga.com/slobodanmargetic988/weeyuga-benchmarks-public", + "data_base_url": "https://benchmarks.weeyuga.com/data", + "gitea_archive_url": "https://git.weeyuga.com/slobodanmargetic988/weeyuga-benchmarks-public", + "controlled_vocab": { + "hardware": [ + "cicd", + "mac", + "pavilion", + "predator", + "runpod", + "vps50" + ], + "engine": [ + "llamacpp", + "ollama", + "vllm", + "mlx", + "cpu", + "weeyuga" + ], + "task_kind": [ + "chat", + "code", + "cold-start", + "long-context", + "param-sweep", + "throughput", + "tool-use", + "vision" + ], + "site_grade": [ + "archive-only", + "flagship", + "standard" + ], + "janie_blurb_status": [ + "drafted", + "pending", + "published", + "vera-reviewed" + ], + "caveat_severity": [ + "info", + "reframe-pending", + "warning" + ], + "chart_spec_kind": [ + "bar", + "line", + "scatter" + ] + }, + "site_grade_count": 10, + "site_grade_breakdown": { + "flagship": 4, + "standard": 6, + "archive-only": 11 + }, + "total_runs_in_archive": 21, "benchmarks": [ { "id": "91751afd-068a-477b-8f40-6e1963f803f1", - "title": "vps50-cpu-matrix-1 \u2014 gemma/phi-4/qwen2.5/qwen3 on vps50", + "title": "vps50-cpu-matrix-1 \u2014 gemma/phi/qwen2.5/qwen3 on vps50", "date": "2026-05-05", + "started_at": "2026-05-05T10:23:13Z", + "git_sha": "f06800254dfd046e154c94f663fb7f9f66c79ae3", "hardware": "vps50", + "hardware_label": "VPS50 \u00b7 cloud \u00b7 16 vCPU AMD EPYC \u00b7 62 GB RAM \u00b7 no GPU", "engine": "llamacpp", "harness": "vps50-cpu-matrix-1", - "model_family": "gemma/phi-4/qwen2.5/qwen3", + "model_family": "gemma/phi/qwen2.5/qwen3", "model_sizes": [ "phi-4", "gemma-4-26b-a4b", @@ -21,12 +80,105 @@ "qwen2.5-72b", "gemma-4-26b-a4b-it" ], - "task_kind": null, + "task_kind": "param-sweep", + "tags": [ + "a3b", + "cpu", + "gemma", + "matrix", + "qwen2.5", + "qwen3", + "vps50" + ], "headline": "2 calls across 5 cell(s)", - "summary_md_path": null, - "raw_jsonl_path": "runs/91751afd-068a-477b-8f40-6e1963f803f1.jsonl", - "log_path": "runs/91751afd-068a-477b-8f40-6e1963f803f1.log", - "synthesis_doc": "A3B_AND_CPU_OVERNIGHT_2026-05-05.md", + "janie_blurb_md": null, + "janie_blurb_status": "pending", + "caveat": null, + "caveat_severity": null, + "methodology_ref": "A3B_AND_CPU_OVERNIGHT_2026-05-05", + "methodology_deviations_md": null, + "results_table": [ + { + "label": "phi-4", + "cell_id": "vps50:llamacpp:phi-4", + "n_calls": 0, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "gemma-4-26b-a4b", + "cell_id": "vps50:llamacpp:gemma-4-26b-a4b", + "n_calls": 0, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "qwen3-30b-a3b", + "cell_id": "vps50:llamacpp:qwen3-30b-a3b", + "n_calls": 0, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "qwen2.5-72b", + "cell_id": "vps50:llamacpp:qwen2.5-72b", + "n_calls": 0, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "gemma-4-26b-a4b-it-q4km-cpu-ctx32k", + "cell_id": "vps50:llamacpp:gemma-4-26b-a4b-it-q4km-cpu-ctx32k", + "n_calls": 2, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + } + ], "cells": [ { "cell_id": "vps50:llamacpp:phi-4", @@ -34,7 +186,10 @@ "n_errors": 0, "duration_ms_mean": null, "duration_ms_p50": null, + "duration_ms_p95": null, "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, "tokens_per_sec_max": null }, { @@ -43,7 +198,10 @@ "n_errors": 0, "duration_ms_mean": null, "duration_ms_p50": null, + "duration_ms_p95": null, "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, "tokens_per_sec_max": null }, { @@ -52,7 +210,10 @@ "n_errors": 0, "duration_ms_mean": null, "duration_ms_p50": null, + "duration_ms_p95": null, "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, "tokens_per_sec_max": null }, { @@ -61,7 +222,10 @@ "n_errors": 0, "duration_ms_mean": null, "duration_ms_p50": null, + "duration_ms_p95": null, "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, "tokens_per_sec_max": null }, { @@ -70,21 +234,46 @@ "n_errors": 0, "duration_ms_mean": null, "duration_ms_p50": null, + "duration_ms_p95": null, "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, "tokens_per_sec_max": null } ], "n_calls_total": 2, "n_errors_total": 0, - "started_at": "2026-05-05T10:23:13Z", - "tags": [ - "a3b", - "cpu", - "gemma", - "matrix", - "qwen2.5", - "qwen3", - "vps50" + "chart_spec": { + "kind": "line", + "x": "param_value", + "y": "tokens_per_sec_mean", + "series": [ + "phi-4", + "gemma-4-26b-a4b", + "qwen3-30b-a3b", + "qwen2.5-72b", + "gemma-4-26b-a4b-it-q4km-cpu-ctx32k" + ], + "data_url": "/data/91751afd-068a-477b-8f40-6e1963f803f1/run.jsonl" + }, + "site_grade": "standard", + "site_grade_reason": null, + "raw_data_urls": { + "jsonl": "/data/91751afd-068a-477b-8f40-6e1963f803f1/run.jsonl", + "log": "/data/91751afd-068a-477b-8f40-6e1963f803f1/run.log", + "md": null, + "metadata": "/data/91751afd-068a-477b-8f40-6e1963f803f1/metadata.json", + "gitea_dir": "https://git.weeyuga.com/slobodanmargetic988/weeyuga-benchmarks-public/src/branch/main/runs/91751afd-068a-477b-8f40-6e1963f803f1/" + }, + "synthesis_doc": "A3B_AND_CPU_OVERNIGHT_2026-05-05.md", + "synthesis_docs_all": [ + "A3B_AND_CPU_OVERNIGHT_2026-05-05.md" + ], + "related_ids": [ + "212d6278-1b9b-45e9-8aae-7eed4d4ec822", + "86241185-24da-4f8e-b254-a60c9465e353", + "cef4b219-6b65-4740-8c3b-5b68aa10cf3f", + "1bf57c9a-fd7a-49aa-90de-cd1907b15ddd" ], "status": "complete", "visibility": "draft" @@ -93,19 +282,82 @@ "id": "79afe366-1055-4e45-adf6-593864a530e8", "title": "predator-a3b-ngl6-ctx-1 \u2014 qwen3 on predator", "date": "2026-05-05", + "started_at": "2026-05-05T09:44:34Z", + "git_sha": "f06800254dfd046e154c94f663fb7f9f66c79ae3", "hardware": "predator", + "hardware_label": "Predator \u00b7 gaming laptop \u00b7 GTX 1060 6 GB \u00b7 28 GB RAM", "engine": "llamacpp", "harness": "predator-a3b-ngl6-ctx-1", "model_family": "qwen3", "model_sizes": [ "qwen3:30b-a3b-iq2m" ], - "task_kind": null, + "task_kind": "param-sweep", + "tags": [ + "a3b", + "ctx", + "iq2", + "ngl", + "predator", + "qwen3" + ], "headline": "45 calls across 3 cell(s); ~5.2 tok/s mean; p50 84.8s", - "summary_md_path": null, - "raw_jsonl_path": "runs/79afe366-1055-4e45-adf6-593864a530e8.jsonl", - "log_path": "runs/79afe366-1055-4e45-adf6-593864a530e8.log", - "synthesis_doc": "A3B_AND_CPU_OVERNIGHT_2026-05-05.md", + "janie_blurb_md": null, + "janie_blurb_status": "pending", + "caveat": null, + "caveat_severity": null, + "methodology_ref": "A3B_AND_CPU_OVERNIGHT_2026-05-05", + "methodology_deviations_md": null, + "results_table": [ + { + "label": "qwen3:30b-a3b-iq2m-think500-ngl6-ctx64k", + "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl6-ctx64k", + "n_calls": 15, + "n_errors": 0, + "duration_ms_mean": 81326.17, + "duration_ms_p50": 73215.0, + "duration_ms_p95": 152048.1, + "tokens_per_sec_mean": 5.42, + "tokens_per_sec_p50": 5.35, + "tokens_per_sec_p95": 5.91, + "tokens_per_sec_max": 5.93, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "qwen3:30b-a3b-iq2m-think500-ngl6-ctx96k", + "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl6-ctx96k", + "n_calls": 15, + "n_errors": 0, + "duration_ms_mean": 84532.92, + "duration_ms_p50": 99578.5, + "duration_ms_p95": 152059.4, + "tokens_per_sec_mean": 5.43, + "tokens_per_sec_p50": 5.38, + "tokens_per_sec_p95": 5.78, + "tokens_per_sec_max": 5.94, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "qwen3:30b-a3b-iq2m-think500-ngl6-ctx131k", + "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl6-ctx131k", + "n_calls": 15, + "n_errors": 0, + "duration_ms_mean": 91748.5, + "duration_ms_p50": 98184.0, + "duration_ms_p95": 161421.6, + "tokens_per_sec_mean": 4.79, + "tokens_per_sec_p50": 4.8, + "tokens_per_sec_p95": 4.93, + "tokens_per_sec_max": 4.93, + "memory_mb": null, + "watts_avg": null, + "notes": null + } + ], "cells": [ { "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl6-ctx64k", @@ -113,7 +365,10 @@ "n_errors": 0, "duration_ms_mean": 81326.17, "duration_ms_p50": 73215.0, + "duration_ms_p95": 152048.1, "tokens_per_sec_mean": 5.42, + "tokens_per_sec_p50": 5.35, + "tokens_per_sec_p95": 5.91, "tokens_per_sec_max": 5.93 }, { @@ -122,7 +377,10 @@ "n_errors": 0, "duration_ms_mean": 84532.92, "duration_ms_p50": 99578.5, + "duration_ms_p95": 152059.4, "tokens_per_sec_mean": 5.43, + "tokens_per_sec_p50": 5.38, + "tokens_per_sec_p95": 5.78, "tokens_per_sec_max": 5.94 }, { @@ -131,20 +389,44 @@ "n_errors": 0, "duration_ms_mean": 91748.5, "duration_ms_p50": 98184.0, + "duration_ms_p95": 161421.6, "tokens_per_sec_mean": 4.79, + "tokens_per_sec_p50": 4.8, + "tokens_per_sec_p95": 4.93, "tokens_per_sec_max": 4.93 } ], "n_calls_total": 45, "n_errors_total": 0, - "started_at": "2026-05-05T09:44:34Z", - "tags": [ - "a3b", - "ctx", - "iq2", - "ngl", - "predator", - "qwen3" + "chart_spec": { + "kind": "line", + "x": "param_value", + "y": "tokens_per_sec_mean", + "series": [ + "qwen3:30b-a3b-iq2m-think500-ngl6-ctx64k", + "qwen3:30b-a3b-iq2m-think500-ngl6-ctx96k", + "qwen3:30b-a3b-iq2m-think500-ngl6-ctx131k" + ], + "data_url": "/data/79afe366-1055-4e45-adf6-593864a530e8/run.jsonl" + }, + "site_grade": "standard", + "site_grade_reason": null, + "raw_data_urls": { + "jsonl": "/data/79afe366-1055-4e45-adf6-593864a530e8/run.jsonl", + "log": "/data/79afe366-1055-4e45-adf6-593864a530e8/run.log", + "md": null, + "metadata": "/data/79afe366-1055-4e45-adf6-593864a530e8/metadata.json", + "gitea_dir": "https://git.weeyuga.com/slobodanmargetic988/weeyuga-benchmarks-public/src/branch/main/runs/79afe366-1055-4e45-adf6-593864a530e8/" + }, + "synthesis_doc": "A3B_AND_CPU_OVERNIGHT_2026-05-05.md", + "synthesis_docs_all": [ + "A3B_AND_CPU_OVERNIGHT_2026-05-05.md" + ], + "related_ids": [ + "ad28cb95-5134-40b5-8514-c8a381f83d87", + "b37836bf-d1a2-4d6b-a732-aff89da1fa07", + "e08a7051-a856-4053-b34b-561d7ce5e8cf", + "5fb2913d-6500-4ecf-9e97-d43f7dd61145" ], "status": "complete", "visibility": "draft" @@ -153,19 +435,130 @@ "id": "e08a7051-a856-4053-b34b-561d7ce5e8cf", "title": "predator-a3b-ngl-ctx-2d-1 \u2014 qwen3 on predator", "date": "2026-05-05", + "started_at": "2026-05-05T07:43:00Z", + "git_sha": "f06800254dfd046e154c94f663fb7f9f66c79ae3", "hardware": "predator", + "hardware_label": "Predator \u00b7 gaming laptop \u00b7 GTX 1060 6 GB \u00b7 28 GB RAM", "engine": "llamacpp", "harness": "predator-a3b-ngl-ctx-2d-1", "model_family": "qwen3", "model_sizes": [ "qwen3:30b-a3b-iq2m" ], - "task_kind": null, + "task_kind": "param-sweep", + "tags": [ + "a3b", + "ctx", + "iq2", + "ngl", + "predator", + "qwen3" + ], "headline": "90 calls across 6 cell(s); ~5.9 tok/s mean; p50 73.4s", - "summary_md_path": null, - "raw_jsonl_path": "runs/e08a7051-a856-4053-b34b-561d7ce5e8cf.jsonl", - "log_path": "runs/e08a7051-a856-4053-b34b-561d7ce5e8cf.log", - "synthesis_doc": "A3B_AND_CPU_OVERNIGHT_2026-05-05.md", + "janie_blurb_md": null, + "janie_blurb_status": "pending", + "caveat": null, + "caveat_severity": null, + "methodology_ref": "A3B_AND_CPU_OVERNIGHT_2026-05-05", + "methodology_deviations_md": null, + "results_table": [ + { + "label": "qwen3:30b-a3b-iq2m-think500-ngl12-ctx64k", + "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx64k", + "n_calls": 15, + "n_errors": 0, + "duration_ms_mean": 85573.42, + "duration_ms_p50": 78940.0, + "duration_ms_p95": 159864.3, + "tokens_per_sec_mean": 5.48, + "tokens_per_sec_p50": 5.31, + "tokens_per_sec_p95": 6.79, + "tokens_per_sec_max": 6.8, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "qwen3:30b-a3b-iq2m-think500-ngl12-ctx96k", + "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx96k", + "n_calls": 15, + "n_errors": 0, + "duration_ms_mean": 71805.83, + "duration_ms_p50": 58541.0, + "duration_ms_p95": 140580.5, + "tokens_per_sec_mean": 5.79, + "tokens_per_sec_p50": 5.75, + "tokens_per_sec_p95": 6.47, + "tokens_per_sec_max": 6.7, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "qwen3:30b-a3b-iq2m-think500-ngl12-ctx131k", + "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx131k", + "n_calls": 15, + "n_errors": 0, + "duration_ms_mean": 96406.17, + "duration_ms_p50": 105842.0, + "duration_ms_p95": 154492.35, + "tokens_per_sec_mean": 5.01, + "tokens_per_sec_p50": 5.13, + "tokens_per_sec_p95": 5.97, + "tokens_per_sec_max": 6.29, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "qwen3:30b-a3b-iq2m-think500-ngl24-ctx48k", + "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx48k", + "n_calls": 15, + "n_errors": 0, + "duration_ms_mean": 65539.5, + "duration_ms_p50": 64606.5, + "duration_ms_p95": 118069.75, + "tokens_per_sec_mean": 6.6, + "tokens_per_sec_p50": 6.59, + "tokens_per_sec_p95": 6.89, + "tokens_per_sec_max": 6.91, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "qwen3:30b-a3b-iq2m-think500-ngl24-ctx64k", + "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx64k", + "n_calls": 15, + "n_errors": 0, + "duration_ms_mean": 71267.58, + "duration_ms_p50": 80265.5, + "duration_ms_p95": 128231.3, + "tokens_per_sec_mean": 6.41, + "tokens_per_sec_p50": 6.39, + "tokens_per_sec_p95": 6.88, + "tokens_per_sec_max": 6.9, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "qwen3:30b-a3b-iq2m-think500-ngl24-ctx96k", + "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx96k", + "n_calls": 15, + "n_errors": 0, + "duration_ms_mean": 69299.5, + "duration_ms_p50": 68355.0, + "duration_ms_p95": 124990.95, + "tokens_per_sec_mean": 6.08, + "tokens_per_sec_p50": 6.44, + "tokens_per_sec_p95": 6.68, + "tokens_per_sec_max": 6.69, + "memory_mb": null, + "watts_avg": null, + "notes": null + } + ], "cells": [ { "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx64k", @@ -173,7 +566,10 @@ "n_errors": 0, "duration_ms_mean": 85573.42, "duration_ms_p50": 78940.0, + "duration_ms_p95": 159864.3, "tokens_per_sec_mean": 5.48, + "tokens_per_sec_p50": 5.31, + "tokens_per_sec_p95": 6.79, "tokens_per_sec_max": 6.8 }, { @@ -182,7 +578,10 @@ "n_errors": 0, "duration_ms_mean": 71805.83, "duration_ms_p50": 58541.0, + "duration_ms_p95": 140580.5, "tokens_per_sec_mean": 5.79, + "tokens_per_sec_p50": 5.75, + "tokens_per_sec_p95": 6.47, "tokens_per_sec_max": 6.7 }, { @@ -191,7 +590,10 @@ "n_errors": 0, "duration_ms_mean": 96406.17, "duration_ms_p50": 105842.0, + "duration_ms_p95": 154492.35, "tokens_per_sec_mean": 5.01, + "tokens_per_sec_p50": 5.13, + "tokens_per_sec_p95": 5.97, "tokens_per_sec_max": 6.29 }, { @@ -200,7 +602,10 @@ "n_errors": 0, "duration_ms_mean": 65539.5, "duration_ms_p50": 64606.5, + "duration_ms_p95": 118069.75, "tokens_per_sec_mean": 6.6, + "tokens_per_sec_p50": 6.59, + "tokens_per_sec_p95": 6.89, "tokens_per_sec_max": 6.91 }, { @@ -209,7 +614,10 @@ "n_errors": 0, "duration_ms_mean": 71267.58, "duration_ms_p50": 80265.5, + "duration_ms_p95": 128231.3, "tokens_per_sec_mean": 6.41, + "tokens_per_sec_p50": 6.39, + "tokens_per_sec_p95": 6.88, "tokens_per_sec_max": 6.9 }, { @@ -218,44 +626,167 @@ "n_errors": 0, "duration_ms_mean": 69299.5, "duration_ms_p50": 68355.0, + "duration_ms_p95": 124990.95, "tokens_per_sec_mean": 6.08, + "tokens_per_sec_p50": 6.44, + "tokens_per_sec_p95": 6.68, "tokens_per_sec_max": 6.69 } ], "n_calls_total": 90, "n_errors_total": 0, - "started_at": "2026-05-05T07:43:00Z", - "tags": [ - "a3b", - "ctx", - "iq2", - "ngl", - "predator", - "qwen3" + "chart_spec": { + "kind": "line", + "x": "param_value", + "y": "tokens_per_sec_mean", + "series": [ + "qwen3:30b-a3b-iq2m-think500-ngl12-ctx64k", + "qwen3:30b-a3b-iq2m-think500-ngl12-ctx96k", + "qwen3:30b-a3b-iq2m-think500-ngl12-ctx131k", + "qwen3:30b-a3b-iq2m-think500-ngl24-ctx48k", + "qwen3:30b-a3b-iq2m-think500-ngl24-ctx64k", + "qwen3:30b-a3b-iq2m-think500-ngl24-ctx96k" + ], + "data_url": "/data/e08a7051-a856-4053-b34b-561d7ce5e8cf/run.jsonl" + }, + "site_grade": "standard", + "site_grade_reason": null, + "raw_data_urls": { + "jsonl": "/data/e08a7051-a856-4053-b34b-561d7ce5e8cf/run.jsonl", + "log": "/data/e08a7051-a856-4053-b34b-561d7ce5e8cf/run.log", + "md": null, + "metadata": "/data/e08a7051-a856-4053-b34b-561d7ce5e8cf/metadata.json", + "gitea_dir": "https://git.weeyuga.com/slobodanmargetic988/weeyuga-benchmarks-public/src/branch/main/runs/e08a7051-a856-4053-b34b-561d7ce5e8cf/" + }, + "synthesis_doc": "A3B_AND_CPU_OVERNIGHT_2026-05-05.md", + "synthesis_docs_all": [ + "A3B_AND_CPU_OVERNIGHT_2026-05-05.md" + ], + "related_ids": [ + "79afe366-1055-4e45-adf6-593864a530e8", + "ad28cb95-5134-40b5-8514-c8a381f83d87", + "b37836bf-d1a2-4d6b-a732-aff89da1fa07", + "5fb2913d-6500-4ecf-9e97-d43f7dd61145" ], "status": "complete", "visibility": "draft" }, { "id": "cef4b219-6b65-4740-8c3b-5b68aa10cf3f", - "title": "vps50-cpu-matrix-1 \u2014 gemma/phi-4/qwen2.5/qwen3 on vps50", + "title": "vps50-cpu-matrix-1 \u2014 gemma/phi/qwen2.5/qwen3 on vps50", "date": "2026-05-05", + "started_at": "2026-05-05T07:00:10Z", + "git_sha": "f06800254dfd046e154c94f663fb7f9f66c79ae3", "hardware": "vps50", + "hardware_label": "VPS50 \u00b7 cloud \u00b7 16 vCPU AMD EPYC \u00b7 62 GB RAM \u00b7 no GPU", "engine": "llamacpp", "harness": "vps50-cpu-matrix-1", - "model_family": "gemma/phi-4/qwen2.5/qwen3", + "model_family": "gemma/phi/qwen2.5/qwen3", "model_sizes": [ "phi-4", "gemma-4-26b-a4b", "qwen3-30b-a3b", "qwen2.5-72b" ], - "task_kind": null, + "task_kind": "param-sweep", + "tags": [ + "a3b", + "cpu", + "gemma", + "matrix", + "qwen2.5", + "qwen3", + "vps50" + ], "headline": "6 calls across 5 cell(s); ~0.0 tok/s mean; p50 527.8s", - "summary_md_path": null, - "raw_jsonl_path": "runs/cef4b219-6b65-4740-8c3b-5b68aa10cf3f.jsonl", - "log_path": "runs/cef4b219-6b65-4740-8c3b-5b68aa10cf3f.log", - "synthesis_doc": "A3B_AND_CPU_OVERNIGHT_2026-05-05.md", + "janie_blurb_md": null, + "janie_blurb_status": "pending", + "caveat": null, + "caveat_severity": null, + "methodology_ref": "A3B_AND_CPU_OVERNIGHT_2026-05-05", + "methodology_deviations_md": null, + "results_table": [ + { + "label": "phi-4", + "cell_id": "vps50:llamacpp:phi-4", + "n_calls": 0, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "gemma-4-26b-a4b", + "cell_id": "vps50:llamacpp:gemma-4-26b-a4b", + "n_calls": 0, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "qwen3-30b-a3b", + "cell_id": "vps50:llamacpp:qwen3-30b-a3b", + "n_calls": 0, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "qwen2.5-72b", + "cell_id": "vps50:llamacpp:qwen2.5-72b", + "n_calls": 0, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "phi-4-q4km-cpu-ctx32k", + "cell_id": "vps50:llamacpp:phi-4-q4km-cpu-ctx32k", + "n_calls": 6, + "n_errors": 0, + "duration_ms_mean": 548939.5, + "duration_ms_p50": 527802.5, + "duration_ms_p95": 614865.75, + "tokens_per_sec_mean": 0.02, + "tokens_per_sec_p50": 0.02, + "tokens_per_sec_p95": 0.02, + "tokens_per_sec_max": 0.02, + "memory_mb": null, + "watts_avg": null, + "notes": null + } + ], "cells": [ { "cell_id": "vps50:llamacpp:phi-4", @@ -263,7 +794,10 @@ "n_errors": 0, "duration_ms_mean": null, "duration_ms_p50": null, + "duration_ms_p95": null, "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, "tokens_per_sec_max": null }, { @@ -272,7 +806,10 @@ "n_errors": 0, "duration_ms_mean": null, "duration_ms_p50": null, + "duration_ms_p95": null, "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, "tokens_per_sec_max": null }, { @@ -281,7 +818,10 @@ "n_errors": 0, "duration_ms_mean": null, "duration_ms_p50": null, + "duration_ms_p95": null, "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, "tokens_per_sec_max": null }, { @@ -290,7 +830,10 @@ "n_errors": 0, "duration_ms_mean": null, "duration_ms_p50": null, + "duration_ms_p95": null, "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, "tokens_per_sec_max": null }, { @@ -299,13 +842,68 @@ "n_errors": 0, "duration_ms_mean": 548939.5, "duration_ms_p50": 527802.5, + "duration_ms_p95": 614865.75, "tokens_per_sec_mean": 0.02, + "tokens_per_sec_p50": 0.02, + "tokens_per_sec_p95": 0.02, "tokens_per_sec_max": 0.02 } ], "n_calls_total": 6, "n_errors_total": 0, - "started_at": "2026-05-05T07:00:10Z", + "chart_spec": { + "kind": "line", + "x": "param_value", + "y": "tokens_per_sec_mean", + "series": [ + "phi-4", + "gemma-4-26b-a4b", + "qwen3-30b-a3b", + "qwen2.5-72b", + "phi-4-q4km-cpu-ctx32k" + ], + "data_url": "/data/cef4b219-6b65-4740-8c3b-5b68aa10cf3f/run.jsonl" + }, + "site_grade": "standard", + "site_grade_reason": null, + "raw_data_urls": { + "jsonl": "/data/cef4b219-6b65-4740-8c3b-5b68aa10cf3f/run.jsonl", + "log": "/data/cef4b219-6b65-4740-8c3b-5b68aa10cf3f/run.log", + "md": null, + "metadata": "/data/cef4b219-6b65-4740-8c3b-5b68aa10cf3f/metadata.json", + "gitea_dir": "https://git.weeyuga.com/slobodanmargetic988/weeyuga-benchmarks-public/src/branch/main/runs/cef4b219-6b65-4740-8c3b-5b68aa10cf3f/" + }, + "synthesis_doc": "A3B_AND_CPU_OVERNIGHT_2026-05-05.md", + "synthesis_docs_all": [ + "A3B_AND_CPU_OVERNIGHT_2026-05-05.md" + ], + "related_ids": [ + "212d6278-1b9b-45e9-8aae-7eed4d4ec822", + "86241185-24da-4f8e-b254-a60c9465e353", + "91751afd-068a-477b-8f40-6e1963f803f1", + "1bf57c9a-fd7a-49aa-90de-cd1907b15ddd" + ], + "status": "complete", + "visibility": "draft" + }, + { + "id": "86241185-24da-4f8e-b254-a60c9465e353", + "title": "vps50-cpu-matrix-1 \u2014 gemma/phi/qwen2.5/qwen3 on vps50", + "date": "2026-05-05", + "started_at": "2026-05-05T03:07:15Z", + "git_sha": "f06800254dfd046e154c94f663fb7f9f66c79ae3", + "hardware": "vps50", + "hardware_label": "VPS50 \u00b7 cloud \u00b7 16 vCPU AMD EPYC \u00b7 62 GB RAM \u00b7 no GPU", + "engine": "llamacpp", + "harness": "vps50-cpu-matrix-1", + "model_family": "gemma/phi/qwen2.5/qwen3", + "model_sizes": [ + "phi-4", + "gemma-4-26b-a4b", + "qwen3-30b-a3b", + "qwen2.5-72b" + ], + "task_kind": "param-sweep", "tags": [ "a3b", "cpu", @@ -315,29 +913,79 @@ "qwen3", "vps50" ], - "status": "complete", - "visibility": "draft" - }, - { - "id": "86241185-24da-4f8e-b254-a60c9465e353", - "title": "vps50-cpu-matrix-1 \u2014 gemma/phi-4/qwen2.5/qwen3 on vps50", - "date": "2026-05-05", - "hardware": "vps50", - "engine": "llamacpp", - "harness": "vps50-cpu-matrix-1", - "model_family": "gemma/phi-4/qwen2.5/qwen3", - "model_sizes": [ - "phi-4", - "gemma-4-26b-a4b", - "qwen3-30b-a3b", - "qwen2.5-72b" - ], - "task_kind": null, "headline": "no calls landed", - "summary_md_path": null, - "raw_jsonl_path": "runs/86241185-24da-4f8e-b254-a60c9465e353.jsonl", - "log_path": "runs/86241185-24da-4f8e-b254-a60c9465e353.log", - "synthesis_doc": null, + "janie_blurb_md": null, + "janie_blurb_status": "pending", + "caveat": null, + "caveat_severity": null, + "methodology_ref": "HARNESS#vps50-cpu-matrix-1", + "methodology_deviations_md": null, + "results_table": [ + { + "label": "phi-4", + "cell_id": "vps50:llamacpp:phi-4", + "n_calls": 0, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "gemma-4-26b-a4b", + "cell_id": "vps50:llamacpp:gemma-4-26b-a4b", + "n_calls": 0, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "qwen3-30b-a3b", + "cell_id": "vps50:llamacpp:qwen3-30b-a3b", + "n_calls": 0, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "qwen2.5-72b", + "cell_id": "vps50:llamacpp:qwen2.5-72b", + "n_calls": 0, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + } + ], "cells": [ { "cell_id": "vps50:llamacpp:phi-4", @@ -345,7 +993,10 @@ "n_errors": 0, "duration_ms_mean": null, "duration_ms_p50": null, + "duration_ms_p95": null, "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, "tokens_per_sec_max": null }, { @@ -354,7 +1005,10 @@ "n_errors": 0, "duration_ms_mean": null, "duration_ms_p50": null, + "duration_ms_p95": null, "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, "tokens_per_sec_max": null }, { @@ -363,7 +1017,10 @@ "n_errors": 0, "duration_ms_mean": null, "duration_ms_p50": null, + "duration_ms_p95": null, "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, "tokens_per_sec_max": null }, { @@ -372,21 +1029,43 @@ "n_errors": 0, "duration_ms_mean": null, "duration_ms_p50": null, + "duration_ms_p95": null, "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, "tokens_per_sec_max": null } ], "n_calls_total": 0, "n_errors_total": 0, - "started_at": "2026-05-05T03:07:15Z", - "tags": [ - "a3b", - "cpu", - "gemma", - "matrix", - "qwen2.5", - "qwen3", - "vps50" + "chart_spec": { + "kind": "line", + "x": "param_value", + "y": "tokens_per_sec_mean", + "series": [ + "phi-4", + "gemma-4-26b-a4b", + "qwen3-30b-a3b", + "qwen2.5-72b" + ], + "data_url": "/data/86241185-24da-4f8e-b254-a60c9465e353/run.jsonl" + }, + "site_grade": "archive-only", + "site_grade_reason": "status=meta-only", + "raw_data_urls": { + "jsonl": "/data/86241185-24da-4f8e-b254-a60c9465e353/run.jsonl", + "log": "/data/86241185-24da-4f8e-b254-a60c9465e353/run.log", + "md": null, + "metadata": "/data/86241185-24da-4f8e-b254-a60c9465e353/metadata.json", + "gitea_dir": "https://git.weeyuga.com/slobodanmargetic988/weeyuga-benchmarks-public/src/branch/main/runs/86241185-24da-4f8e-b254-a60c9465e353/" + }, + "synthesis_doc": null, + "synthesis_docs_all": [], + "related_ids": [ + "212d6278-1b9b-45e9-8aae-7eed4d4ec822", + "91751afd-068a-477b-8f40-6e1963f803f1", + "cef4b219-6b65-4740-8c3b-5b68aa10cf3f", + "1bf57c9a-fd7a-49aa-90de-cd1907b15ddd" ], "status": "meta-only", "visibility": "draft" @@ -395,19 +1074,46 @@ "id": "1bf57c9a-fd7a-49aa-90de-cd1907b15ddd", "title": "vps50-gemma-e4b-1 \u2014 gemma on vps50", "date": "2026-05-05", + "started_at": "2026-05-05T03:07:13Z", + "git_sha": "f06800254dfd046e154c94f663fb7f9f66c79ae3", "hardware": "vps50", + "hardware_label": "VPS50 \u00b7 cloud \u00b7 16 vCPU AMD EPYC \u00b7 62 GB RAM \u00b7 no GPU", "engine": "llamacpp", "harness": "vps50-gemma-e4b-1", "model_family": "gemma", "model_sizes": [ "gemma-4-e4b" ], - "task_kind": null, + "task_kind": "chat", + "tags": [ + "gemma", + "vps50" + ], "headline": "no calls landed", - "summary_md_path": null, - "raw_jsonl_path": "runs/1bf57c9a-fd7a-49aa-90de-cd1907b15ddd.jsonl", - "log_path": "runs/1bf57c9a-fd7a-49aa-90de-cd1907b15ddd.log", - "synthesis_doc": null, + "janie_blurb_md": null, + "janie_blurb_status": "pending", + "caveat": null, + "caveat_severity": null, + "methodology_ref": "HARNESS#vps50-gemma-e4b-1", + "methodology_deviations_md": null, + "results_table": [ + { + "label": "gemma-4-e4b", + "cell_id": "vps50:llamacpp:gemma-4-e4b", + "n_calls": 0, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + } + ], "cells": [ { "cell_id": "vps50:llamacpp:gemma-4-e4b", @@ -415,16 +1121,40 @@ "n_errors": 0, "duration_ms_mean": null, "duration_ms_p50": null, + "duration_ms_p95": null, "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, "tokens_per_sec_max": null } ], "n_calls_total": 0, "n_errors_total": 0, - "started_at": "2026-05-05T03:07:13Z", - "tags": [ - "gemma", - "vps50" + "chart_spec": { + "kind": "bar", + "x": "cell", + "y": "tokens_per_sec_mean", + "series": [ + "gemma-4-e4b" + ], + "data_url": "/data/1bf57c9a-fd7a-49aa-90de-cd1907b15ddd/run.jsonl" + }, + "site_grade": "archive-only", + "site_grade_reason": "status=meta-only", + "raw_data_urls": { + "jsonl": "/data/1bf57c9a-fd7a-49aa-90de-cd1907b15ddd/run.jsonl", + "log": "/data/1bf57c9a-fd7a-49aa-90de-cd1907b15ddd/run.log", + "md": null, + "metadata": "/data/1bf57c9a-fd7a-49aa-90de-cd1907b15ddd/metadata.json", + "gitea_dir": "https://git.weeyuga.com/slobodanmargetic988/weeyuga-benchmarks-public/src/branch/main/runs/1bf57c9a-fd7a-49aa-90de-cd1907b15ddd/" + }, + "synthesis_doc": null, + "synthesis_docs_all": [], + "related_ids": [ + "b54c61c0-b6b8-44ac-bc3e-a515df0f0499", + "212d6278-1b9b-45e9-8aae-7eed4d4ec822", + "86241185-24da-4f8e-b254-a60c9465e353", + "91751afd-068a-477b-8f40-6e1963f803f1" ], "status": "meta-only", "visibility": "draft" @@ -433,19 +1163,46 @@ "id": "b54c61c0-b6b8-44ac-bc3e-a515df0f0499", "title": "vps50-gemma-e4b-1 \u2014 gemma on vps50", "date": "2026-05-04", + "started_at": "2026-05-04T23:53:09Z", + "git_sha": "f06800254dfd046e154c94f663fb7f9f66c79ae3", "hardware": "vps50", + "hardware_label": "VPS50 \u00b7 cloud \u00b7 16 vCPU AMD EPYC \u00b7 62 GB RAM \u00b7 no GPU", "engine": "llamacpp", "harness": "vps50-gemma-e4b-1", "model_family": "gemma", "model_sizes": [ "gemma-4-e4b" ], - "task_kind": null, + "task_kind": "chat", + "tags": [ + "gemma", + "vps50" + ], "headline": "no calls landed", - "summary_md_path": null, - "raw_jsonl_path": "runs/b54c61c0-b6b8-44ac-bc3e-a515df0f0499.jsonl", - "log_path": "runs/b54c61c0-b6b8-44ac-bc3e-a515df0f0499.log", - "synthesis_doc": null, + "janie_blurb_md": null, + "janie_blurb_status": "pending", + "caveat": null, + "caveat_severity": null, + "methodology_ref": "HARNESS#vps50-gemma-e4b-1", + "methodology_deviations_md": null, + "results_table": [ + { + "label": "gemma-4-e4b", + "cell_id": "vps50:llamacpp:gemma-4-e4b", + "n_calls": 0, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + } + ], "cells": [ { "cell_id": "vps50:llamacpp:gemma-4-e4b", @@ -453,16 +1210,40 @@ "n_errors": 0, "duration_ms_mean": null, "duration_ms_p50": null, + "duration_ms_p95": null, "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, "tokens_per_sec_max": null } ], "n_calls_total": 0, "n_errors_total": 0, - "started_at": "2026-05-04T23:53:09Z", - "tags": [ - "gemma", - "vps50" + "chart_spec": { + "kind": "bar", + "x": "cell", + "y": "tokens_per_sec_mean", + "series": [ + "gemma-4-e4b" + ], + "data_url": "/data/b54c61c0-b6b8-44ac-bc3e-a515df0f0499/run.jsonl" + }, + "site_grade": "archive-only", + "site_grade_reason": "status=meta-only", + "raw_data_urls": { + "jsonl": "/data/b54c61c0-b6b8-44ac-bc3e-a515df0f0499/run.jsonl", + "log": "/data/b54c61c0-b6b8-44ac-bc3e-a515df0f0499/run.log", + "md": null, + "metadata": "/data/b54c61c0-b6b8-44ac-bc3e-a515df0f0499/metadata.json", + "gitea_dir": "https://git.weeyuga.com/slobodanmargetic988/weeyuga-benchmarks-public/src/branch/main/runs/b54c61c0-b6b8-44ac-bc3e-a515df0f0499/" + }, + "synthesis_doc": null, + "synthesis_docs_all": [], + "related_ids": [ + "1bf57c9a-fd7a-49aa-90de-cd1907b15ddd", + "212d6278-1b9b-45e9-8aae-7eed4d4ec822", + "86241185-24da-4f8e-b254-a60c9465e353", + "91751afd-068a-477b-8f40-6e1963f803f1" ], "status": "meta-only", "visibility": "draft" @@ -471,51 +1252,17 @@ "id": "ad28cb95-5134-40b5-8514-c8a381f83d87", "title": "predator-a3b-ctx-sweep-1 \u2014 qwen3 on predator", "date": "2026-05-04", + "started_at": "2026-05-04T23:48:01Z", + "git_sha": "f06800254dfd046e154c94f663fb7f9f66c79ae3", "hardware": "predator", + "hardware_label": "Predator \u00b7 gaming laptop \u00b7 GTX 1060 6 GB \u00b7 28 GB RAM", "engine": "llamacpp", "harness": "predator-a3b-ctx-sweep-1", "model_family": "qwen3", "model_sizes": [ "qwen3:30b-a3b-iq2m" ], - "task_kind": null, - "headline": "45 calls across 3 cell(s); ~5.0 tok/s mean; p50 83.9s", - "summary_md_path": null, - "raw_jsonl_path": "runs/ad28cb95-5134-40b5-8514-c8a381f83d87.jsonl", - "log_path": "runs/ad28cb95-5134-40b5-8514-c8a381f83d87.log", - "synthesis_doc": "A3B_AND_CPU_OVERNIGHT_2026-05-05.md", - "cells": [ - { - "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl36-ctx32k", - "n_calls": 15, - "n_errors": 0, - "duration_ms_mean": 82375.0, - "duration_ms_p50": 69292.5, - "tokens_per_sec_mean": 4.69, - "tokens_per_sec_max": 4.95 - }, - { - "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl6-ctx64k", - "n_calls": 15, - "n_errors": 0, - "duration_ms_mean": 79021.08, - "duration_ms_p50": 83884.0, - "tokens_per_sec_mean": 5.46, - "tokens_per_sec_max": 5.83 - }, - { - "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl6-ctx128k", - "n_calls": 15, - "n_errors": 0, - "duration_ms_mean": 88976.42, - "duration_ms_p50": 97634.0, - "tokens_per_sec_mean": 4.79, - "tokens_per_sec_max": 5.12 - } - ], - "n_calls_total": 45, - "n_errors_total": 0, - "started_at": "2026-05-04T23:48:01Z", + "task_kind": "param-sweep", "tags": [ "a3b", "ctx", @@ -524,70 +1271,154 @@ "qwen3", "sweep" ], + "headline": "45 calls across 3 cell(s); ~5.0 tok/s mean; p50 83.9s", + "janie_blurb_md": null, + "janie_blurb_status": "pending", + "caveat": null, + "caveat_severity": null, + "methodology_ref": "A3B_AND_CPU_OVERNIGHT_2026-05-05", + "methodology_deviations_md": null, + "results_table": [ + { + "label": "qwen3:30b-a3b-iq2m-think500-ngl36-ctx32k", + "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl36-ctx32k", + "n_calls": 15, + "n_errors": 0, + "duration_ms_mean": 82375.0, + "duration_ms_p50": 69292.5, + "duration_ms_p95": 170672.15, + "tokens_per_sec_mean": 4.69, + "tokens_per_sec_p50": 4.74, + "tokens_per_sec_p95": 4.92, + "tokens_per_sec_max": 4.95, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "qwen3:30b-a3b-iq2m-think500-ngl6-ctx64k", + "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl6-ctx64k", + "n_calls": 15, + "n_errors": 0, + "duration_ms_mean": 79021.08, + "duration_ms_p50": 83884.0, + "duration_ms_p95": 146975.5, + "tokens_per_sec_mean": 5.46, + "tokens_per_sec_p50": 5.41, + "tokens_per_sec_p95": 5.73, + "tokens_per_sec_max": 5.83, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "qwen3:30b-a3b-iq2m-think500-ngl6-ctx128k", + "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl6-ctx128k", + "n_calls": 15, + "n_errors": 0, + "duration_ms_mean": 88976.42, + "duration_ms_p50": 97634.0, + "duration_ms_p95": 162935.6, + "tokens_per_sec_mean": 4.79, + "tokens_per_sec_p50": 4.77, + "tokens_per_sec_p95": 5.0, + "tokens_per_sec_max": 5.12, + "memory_mb": null, + "watts_avg": null, + "notes": null + } + ], + "cells": [ + { + "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl36-ctx32k", + "n_calls": 15, + "n_errors": 0, + "duration_ms_mean": 82375.0, + "duration_ms_p50": 69292.5, + "duration_ms_p95": 170672.15, + "tokens_per_sec_mean": 4.69, + "tokens_per_sec_p50": 4.74, + "tokens_per_sec_p95": 4.92, + "tokens_per_sec_max": 4.95 + }, + { + "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl6-ctx64k", + "n_calls": 15, + "n_errors": 0, + "duration_ms_mean": 79021.08, + "duration_ms_p50": 83884.0, + "duration_ms_p95": 146975.5, + "tokens_per_sec_mean": 5.46, + "tokens_per_sec_p50": 5.41, + "tokens_per_sec_p95": 5.73, + "tokens_per_sec_max": 5.83 + }, + { + "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl6-ctx128k", + "n_calls": 15, + "n_errors": 0, + "duration_ms_mean": 88976.42, + "duration_ms_p50": 97634.0, + "duration_ms_p95": 162935.6, + "tokens_per_sec_mean": 4.79, + "tokens_per_sec_p50": 4.77, + "tokens_per_sec_p95": 5.0, + "tokens_per_sec_max": 5.12 + } + ], + "n_calls_total": 45, + "n_errors_total": 0, + "chart_spec": { + "kind": "line", + "x": "param_value", + "y": "tokens_per_sec_mean", + "series": [ + "qwen3:30b-a3b-iq2m-think500-ngl36-ctx32k", + "qwen3:30b-a3b-iq2m-think500-ngl6-ctx64k", + "qwen3:30b-a3b-iq2m-think500-ngl6-ctx128k" + ], + "data_url": "/data/ad28cb95-5134-40b5-8514-c8a381f83d87/run.jsonl" + }, + "site_grade": "standard", + "site_grade_reason": null, + "raw_data_urls": { + "jsonl": "/data/ad28cb95-5134-40b5-8514-c8a381f83d87/run.jsonl", + "log": "/data/ad28cb95-5134-40b5-8514-c8a381f83d87/run.log", + "md": null, + "metadata": "/data/ad28cb95-5134-40b5-8514-c8a381f83d87/metadata.json", + "gitea_dir": "https://git.weeyuga.com/slobodanmargetic988/weeyuga-benchmarks-public/src/branch/main/runs/ad28cb95-5134-40b5-8514-c8a381f83d87/" + }, + "synthesis_doc": "A3B_AND_CPU_OVERNIGHT_2026-05-05.md", + "synthesis_docs_all": [ + "A3B_AND_CPU_OVERNIGHT_2026-05-05.md" + ], + "related_ids": [ + "79afe366-1055-4e45-adf6-593864a530e8", + "b37836bf-d1a2-4d6b-a732-aff89da1fa07", + "e08a7051-a856-4053-b34b-561d7ce5e8cf", + "5fb2913d-6500-4ecf-9e97-d43f7dd61145" + ], "status": "complete", "visibility": "draft" }, { "id": "212d6278-1b9b-45e9-8aae-7eed4d4ec822", - "title": "vps50-cpu-matrix-1 \u2014 gemma/phi-4/qwen2.5/qwen3 on vps50", + "title": "vps50-cpu-matrix-1 \u2014 gemma/phi/qwen2.5/qwen3 on vps50", "date": "2026-05-04", + "started_at": "2026-05-04T23:39:53Z", + "git_sha": "f06800254dfd046e154c94f663fb7f9f66c79ae3", "hardware": "vps50", + "hardware_label": "VPS50 \u00b7 cloud \u00b7 16 vCPU AMD EPYC \u00b7 62 GB RAM \u00b7 no GPU", "engine": "llamacpp", "harness": "vps50-cpu-matrix-1", - "model_family": "gemma/phi-4/qwen2.5/qwen3", + "model_family": "gemma/phi/qwen2.5/qwen3", "model_sizes": [ "phi-4", "gemma-4-26b-a4b", "qwen3-30b-a3b", "qwen2.5-72b" ], - "task_kind": null, - "headline": "no calls landed", - "summary_md_path": null, - "raw_jsonl_path": "runs/212d6278-1b9b-45e9-8aae-7eed4d4ec822.jsonl", - "log_path": "runs/212d6278-1b9b-45e9-8aae-7eed4d4ec822.log", - "synthesis_doc": "A3B_AND_CPU_OVERNIGHT_2026-05-05.md", - "cells": [ - { - "cell_id": "vps50:llamacpp:phi-4", - "n_calls": 0, - "n_errors": 0, - "duration_ms_mean": null, - "duration_ms_p50": null, - "tokens_per_sec_mean": null, - "tokens_per_sec_max": null - }, - { - "cell_id": "vps50:llamacpp:gemma-4-26b-a4b", - "n_calls": 0, - "n_errors": 0, - "duration_ms_mean": null, - "duration_ms_p50": null, - "tokens_per_sec_mean": null, - "tokens_per_sec_max": null - }, - { - "cell_id": "vps50:llamacpp:qwen3-30b-a3b", - "n_calls": 0, - "n_errors": 0, - "duration_ms_mean": null, - "duration_ms_p50": null, - "tokens_per_sec_mean": null, - "tokens_per_sec_max": null - }, - { - "cell_id": "vps50:llamacpp:qwen2.5-72b", - "n_calls": 0, - "n_errors": 0, - "duration_ms_mean": null, - "duration_ms_p50": null, - "tokens_per_sec_mean": null, - "tokens_per_sec_max": null - } - ], - "n_calls_total": 0, - "n_errors_total": 0, - "started_at": "2026-05-04T23:39:53Z", + "task_kind": "param-sweep", "tags": [ "a3b", "cpu", @@ -597,6 +1428,162 @@ "qwen3", "vps50" ], + "headline": "no calls landed", + "janie_blurb_md": null, + "janie_blurb_status": "pending", + "caveat": null, + "caveat_severity": null, + "methodology_ref": "A3B_AND_CPU_OVERNIGHT_2026-05-05", + "methodology_deviations_md": null, + "results_table": [ + { + "label": "phi-4", + "cell_id": "vps50:llamacpp:phi-4", + "n_calls": 0, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "gemma-4-26b-a4b", + "cell_id": "vps50:llamacpp:gemma-4-26b-a4b", + "n_calls": 0, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "qwen3-30b-a3b", + "cell_id": "vps50:llamacpp:qwen3-30b-a3b", + "n_calls": 0, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "qwen2.5-72b", + "cell_id": "vps50:llamacpp:qwen2.5-72b", + "n_calls": 0, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + } + ], + "cells": [ + { + "cell_id": "vps50:llamacpp:phi-4", + "n_calls": 0, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null + }, + { + "cell_id": "vps50:llamacpp:gemma-4-26b-a4b", + "n_calls": 0, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null + }, + { + "cell_id": "vps50:llamacpp:qwen3-30b-a3b", + "n_calls": 0, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null + }, + { + "cell_id": "vps50:llamacpp:qwen2.5-72b", + "n_calls": 0, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null + } + ], + "n_calls_total": 0, + "n_errors_total": 0, + "chart_spec": { + "kind": "line", + "x": "param_value", + "y": "tokens_per_sec_mean", + "series": [ + "phi-4", + "gemma-4-26b-a4b", + "qwen3-30b-a3b", + "qwen2.5-72b" + ], + "data_url": "/data/212d6278-1b9b-45e9-8aae-7eed4d4ec822/run.jsonl" + }, + "site_grade": "archive-only", + "site_grade_reason": "status=meta-only", + "raw_data_urls": { + "jsonl": "/data/212d6278-1b9b-45e9-8aae-7eed4d4ec822/run.jsonl", + "log": "/data/212d6278-1b9b-45e9-8aae-7eed4d4ec822/run.log", + "md": null, + "metadata": "/data/212d6278-1b9b-45e9-8aae-7eed4d4ec822/metadata.json", + "gitea_dir": "https://git.weeyuga.com/slobodanmargetic988/weeyuga-benchmarks-public/src/branch/main/runs/212d6278-1b9b-45e9-8aae-7eed4d4ec822/" + }, + "synthesis_doc": "A3B_AND_CPU_OVERNIGHT_2026-05-05.md", + "synthesis_docs_all": [ + "A3B_AND_CPU_OVERNIGHT_2026-05-05.md" + ], + "related_ids": [ + "86241185-24da-4f8e-b254-a60c9465e353", + "91751afd-068a-477b-8f40-6e1963f803f1", + "cef4b219-6b65-4740-8c3b-5b68aa10cf3f", + "1bf57c9a-fd7a-49aa-90de-cd1907b15ddd" + ], "status": "meta-only", "visibility": "draft" }, @@ -604,51 +1591,17 @@ "id": "b37836bf-d1a2-4d6b-a732-aff89da1fa07", "title": "predator-a3b-ngl-matrix-1 \u2014 qwen3 on predator", "date": "2026-05-04", + "started_at": "2026-05-04T22:50:47Z", + "git_sha": "f06800254dfd046e154c94f663fb7f9f66c79ae3", "hardware": "predator", + "hardware_label": "Predator \u00b7 gaming laptop \u00b7 GTX 1060 6 GB \u00b7 28 GB RAM", "engine": "llamacpp", "harness": "predator-a3b-ngl-matrix-1", "model_family": "qwen3", "model_sizes": [ "qwen3:30b-a3b-iq2m" ], - "task_kind": null, - "headline": "42 calls across 3 cell(s); ~5.7 tok/s mean; p50 71.5s", - "summary_md_path": null, - "raw_jsonl_path": "runs/b37836bf-d1a2-4d6b-a732-aff89da1fa07.jsonl", - "log_path": "runs/b37836bf-d1a2-4d6b-a732-aff89da1fa07.log", - "synthesis_doc": "A3B_NGL_RETUNE_2026-05-05.md", - "cells": [ - { - "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ctx32k-ngl6", - "n_calls": 14, - "n_errors": 0, - "duration_ms_mean": 85436.42, - "duration_ms_p50": 92483.0, - "tokens_per_sec_mean": 4.95, - "tokens_per_sec_max": 5.15 - }, - { - "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ctx32k-ngl12", - "n_calls": 14, - "n_errors": 0, - "duration_ms_mean": 75013.58, - "duration_ms_p50": 81347.0, - "tokens_per_sec_mean": 5.59, - "tokens_per_sec_max": 6.63 - }, - { - "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ctx32k-ngl24", - "n_calls": 14, - "n_errors": 0, - "duration_ms_mean": 62607.33, - "duration_ms_p50": 67049.0, - "tokens_per_sec_mean": 6.58, - "tokens_per_sec_max": 6.89 - } - ], - "n_calls_total": 42, - "n_errors_total": 0, - "started_at": "2026-05-04T22:50:47Z", + "task_kind": "param-sweep", "tags": [ "a3b", "iq2", @@ -657,26 +1610,183 @@ "predator", "qwen3" ], + "headline": "42 calls across 3 cell(s); ~5.7 tok/s mean; p50 71.5s", + "janie_blurb_md": null, + "janie_blurb_status": "pending", + "caveat": null, + "caveat_severity": null, + "methodology_ref": "A3B_AND_CPU_OVERNIGHT_2026-05-05", + "methodology_deviations_md": null, + "results_table": [ + { + "label": "qwen3:30b-a3b-iq2m-think500-ctx32k-ngl6", + "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ctx32k-ngl6", + "n_calls": 14, + "n_errors": 0, + "duration_ms_mean": 85436.42, + "duration_ms_p50": 92483.0, + "duration_ms_p95": 154201.8, + "tokens_per_sec_mean": 4.95, + "tokens_per_sec_p50": 4.98, + "tokens_per_sec_p95": 5.15, + "tokens_per_sec_max": 5.15, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "qwen3:30b-a3b-iq2m-think500-ctx32k-ngl12", + "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ctx32k-ngl12", + "n_calls": 14, + "n_errors": 0, + "duration_ms_mean": 75013.58, + "duration_ms_p50": 81347.0, + "duration_ms_p95": 146474.55, + "tokens_per_sec_mean": 5.59, + "tokens_per_sec_p50": 5.36, + "tokens_per_sec_p95": 6.61, + "tokens_per_sec_max": 6.63, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "qwen3:30b-a3b-iq2m-think500-ctx32k-ngl24", + "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ctx32k-ngl24", + "n_calls": 14, + "n_errors": 0, + "duration_ms_mean": 62607.33, + "duration_ms_p50": 67049.0, + "duration_ms_p95": 123041.65, + "tokens_per_sec_mean": 6.58, + "tokens_per_sec_p50": 6.68, + "tokens_per_sec_p95": 6.88, + "tokens_per_sec_max": 6.89, + "memory_mb": null, + "watts_avg": null, + "notes": null + } + ], + "cells": [ + { + "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ctx32k-ngl6", + "n_calls": 14, + "n_errors": 0, + "duration_ms_mean": 85436.42, + "duration_ms_p50": 92483.0, + "duration_ms_p95": 154201.8, + "tokens_per_sec_mean": 4.95, + "tokens_per_sec_p50": 4.98, + "tokens_per_sec_p95": 5.15, + "tokens_per_sec_max": 5.15 + }, + { + "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ctx32k-ngl12", + "n_calls": 14, + "n_errors": 0, + "duration_ms_mean": 75013.58, + "duration_ms_p50": 81347.0, + "duration_ms_p95": 146474.55, + "tokens_per_sec_mean": 5.59, + "tokens_per_sec_p50": 5.36, + "tokens_per_sec_p95": 6.61, + "tokens_per_sec_max": 6.63 + }, + { + "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ctx32k-ngl24", + "n_calls": 14, + "n_errors": 0, + "duration_ms_mean": 62607.33, + "duration_ms_p50": 67049.0, + "duration_ms_p95": 123041.65, + "tokens_per_sec_mean": 6.58, + "tokens_per_sec_p50": 6.68, + "tokens_per_sec_p95": 6.88, + "tokens_per_sec_max": 6.89 + } + ], + "n_calls_total": 42, + "n_errors_total": 0, + "chart_spec": { + "kind": "line", + "x": "param_value", + "y": "tokens_per_sec_mean", + "series": [ + "qwen3:30b-a3b-iq2m-think500-ctx32k-ngl6", + "qwen3:30b-a3b-iq2m-think500-ctx32k-ngl12", + "qwen3:30b-a3b-iq2m-think500-ctx32k-ngl24" + ], + "data_url": "/data/b37836bf-d1a2-4d6b-a732-aff89da1fa07/run.jsonl" + }, + "site_grade": "standard", + "site_grade_reason": null, + "raw_data_urls": { + "jsonl": "/data/b37836bf-d1a2-4d6b-a732-aff89da1fa07/run.jsonl", + "log": "/data/b37836bf-d1a2-4d6b-a732-aff89da1fa07/run.log", + "md": null, + "metadata": "/data/b37836bf-d1a2-4d6b-a732-aff89da1fa07/metadata.json", + "gitea_dir": "https://git.weeyuga.com/slobodanmargetic988/weeyuga-benchmarks-public/src/branch/main/runs/b37836bf-d1a2-4d6b-a732-aff89da1fa07/" + }, + "synthesis_doc": "A3B_AND_CPU_OVERNIGHT_2026-05-05.md", + "synthesis_docs_all": [ + "A3B_AND_CPU_OVERNIGHT_2026-05-05.md", + "A3B_NGL_RETUNE_2026-05-05.md" + ], + "related_ids": [ + "79afe366-1055-4e45-adf6-593864a530e8", + "ad28cb95-5134-40b5-8514-c8a381f83d87", + "e08a7051-a856-4053-b34b-561d7ce5e8cf", + "5fb2913d-6500-4ecf-9e97-d43f7dd61145" + ], "status": "complete", "visibility": "draft" }, { "id": "23066b38-ea9c-4dd3-b2f5-32912a67fce4", - "title": "Predator Qwen rerun \u2014 23066b38-ea9c-4dd3-b2f5-32912a67fce4", + "title": "Predator Qwen rerun", "date": "2026-05-04", + "started_at": "2026-05-04T22:11:43Z", + "git_sha": "f06800254dfd046e154c94f663fb7f9f66c79ae3", "hardware": "pavilion", + "hardware_label": "Pavilion \u00b7 HP laptop \u00b7 GTX 1050 4 GB \u00b7 16 GB RAM \u00b7 i7-9750H", "engine": "llamacpp", "harness": "pavilion-a3b-1", "model_family": "qwen3", "model_sizes": [ "qwen3:30b-a3b-iq2xxs" ], - "task_kind": null, - "headline": "`Answer chars` is `len(message.content)`. Both averaged over the 3 warm runs.", - "summary_md_path": "runs/23066b38-ea9c-4dd3-b2f5-32912a67fce4.md", - "raw_jsonl_path": "runs/23066b38-ea9c-4dd3-b2f5-32912a67fce4.jsonl", - "log_path": "runs/23066b38-ea9c-4dd3-b2f5-32912a67fce4.log", - "synthesis_doc": "A3B_AND_CPU_OVERNIGHT_2026-05-05.md", + "task_kind": "chat", + "tags": [ + "a3b", + "iq2", + "pavilion", + "qwen3" + ], + "headline": "14 calls across 1 cell(s); ~5.9 tok/s mean; p50 57.8s", + "janie_blurb_md": null, + "janie_blurb_status": "pending", + "caveat": null, + "caveat_severity": null, + "methodology_ref": "A3B_AND_CPU_OVERNIGHT_2026-05-05", + "methodology_deviations_md": null, + "results_table": [ + { + "label": "qwen3:30b-a3b-iq2xxs-think500", + "cell_id": "pavilion:llamacpp:qwen3:30b-a3b-iq2xxs-think500", + "n_calls": 14, + "n_errors": 0, + "duration_ms_mean": 72439.92, + "duration_ms_p50": 57784.0, + "duration_ms_p95": 174786.1, + "tokens_per_sec_mean": 5.88, + "tokens_per_sec_p50": 6.67, + "tokens_per_sec_p95": 8.17, + "tokens_per_sec_max": 8.19, + "memory_mb": null, + "watts_avg": null, + "notes": null + } + ], "cells": [ { "cell_id": "pavilion:llamacpp:qwen3:30b-a3b-iq2xxs-think500", @@ -684,39 +1794,93 @@ "n_errors": 0, "duration_ms_mean": 72439.92, "duration_ms_p50": 57784.0, + "duration_ms_p95": 174786.1, "tokens_per_sec_mean": 5.88, + "tokens_per_sec_p50": 6.67, + "tokens_per_sec_p95": 8.17, "tokens_per_sec_max": 8.19 } ], "n_calls_total": 14, "n_errors_total": 0, - "started_at": "2026-05-04T22:11:43Z", - "tags": [ - "a3b", - "iq2", - "pavilion", - "qwen3" + "chart_spec": { + "kind": "bar", + "x": "cell", + "y": "tokens_per_sec_mean", + "series": [ + "qwen3:30b-a3b-iq2xxs-think500" + ], + "data_url": "/data/23066b38-ea9c-4dd3-b2f5-32912a67fce4/run.jsonl" + }, + "site_grade": "flagship", + "site_grade_reason": null, + "raw_data_urls": { + "jsonl": "/data/23066b38-ea9c-4dd3-b2f5-32912a67fce4/run.jsonl", + "log": "/data/23066b38-ea9c-4dd3-b2f5-32912a67fce4/run.log", + "md": "/data/23066b38-ea9c-4dd3-b2f5-32912a67fce4/run.md", + "metadata": "/data/23066b38-ea9c-4dd3-b2f5-32912a67fce4/metadata.json", + "gitea_dir": "https://git.weeyuga.com/slobodanmargetic988/weeyuga-benchmarks-public/src/branch/main/runs/23066b38-ea9c-4dd3-b2f5-32912a67fce4/" + }, + "synthesis_doc": "A3B_AND_CPU_OVERNIGHT_2026-05-05.md", + "synthesis_docs_all": [ + "A3B_AND_CPU_OVERNIGHT_2026-05-05.md", + "A3B_CROSS_MACHINE_2026-05-05.md" + ], + "related_ids": [ + "487c8a51-7c5c-417d-a0c5-4b6e8491e11f", + "5f4d9e97-4294-4e69-9e6c-f318bca1ce44", + "d1cff064-9141-48e0-b1d2-729b134e0543", + "5fb2913d-6500-4ecf-9e97-d43f7dd61145" ], "status": "complete", "visibility": "draft" }, { "id": "d1cff064-9141-48e0-b1d2-729b134e0543", - "title": "Predator Qwen rerun \u2014 d1cff064-9141-48e0-b1d2-729b134e0543", + "title": "Predator Qwen rerun", "date": "2026-05-04", + "started_at": "2026-05-04T22:02:28Z", + "git_sha": "f06800254dfd046e154c94f663fb7f9f66c79ae3", "hardware": "pavilion", + "hardware_label": "Pavilion \u00b7 HP laptop \u00b7 GTX 1050 4 GB \u00b7 16 GB RAM \u00b7 i7-9750H", "engine": "llamacpp", "harness": "pavilion-a3b-1", "model_family": "qwen3", "model_sizes": [ "qwen3:30b-a3b-iq2xxs" ], - "task_kind": null, - "headline": "`Answer chars` is `len(message.content)`. Both averaged over the 3 warm runs.", - "summary_md_path": "runs/d1cff064-9141-48e0-b1d2-729b134e0543.md", - "raw_jsonl_path": "runs/d1cff064-9141-48e0-b1d2-729b134e0543.jsonl", - "log_path": "runs/d1cff064-9141-48e0-b1d2-729b134e0543.log", - "synthesis_doc": null, + "task_kind": "chat", + "tags": [ + "a3b", + "iq2", + "pavilion", + "qwen3" + ], + "headline": "no calls landed", + "janie_blurb_md": null, + "janie_blurb_status": "pending", + "caveat": null, + "caveat_severity": null, + "methodology_ref": "HARNESS#pavilion-a3b-1", + "methodology_deviations_md": null, + "results_table": [ + { + "label": "qwen3:30b-a3b-iq2xxs-think500", + "cell_id": "pavilion:llamacpp:qwen3:30b-a3b-iq2xxs-think500", + "n_calls": 0, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + } + ], "cells": [ { "cell_id": "pavilion:llamacpp:qwen3:30b-a3b-iq2xxs-think500", @@ -724,39 +1888,90 @@ "n_errors": 0, "duration_ms_mean": null, "duration_ms_p50": null, + "duration_ms_p95": null, "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, "tokens_per_sec_max": null } ], "n_calls_total": 0, "n_errors_total": 0, - "started_at": "2026-05-04T22:02:28Z", - "tags": [ - "a3b", - "iq2", - "pavilion", - "qwen3" + "chart_spec": { + "kind": "bar", + "x": "cell", + "y": "tokens_per_sec_mean", + "series": [ + "qwen3:30b-a3b-iq2xxs-think500" + ], + "data_url": "/data/d1cff064-9141-48e0-b1d2-729b134e0543/run.jsonl" + }, + "site_grade": "archive-only", + "site_grade_reason": "status=meta-only", + "raw_data_urls": { + "jsonl": "/data/d1cff064-9141-48e0-b1d2-729b134e0543/run.jsonl", + "log": "/data/d1cff064-9141-48e0-b1d2-729b134e0543/run.log", + "md": "/data/d1cff064-9141-48e0-b1d2-729b134e0543/run.md", + "metadata": "/data/d1cff064-9141-48e0-b1d2-729b134e0543/metadata.json", + "gitea_dir": "https://git.weeyuga.com/slobodanmargetic988/weeyuga-benchmarks-public/src/branch/main/runs/d1cff064-9141-48e0-b1d2-729b134e0543/" + }, + "synthesis_doc": null, + "synthesis_docs_all": [], + "related_ids": [ + "23066b38-ea9c-4dd3-b2f5-32912a67fce4", + "487c8a51-7c5c-417d-a0c5-4b6e8491e11f", + "5f4d9e97-4294-4e69-9e6c-f318bca1ce44", + "5fb2913d-6500-4ecf-9e97-d43f7dd61145" ], "status": "meta-only", "visibility": "draft" }, { "id": "5fb2913d-6500-4ecf-9e97-d43f7dd61145", - "title": "Predator Qwen rerun \u2014 5fb2913d-6500-4ecf-9e97-d43f7dd61145", + "title": "Predator Qwen rerun", "date": "2026-05-04", + "started_at": "2026-05-04T22:02:17Z", + "git_sha": "f06800254dfd046e154c94f663fb7f9f66c79ae3", "hardware": "predator", + "hardware_label": "Predator \u00b7 gaming laptop \u00b7 GTX 1060 6 GB \u00b7 28 GB RAM", "engine": "llamacpp", "harness": "predator-a3b-1", "model_family": "qwen3", "model_sizes": [ "qwen3:30b-a3b-iq2m" ], - "task_kind": null, - "headline": "`Answer chars` is `len(message.content)`. Both averaged over the 3 warm runs.", - "summary_md_path": "runs/5fb2913d-6500-4ecf-9e97-d43f7dd61145.md", - "raw_jsonl_path": "runs/5fb2913d-6500-4ecf-9e97-d43f7dd61145.jsonl", - "log_path": "runs/5fb2913d-6500-4ecf-9e97-d43f7dd61145.log", - "synthesis_doc": "A3B_NGL_RETUNE_2026-05-05.md", + "task_kind": "chat", + "tags": [ + "a3b", + "iq2", + "predator", + "qwen3" + ], + "headline": "14 calls across 1 cell(s); ~3.9 tok/s mean; p50 101.9s", + "janie_blurb_md": null, + "janie_blurb_status": "pending", + "caveat": null, + "caveat_severity": null, + "methodology_ref": "A3B_AND_CPU_OVERNIGHT_2026-05-05", + "methodology_deviations_md": null, + "results_table": [ + { + "label": "qwen3:30b-a3b-iq2m-think500", + "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500", + "n_calls": 14, + "n_errors": 0, + "duration_ms_mean": 107312.08, + "duration_ms_p50": 101920.0, + "duration_ms_p95": 211459.2, + "tokens_per_sec_mean": 3.87, + "tokens_per_sec_p50": 3.99, + "tokens_per_sec_p95": 4.06, + "tokens_per_sec_max": 4.06, + "memory_mb": null, + "watts_avg": null, + "notes": null + } + ], "cells": [ { "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500", @@ -764,18 +1979,44 @@ "n_errors": 0, "duration_ms_mean": 107312.08, "duration_ms_p50": 101920.0, + "duration_ms_p95": 211459.2, "tokens_per_sec_mean": 3.87, + "tokens_per_sec_p50": 3.99, + "tokens_per_sec_p95": 4.06, "tokens_per_sec_max": 4.06 } ], "n_calls_total": 14, "n_errors_total": 0, - "started_at": "2026-05-04T22:02:17Z", - "tags": [ - "a3b", - "iq2", - "predator", - "qwen3" + "chart_spec": { + "kind": "bar", + "x": "cell", + "y": "tokens_per_sec_mean", + "series": [ + "qwen3:30b-a3b-iq2m-think500" + ], + "data_url": "/data/5fb2913d-6500-4ecf-9e97-d43f7dd61145/run.jsonl" + }, + "site_grade": "flagship", + "site_grade_reason": null, + "raw_data_urls": { + "jsonl": "/data/5fb2913d-6500-4ecf-9e97-d43f7dd61145/run.jsonl", + "log": "/data/5fb2913d-6500-4ecf-9e97-d43f7dd61145/run.log", + "md": "/data/5fb2913d-6500-4ecf-9e97-d43f7dd61145/run.md", + "metadata": "/data/5fb2913d-6500-4ecf-9e97-d43f7dd61145/metadata.json", + "gitea_dir": "https://git.weeyuga.com/slobodanmargetic988/weeyuga-benchmarks-public/src/branch/main/runs/5fb2913d-6500-4ecf-9e97-d43f7dd61145/" + }, + "synthesis_doc": "A3B_AND_CPU_OVERNIGHT_2026-05-05.md", + "synthesis_docs_all": [ + "A3B_AND_CPU_OVERNIGHT_2026-05-05.md", + "A3B_CROSS_MACHINE_2026-05-05.md", + "A3B_NGL_RETUNE_2026-05-05.md" + ], + "related_ids": [ + "8ec3c8cb-58a7-4f33-974c-9c1bfb7824ef", + "fe4fa289-67a9-48e8-8a80-d5d88c875208", + "79afe366-1055-4e45-adf6-593864a530e8", + "ad28cb95-5134-40b5-8514-c8a381f83d87" ], "status": "complete", "visibility": "draft" @@ -784,19 +2025,48 @@ "id": "8ec3c8cb-58a7-4f33-974c-9c1bfb7824ef", "title": "predator-a3b-1 \u2014 qwen3 on predator", "date": "2026-05-04", + "started_at": "2026-05-04T21:24:11Z", + "git_sha": "f06800254dfd046e154c94f663fb7f9f66c79ae3", "hardware": "predator", + "hardware_label": "Predator \u00b7 gaming laptop \u00b7 GTX 1060 6 GB \u00b7 28 GB RAM", "engine": "llamacpp", "harness": "predator-a3b-1", "model_family": "qwen3", "model_sizes": [ "qwen3:30b-a3b-iq2m" ], - "task_kind": null, + "task_kind": "chat", + "tags": [ + "a3b", + "iq2", + "predator", + "qwen3" + ], "headline": "no calls landed", - "summary_md_path": null, - "raw_jsonl_path": "runs/8ec3c8cb-58a7-4f33-974c-9c1bfb7824ef.jsonl", - "log_path": "runs/8ec3c8cb-58a7-4f33-974c-9c1bfb7824ef.log", - "synthesis_doc": null, + "janie_blurb_md": null, + "janie_blurb_status": "pending", + "caveat": null, + "caveat_severity": null, + "methodology_ref": "HARNESS#predator-a3b-1", + "methodology_deviations_md": null, + "results_table": [ + { + "label": "qwen3:30b-a3b-iq2m-think500", + "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500", + "n_calls": 0, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + } + ], "cells": [ { "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500", @@ -804,39 +2074,90 @@ "n_errors": 0, "duration_ms_mean": null, "duration_ms_p50": null, + "duration_ms_p95": null, "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, "tokens_per_sec_max": null } ], "n_calls_total": 0, "n_errors_total": 0, - "started_at": "2026-05-04T21:24:11Z", - "tags": [ - "a3b", - "iq2", - "predator", - "qwen3" + "chart_spec": { + "kind": "bar", + "x": "cell", + "y": "tokens_per_sec_mean", + "series": [ + "qwen3:30b-a3b-iq2m-think500" + ], + "data_url": "/data/8ec3c8cb-58a7-4f33-974c-9c1bfb7824ef/run.jsonl" + }, + "site_grade": "archive-only", + "site_grade_reason": "status=meta-only", + "raw_data_urls": { + "jsonl": "/data/8ec3c8cb-58a7-4f33-974c-9c1bfb7824ef/run.jsonl", + "log": "/data/8ec3c8cb-58a7-4f33-974c-9c1bfb7824ef/run.log", + "md": null, + "metadata": "/data/8ec3c8cb-58a7-4f33-974c-9c1bfb7824ef/metadata.json", + "gitea_dir": "https://git.weeyuga.com/slobodanmargetic988/weeyuga-benchmarks-public/src/branch/main/runs/8ec3c8cb-58a7-4f33-974c-9c1bfb7824ef/" + }, + "synthesis_doc": null, + "synthesis_docs_all": [], + "related_ids": [ + "5fb2913d-6500-4ecf-9e97-d43f7dd61145", + "fe4fa289-67a9-48e8-8a80-d5d88c875208", + "79afe366-1055-4e45-adf6-593864a530e8", + "ad28cb95-5134-40b5-8514-c8a381f83d87" ], "status": "meta-only", "visibility": "draft" }, { "id": "487c8a51-7c5c-417d-a0c5-4b6e8491e11f", - "title": "Predator Qwen rerun \u2014 487c8a51-7c5c-417d-a0c5-4b6e8491e11f", + "title": "Predator Qwen rerun", "date": "2026-05-04", + "started_at": "2026-05-04T21:24:10Z", + "git_sha": "f06800254dfd046e154c94f663fb7f9f66c79ae3", "hardware": "pavilion", + "hardware_label": "Pavilion \u00b7 HP laptop \u00b7 GTX 1050 4 GB \u00b7 16 GB RAM \u00b7 i7-9750H", "engine": "llamacpp", "harness": "pavilion-a3b-1", "model_family": "qwen3", "model_sizes": [ "qwen3:30b-a3b-iq2xxs" ], - "task_kind": null, - "headline": "`Answer chars` is `len(message.content)`. Both averaged over the 3 warm runs.", - "summary_md_path": "runs/487c8a51-7c5c-417d-a0c5-4b6e8491e11f.md", - "raw_jsonl_path": "runs/487c8a51-7c5c-417d-a0c5-4b6e8491e11f.jsonl", - "log_path": "runs/487c8a51-7c5c-417d-a0c5-4b6e8491e11f.log", - "synthesis_doc": null, + "task_kind": "chat", + "tags": [ + "a3b", + "iq2", + "pavilion", + "qwen3" + ], + "headline": "no calls landed", + "janie_blurb_md": null, + "janie_blurb_status": "pending", + "caveat": null, + "caveat_severity": null, + "methodology_ref": "HARNESS#pavilion-a3b-1", + "methodology_deviations_md": null, + "results_table": [ + { + "label": "qwen3:30b-a3b-iq2xxs-think500", + "cell_id": "pavilion:llamacpp:qwen3:30b-a3b-iq2xxs-think500", + "n_calls": 0, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + } + ], "cells": [ { "cell_id": "pavilion:llamacpp:qwen3:30b-a3b-iq2xxs-think500", @@ -844,18 +2165,40 @@ "n_errors": 0, "duration_ms_mean": null, "duration_ms_p50": null, + "duration_ms_p95": null, "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, "tokens_per_sec_max": null } ], "n_calls_total": 0, "n_errors_total": 0, - "started_at": "2026-05-04T21:24:10Z", - "tags": [ - "a3b", - "iq2", - "pavilion", - "qwen3" + "chart_spec": { + "kind": "bar", + "x": "cell", + "y": "tokens_per_sec_mean", + "series": [ + "qwen3:30b-a3b-iq2xxs-think500" + ], + "data_url": "/data/487c8a51-7c5c-417d-a0c5-4b6e8491e11f/run.jsonl" + }, + "site_grade": "archive-only", + "site_grade_reason": "status=meta-only", + "raw_data_urls": { + "jsonl": "/data/487c8a51-7c5c-417d-a0c5-4b6e8491e11f/run.jsonl", + "log": "/data/487c8a51-7c5c-417d-a0c5-4b6e8491e11f/run.log", + "md": "/data/487c8a51-7c5c-417d-a0c5-4b6e8491e11f/run.md", + "metadata": "/data/487c8a51-7c5c-417d-a0c5-4b6e8491e11f/metadata.json", + "gitea_dir": "https://git.weeyuga.com/slobodanmargetic988/weeyuga-benchmarks-public/src/branch/main/runs/487c8a51-7c5c-417d-a0c5-4b6e8491e11f/" + }, + "synthesis_doc": null, + "synthesis_docs_all": [], + "related_ids": [ + "23066b38-ea9c-4dd3-b2f5-32912a67fce4", + "5f4d9e97-4294-4e69-9e6c-f318bca1ce44", + "d1cff064-9141-48e0-b1d2-729b134e0543", + "5fb2913d-6500-4ecf-9e97-d43f7dd61145" ], "status": "meta-only", "visibility": "draft" @@ -864,19 +2207,48 @@ "id": "5f4d9e97-4294-4e69-9e6c-f318bca1ce44", "title": "pavilion-a3b-1 \u2014 qwen3 on pavilion", "date": "2026-05-04", + "started_at": "2026-05-04T20:38:52Z", + "git_sha": "f06800254dfd046e154c94f663fb7f9f66c79ae3", "hardware": "pavilion", + "hardware_label": "Pavilion \u00b7 HP laptop \u00b7 GTX 1050 4 GB \u00b7 16 GB RAM \u00b7 i7-9750H", "engine": "llamacpp", "harness": "pavilion-a3b-1", "model_family": "qwen3", "model_sizes": [ "qwen3:30b-a3b-iq2xxs" ], - "task_kind": null, + "task_kind": "chat", + "tags": [ + "a3b", + "iq2", + "pavilion", + "qwen3" + ], "headline": "no calls landed", - "summary_md_path": null, - "raw_jsonl_path": "runs/5f4d9e97-4294-4e69-9e6c-f318bca1ce44.jsonl", - "log_path": "runs/5f4d9e97-4294-4e69-9e6c-f318bca1ce44.log", - "synthesis_doc": null, + "janie_blurb_md": null, + "janie_blurb_status": "pending", + "caveat": null, + "caveat_severity": null, + "methodology_ref": "HARNESS#pavilion-a3b-1", + "methodology_deviations_md": null, + "results_table": [ + { + "label": "qwen3:30b-a3b-iq2xxs-think500", + "cell_id": "pavilion:llamacpp:qwen3:30b-a3b-iq2xxs-think500", + "n_calls": 0, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + } + ], "cells": [ { "cell_id": "pavilion:llamacpp:qwen3:30b-a3b-iq2xxs-think500", @@ -884,18 +2256,40 @@ "n_errors": 0, "duration_ms_mean": null, "duration_ms_p50": null, + "duration_ms_p95": null, "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, "tokens_per_sec_max": null } ], "n_calls_total": 0, "n_errors_total": 0, - "started_at": "2026-05-04T20:38:52Z", - "tags": [ - "a3b", - "iq2", - "pavilion", - "qwen3" + "chart_spec": { + "kind": "bar", + "x": "cell", + "y": "tokens_per_sec_mean", + "series": [ + "qwen3:30b-a3b-iq2xxs-think500" + ], + "data_url": "/data/5f4d9e97-4294-4e69-9e6c-f318bca1ce44/run.jsonl" + }, + "site_grade": "archive-only", + "site_grade_reason": "status=meta-only", + "raw_data_urls": { + "jsonl": "/data/5f4d9e97-4294-4e69-9e6c-f318bca1ce44/run.jsonl", + "log": "/data/5f4d9e97-4294-4e69-9e6c-f318bca1ce44/run.log", + "md": null, + "metadata": "/data/5f4d9e97-4294-4e69-9e6c-f318bca1ce44/metadata.json", + "gitea_dir": "https://git.weeyuga.com/slobodanmargetic988/weeyuga-benchmarks-public/src/branch/main/runs/5f4d9e97-4294-4e69-9e6c-f318bca1ce44/" + }, + "synthesis_doc": null, + "synthesis_docs_all": [], + "related_ids": [ + "23066b38-ea9c-4dd3-b2f5-32912a67fce4", + "487c8a51-7c5c-417d-a0c5-4b6e8491e11f", + "d1cff064-9141-48e0-b1d2-729b134e0543", + "5fb2913d-6500-4ecf-9e97-d43f7dd61145" ], "status": "meta-only", "visibility": "draft" @@ -904,19 +2298,47 @@ "id": "fe4fa289-67a9-48e8-8a80-d5d88c875208", "title": "predator-a3b-1 \u2014 qwen3 on predator", "date": "2026-05-04", + "started_at": "2026-05-04T18:50:19Z", + "git_sha": "f06800254dfd046e154c94f663fb7f9f66c79ae3", "hardware": "predator", + "hardware_label": "Predator \u00b7 gaming laptop \u00b7 GTX 1060 6 GB \u00b7 28 GB RAM", "engine": "llamacpp", "harness": "predator-a3b-1", "model_family": "qwen3", "model_sizes": [ "qwen3:30b-a3b" ], - "task_kind": null, + "task_kind": "chat", + "tags": [ + "a3b", + "predator", + "qwen3" + ], "headline": "2 calls across 1 cell(s)", - "summary_md_path": null, - "raw_jsonl_path": "runs/fe4fa289-67a9-48e8-8a80-d5d88c875208.jsonl", - "log_path": "runs/fe4fa289-67a9-48e8-8a80-d5d88c875208.log", - "synthesis_doc": null, + "janie_blurb_md": null, + "janie_blurb_status": "pending", + "caveat": null, + "caveat_severity": null, + "methodology_ref": "HARNESS#predator-a3b-1", + "methodology_deviations_md": null, + "results_table": [ + { + "label": "qwen3:30b-a3b-q4km-think500", + "cell_id": "predator:llamacpp:qwen3:30b-a3b-q4km-think500", + "n_calls": 2, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + } + ], "cells": [ { "cell_id": "predator:llamacpp:qwen3:30b-a3b-q4km-think500", @@ -924,26 +2346,52 @@ "n_errors": 0, "duration_ms_mean": null, "duration_ms_p50": null, + "duration_ms_p95": null, "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, "tokens_per_sec_max": null } ], "n_calls_total": 2, "n_errors_total": 0, - "started_at": "2026-05-04T18:50:19Z", - "tags": [ - "a3b", - "predator", - "qwen3" + "chart_spec": { + "kind": "bar", + "x": "cell", + "y": "tokens_per_sec_mean", + "series": [ + "qwen3:30b-a3b-q4km-think500" + ], + "data_url": "/data/fe4fa289-67a9-48e8-8a80-d5d88c875208/run.jsonl" + }, + "site_grade": "archive-only", + "site_grade_reason": "no documented method (no synthesis doc and no run.md)", + "raw_data_urls": { + "jsonl": "/data/fe4fa289-67a9-48e8-8a80-d5d88c875208/run.jsonl", + "log": "/data/fe4fa289-67a9-48e8-8a80-d5d88c875208/run.log", + "md": null, + "metadata": "/data/fe4fa289-67a9-48e8-8a80-d5d88c875208/metadata.json", + "gitea_dir": "https://git.weeyuga.com/slobodanmargetic988/weeyuga-benchmarks-public/src/branch/main/runs/fe4fa289-67a9-48e8-8a80-d5d88c875208/" + }, + "synthesis_doc": null, + "synthesis_docs_all": [], + "related_ids": [ + "5fb2913d-6500-4ecf-9e97-d43f7dd61145", + "8ec3c8cb-58a7-4f33-974c-9c1bfb7824ef", + "79afe366-1055-4e45-adf6-593864a530e8", + "ad28cb95-5134-40b5-8514-c8a381f83d87" ], "status": "complete", "visibility": "draft" }, { "id": "fba9d9b1-cc5d-40bc-9e21-beafbb72c65d", - "title": "Predator Qwen rerun \u2014 fba9d9b1-cc5d-40bc-9e21-beafbb72c65d", + "title": "Predator Qwen rerun", "date": "2026-05-04", + "started_at": "2026-05-04T17:49:07Z", + "git_sha": "f06800254dfd046e154c94f663fb7f9f66c79ae3", "hardware": "predator", + "hardware_label": "Predator \u00b7 gaming laptop \u00b7 GTX 1060 6 GB \u00b7 28 GB RAM", "engine": "llamacpp", "harness": "predator-qwen-rerun-1", "model_family": "qwen3/qwen3.5", @@ -951,12 +2399,70 @@ "qwen3.5:9b", "qwen3:14b" ], - "task_kind": null, - "headline": "`Answer chars` is `len(message.content)`. Both averaged over the 3 warm runs.", - "summary_md_path": "runs/fba9d9b1-cc5d-40bc-9e21-beafbb72c65d.md", - "raw_jsonl_path": "runs/fba9d9b1-cc5d-40bc-9e21-beafbb72c65d.jsonl", - "log_path": "runs/fba9d9b1-cc5d-40bc-9e21-beafbb72c65d.log", - "synthesis_doc": "A3B_AND_CPU_OVERNIGHT_2026-05-05.md", + "task_kind": "chat", + "tags": [ + "predator", + "qwen3", + "qwen3.5", + "rerun" + ], + "headline": "36 calls across 3 cell(s); ~10.9 tok/s mean; p50 24.1s", + "janie_blurb_md": null, + "janie_blurb_status": "pending", + "caveat": null, + "caveat_severity": null, + "methodology_ref": "A3B_AND_CPU_OVERNIGHT_2026-05-05", + "methodology_deviations_md": null, + "results_table": [ + { + "label": "qwen3.5:9b-q4km-think500", + "cell_id": "predator:llamacpp:qwen3.5:9b-q4km-think500", + "n_calls": 14, + "n_errors": 0, + "duration_ms_mean": 30474.33, + "duration_ms_p50": 33717.5, + "duration_ms_p95": 52611.55, + "tokens_per_sec_mean": 14.25, + "tokens_per_sec_p50": 14.91, + "tokens_per_sec_p95": 15.3, + "tokens_per_sec_max": 15.32, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "qwen3.5:9b-q4km-nothink", + "cell_id": "predator:llamacpp:qwen3.5:9b-q4km-nothink", + "n_calls": 14, + "n_errors": 0, + "duration_ms_mean": 10425.75, + "duration_ms_p50": 7200.0, + "duration_ms_p95": 23998.85, + "tokens_per_sec_mean": 12.55, + "tokens_per_sec_p50": 14.37, + "tokens_per_sec_p95": 14.93, + "tokens_per_sec_max": 14.95, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "qwen3:14b-q4km", + "cell_id": "predator:llamacpp:qwen3:14b-q4km", + "n_calls": 8, + "n_errors": 0, + "duration_ms_mean": 130876.17, + "duration_ms_p50": 78946.0, + "duration_ms_p95": 277160.0, + "tokens_per_sec_mean": 1.06, + "tokens_per_sec_p50": 1.09, + "tokens_per_sec_p95": 1.28, + "tokens_per_sec_max": 1.33, + "memory_mb": null, + "watts_avg": null, + "notes": null + } + ], "cells": [ { "cell_id": "predator:llamacpp:qwen3.5:9b-q4km-think500", @@ -964,7 +2470,10 @@ "n_errors": 0, "duration_ms_mean": 30474.33, "duration_ms_p50": 33717.5, + "duration_ms_p95": 52611.55, "tokens_per_sec_mean": 14.25, + "tokens_per_sec_p50": 14.91, + "tokens_per_sec_p95": 15.3, "tokens_per_sec_max": 15.32 }, { @@ -973,7 +2482,10 @@ "n_errors": 0, "duration_ms_mean": 10425.75, "duration_ms_p50": 7200.0, + "duration_ms_p95": 23998.85, "tokens_per_sec_mean": 12.55, + "tokens_per_sec_p50": 14.37, + "tokens_per_sec_p95": 14.93, "tokens_per_sec_max": 14.95 }, { @@ -982,27 +2494,57 @@ "n_errors": 0, "duration_ms_mean": 130876.17, "duration_ms_p50": 78946.0, + "duration_ms_p95": 277160.0, "tokens_per_sec_mean": 1.06, + "tokens_per_sec_p50": 1.09, + "tokens_per_sec_p95": 1.28, "tokens_per_sec_max": 1.33 } ], "n_calls_total": 36, "n_errors_total": 0, - "started_at": "2026-05-04T17:49:07Z", - "tags": [ - "predator", - "qwen3", - "qwen3.5", - "rerun" + "chart_spec": { + "kind": "bar", + "x": "cell", + "y": "tokens_per_sec_mean", + "series": [ + "qwen3.5:9b-q4km-think500", + "qwen3.5:9b-q4km-nothink", + "qwen3:14b-q4km" + ], + "data_url": "/data/fba9d9b1-cc5d-40bc-9e21-beafbb72c65d/run.jsonl" + }, + "site_grade": "flagship", + "site_grade_reason": null, + "raw_data_urls": { + "jsonl": "/data/fba9d9b1-cc5d-40bc-9e21-beafbb72c65d/run.jsonl", + "log": "/data/fba9d9b1-cc5d-40bc-9e21-beafbb72c65d/run.log", + "md": "/data/fba9d9b1-cc5d-40bc-9e21-beafbb72c65d/run.md", + "metadata": "/data/fba9d9b1-cc5d-40bc-9e21-beafbb72c65d/metadata.json", + "gitea_dir": "https://git.weeyuga.com/slobodanmargetic988/weeyuga-benchmarks-public/src/branch/main/runs/fba9d9b1-cc5d-40bc-9e21-beafbb72c65d/" + }, + "synthesis_doc": "A3B_AND_CPU_OVERNIGHT_2026-05-05.md", + "synthesis_docs_all": [ + "A3B_AND_CPU_OVERNIGHT_2026-05-05.md", + "A3B_CROSS_MACHINE_2026-05-05.md" + ], + "related_ids": [ + "09d8fbde-0008-49bb-99da-03eeaca72be1", + "5fb2913d-6500-4ecf-9e97-d43f7dd61145", + "8ec3c8cb-58a7-4f33-974c-9c1bfb7824ef", + "fe4fa289-67a9-48e8-8a80-d5d88c875208" ], "status": "complete", "visibility": "draft" }, { "id": "09d8fbde-0008-49bb-99da-03eeaca72be1", - "title": "Predator trio bench \u2014 09d8fbde-0008-49bb-99da-03eeaca72be1", + "title": "Predator trio bench", "date": "2026-05-04", + "started_at": "2026-05-04T16:01:52Z", + "git_sha": "f06800254dfd046e154c94f663fb7f9f66c79ae3", "hardware": "predator", + "hardware_label": "Predator \u00b7 gaming laptop \u00b7 GTX 1060 6 GB \u00b7 28 GB RAM", "engine": "llamacpp", "harness": "predator-trio-1", "model_family": "gemma/granite/qwen3.5", @@ -1011,44 +2553,7 @@ "gemma-4:e4b-it", "qwen3.5:9b" ], - "task_kind": null, - "headline": "includes prefill time, so true generation rate is slightly higher. Use llama-bench's", - "summary_md_path": "runs/09d8fbde-0008-49bb-99da-03eeaca72be1.md", - "raw_jsonl_path": "runs/09d8fbde-0008-49bb-99da-03eeaca72be1.jsonl", - "log_path": "runs/09d8fbde-0008-49bb-99da-03eeaca72be1.log", - "synthesis_doc": "A3B_AND_CPU_OVERNIGHT_2026-05-05.md", - "cells": [ - { - "cell_id": "predator:llamacpp:granite-4.1:8b-q4km", - "n_calls": 14, - "n_errors": 0, - "duration_ms_mean": 8831.33, - "duration_ms_p50": 6403.0, - "tokens_per_sec_mean": 13.49, - "tokens_per_sec_max": 15.75 - }, - { - "cell_id": "predator:llamacpp:gemma-4:e4b-it-q4km", - "n_calls": 14, - "n_errors": 0, - "duration_ms_mean": 9888.42, - "duration_ms_p50": 8199.0, - "tokens_per_sec_mean": 21.82, - "tokens_per_sec_max": 23.66 - }, - { - "cell_id": "predator:llamacpp:qwen3.5:9b-q4km", - "n_calls": 14, - "n_errors": 0, - "duration_ms_mean": 37136.33, - "duration_ms_p50": 35324.5, - "tokens_per_sec_mean": 14.04, - "tokens_per_sec_max": 14.57 - } - ], - "n_calls_total": 42, - "n_errors_total": 0, - "started_at": "2026-05-04T16:01:52Z", + "task_kind": "chat", "tags": [ "gemma", "granite", @@ -1057,6 +2562,134 @@ "qwen3.5", "trio" ], + "headline": "42 calls across 3 cell(s); ~16.5 tok/s mean; p50 9.6s", + "janie_blurb_md": null, + "janie_blurb_status": "pending", + "caveat": null, + "caveat_severity": null, + "methodology_ref": "A3B_AND_CPU_OVERNIGHT_2026-05-05", + "methodology_deviations_md": null, + "results_table": [ + { + "label": "granite-4.1:8b-q4km", + "cell_id": "predator:llamacpp:granite-4.1:8b-q4km", + "n_calls": 14, + "n_errors": 0, + "duration_ms_mean": 8831.33, + "duration_ms_p50": 6403.0, + "duration_ms_p95": 19524.25, + "tokens_per_sec_mean": 13.49, + "tokens_per_sec_p50": 15.16, + "tokens_per_sec_p95": 15.73, + "tokens_per_sec_max": 15.75, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "gemma-4:e4b-it-q4km", + "cell_id": "predator:llamacpp:gemma-4:e4b-it-q4km", + "n_calls": 14, + "n_errors": 0, + "duration_ms_mean": 9888.42, + "duration_ms_p50": 8199.0, + "duration_ms_p95": 21724.5, + "tokens_per_sec_mean": 21.82, + "tokens_per_sec_p50": 22.91, + "tokens_per_sec_p95": 23.64, + "tokens_per_sec_max": 23.66, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "qwen3.5:9b-q4km", + "cell_id": "predator:llamacpp:qwen3.5:9b-q4km", + "n_calls": 14, + "n_errors": 0, + "duration_ms_mean": 37136.33, + "duration_ms_p50": 35324.5, + "duration_ms_p95": 71874.15, + "tokens_per_sec_mean": 14.04, + "tokens_per_sec_p50": 14.42, + "tokens_per_sec_p95": 14.57, + "tokens_per_sec_max": 14.57, + "memory_mb": null, + "watts_avg": null, + "notes": null + } + ], + "cells": [ + { + "cell_id": "predator:llamacpp:granite-4.1:8b-q4km", + "n_calls": 14, + "n_errors": 0, + "duration_ms_mean": 8831.33, + "duration_ms_p50": 6403.0, + "duration_ms_p95": 19524.25, + "tokens_per_sec_mean": 13.49, + "tokens_per_sec_p50": 15.16, + "tokens_per_sec_p95": 15.73, + "tokens_per_sec_max": 15.75 + }, + { + "cell_id": "predator:llamacpp:gemma-4:e4b-it-q4km", + "n_calls": 14, + "n_errors": 0, + "duration_ms_mean": 9888.42, + "duration_ms_p50": 8199.0, + "duration_ms_p95": 21724.5, + "tokens_per_sec_mean": 21.82, + "tokens_per_sec_p50": 22.91, + "tokens_per_sec_p95": 23.64, + "tokens_per_sec_max": 23.66 + }, + { + "cell_id": "predator:llamacpp:qwen3.5:9b-q4km", + "n_calls": 14, + "n_errors": 0, + "duration_ms_mean": 37136.33, + "duration_ms_p50": 35324.5, + "duration_ms_p95": 71874.15, + "tokens_per_sec_mean": 14.04, + "tokens_per_sec_p50": 14.42, + "tokens_per_sec_p95": 14.57, + "tokens_per_sec_max": 14.57 + } + ], + "n_calls_total": 42, + "n_errors_total": 0, + "chart_spec": { + "kind": "bar", + "x": "cell", + "y": "tokens_per_sec_mean", + "series": [ + "granite-4.1:8b-q4km", + "gemma-4:e4b-it-q4km", + "qwen3.5:9b-q4km" + ], + "data_url": "/data/09d8fbde-0008-49bb-99da-03eeaca72be1/run.jsonl" + }, + "site_grade": "flagship", + "site_grade_reason": null, + "raw_data_urls": { + "jsonl": "/data/09d8fbde-0008-49bb-99da-03eeaca72be1/run.jsonl", + "log": "/data/09d8fbde-0008-49bb-99da-03eeaca72be1/run.log", + "md": "/data/09d8fbde-0008-49bb-99da-03eeaca72be1/run.md", + "metadata": "/data/09d8fbde-0008-49bb-99da-03eeaca72be1/metadata.json", + "gitea_dir": "https://git.weeyuga.com/slobodanmargetic988/weeyuga-benchmarks-public/src/branch/main/runs/09d8fbde-0008-49bb-99da-03eeaca72be1/" + }, + "synthesis_doc": "A3B_AND_CPU_OVERNIGHT_2026-05-05.md", + "synthesis_docs_all": [ + "A3B_AND_CPU_OVERNIGHT_2026-05-05.md", + "A3B_CROSS_MACHINE_2026-05-05.md" + ], + "related_ids": [ + "5fb2913d-6500-4ecf-9e97-d43f7dd61145", + "8ec3c8cb-58a7-4f33-974c-9c1bfb7824ef", + "fba9d9b1-cc5d-40bc-9e21-beafbb72c65d", + "fe4fa289-67a9-48e8-8a80-d5d88c875208" + ], "status": "complete", "visibility": "draft" }, @@ -1064,13 +2697,16 @@ "id": "ad057f5b-ed3f-4a95-a38e-361be310ffd6", "title": "pavilion-weeyuga-v3 \u2014 qwen2.5/qwen2.5-coder/qwen3/qwen3.5 on pavilion", "date": "2026-04-29", + "started_at": "2026-04-29T09:51:46Z", + "git_sha": "371ce70c8708ce0aedbe4f3bf096f8e8f8b11d43", "hardware": "pavilion", + "hardware_label": "Pavilion \u00b7 HP laptop \u00b7 GTX 1050 4 GB \u00b7 16 GB RAM \u00b7 i7-9750H", "engine": "weeyuga", "harness": "pavilion-weeyuga-v3", "model_family": "qwen2.5/qwen2.5-coder/qwen3/qwen3.5", "model_sizes": [ "qwen3.5:4b", - "qwen3.5:35b-a3b-uncensored-iq1m", + "qwen3.5:35b-a3b-uncensored", "qwen3.5:35b-a3b-iq2s", "qwen3.5:9b-q6k", "qwen3.5:9b", @@ -1085,12 +2721,281 @@ "qwen2.5-coder:1.5b", "qwen2.5-coder:0.5b" ], - "task_kind": null, + "task_kind": "chat", + "tags": [ + "a3b", + "iq2", + "pavilion", + "qwen2.5", + "qwen3", + "qwen3.5", + "weeyuga" + ], "headline": "96 calls across 16 cell(s); 12 errors", - "summary_md_path": null, - "raw_jsonl_path": "runs/ad057f5b-ed3f-4a95-a38e-361be310ffd6.jsonl", - "log_path": null, - "synthesis_doc": null, + "janie_blurb_md": null, + "janie_blurb_status": "pending", + "caveat": null, + "caveat_severity": null, + "methodology_ref": "PAVILION_LLAMACPP_VS_OLLAMA_v0_INCOMPLETE", + "methodology_deviations_md": null, + "results_table": [ + { + "label": "qwen3.5:4b", + "cell_id": "pavilion:weeyuga:qwen3.5:4b", + "n_calls": 6, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "qwen3.5:35b-a3b-uncensored-iq1m", + "cell_id": "pavilion:weeyuga:qwen3.5:35b-a3b-uncensored-iq1m", + "n_calls": 6, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "qwen3.5:35b-a3b-iq2s", + "cell_id": "pavilion:weeyuga:qwen3.5:35b-a3b-iq2s", + "n_calls": 6, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "qwen3.5:9b-q6k", + "cell_id": "pavilion:weeyuga:qwen3.5:9b-q6k", + "n_calls": 6, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "qwen3.5:9b-q4km", + "cell_id": "pavilion:weeyuga:qwen3.5:9b-q4km", + "n_calls": 6, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "qwen3.5:2b", + "cell_id": "pavilion:weeyuga:qwen3.5:2b", + "n_calls": 6, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "qwen3.5:0.8b", + "cell_id": "pavilion:weeyuga:qwen3.5:0.8b", + "n_calls": 6, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "qwen3.5:9b", + "cell_id": "pavilion:weeyuga:qwen3.5:9b", + "n_calls": 6, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "qwen2.5-coder:14b", + "cell_id": "pavilion:weeyuga:qwen2.5-coder:14b", + "n_calls": 6, + "n_errors": 6, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "qwen2.5-coder:3b", + "cell_id": "pavilion:weeyuga:qwen2.5-coder:3b", + "n_calls": 6, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "qwen3:14b", + "cell_id": "pavilion:weeyuga:qwen3:14b", + "n_calls": 6, + "n_errors": 4, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "qwen3:8b", + "cell_id": "pavilion:weeyuga:qwen3:8b", + "n_calls": 6, + "n_errors": 2, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "qwen3:4b", + "cell_id": "pavilion:weeyuga:qwen3:4b", + "n_calls": 6, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "qwen2.5:3b", + "cell_id": "pavilion:weeyuga:qwen2.5:3b", + "n_calls": 6, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "qwen2.5-coder:1.5b", + "cell_id": "pavilion:weeyuga:qwen2.5-coder:1.5b", + "n_calls": 6, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "qwen2.5-coder:0.5b", + "cell_id": "pavilion:weeyuga:qwen2.5-coder:0.5b", + "n_calls": 6, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + } + ], "cells": [ { "cell_id": "pavilion:weeyuga:qwen3.5:4b", @@ -1098,7 +3003,10 @@ "n_errors": 0, "duration_ms_mean": null, "duration_ms_p50": null, + "duration_ms_p95": null, "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, "tokens_per_sec_max": null }, { @@ -1107,7 +3015,10 @@ "n_errors": 0, "duration_ms_mean": null, "duration_ms_p50": null, + "duration_ms_p95": null, "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, "tokens_per_sec_max": null }, { @@ -1116,7 +3027,10 @@ "n_errors": 0, "duration_ms_mean": null, "duration_ms_p50": null, + "duration_ms_p95": null, "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, "tokens_per_sec_max": null }, { @@ -1125,7 +3039,10 @@ "n_errors": 0, "duration_ms_mean": null, "duration_ms_p50": null, + "duration_ms_p95": null, "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, "tokens_per_sec_max": null }, { @@ -1134,7 +3051,10 @@ "n_errors": 0, "duration_ms_mean": null, "duration_ms_p50": null, + "duration_ms_p95": null, "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, "tokens_per_sec_max": null }, { @@ -1143,7 +3063,10 @@ "n_errors": 0, "duration_ms_mean": null, "duration_ms_p50": null, + "duration_ms_p95": null, "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, "tokens_per_sec_max": null }, { @@ -1152,7 +3075,10 @@ "n_errors": 0, "duration_ms_mean": null, "duration_ms_p50": null, + "duration_ms_p95": null, "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, "tokens_per_sec_max": null }, { @@ -1161,7 +3087,10 @@ "n_errors": 0, "duration_ms_mean": null, "duration_ms_p50": null, + "duration_ms_p95": null, "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, "tokens_per_sec_max": null }, { @@ -1170,7 +3099,10 @@ "n_errors": 6, "duration_ms_mean": null, "duration_ms_p50": null, + "duration_ms_p95": null, "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, "tokens_per_sec_max": null }, { @@ -1179,7 +3111,10 @@ "n_errors": 0, "duration_ms_mean": null, "duration_ms_p50": null, + "duration_ms_p95": null, "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, "tokens_per_sec_max": null }, { @@ -1188,7 +3123,10 @@ "n_errors": 4, "duration_ms_mean": null, "duration_ms_p50": null, + "duration_ms_p95": null, "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, "tokens_per_sec_max": null }, { @@ -1197,7 +3135,10 @@ "n_errors": 2, "duration_ms_mean": null, "duration_ms_p50": null, + "duration_ms_p95": null, "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, "tokens_per_sec_max": null }, { @@ -1206,7 +3147,10 @@ "n_errors": 0, "duration_ms_mean": null, "duration_ms_p50": null, + "duration_ms_p95": null, "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, "tokens_per_sec_max": null }, { @@ -1215,7 +3159,10 @@ "n_errors": 0, "duration_ms_mean": null, "duration_ms_p50": null, + "duration_ms_p95": null, "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, "tokens_per_sec_max": null }, { @@ -1224,7 +3171,10 @@ "n_errors": 0, "duration_ms_mean": null, "duration_ms_p50": null, + "duration_ms_p95": null, "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, "tokens_per_sec_max": null }, { @@ -1233,21 +3183,57 @@ "n_errors": 0, "duration_ms_mean": null, "duration_ms_p50": null, + "duration_ms_p95": null, "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, "tokens_per_sec_max": null } ], "n_calls_total": 96, "n_errors_total": 12, - "started_at": "2026-04-29T09:51:46Z", - "tags": [ - "a3b", - "iq2", - "pavilion", - "qwen2.5", - "qwen3", - "qwen3.5", - "weeyuga" + "chart_spec": { + "kind": "bar", + "x": "cell", + "y": "tokens_per_sec_mean", + "series": [ + "qwen3.5:4b", + "qwen3.5:35b-a3b-uncensored-iq1m", + "qwen3.5:35b-a3b-iq2s", + "qwen3.5:9b-q6k", + "qwen3.5:9b-q4km", + "qwen3.5:2b", + "qwen3.5:0.8b", + "qwen3.5:9b", + "qwen2.5-coder:14b", + "qwen2.5-coder:3b", + "qwen3:14b", + "qwen3:8b", + "qwen3:4b", + "qwen2.5:3b", + "qwen2.5-coder:1.5b", + "qwen2.5-coder:0.5b" + ], + "data_url": "/data/ad057f5b-ed3f-4a95-a38e-361be310ffd6/run.jsonl" + }, + "site_grade": "archive-only", + "site_grade_reason": "no documented method (no synthesis doc and no run.md)", + "raw_data_urls": { + "jsonl": "/data/ad057f5b-ed3f-4a95-a38e-361be310ffd6/run.jsonl", + "log": null, + "md": null, + "metadata": "/data/ad057f5b-ed3f-4a95-a38e-361be310ffd6/metadata.json", + "gitea_dir": "https://git.weeyuga.com/slobodanmargetic988/weeyuga-benchmarks-public/src/branch/main/runs/ad057f5b-ed3f-4a95-a38e-361be310ffd6/" + }, + "synthesis_doc": "PAVILION_LLAMACPP_VS_OLLAMA_v0_INCOMPLETE.md", + "synthesis_docs_all": [ + "PAVILION_LLAMACPP_VS_OLLAMA_v0_INCOMPLETE.md" + ], + "related_ids": [ + "ff1131ca-d021-4e06-8616-4b4cdb54e97e", + "23066b38-ea9c-4dd3-b2f5-32912a67fce4", + "487c8a51-7c5c-417d-a0c5-4b6e8491e11f", + "5f4d9e97-4294-4e69-9e6c-f318bca1ce44" ], "status": "complete", "visibility": "draft" @@ -1256,13 +3242,16 @@ "id": "ff1131ca-d021-4e06-8616-4b4cdb54e97e", "title": "pavilion-weeyuga-v1 \u2014 qwen2.5/qwen2.5-coder/qwen3/qwen3.5 on pavilion", "date": "2026-04-28", + "started_at": "2026-04-28T21:03:46Z", + "git_sha": "9934892784228748586130d8abbacd82a919aee2", "hardware": "pavilion", + "hardware_label": "Pavilion \u00b7 HP laptop \u00b7 GTX 1050 4 GB \u00b7 16 GB RAM \u00b7 i7-9750H", "engine": "weeyuga", "harness": "pavilion-weeyuga-v1", "model_family": "qwen2.5/qwen2.5-coder/qwen3/qwen3.5", "model_sizes": [ "qwen3.5:4b", - "qwen3.5:35b-a3b-uncensored-iq1m", + "qwen3.5:35b-a3b-uncensored", "qwen3.5:35b-a3b-iq2s", "qwen3.5:9b-q6k", "qwen3.5:9b", @@ -1277,161 +3266,7 @@ "qwen2.5-coder:1.5b", "qwen2.5-coder:0.5b" ], - "task_kind": null, - "headline": "17 calls across 16 cell(s); 3 errors", - "summary_md_path": null, - "raw_jsonl_path": "runs/ff1131ca-d021-4e06-8616-4b4cdb54e97e.jsonl", - "log_path": null, - "synthesis_doc": "PAVILION_WEEYUGA_v1.md", - "cells": [ - { - "cell_id": "pavilion:weeyuga:qwen3.5:4b", - "n_calls": 2, - "n_errors": 0, - "duration_ms_mean": null, - "duration_ms_p50": null, - "tokens_per_sec_mean": null, - "tokens_per_sec_max": null - }, - { - "cell_id": "pavilion:weeyuga:qwen3.5:35b-a3b-uncensored-iq1m", - "n_calls": 1, - "n_errors": 0, - "duration_ms_mean": null, - "duration_ms_p50": null, - "tokens_per_sec_mean": null, - "tokens_per_sec_max": null - }, - { - "cell_id": "pavilion:weeyuga:qwen3.5:35b-a3b-iq2s", - "n_calls": 1, - "n_errors": 0, - "duration_ms_mean": null, - "duration_ms_p50": null, - "tokens_per_sec_mean": null, - "tokens_per_sec_max": null - }, - { - "cell_id": "pavilion:weeyuga:qwen3.5:9b-q6k", - "n_calls": 1, - "n_errors": 0, - "duration_ms_mean": null, - "duration_ms_p50": null, - "tokens_per_sec_mean": null, - "tokens_per_sec_max": null - }, - { - "cell_id": "pavilion:weeyuga:qwen3.5:9b-q4km", - "n_calls": 1, - "n_errors": 0, - "duration_ms_mean": null, - "duration_ms_p50": null, - "tokens_per_sec_mean": null, - "tokens_per_sec_max": null - }, - { - "cell_id": "pavilion:weeyuga:qwen3.5:2b", - "n_calls": 1, - "n_errors": 0, - "duration_ms_mean": null, - "duration_ms_p50": null, - "tokens_per_sec_mean": null, - "tokens_per_sec_max": null - }, - { - "cell_id": "pavilion:weeyuga:qwen3.5:0.8b", - "n_calls": 1, - "n_errors": 0, - "duration_ms_mean": null, - "duration_ms_p50": null, - "tokens_per_sec_mean": null, - "tokens_per_sec_max": null - }, - { - "cell_id": "pavilion:weeyuga:qwen3.5:9b", - "n_calls": 1, - "n_errors": 0, - "duration_ms_mean": null, - "duration_ms_p50": null, - "tokens_per_sec_mean": null, - "tokens_per_sec_max": null - }, - { - "cell_id": "pavilion:weeyuga:qwen2.5-coder:14b", - "n_calls": 1, - "n_errors": 1, - "duration_ms_mean": null, - "duration_ms_p50": null, - "tokens_per_sec_mean": null, - "tokens_per_sec_max": null - }, - { - "cell_id": "pavilion:weeyuga:qwen2.5-coder:3b", - "n_calls": 1, - "n_errors": 0, - "duration_ms_mean": null, - "duration_ms_p50": null, - "tokens_per_sec_mean": null, - "tokens_per_sec_max": null - }, - { - "cell_id": "pavilion:weeyuga:qwen3:14b", - "n_calls": 1, - "n_errors": 1, - "duration_ms_mean": null, - "duration_ms_p50": null, - "tokens_per_sec_mean": null, - "tokens_per_sec_max": null - }, - { - "cell_id": "pavilion:weeyuga:qwen3:8b", - "n_calls": 1, - "n_errors": 1, - "duration_ms_mean": null, - "duration_ms_p50": null, - "tokens_per_sec_mean": null, - "tokens_per_sec_max": null - }, - { - "cell_id": "pavilion:weeyuga:qwen3:4b", - "n_calls": 1, - "n_errors": 0, - "duration_ms_mean": null, - "duration_ms_p50": null, - "tokens_per_sec_mean": null, - "tokens_per_sec_max": null - }, - { - "cell_id": "pavilion:weeyuga:qwen2.5:3b", - "n_calls": 1, - "n_errors": 0, - "duration_ms_mean": null, - "duration_ms_p50": null, - "tokens_per_sec_mean": null, - "tokens_per_sec_max": null - }, - { - "cell_id": "pavilion:weeyuga:qwen2.5-coder:1.5b", - "n_calls": 1, - "n_errors": 0, - "duration_ms_mean": null, - "duration_ms_p50": null, - "tokens_per_sec_mean": null, - "tokens_per_sec_max": null - }, - { - "cell_id": "pavilion:weeyuga:qwen2.5-coder:0.5b", - "n_calls": 1, - "n_errors": 0, - "duration_ms_mean": null, - "duration_ms_p50": null, - "tokens_per_sec_mean": null, - "tokens_per_sec_max": null - } - ], - "n_calls_total": 17, - "n_errors_total": 3, - "started_at": "2026-04-28T21:03:46Z", + "task_kind": "chat", "tags": [ "a3b", "iq2", @@ -1441,6 +3276,511 @@ "qwen3.5", "weeyuga" ], + "headline": "17 calls across 16 cell(s); 3 errors", + "janie_blurb_md": null, + "janie_blurb_status": "pending", + "caveat": null, + "caveat_severity": null, + "methodology_ref": "PAVILION_WEEYUGA_v1", + "methodology_deviations_md": null, + "results_table": [ + { + "label": "qwen3.5:4b", + "cell_id": "pavilion:weeyuga:qwen3.5:4b", + "n_calls": 2, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "qwen3.5:35b-a3b-uncensored-iq1m", + "cell_id": "pavilion:weeyuga:qwen3.5:35b-a3b-uncensored-iq1m", + "n_calls": 1, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "qwen3.5:35b-a3b-iq2s", + "cell_id": "pavilion:weeyuga:qwen3.5:35b-a3b-iq2s", + "n_calls": 1, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "qwen3.5:9b-q6k", + "cell_id": "pavilion:weeyuga:qwen3.5:9b-q6k", + "n_calls": 1, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "qwen3.5:9b-q4km", + "cell_id": "pavilion:weeyuga:qwen3.5:9b-q4km", + "n_calls": 1, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "qwen3.5:2b", + "cell_id": "pavilion:weeyuga:qwen3.5:2b", + "n_calls": 1, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "qwen3.5:0.8b", + "cell_id": "pavilion:weeyuga:qwen3.5:0.8b", + "n_calls": 1, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "qwen3.5:9b", + "cell_id": "pavilion:weeyuga:qwen3.5:9b", + "n_calls": 1, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "qwen2.5-coder:14b", + "cell_id": "pavilion:weeyuga:qwen2.5-coder:14b", + "n_calls": 1, + "n_errors": 1, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "qwen2.5-coder:3b", + "cell_id": "pavilion:weeyuga:qwen2.5-coder:3b", + "n_calls": 1, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "qwen3:14b", + "cell_id": "pavilion:weeyuga:qwen3:14b", + "n_calls": 1, + "n_errors": 1, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "qwen3:8b", + "cell_id": "pavilion:weeyuga:qwen3:8b", + "n_calls": 1, + "n_errors": 1, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "qwen3:4b", + "cell_id": "pavilion:weeyuga:qwen3:4b", + "n_calls": 1, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "qwen2.5:3b", + "cell_id": "pavilion:weeyuga:qwen2.5:3b", + "n_calls": 1, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "qwen2.5-coder:1.5b", + "cell_id": "pavilion:weeyuga:qwen2.5-coder:1.5b", + "n_calls": 1, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "qwen2.5-coder:0.5b", + "cell_id": "pavilion:weeyuga:qwen2.5-coder:0.5b", + "n_calls": 1, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + } + ], + "cells": [ + { + "cell_id": "pavilion:weeyuga:qwen3.5:4b", + "n_calls": 2, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null + }, + { + "cell_id": "pavilion:weeyuga:qwen3.5:35b-a3b-uncensored-iq1m", + "n_calls": 1, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null + }, + { + "cell_id": "pavilion:weeyuga:qwen3.5:35b-a3b-iq2s", + "n_calls": 1, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null + }, + { + "cell_id": "pavilion:weeyuga:qwen3.5:9b-q6k", + "n_calls": 1, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null + }, + { + "cell_id": "pavilion:weeyuga:qwen3.5:9b-q4km", + "n_calls": 1, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null + }, + { + "cell_id": "pavilion:weeyuga:qwen3.5:2b", + "n_calls": 1, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null + }, + { + "cell_id": "pavilion:weeyuga:qwen3.5:0.8b", + "n_calls": 1, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null + }, + { + "cell_id": "pavilion:weeyuga:qwen3.5:9b", + "n_calls": 1, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null + }, + { + "cell_id": "pavilion:weeyuga:qwen2.5-coder:14b", + "n_calls": 1, + "n_errors": 1, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null + }, + { + "cell_id": "pavilion:weeyuga:qwen2.5-coder:3b", + "n_calls": 1, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null + }, + { + "cell_id": "pavilion:weeyuga:qwen3:14b", + "n_calls": 1, + "n_errors": 1, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null + }, + { + "cell_id": "pavilion:weeyuga:qwen3:8b", + "n_calls": 1, + "n_errors": 1, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null + }, + { + "cell_id": "pavilion:weeyuga:qwen3:4b", + "n_calls": 1, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null + }, + { + "cell_id": "pavilion:weeyuga:qwen2.5:3b", + "n_calls": 1, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null + }, + { + "cell_id": "pavilion:weeyuga:qwen2.5-coder:1.5b", + "n_calls": 1, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null + }, + { + "cell_id": "pavilion:weeyuga:qwen2.5-coder:0.5b", + "n_calls": 1, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null + } + ], + "n_calls_total": 17, + "n_errors_total": 3, + "chart_spec": { + "kind": "bar", + "x": "cell", + "y": "tokens_per_sec_mean", + "series": [ + "qwen3.5:4b", + "qwen3.5:35b-a3b-uncensored-iq1m", + "qwen3.5:35b-a3b-iq2s", + "qwen3.5:9b-q6k", + "qwen3.5:9b-q4km", + "qwen3.5:2b", + "qwen3.5:0.8b", + "qwen3.5:9b", + "qwen2.5-coder:14b", + "qwen2.5-coder:3b", + "qwen3:14b", + "qwen3:8b", + "qwen3:4b", + "qwen2.5:3b", + "qwen2.5-coder:1.5b", + "qwen2.5-coder:0.5b" + ], + "data_url": "/data/ff1131ca-d021-4e06-8616-4b4cdb54e97e/run.jsonl" + }, + "site_grade": "archive-only", + "site_grade_reason": "error rate 3/17", + "raw_data_urls": { + "jsonl": "/data/ff1131ca-d021-4e06-8616-4b4cdb54e97e/run.jsonl", + "log": null, + "md": null, + "metadata": "/data/ff1131ca-d021-4e06-8616-4b4cdb54e97e/metadata.json", + "gitea_dir": "https://git.weeyuga.com/slobodanmargetic988/weeyuga-benchmarks-public/src/branch/main/runs/ff1131ca-d021-4e06-8616-4b4cdb54e97e/" + }, + "synthesis_doc": "PAVILION_WEEYUGA_v1.md", + "synthesis_docs_all": [ + "PAVILION_WEEYUGA_v1.md", + "PAVILION_LLAMACPP_VS_OLLAMA_v0_INCOMPLETE.md" + ], + "related_ids": [ + "ad057f5b-ed3f-4a95-a38e-361be310ffd6", + "23066b38-ea9c-4dd3-b2f5-32912a67fce4", + "487c8a51-7c5c-417d-a0c5-4b6e8491e11f", + "5f4d9e97-4294-4e69-9e6c-f318bca1ce44" + ], "status": "complete", "visibility": "draft" } diff --git a/runs/09d8fbde-0008-49bb-99da-03eeaca72be1/metadata.json b/runs/09d8fbde-0008-49bb-99da-03eeaca72be1/metadata.json index d31e841..dddb2ee 100644 --- a/runs/09d8fbde-0008-49bb-99da-03eeaca72be1/metadata.json +++ b/runs/09d8fbde-0008-49bb-99da-03eeaca72be1/metadata.json @@ -1,8 +1,11 @@ { "id": "09d8fbde-0008-49bb-99da-03eeaca72be1", - "title": "Predator trio bench \u2014 09d8fbde-0008-49bb-99da-03eeaca72be1", + "title": "Predator trio bench", "date": "2026-05-04", + "started_at": "2026-05-04T16:01:52Z", + "git_sha": "f06800254dfd046e154c94f663fb7f9f66c79ae3", "hardware": "predator", + "hardware_label": "Predator \u00b7 gaming laptop \u00b7 GTX 1060 6 GB \u00b7 28 GB RAM", "engine": "llamacpp", "harness": "predator-trio-1", "model_family": "gemma/granite/qwen3.5", @@ -11,44 +14,7 @@ "gemma-4:e4b-it", "qwen3.5:9b" ], - "task_kind": null, - "headline": "includes prefill time, so true generation rate is slightly higher. Use llama-bench's", - "summary_md_path": "runs/09d8fbde-0008-49bb-99da-03eeaca72be1.md", - "raw_jsonl_path": "runs/09d8fbde-0008-49bb-99da-03eeaca72be1.jsonl", - "log_path": "runs/09d8fbde-0008-49bb-99da-03eeaca72be1.log", - "synthesis_doc": "A3B_AND_CPU_OVERNIGHT_2026-05-05.md", - "cells": [ - { - "cell_id": "predator:llamacpp:granite-4.1:8b-q4km", - "n_calls": 14, - "n_errors": 0, - "duration_ms_mean": 8831.33, - "duration_ms_p50": 6403.0, - "tokens_per_sec_mean": 13.49, - "tokens_per_sec_max": 15.75 - }, - { - "cell_id": "predator:llamacpp:gemma-4:e4b-it-q4km", - "n_calls": 14, - "n_errors": 0, - "duration_ms_mean": 9888.42, - "duration_ms_p50": 8199.0, - "tokens_per_sec_mean": 21.82, - "tokens_per_sec_max": 23.66 - }, - { - "cell_id": "predator:llamacpp:qwen3.5:9b-q4km", - "n_calls": 14, - "n_errors": 0, - "duration_ms_mean": 37136.33, - "duration_ms_p50": 35324.5, - "tokens_per_sec_mean": 14.04, - "tokens_per_sec_max": 14.57 - } - ], - "n_calls_total": 42, - "n_errors_total": 0, - "started_at": "2026-05-04T16:01:52Z", + "task_kind": "chat", "tags": [ "gemma", "granite", @@ -57,6 +23,129 @@ "qwen3.5", "trio" ], + "headline": "42 calls across 3 cell(s); ~16.5 tok/s mean; p50 9.6s", + "janie_blurb_md": null, + "janie_blurb_status": "pending", + "caveat": null, + "caveat_severity": null, + "methodology_ref": "A3B_AND_CPU_OVERNIGHT_2026-05-05", + "methodology_deviations_md": null, + "results_table": [ + { + "label": "granite-4.1:8b-q4km", + "cell_id": "predator:llamacpp:granite-4.1:8b-q4km", + "n_calls": 14, + "n_errors": 0, + "duration_ms_mean": 8831.33, + "duration_ms_p50": 6403.0, + "duration_ms_p95": 19524.25, + "tokens_per_sec_mean": 13.49, + "tokens_per_sec_p50": 15.16, + "tokens_per_sec_p95": 15.73, + "tokens_per_sec_max": 15.75, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "gemma-4:e4b-it-q4km", + "cell_id": "predator:llamacpp:gemma-4:e4b-it-q4km", + "n_calls": 14, + "n_errors": 0, + "duration_ms_mean": 9888.42, + "duration_ms_p50": 8199.0, + "duration_ms_p95": 21724.5, + "tokens_per_sec_mean": 21.82, + "tokens_per_sec_p50": 22.91, + "tokens_per_sec_p95": 23.64, + "tokens_per_sec_max": 23.66, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "qwen3.5:9b-q4km", + "cell_id": "predator:llamacpp:qwen3.5:9b-q4km", + "n_calls": 14, + "n_errors": 0, + "duration_ms_mean": 37136.33, + "duration_ms_p50": 35324.5, + "duration_ms_p95": 71874.15, + "tokens_per_sec_mean": 14.04, + "tokens_per_sec_p50": 14.42, + "tokens_per_sec_p95": 14.57, + "tokens_per_sec_max": 14.57, + "memory_mb": null, + "watts_avg": null, + "notes": null + } + ], + "cells": [ + { + "cell_id": "predator:llamacpp:granite-4.1:8b-q4km", + "n_calls": 14, + "n_errors": 0, + "duration_ms_mean": 8831.33, + "duration_ms_p50": 6403.0, + "duration_ms_p95": 19524.25, + "tokens_per_sec_mean": 13.49, + "tokens_per_sec_p50": 15.16, + "tokens_per_sec_p95": 15.73, + "tokens_per_sec_max": 15.75 + }, + { + "cell_id": "predator:llamacpp:gemma-4:e4b-it-q4km", + "n_calls": 14, + "n_errors": 0, + "duration_ms_mean": 9888.42, + "duration_ms_p50": 8199.0, + "duration_ms_p95": 21724.5, + "tokens_per_sec_mean": 21.82, + "tokens_per_sec_p50": 22.91, + "tokens_per_sec_p95": 23.64, + "tokens_per_sec_max": 23.66 + }, + { + "cell_id": "predator:llamacpp:qwen3.5:9b-q4km", + "n_calls": 14, + "n_errors": 0, + "duration_ms_mean": 37136.33, + "duration_ms_p50": 35324.5, + "duration_ms_p95": 71874.15, + "tokens_per_sec_mean": 14.04, + "tokens_per_sec_p50": 14.42, + "tokens_per_sec_p95": 14.57, + "tokens_per_sec_max": 14.57 + } + ], + "n_calls_total": 42, + "n_errors_total": 0, + "chart_spec": { + "kind": "bar", + "x": "cell", + "y": "tokens_per_sec_mean", + "series": [ + "granite-4.1:8b-q4km", + "gemma-4:e4b-it-q4km", + "qwen3.5:9b-q4km" + ], + "data_url": "/data/09d8fbde-0008-49bb-99da-03eeaca72be1/run.jsonl" + }, + "site_grade": "flagship", + "site_grade_reason": null, + "raw_data_urls": { + "jsonl": "/data/09d8fbde-0008-49bb-99da-03eeaca72be1/run.jsonl", + "log": "/data/09d8fbde-0008-49bb-99da-03eeaca72be1/run.log", + "md": "/data/09d8fbde-0008-49bb-99da-03eeaca72be1/run.md", + "metadata": "/data/09d8fbde-0008-49bb-99da-03eeaca72be1/metadata.json", + "gitea_dir": "https://git.weeyuga.com/slobodanmargetic988/weeyuga-benchmarks-public/src/branch/main/runs/09d8fbde-0008-49bb-99da-03eeaca72be1/" + }, + "synthesis_doc": "A3B_AND_CPU_OVERNIGHT_2026-05-05.md", + "synthesis_docs_all": [ + "A3B_AND_CPU_OVERNIGHT_2026-05-05.md", + "A3B_CROSS_MACHINE_2026-05-05.md" + ], + "related_ids": [], "status": "complete", "visibility": "draft", "cells_full": [ @@ -66,7 +155,10 @@ "n_errors": 0, "duration_ms_mean": 8831.33, "duration_ms_p50": 6403.0, + "duration_ms_p95": 19524.25, "tokens_per_sec_mean": 13.49, + "tokens_per_sec_p50": 15.16, + "tokens_per_sec_p95": 15.73, "tokens_per_sec_max": 15.75, "calls": [ { @@ -257,7 +349,10 @@ "n_errors": 0, "duration_ms_mean": 9888.42, "duration_ms_p50": 8199.0, + "duration_ms_p95": 21724.5, "tokens_per_sec_mean": 21.82, + "tokens_per_sec_p50": 22.91, + "tokens_per_sec_p95": 23.64, "tokens_per_sec_max": 23.66, "calls": [ { @@ -448,7 +543,10 @@ "n_errors": 0, "duration_ms_mean": 37136.33, "duration_ms_p50": 35324.5, + "duration_ms_p95": 71874.15, "tokens_per_sec_mean": 14.04, + "tokens_per_sec_p50": 14.42, + "tokens_per_sec_p95": 14.57, "tokens_per_sec_max": 14.57, "calls": [ { @@ -658,5 +756,5 @@ "engine": "llamacpp", "predator_target_url": "http://10.8.0.7:11436" }, - "packaged_at": "2026-05-05T17:43:34Z" + "packaged_at": "2026-05-06T08:04:27Z" } diff --git a/runs/1bf57c9a-fd7a-49aa-90de-cd1907b15ddd/metadata.json b/runs/1bf57c9a-fd7a-49aa-90de-cd1907b15ddd/metadata.json index 4710aea..76498d4 100644 --- a/runs/1bf57c9a-fd7a-49aa-90de-cd1907b15ddd/metadata.json +++ b/runs/1bf57c9a-fd7a-49aa-90de-cd1907b15ddd/metadata.json @@ -2,19 +2,46 @@ "id": "1bf57c9a-fd7a-49aa-90de-cd1907b15ddd", "title": "vps50-gemma-e4b-1 \u2014 gemma on vps50", "date": "2026-05-05", + "started_at": "2026-05-05T03:07:13Z", + "git_sha": "f06800254dfd046e154c94f663fb7f9f66c79ae3", "hardware": "vps50", + "hardware_label": "VPS50 \u00b7 cloud \u00b7 16 vCPU AMD EPYC \u00b7 62 GB RAM \u00b7 no GPU", "engine": "llamacpp", "harness": "vps50-gemma-e4b-1", "model_family": "gemma", "model_sizes": [ "gemma-4-e4b" ], - "task_kind": null, + "task_kind": "chat", + "tags": [ + "gemma", + "vps50" + ], "headline": "no calls landed", - "summary_md_path": null, - "raw_jsonl_path": "runs/1bf57c9a-fd7a-49aa-90de-cd1907b15ddd.jsonl", - "log_path": "runs/1bf57c9a-fd7a-49aa-90de-cd1907b15ddd.log", - "synthesis_doc": null, + "janie_blurb_md": null, + "janie_blurb_status": "pending", + "caveat": null, + "caveat_severity": null, + "methodology_ref": "HARNESS#vps50-gemma-e4b-1", + "methodology_deviations_md": null, + "results_table": [ + { + "label": "gemma-4-e4b", + "cell_id": "vps50:llamacpp:gemma-4-e4b", + "n_calls": 0, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + } + ], "cells": [ { "cell_id": "vps50:llamacpp:gemma-4-e4b", @@ -22,17 +49,36 @@ "n_errors": 0, "duration_ms_mean": null, "duration_ms_p50": null, + "duration_ms_p95": null, "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, "tokens_per_sec_max": null } ], "n_calls_total": 0, "n_errors_total": 0, - "started_at": "2026-05-05T03:07:13Z", - "tags": [ - "gemma", - "vps50" - ], + "chart_spec": { + "kind": "bar", + "x": "cell", + "y": "tokens_per_sec_mean", + "series": [ + "gemma-4-e4b" + ], + "data_url": "/data/1bf57c9a-fd7a-49aa-90de-cd1907b15ddd/run.jsonl" + }, + "site_grade": "archive-only", + "site_grade_reason": "status=meta-only", + "raw_data_urls": { + "jsonl": "/data/1bf57c9a-fd7a-49aa-90de-cd1907b15ddd/run.jsonl", + "log": "/data/1bf57c9a-fd7a-49aa-90de-cd1907b15ddd/run.log", + "md": null, + "metadata": "/data/1bf57c9a-fd7a-49aa-90de-cd1907b15ddd/metadata.json", + "gitea_dir": "https://git.weeyuga.com/slobodanmargetic988/weeyuga-benchmarks-public/src/branch/main/runs/1bf57c9a-fd7a-49aa-90de-cd1907b15ddd/" + }, + "synthesis_doc": null, + "synthesis_docs_all": [], + "related_ids": [], "status": "meta-only", "visibility": "draft", "cells_full": [ @@ -42,7 +88,10 @@ "n_errors": 0, "duration_ms_mean": null, "duration_ms_p50": null, + "duration_ms_p95": null, "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, "tokens_per_sec_max": null, "calls": [] } @@ -63,5 +112,5 @@ "ngl": 0, "ctx": 32768 }, - "packaged_at": "2026-05-05T17:43:34Z" + "packaged_at": "2026-05-06T08:04:27Z" } diff --git a/runs/212d6278-1b9b-45e9-8aae-7eed4d4ec822/metadata.json b/runs/212d6278-1b9b-45e9-8aae-7eed4d4ec822/metadata.json index 022ef87..60570ca 100644 --- a/runs/212d6278-1b9b-45e9-8aae-7eed4d4ec822/metadata.json +++ b/runs/212d6278-1b9b-45e9-8aae-7eed4d4ec822/metadata.json @@ -1,64 +1,21 @@ { "id": "212d6278-1b9b-45e9-8aae-7eed4d4ec822", - "title": "vps50-cpu-matrix-1 \u2014 gemma/phi-4/qwen2.5/qwen3 on vps50", + "title": "vps50-cpu-matrix-1 \u2014 gemma/phi/qwen2.5/qwen3 on vps50", "date": "2026-05-04", + "started_at": "2026-05-04T23:39:53Z", + "git_sha": "f06800254dfd046e154c94f663fb7f9f66c79ae3", "hardware": "vps50", + "hardware_label": "VPS50 \u00b7 cloud \u00b7 16 vCPU AMD EPYC \u00b7 62 GB RAM \u00b7 no GPU", "engine": "llamacpp", "harness": "vps50-cpu-matrix-1", - "model_family": "gemma/phi-4/qwen2.5/qwen3", + "model_family": "gemma/phi/qwen2.5/qwen3", "model_sizes": [ "phi-4", "gemma-4-26b-a4b", "qwen3-30b-a3b", "qwen2.5-72b" ], - "task_kind": null, - "headline": "no calls landed", - "summary_md_path": null, - "raw_jsonl_path": "runs/212d6278-1b9b-45e9-8aae-7eed4d4ec822.jsonl", - "log_path": "runs/212d6278-1b9b-45e9-8aae-7eed4d4ec822.log", - "synthesis_doc": "A3B_AND_CPU_OVERNIGHT_2026-05-05.md", - "cells": [ - { - "cell_id": "vps50:llamacpp:phi-4", - "n_calls": 0, - "n_errors": 0, - "duration_ms_mean": null, - "duration_ms_p50": null, - "tokens_per_sec_mean": null, - "tokens_per_sec_max": null - }, - { - "cell_id": "vps50:llamacpp:gemma-4-26b-a4b", - "n_calls": 0, - "n_errors": 0, - "duration_ms_mean": null, - "duration_ms_p50": null, - "tokens_per_sec_mean": null, - "tokens_per_sec_max": null - }, - { - "cell_id": "vps50:llamacpp:qwen3-30b-a3b", - "n_calls": 0, - "n_errors": 0, - "duration_ms_mean": null, - "duration_ms_p50": null, - "tokens_per_sec_mean": null, - "tokens_per_sec_max": null - }, - { - "cell_id": "vps50:llamacpp:qwen2.5-72b", - "n_calls": 0, - "n_errors": 0, - "duration_ms_mean": null, - "duration_ms_p50": null, - "tokens_per_sec_mean": null, - "tokens_per_sec_max": null - } - ], - "n_calls_total": 0, - "n_errors_total": 0, - "started_at": "2026-05-04T23:39:53Z", + "task_kind": "param-sweep", "tags": [ "a3b", "cpu", @@ -68,6 +25,157 @@ "qwen3", "vps50" ], + "headline": "no calls landed", + "janie_blurb_md": null, + "janie_blurb_status": "pending", + "caveat": null, + "caveat_severity": null, + "methodology_ref": "A3B_AND_CPU_OVERNIGHT_2026-05-05", + "methodology_deviations_md": null, + "results_table": [ + { + "label": "phi-4", + "cell_id": "vps50:llamacpp:phi-4", + "n_calls": 0, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "gemma-4-26b-a4b", + "cell_id": "vps50:llamacpp:gemma-4-26b-a4b", + "n_calls": 0, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "qwen3-30b-a3b", + "cell_id": "vps50:llamacpp:qwen3-30b-a3b", + "n_calls": 0, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "qwen2.5-72b", + "cell_id": "vps50:llamacpp:qwen2.5-72b", + "n_calls": 0, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + } + ], + "cells": [ + { + "cell_id": "vps50:llamacpp:phi-4", + "n_calls": 0, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null + }, + { + "cell_id": "vps50:llamacpp:gemma-4-26b-a4b", + "n_calls": 0, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null + }, + { + "cell_id": "vps50:llamacpp:qwen3-30b-a3b", + "n_calls": 0, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null + }, + { + "cell_id": "vps50:llamacpp:qwen2.5-72b", + "n_calls": 0, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null + } + ], + "n_calls_total": 0, + "n_errors_total": 0, + "chart_spec": { + "kind": "line", + "x": "param_value", + "y": "tokens_per_sec_mean", + "series": [ + "phi-4", + "gemma-4-26b-a4b", + "qwen3-30b-a3b", + "qwen2.5-72b" + ], + "data_url": "/data/212d6278-1b9b-45e9-8aae-7eed4d4ec822/run.jsonl" + }, + "site_grade": "archive-only", + "site_grade_reason": "status=meta-only", + "raw_data_urls": { + "jsonl": "/data/212d6278-1b9b-45e9-8aae-7eed4d4ec822/run.jsonl", + "log": "/data/212d6278-1b9b-45e9-8aae-7eed4d4ec822/run.log", + "md": null, + "metadata": "/data/212d6278-1b9b-45e9-8aae-7eed4d4ec822/metadata.json", + "gitea_dir": "https://git.weeyuga.com/slobodanmargetic988/weeyuga-benchmarks-public/src/branch/main/runs/212d6278-1b9b-45e9-8aae-7eed4d4ec822/" + }, + "synthesis_doc": "A3B_AND_CPU_OVERNIGHT_2026-05-05.md", + "synthesis_docs_all": [ + "A3B_AND_CPU_OVERNIGHT_2026-05-05.md" + ], + "related_ids": [], "status": "meta-only", "visibility": "draft", "cells_full": [ @@ -77,7 +185,10 @@ "n_errors": 0, "duration_ms_mean": null, "duration_ms_p50": null, + "duration_ms_p95": null, "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, "tokens_per_sec_max": null, "calls": [] }, @@ -87,7 +198,10 @@ "n_errors": 0, "duration_ms_mean": null, "duration_ms_p50": null, + "duration_ms_p95": null, "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, "tokens_per_sec_max": null, "calls": [] }, @@ -97,7 +211,10 @@ "n_errors": 0, "duration_ms_mean": null, "duration_ms_p50": null, + "duration_ms_p95": null, "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, "tokens_per_sec_max": null, "calls": [] }, @@ -107,7 +224,10 @@ "n_errors": 0, "duration_ms_mean": null, "duration_ms_p50": null, + "duration_ms_p95": null, "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, "tokens_per_sec_max": null, "calls": [] } @@ -135,5 +255,5 @@ "qwen2.5-72b" ] }, - "packaged_at": "2026-05-05T17:43:34Z" + "packaged_at": "2026-05-06T08:04:27Z" } diff --git a/runs/23066b38-ea9c-4dd3-b2f5-32912a67fce4/metadata.json b/runs/23066b38-ea9c-4dd3-b2f5-32912a67fce4/metadata.json index 3f9ab54..fe6ff13 100644 --- a/runs/23066b38-ea9c-4dd3-b2f5-32912a67fce4/metadata.json +++ b/runs/23066b38-ea9c-4dd3-b2f5-32912a67fce4/metadata.json @@ -1,20 +1,49 @@ { "id": "23066b38-ea9c-4dd3-b2f5-32912a67fce4", - "title": "Predator Qwen rerun \u2014 23066b38-ea9c-4dd3-b2f5-32912a67fce4", + "title": "Predator Qwen rerun", "date": "2026-05-04", + "started_at": "2026-05-04T22:11:43Z", + "git_sha": "f06800254dfd046e154c94f663fb7f9f66c79ae3", "hardware": "pavilion", + "hardware_label": "Pavilion \u00b7 HP laptop \u00b7 GTX 1050 4 GB \u00b7 16 GB RAM \u00b7 i7-9750H", "engine": "llamacpp", "harness": "pavilion-a3b-1", "model_family": "qwen3", "model_sizes": [ "qwen3:30b-a3b-iq2xxs" ], - "task_kind": null, - "headline": "`Answer chars` is `len(message.content)`. Both averaged over the 3 warm runs.", - "summary_md_path": "runs/23066b38-ea9c-4dd3-b2f5-32912a67fce4.md", - "raw_jsonl_path": "runs/23066b38-ea9c-4dd3-b2f5-32912a67fce4.jsonl", - "log_path": "runs/23066b38-ea9c-4dd3-b2f5-32912a67fce4.log", - "synthesis_doc": "A3B_AND_CPU_OVERNIGHT_2026-05-05.md", + "task_kind": "chat", + "tags": [ + "a3b", + "iq2", + "pavilion", + "qwen3" + ], + "headline": "14 calls across 1 cell(s); ~5.9 tok/s mean; p50 57.8s", + "janie_blurb_md": null, + "janie_blurb_status": "pending", + "caveat": null, + "caveat_severity": null, + "methodology_ref": "A3B_AND_CPU_OVERNIGHT_2026-05-05", + "methodology_deviations_md": null, + "results_table": [ + { + "label": "qwen3:30b-a3b-iq2xxs-think500", + "cell_id": "pavilion:llamacpp:qwen3:30b-a3b-iq2xxs-think500", + "n_calls": 14, + "n_errors": 0, + "duration_ms_mean": 72439.92, + "duration_ms_p50": 57784.0, + "duration_ms_p95": 174786.1, + "tokens_per_sec_mean": 5.88, + "tokens_per_sec_p50": 6.67, + "tokens_per_sec_p95": 8.17, + "tokens_per_sec_max": 8.19, + "memory_mb": null, + "watts_avg": null, + "notes": null + } + ], "cells": [ { "cell_id": "pavilion:llamacpp:qwen3:30b-a3b-iq2xxs-think500", @@ -22,19 +51,39 @@ "n_errors": 0, "duration_ms_mean": 72439.92, "duration_ms_p50": 57784.0, + "duration_ms_p95": 174786.1, "tokens_per_sec_mean": 5.88, + "tokens_per_sec_p50": 6.67, + "tokens_per_sec_p95": 8.17, "tokens_per_sec_max": 8.19 } ], "n_calls_total": 14, "n_errors_total": 0, - "started_at": "2026-05-04T22:11:43Z", - "tags": [ - "a3b", - "iq2", - "pavilion", - "qwen3" + "chart_spec": { + "kind": "bar", + "x": "cell", + "y": "tokens_per_sec_mean", + "series": [ + "qwen3:30b-a3b-iq2xxs-think500" + ], + "data_url": "/data/23066b38-ea9c-4dd3-b2f5-32912a67fce4/run.jsonl" + }, + "site_grade": "flagship", + "site_grade_reason": null, + "raw_data_urls": { + "jsonl": "/data/23066b38-ea9c-4dd3-b2f5-32912a67fce4/run.jsonl", + "log": "/data/23066b38-ea9c-4dd3-b2f5-32912a67fce4/run.log", + "md": "/data/23066b38-ea9c-4dd3-b2f5-32912a67fce4/run.md", + "metadata": "/data/23066b38-ea9c-4dd3-b2f5-32912a67fce4/metadata.json", + "gitea_dir": "https://git.weeyuga.com/slobodanmargetic988/weeyuga-benchmarks-public/src/branch/main/runs/23066b38-ea9c-4dd3-b2f5-32912a67fce4/" + }, + "synthesis_doc": "A3B_AND_CPU_OVERNIGHT_2026-05-05.md", + "synthesis_docs_all": [ + "A3B_AND_CPU_OVERNIGHT_2026-05-05.md", + "A3B_CROSS_MACHINE_2026-05-05.md" ], + "related_ids": [], "status": "complete", "visibility": "draft", "cells_full": [ @@ -44,7 +93,10 @@ "n_errors": 0, "duration_ms_mean": 72439.92, "duration_ms_p50": 57784.0, + "duration_ms_p95": 174786.1, "tokens_per_sec_mean": 5.88, + "tokens_per_sec_p50": 6.67, + "tokens_per_sec_p95": 8.17, "tokens_per_sec_max": 8.19, "calls": [ { @@ -275,5 +327,5 @@ } } }, - "packaged_at": "2026-05-05T17:43:34Z" + "packaged_at": "2026-05-06T08:04:27Z" } diff --git a/runs/487c8a51-7c5c-417d-a0c5-4b6e8491e11f/metadata.json b/runs/487c8a51-7c5c-417d-a0c5-4b6e8491e11f/metadata.json index 8a9b2de..87117bf 100644 --- a/runs/487c8a51-7c5c-417d-a0c5-4b6e8491e11f/metadata.json +++ b/runs/487c8a51-7c5c-417d-a0c5-4b6e8491e11f/metadata.json @@ -1,20 +1,49 @@ { "id": "487c8a51-7c5c-417d-a0c5-4b6e8491e11f", - "title": "Predator Qwen rerun \u2014 487c8a51-7c5c-417d-a0c5-4b6e8491e11f", + "title": "Predator Qwen rerun", "date": "2026-05-04", + "started_at": "2026-05-04T21:24:10Z", + "git_sha": "f06800254dfd046e154c94f663fb7f9f66c79ae3", "hardware": "pavilion", + "hardware_label": "Pavilion \u00b7 HP laptop \u00b7 GTX 1050 4 GB \u00b7 16 GB RAM \u00b7 i7-9750H", "engine": "llamacpp", "harness": "pavilion-a3b-1", "model_family": "qwen3", "model_sizes": [ "qwen3:30b-a3b-iq2xxs" ], - "task_kind": null, - "headline": "`Answer chars` is `len(message.content)`. Both averaged over the 3 warm runs.", - "summary_md_path": "runs/487c8a51-7c5c-417d-a0c5-4b6e8491e11f.md", - "raw_jsonl_path": "runs/487c8a51-7c5c-417d-a0c5-4b6e8491e11f.jsonl", - "log_path": "runs/487c8a51-7c5c-417d-a0c5-4b6e8491e11f.log", - "synthesis_doc": null, + "task_kind": "chat", + "tags": [ + "a3b", + "iq2", + "pavilion", + "qwen3" + ], + "headline": "no calls landed", + "janie_blurb_md": null, + "janie_blurb_status": "pending", + "caveat": null, + "caveat_severity": null, + "methodology_ref": "HARNESS#pavilion-a3b-1", + "methodology_deviations_md": null, + "results_table": [ + { + "label": "qwen3:30b-a3b-iq2xxs-think500", + "cell_id": "pavilion:llamacpp:qwen3:30b-a3b-iq2xxs-think500", + "n_calls": 0, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + } + ], "cells": [ { "cell_id": "pavilion:llamacpp:qwen3:30b-a3b-iq2xxs-think500", @@ -22,19 +51,36 @@ "n_errors": 0, "duration_ms_mean": null, "duration_ms_p50": null, + "duration_ms_p95": null, "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, "tokens_per_sec_max": null } ], "n_calls_total": 0, "n_errors_total": 0, - "started_at": "2026-05-04T21:24:10Z", - "tags": [ - "a3b", - "iq2", - "pavilion", - "qwen3" - ], + "chart_spec": { + "kind": "bar", + "x": "cell", + "y": "tokens_per_sec_mean", + "series": [ + "qwen3:30b-a3b-iq2xxs-think500" + ], + "data_url": "/data/487c8a51-7c5c-417d-a0c5-4b6e8491e11f/run.jsonl" + }, + "site_grade": "archive-only", + "site_grade_reason": "status=meta-only", + "raw_data_urls": { + "jsonl": "/data/487c8a51-7c5c-417d-a0c5-4b6e8491e11f/run.jsonl", + "log": "/data/487c8a51-7c5c-417d-a0c5-4b6e8491e11f/run.log", + "md": "/data/487c8a51-7c5c-417d-a0c5-4b6e8491e11f/run.md", + "metadata": "/data/487c8a51-7c5c-417d-a0c5-4b6e8491e11f/metadata.json", + "gitea_dir": "https://git.weeyuga.com/slobodanmargetic988/weeyuga-benchmarks-public/src/branch/main/runs/487c8a51-7c5c-417d-a0c5-4b6e8491e11f/" + }, + "synthesis_doc": null, + "synthesis_docs_all": [], + "related_ids": [], "status": "meta-only", "visibility": "draft", "cells_full": [ @@ -44,7 +90,10 @@ "n_errors": 0, "duration_ms_mean": null, "duration_ms_p50": null, + "duration_ms_p95": null, "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, "tokens_per_sec_max": null, "calls": [] } @@ -70,5 +119,5 @@ } } }, - "packaged_at": "2026-05-05T17:43:34Z" + "packaged_at": "2026-05-06T08:04:27Z" } diff --git a/runs/5f4d9e97-4294-4e69-9e6c-f318bca1ce44/metadata.json b/runs/5f4d9e97-4294-4e69-9e6c-f318bca1ce44/metadata.json index d80d20a..b171749 100644 --- a/runs/5f4d9e97-4294-4e69-9e6c-f318bca1ce44/metadata.json +++ b/runs/5f4d9e97-4294-4e69-9e6c-f318bca1ce44/metadata.json @@ -2,19 +2,48 @@ "id": "5f4d9e97-4294-4e69-9e6c-f318bca1ce44", "title": "pavilion-a3b-1 \u2014 qwen3 on pavilion", "date": "2026-05-04", + "started_at": "2026-05-04T20:38:52Z", + "git_sha": "f06800254dfd046e154c94f663fb7f9f66c79ae3", "hardware": "pavilion", + "hardware_label": "Pavilion \u00b7 HP laptop \u00b7 GTX 1050 4 GB \u00b7 16 GB RAM \u00b7 i7-9750H", "engine": "llamacpp", "harness": "pavilion-a3b-1", "model_family": "qwen3", "model_sizes": [ "qwen3:30b-a3b-iq2xxs" ], - "task_kind": null, + "task_kind": "chat", + "tags": [ + "a3b", + "iq2", + "pavilion", + "qwen3" + ], "headline": "no calls landed", - "summary_md_path": null, - "raw_jsonl_path": "runs/5f4d9e97-4294-4e69-9e6c-f318bca1ce44.jsonl", - "log_path": "runs/5f4d9e97-4294-4e69-9e6c-f318bca1ce44.log", - "synthesis_doc": null, + "janie_blurb_md": null, + "janie_blurb_status": "pending", + "caveat": null, + "caveat_severity": null, + "methodology_ref": "HARNESS#pavilion-a3b-1", + "methodology_deviations_md": null, + "results_table": [ + { + "label": "qwen3:30b-a3b-iq2xxs-think500", + "cell_id": "pavilion:llamacpp:qwen3:30b-a3b-iq2xxs-think500", + "n_calls": 0, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + } + ], "cells": [ { "cell_id": "pavilion:llamacpp:qwen3:30b-a3b-iq2xxs-think500", @@ -22,19 +51,36 @@ "n_errors": 0, "duration_ms_mean": null, "duration_ms_p50": null, + "duration_ms_p95": null, "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, "tokens_per_sec_max": null } ], "n_calls_total": 0, "n_errors_total": 0, - "started_at": "2026-05-04T20:38:52Z", - "tags": [ - "a3b", - "iq2", - "pavilion", - "qwen3" - ], + "chart_spec": { + "kind": "bar", + "x": "cell", + "y": "tokens_per_sec_mean", + "series": [ + "qwen3:30b-a3b-iq2xxs-think500" + ], + "data_url": "/data/5f4d9e97-4294-4e69-9e6c-f318bca1ce44/run.jsonl" + }, + "site_grade": "archive-only", + "site_grade_reason": "status=meta-only", + "raw_data_urls": { + "jsonl": "/data/5f4d9e97-4294-4e69-9e6c-f318bca1ce44/run.jsonl", + "log": "/data/5f4d9e97-4294-4e69-9e6c-f318bca1ce44/run.log", + "md": null, + "metadata": "/data/5f4d9e97-4294-4e69-9e6c-f318bca1ce44/metadata.json", + "gitea_dir": "https://git.weeyuga.com/slobodanmargetic988/weeyuga-benchmarks-public/src/branch/main/runs/5f4d9e97-4294-4e69-9e6c-f318bca1ce44/" + }, + "synthesis_doc": null, + "synthesis_docs_all": [], + "related_ids": [], "status": "meta-only", "visibility": "draft", "cells_full": [ @@ -44,7 +90,10 @@ "n_errors": 0, "duration_ms_mean": null, "duration_ms_p50": null, + "duration_ms_p95": null, "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, "tokens_per_sec_max": null, "calls": [] } @@ -70,5 +119,5 @@ } } }, - "packaged_at": "2026-05-05T17:43:34Z" + "packaged_at": "2026-05-06T08:04:27Z" } diff --git a/runs/5fb2913d-6500-4ecf-9e97-d43f7dd61145/metadata.json b/runs/5fb2913d-6500-4ecf-9e97-d43f7dd61145/metadata.json index a4e1305..45402df 100644 --- a/runs/5fb2913d-6500-4ecf-9e97-d43f7dd61145/metadata.json +++ b/runs/5fb2913d-6500-4ecf-9e97-d43f7dd61145/metadata.json @@ -1,20 +1,49 @@ { "id": "5fb2913d-6500-4ecf-9e97-d43f7dd61145", - "title": "Predator Qwen rerun \u2014 5fb2913d-6500-4ecf-9e97-d43f7dd61145", + "title": "Predator Qwen rerun", "date": "2026-05-04", + "started_at": "2026-05-04T22:02:17Z", + "git_sha": "f06800254dfd046e154c94f663fb7f9f66c79ae3", "hardware": "predator", + "hardware_label": "Predator \u00b7 gaming laptop \u00b7 GTX 1060 6 GB \u00b7 28 GB RAM", "engine": "llamacpp", "harness": "predator-a3b-1", "model_family": "qwen3", "model_sizes": [ "qwen3:30b-a3b-iq2m" ], - "task_kind": null, - "headline": "`Answer chars` is `len(message.content)`. Both averaged over the 3 warm runs.", - "summary_md_path": "runs/5fb2913d-6500-4ecf-9e97-d43f7dd61145.md", - "raw_jsonl_path": "runs/5fb2913d-6500-4ecf-9e97-d43f7dd61145.jsonl", - "log_path": "runs/5fb2913d-6500-4ecf-9e97-d43f7dd61145.log", - "synthesis_doc": "A3B_NGL_RETUNE_2026-05-05.md", + "task_kind": "chat", + "tags": [ + "a3b", + "iq2", + "predator", + "qwen3" + ], + "headline": "14 calls across 1 cell(s); ~3.9 tok/s mean; p50 101.9s", + "janie_blurb_md": null, + "janie_blurb_status": "pending", + "caveat": null, + "caveat_severity": null, + "methodology_ref": "A3B_AND_CPU_OVERNIGHT_2026-05-05", + "methodology_deviations_md": null, + "results_table": [ + { + "label": "qwen3:30b-a3b-iq2m-think500", + "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500", + "n_calls": 14, + "n_errors": 0, + "duration_ms_mean": 107312.08, + "duration_ms_p50": 101920.0, + "duration_ms_p95": 211459.2, + "tokens_per_sec_mean": 3.87, + "tokens_per_sec_p50": 3.99, + "tokens_per_sec_p95": 4.06, + "tokens_per_sec_max": 4.06, + "memory_mb": null, + "watts_avg": null, + "notes": null + } + ], "cells": [ { "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500", @@ -22,19 +51,40 @@ "n_errors": 0, "duration_ms_mean": 107312.08, "duration_ms_p50": 101920.0, + "duration_ms_p95": 211459.2, "tokens_per_sec_mean": 3.87, + "tokens_per_sec_p50": 3.99, + "tokens_per_sec_p95": 4.06, "tokens_per_sec_max": 4.06 } ], "n_calls_total": 14, "n_errors_total": 0, - "started_at": "2026-05-04T22:02:17Z", - "tags": [ - "a3b", - "iq2", - "predator", - "qwen3" + "chart_spec": { + "kind": "bar", + "x": "cell", + "y": "tokens_per_sec_mean", + "series": [ + "qwen3:30b-a3b-iq2m-think500" + ], + "data_url": "/data/5fb2913d-6500-4ecf-9e97-d43f7dd61145/run.jsonl" + }, + "site_grade": "flagship", + "site_grade_reason": null, + "raw_data_urls": { + "jsonl": "/data/5fb2913d-6500-4ecf-9e97-d43f7dd61145/run.jsonl", + "log": "/data/5fb2913d-6500-4ecf-9e97-d43f7dd61145/run.log", + "md": "/data/5fb2913d-6500-4ecf-9e97-d43f7dd61145/run.md", + "metadata": "/data/5fb2913d-6500-4ecf-9e97-d43f7dd61145/metadata.json", + "gitea_dir": "https://git.weeyuga.com/slobodanmargetic988/weeyuga-benchmarks-public/src/branch/main/runs/5fb2913d-6500-4ecf-9e97-d43f7dd61145/" + }, + "synthesis_doc": "A3B_AND_CPU_OVERNIGHT_2026-05-05.md", + "synthesis_docs_all": [ + "A3B_AND_CPU_OVERNIGHT_2026-05-05.md", + "A3B_CROSS_MACHINE_2026-05-05.md", + "A3B_NGL_RETUNE_2026-05-05.md" ], + "related_ids": [], "status": "complete", "visibility": "draft", "cells_full": [ @@ -44,7 +94,10 @@ "n_errors": 0, "duration_ms_mean": 107312.08, "duration_ms_p50": 101920.0, + "duration_ms_p95": 211459.2, "tokens_per_sec_mean": 3.87, + "tokens_per_sec_p50": 3.99, + "tokens_per_sec_p95": 4.06, "tokens_per_sec_max": 4.06, "calls": [ { @@ -273,5 +326,5 @@ } } }, - "packaged_at": "2026-05-05T17:43:34Z" + "packaged_at": "2026-05-06T08:04:27Z" } diff --git a/runs/79afe366-1055-4e45-adf6-593864a530e8/metadata.json b/runs/79afe366-1055-4e45-adf6-593864a530e8/metadata.json index 98996dd..e16f2ca 100644 --- a/runs/79afe366-1055-4e45-adf6-593864a530e8/metadata.json +++ b/runs/79afe366-1055-4e45-adf6-593864a530e8/metadata.json @@ -2,51 +2,17 @@ "id": "79afe366-1055-4e45-adf6-593864a530e8", "title": "predator-a3b-ngl6-ctx-1 \u2014 qwen3 on predator", "date": "2026-05-05", + "started_at": "2026-05-05T09:44:34Z", + "git_sha": "f06800254dfd046e154c94f663fb7f9f66c79ae3", "hardware": "predator", + "hardware_label": "Predator \u00b7 gaming laptop \u00b7 GTX 1060 6 GB \u00b7 28 GB RAM", "engine": "llamacpp", "harness": "predator-a3b-ngl6-ctx-1", "model_family": "qwen3", "model_sizes": [ "qwen3:30b-a3b-iq2m" ], - "task_kind": null, - "headline": "45 calls across 3 cell(s); ~5.2 tok/s mean; p50 84.8s", - "summary_md_path": null, - "raw_jsonl_path": "runs/79afe366-1055-4e45-adf6-593864a530e8.jsonl", - "log_path": "runs/79afe366-1055-4e45-adf6-593864a530e8.log", - "synthesis_doc": "A3B_AND_CPU_OVERNIGHT_2026-05-05.md", - "cells": [ - { - "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl6-ctx64k", - "n_calls": 15, - "n_errors": 0, - "duration_ms_mean": 81326.17, - "duration_ms_p50": 73215.0, - "tokens_per_sec_mean": 5.42, - "tokens_per_sec_max": 5.93 - }, - { - "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl6-ctx96k", - "n_calls": 15, - "n_errors": 0, - "duration_ms_mean": 84532.92, - "duration_ms_p50": 99578.5, - "tokens_per_sec_mean": 5.43, - "tokens_per_sec_max": 5.94 - }, - { - "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl6-ctx131k", - "n_calls": 15, - "n_errors": 0, - "duration_ms_mean": 91748.5, - "duration_ms_p50": 98184.0, - "tokens_per_sec_mean": 4.79, - "tokens_per_sec_max": 4.93 - } - ], - "n_calls_total": 45, - "n_errors_total": 0, - "started_at": "2026-05-05T09:44:34Z", + "task_kind": "param-sweep", "tags": [ "a3b", "ctx", @@ -55,6 +21,128 @@ "predator", "qwen3" ], + "headline": "45 calls across 3 cell(s); ~5.2 tok/s mean; p50 84.8s", + "janie_blurb_md": null, + "janie_blurb_status": "pending", + "caveat": null, + "caveat_severity": null, + "methodology_ref": "A3B_AND_CPU_OVERNIGHT_2026-05-05", + "methodology_deviations_md": null, + "results_table": [ + { + "label": "qwen3:30b-a3b-iq2m-think500-ngl6-ctx64k", + "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl6-ctx64k", + "n_calls": 15, + "n_errors": 0, + "duration_ms_mean": 81326.17, + "duration_ms_p50": 73215.0, + "duration_ms_p95": 152048.1, + "tokens_per_sec_mean": 5.42, + "tokens_per_sec_p50": 5.35, + "tokens_per_sec_p95": 5.91, + "tokens_per_sec_max": 5.93, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "qwen3:30b-a3b-iq2m-think500-ngl6-ctx96k", + "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl6-ctx96k", + "n_calls": 15, + "n_errors": 0, + "duration_ms_mean": 84532.92, + "duration_ms_p50": 99578.5, + "duration_ms_p95": 152059.4, + "tokens_per_sec_mean": 5.43, + "tokens_per_sec_p50": 5.38, + "tokens_per_sec_p95": 5.78, + "tokens_per_sec_max": 5.94, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "qwen3:30b-a3b-iq2m-think500-ngl6-ctx131k", + "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl6-ctx131k", + "n_calls": 15, + "n_errors": 0, + "duration_ms_mean": 91748.5, + "duration_ms_p50": 98184.0, + "duration_ms_p95": 161421.6, + "tokens_per_sec_mean": 4.79, + "tokens_per_sec_p50": 4.8, + "tokens_per_sec_p95": 4.93, + "tokens_per_sec_max": 4.93, + "memory_mb": null, + "watts_avg": null, + "notes": null + } + ], + "cells": [ + { + "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl6-ctx64k", + "n_calls": 15, + "n_errors": 0, + "duration_ms_mean": 81326.17, + "duration_ms_p50": 73215.0, + "duration_ms_p95": 152048.1, + "tokens_per_sec_mean": 5.42, + "tokens_per_sec_p50": 5.35, + "tokens_per_sec_p95": 5.91, + "tokens_per_sec_max": 5.93 + }, + { + "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl6-ctx96k", + "n_calls": 15, + "n_errors": 0, + "duration_ms_mean": 84532.92, + "duration_ms_p50": 99578.5, + "duration_ms_p95": 152059.4, + "tokens_per_sec_mean": 5.43, + "tokens_per_sec_p50": 5.38, + "tokens_per_sec_p95": 5.78, + "tokens_per_sec_max": 5.94 + }, + { + "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl6-ctx131k", + "n_calls": 15, + "n_errors": 0, + "duration_ms_mean": 91748.5, + "duration_ms_p50": 98184.0, + "duration_ms_p95": 161421.6, + "tokens_per_sec_mean": 4.79, + "tokens_per_sec_p50": 4.8, + "tokens_per_sec_p95": 4.93, + "tokens_per_sec_max": 4.93 + } + ], + "n_calls_total": 45, + "n_errors_total": 0, + "chart_spec": { + "kind": "line", + "x": "param_value", + "y": "tokens_per_sec_mean", + "series": [ + "qwen3:30b-a3b-iq2m-think500-ngl6-ctx64k", + "qwen3:30b-a3b-iq2m-think500-ngl6-ctx96k", + "qwen3:30b-a3b-iq2m-think500-ngl6-ctx131k" + ], + "data_url": "/data/79afe366-1055-4e45-adf6-593864a530e8/run.jsonl" + }, + "site_grade": "standard", + "site_grade_reason": null, + "raw_data_urls": { + "jsonl": "/data/79afe366-1055-4e45-adf6-593864a530e8/run.jsonl", + "log": "/data/79afe366-1055-4e45-adf6-593864a530e8/run.log", + "md": null, + "metadata": "/data/79afe366-1055-4e45-adf6-593864a530e8/metadata.json", + "gitea_dir": "https://git.weeyuga.com/slobodanmargetic988/weeyuga-benchmarks-public/src/branch/main/runs/79afe366-1055-4e45-adf6-593864a530e8/" + }, + "synthesis_doc": "A3B_AND_CPU_OVERNIGHT_2026-05-05.md", + "synthesis_docs_all": [ + "A3B_AND_CPU_OVERNIGHT_2026-05-05.md" + ], + "related_ids": [], "status": "complete", "visibility": "draft", "cells_full": [ @@ -64,7 +152,10 @@ "n_errors": 0, "duration_ms_mean": 81326.17, "duration_ms_p50": 73215.0, + "duration_ms_p95": 152048.1, "tokens_per_sec_mean": 5.42, + "tokens_per_sec_p50": 5.35, + "tokens_per_sec_p95": 5.91, "tokens_per_sec_max": 5.93, "calls": [ { @@ -315,7 +406,10 @@ "n_errors": 0, "duration_ms_mean": 84532.92, "duration_ms_p50": 99578.5, + "duration_ms_p95": 152059.4, "tokens_per_sec_mean": 5.43, + "tokens_per_sec_p50": 5.38, + "tokens_per_sec_p95": 5.78, "tokens_per_sec_max": 5.94, "calls": [ { @@ -566,7 +660,10 @@ "n_errors": 0, "duration_ms_mean": 91748.5, "duration_ms_p50": 98184.0, + "duration_ms_p95": 161421.6, "tokens_per_sec_mean": 4.79, + "tokens_per_sec_p50": 4.8, + "tokens_per_sec_p95": 4.93, "tokens_per_sec_max": 4.93, "calls": [ { @@ -845,5 +942,5 @@ "P-HARD": 2048 } }, - "packaged_at": "2026-05-05T17:43:34Z" + "packaged_at": "2026-05-06T08:04:27Z" } diff --git a/runs/86241185-24da-4f8e-b254-a60c9465e353/metadata.json b/runs/86241185-24da-4f8e-b254-a60c9465e353/metadata.json index 7e34f8d..cd56d97 100644 --- a/runs/86241185-24da-4f8e-b254-a60c9465e353/metadata.json +++ b/runs/86241185-24da-4f8e-b254-a60c9465e353/metadata.json @@ -1,64 +1,21 @@ { "id": "86241185-24da-4f8e-b254-a60c9465e353", - "title": "vps50-cpu-matrix-1 \u2014 gemma/phi-4/qwen2.5/qwen3 on vps50", + "title": "vps50-cpu-matrix-1 \u2014 gemma/phi/qwen2.5/qwen3 on vps50", "date": "2026-05-05", + "started_at": "2026-05-05T03:07:15Z", + "git_sha": "f06800254dfd046e154c94f663fb7f9f66c79ae3", "hardware": "vps50", + "hardware_label": "VPS50 \u00b7 cloud \u00b7 16 vCPU AMD EPYC \u00b7 62 GB RAM \u00b7 no GPU", "engine": "llamacpp", "harness": "vps50-cpu-matrix-1", - "model_family": "gemma/phi-4/qwen2.5/qwen3", + "model_family": "gemma/phi/qwen2.5/qwen3", "model_sizes": [ "phi-4", "gemma-4-26b-a4b", "qwen3-30b-a3b", "qwen2.5-72b" ], - "task_kind": null, - "headline": "no calls landed", - "summary_md_path": null, - "raw_jsonl_path": "runs/86241185-24da-4f8e-b254-a60c9465e353.jsonl", - "log_path": "runs/86241185-24da-4f8e-b254-a60c9465e353.log", - "synthesis_doc": null, - "cells": [ - { - "cell_id": "vps50:llamacpp:phi-4", - "n_calls": 0, - "n_errors": 0, - "duration_ms_mean": null, - "duration_ms_p50": null, - "tokens_per_sec_mean": null, - "tokens_per_sec_max": null - }, - { - "cell_id": "vps50:llamacpp:gemma-4-26b-a4b", - "n_calls": 0, - "n_errors": 0, - "duration_ms_mean": null, - "duration_ms_p50": null, - "tokens_per_sec_mean": null, - "tokens_per_sec_max": null - }, - { - "cell_id": "vps50:llamacpp:qwen3-30b-a3b", - "n_calls": 0, - "n_errors": 0, - "duration_ms_mean": null, - "duration_ms_p50": null, - "tokens_per_sec_mean": null, - "tokens_per_sec_max": null - }, - { - "cell_id": "vps50:llamacpp:qwen2.5-72b", - "n_calls": 0, - "n_errors": 0, - "duration_ms_mean": null, - "duration_ms_p50": null, - "tokens_per_sec_mean": null, - "tokens_per_sec_max": null - } - ], - "n_calls_total": 0, - "n_errors_total": 0, - "started_at": "2026-05-05T03:07:15Z", + "task_kind": "param-sweep", "tags": [ "a3b", "cpu", @@ -68,6 +25,155 @@ "qwen3", "vps50" ], + "headline": "no calls landed", + "janie_blurb_md": null, + "janie_blurb_status": "pending", + "caveat": null, + "caveat_severity": null, + "methodology_ref": "HARNESS#vps50-cpu-matrix-1", + "methodology_deviations_md": null, + "results_table": [ + { + "label": "phi-4", + "cell_id": "vps50:llamacpp:phi-4", + "n_calls": 0, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "gemma-4-26b-a4b", + "cell_id": "vps50:llamacpp:gemma-4-26b-a4b", + "n_calls": 0, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "qwen3-30b-a3b", + "cell_id": "vps50:llamacpp:qwen3-30b-a3b", + "n_calls": 0, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "qwen2.5-72b", + "cell_id": "vps50:llamacpp:qwen2.5-72b", + "n_calls": 0, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + } + ], + "cells": [ + { + "cell_id": "vps50:llamacpp:phi-4", + "n_calls": 0, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null + }, + { + "cell_id": "vps50:llamacpp:gemma-4-26b-a4b", + "n_calls": 0, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null + }, + { + "cell_id": "vps50:llamacpp:qwen3-30b-a3b", + "n_calls": 0, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null + }, + { + "cell_id": "vps50:llamacpp:qwen2.5-72b", + "n_calls": 0, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null + } + ], + "n_calls_total": 0, + "n_errors_total": 0, + "chart_spec": { + "kind": "line", + "x": "param_value", + "y": "tokens_per_sec_mean", + "series": [ + "phi-4", + "gemma-4-26b-a4b", + "qwen3-30b-a3b", + "qwen2.5-72b" + ], + "data_url": "/data/86241185-24da-4f8e-b254-a60c9465e353/run.jsonl" + }, + "site_grade": "archive-only", + "site_grade_reason": "status=meta-only", + "raw_data_urls": { + "jsonl": "/data/86241185-24da-4f8e-b254-a60c9465e353/run.jsonl", + "log": "/data/86241185-24da-4f8e-b254-a60c9465e353/run.log", + "md": null, + "metadata": "/data/86241185-24da-4f8e-b254-a60c9465e353/metadata.json", + "gitea_dir": "https://git.weeyuga.com/slobodanmargetic988/weeyuga-benchmarks-public/src/branch/main/runs/86241185-24da-4f8e-b254-a60c9465e353/" + }, + "synthesis_doc": null, + "synthesis_docs_all": [], + "related_ids": [], "status": "meta-only", "visibility": "draft", "cells_full": [ @@ -77,7 +183,10 @@ "n_errors": 0, "duration_ms_mean": null, "duration_ms_p50": null, + "duration_ms_p95": null, "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, "tokens_per_sec_max": null, "calls": [] }, @@ -87,7 +196,10 @@ "n_errors": 0, "duration_ms_mean": null, "duration_ms_p50": null, + "duration_ms_p95": null, "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, "tokens_per_sec_max": null, "calls": [] }, @@ -97,7 +209,10 @@ "n_errors": 0, "duration_ms_mean": null, "duration_ms_p50": null, + "duration_ms_p95": null, "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, "tokens_per_sec_max": null, "calls": [] }, @@ -107,7 +222,10 @@ "n_errors": 0, "duration_ms_mean": null, "duration_ms_p50": null, + "duration_ms_p95": null, "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, "tokens_per_sec_max": null, "calls": [] } @@ -135,5 +253,5 @@ "qwen2.5-72b" ] }, - "packaged_at": "2026-05-05T17:43:34Z" + "packaged_at": "2026-05-06T08:04:27Z" } diff --git a/runs/8ec3c8cb-58a7-4f33-974c-9c1bfb7824ef/metadata.json b/runs/8ec3c8cb-58a7-4f33-974c-9c1bfb7824ef/metadata.json index 8f666d5..317b2ff 100644 --- a/runs/8ec3c8cb-58a7-4f33-974c-9c1bfb7824ef/metadata.json +++ b/runs/8ec3c8cb-58a7-4f33-974c-9c1bfb7824ef/metadata.json @@ -2,19 +2,48 @@ "id": "8ec3c8cb-58a7-4f33-974c-9c1bfb7824ef", "title": "predator-a3b-1 \u2014 qwen3 on predator", "date": "2026-05-04", + "started_at": "2026-05-04T21:24:11Z", + "git_sha": "f06800254dfd046e154c94f663fb7f9f66c79ae3", "hardware": "predator", + "hardware_label": "Predator \u00b7 gaming laptop \u00b7 GTX 1060 6 GB \u00b7 28 GB RAM", "engine": "llamacpp", "harness": "predator-a3b-1", "model_family": "qwen3", "model_sizes": [ "qwen3:30b-a3b-iq2m" ], - "task_kind": null, + "task_kind": "chat", + "tags": [ + "a3b", + "iq2", + "predator", + "qwen3" + ], "headline": "no calls landed", - "summary_md_path": null, - "raw_jsonl_path": "runs/8ec3c8cb-58a7-4f33-974c-9c1bfb7824ef.jsonl", - "log_path": "runs/8ec3c8cb-58a7-4f33-974c-9c1bfb7824ef.log", - "synthesis_doc": null, + "janie_blurb_md": null, + "janie_blurb_status": "pending", + "caveat": null, + "caveat_severity": null, + "methodology_ref": "HARNESS#predator-a3b-1", + "methodology_deviations_md": null, + "results_table": [ + { + "label": "qwen3:30b-a3b-iq2m-think500", + "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500", + "n_calls": 0, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + } + ], "cells": [ { "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500", @@ -22,19 +51,36 @@ "n_errors": 0, "duration_ms_mean": null, "duration_ms_p50": null, + "duration_ms_p95": null, "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, "tokens_per_sec_max": null } ], "n_calls_total": 0, "n_errors_total": 0, - "started_at": "2026-05-04T21:24:11Z", - "tags": [ - "a3b", - "iq2", - "predator", - "qwen3" - ], + "chart_spec": { + "kind": "bar", + "x": "cell", + "y": "tokens_per_sec_mean", + "series": [ + "qwen3:30b-a3b-iq2m-think500" + ], + "data_url": "/data/8ec3c8cb-58a7-4f33-974c-9c1bfb7824ef/run.jsonl" + }, + "site_grade": "archive-only", + "site_grade_reason": "status=meta-only", + "raw_data_urls": { + "jsonl": "/data/8ec3c8cb-58a7-4f33-974c-9c1bfb7824ef/run.jsonl", + "log": "/data/8ec3c8cb-58a7-4f33-974c-9c1bfb7824ef/run.log", + "md": null, + "metadata": "/data/8ec3c8cb-58a7-4f33-974c-9c1bfb7824ef/metadata.json", + "gitea_dir": "https://git.weeyuga.com/slobodanmargetic988/weeyuga-benchmarks-public/src/branch/main/runs/8ec3c8cb-58a7-4f33-974c-9c1bfb7824ef/" + }, + "synthesis_doc": null, + "synthesis_docs_all": [], + "related_ids": [], "status": "meta-only", "visibility": "draft", "cells_full": [ @@ -44,7 +90,10 @@ "n_errors": 0, "duration_ms_mean": null, "duration_ms_p50": null, + "duration_ms_p95": null, "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, "tokens_per_sec_max": null, "calls": [] } @@ -68,5 +117,5 @@ } } }, - "packaged_at": "2026-05-05T17:43:34Z" + "packaged_at": "2026-05-06T08:04:27Z" } diff --git a/runs/91751afd-068a-477b-8f40-6e1963f803f1/metadata.json b/runs/91751afd-068a-477b-8f40-6e1963f803f1/metadata.json index 6a947d7..9239f57 100644 --- a/runs/91751afd-068a-477b-8f40-6e1963f803f1/metadata.json +++ b/runs/91751afd-068a-477b-8f40-6e1963f803f1/metadata.json @@ -1,11 +1,14 @@ { "id": "91751afd-068a-477b-8f40-6e1963f803f1", - "title": "vps50-cpu-matrix-1 \u2014 gemma/phi-4/qwen2.5/qwen3 on vps50", + "title": "vps50-cpu-matrix-1 \u2014 gemma/phi/qwen2.5/qwen3 on vps50", "date": "2026-05-05", + "started_at": "2026-05-05T10:23:13Z", + "git_sha": "f06800254dfd046e154c94f663fb7f9f66c79ae3", "hardware": "vps50", + "hardware_label": "VPS50 \u00b7 cloud \u00b7 16 vCPU AMD EPYC \u00b7 62 GB RAM \u00b7 no GPU", "engine": "llamacpp", "harness": "vps50-cpu-matrix-1", - "model_family": "gemma/phi-4/qwen2.5/qwen3", + "model_family": "gemma/phi/qwen2.5/qwen3", "model_sizes": [ "phi-4", "gemma-4-26b-a4b", @@ -13,62 +16,7 @@ "qwen2.5-72b", "gemma-4-26b-a4b-it" ], - "task_kind": null, - "headline": "2 calls across 5 cell(s)", - "summary_md_path": null, - "raw_jsonl_path": "runs/91751afd-068a-477b-8f40-6e1963f803f1.jsonl", - "log_path": "runs/91751afd-068a-477b-8f40-6e1963f803f1.log", - "synthesis_doc": "A3B_AND_CPU_OVERNIGHT_2026-05-05.md", - "cells": [ - { - "cell_id": "vps50:llamacpp:phi-4", - "n_calls": 0, - "n_errors": 0, - "duration_ms_mean": null, - "duration_ms_p50": null, - "tokens_per_sec_mean": null, - "tokens_per_sec_max": null - }, - { - "cell_id": "vps50:llamacpp:gemma-4-26b-a4b", - "n_calls": 0, - "n_errors": 0, - "duration_ms_mean": null, - "duration_ms_p50": null, - "tokens_per_sec_mean": null, - "tokens_per_sec_max": null - }, - { - "cell_id": "vps50:llamacpp:qwen3-30b-a3b", - "n_calls": 0, - "n_errors": 0, - "duration_ms_mean": null, - "duration_ms_p50": null, - "tokens_per_sec_mean": null, - "tokens_per_sec_max": null - }, - { - "cell_id": "vps50:llamacpp:qwen2.5-72b", - "n_calls": 0, - "n_errors": 0, - "duration_ms_mean": null, - "duration_ms_p50": null, - "tokens_per_sec_mean": null, - "tokens_per_sec_max": null - }, - { - "cell_id": "vps50:llamacpp:gemma-4-26b-a4b-it-q4km-cpu-ctx32k", - "n_calls": 2, - "n_errors": 0, - "duration_ms_mean": null, - "duration_ms_p50": null, - "tokens_per_sec_mean": null, - "tokens_per_sec_max": null - } - ], - "n_calls_total": 2, - "n_errors_total": 0, - "started_at": "2026-05-05T10:23:13Z", + "task_kind": "param-sweep", "tags": [ "a3b", "cpu", @@ -78,6 +26,186 @@ "qwen3", "vps50" ], + "headline": "2 calls across 5 cell(s)", + "janie_blurb_md": null, + "janie_blurb_status": "pending", + "caveat": null, + "caveat_severity": null, + "methodology_ref": "A3B_AND_CPU_OVERNIGHT_2026-05-05", + "methodology_deviations_md": null, + "results_table": [ + { + "label": "phi-4", + "cell_id": "vps50:llamacpp:phi-4", + "n_calls": 0, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "gemma-4-26b-a4b", + "cell_id": "vps50:llamacpp:gemma-4-26b-a4b", + "n_calls": 0, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "qwen3-30b-a3b", + "cell_id": "vps50:llamacpp:qwen3-30b-a3b", + "n_calls": 0, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "qwen2.5-72b", + "cell_id": "vps50:llamacpp:qwen2.5-72b", + "n_calls": 0, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "gemma-4-26b-a4b-it-q4km-cpu-ctx32k", + "cell_id": "vps50:llamacpp:gemma-4-26b-a4b-it-q4km-cpu-ctx32k", + "n_calls": 2, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + } + ], + "cells": [ + { + "cell_id": "vps50:llamacpp:phi-4", + "n_calls": 0, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null + }, + { + "cell_id": "vps50:llamacpp:gemma-4-26b-a4b", + "n_calls": 0, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null + }, + { + "cell_id": "vps50:llamacpp:qwen3-30b-a3b", + "n_calls": 0, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null + }, + { + "cell_id": "vps50:llamacpp:qwen2.5-72b", + "n_calls": 0, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null + }, + { + "cell_id": "vps50:llamacpp:gemma-4-26b-a4b-it-q4km-cpu-ctx32k", + "n_calls": 2, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null + } + ], + "n_calls_total": 2, + "n_errors_total": 0, + "chart_spec": { + "kind": "line", + "x": "param_value", + "y": "tokens_per_sec_mean", + "series": [ + "phi-4", + "gemma-4-26b-a4b", + "qwen3-30b-a3b", + "qwen2.5-72b", + "gemma-4-26b-a4b-it-q4km-cpu-ctx32k" + ], + "data_url": "/data/91751afd-068a-477b-8f40-6e1963f803f1/run.jsonl" + }, + "site_grade": "standard", + "site_grade_reason": null, + "raw_data_urls": { + "jsonl": "/data/91751afd-068a-477b-8f40-6e1963f803f1/run.jsonl", + "log": "/data/91751afd-068a-477b-8f40-6e1963f803f1/run.log", + "md": null, + "metadata": "/data/91751afd-068a-477b-8f40-6e1963f803f1/metadata.json", + "gitea_dir": "https://git.weeyuga.com/slobodanmargetic988/weeyuga-benchmarks-public/src/branch/main/runs/91751afd-068a-477b-8f40-6e1963f803f1/" + }, + "synthesis_doc": "A3B_AND_CPU_OVERNIGHT_2026-05-05.md", + "synthesis_docs_all": [ + "A3B_AND_CPU_OVERNIGHT_2026-05-05.md" + ], + "related_ids": [], "status": "complete", "visibility": "draft", "cells_full": [ @@ -87,7 +215,10 @@ "n_errors": 0, "duration_ms_mean": null, "duration_ms_p50": null, + "duration_ms_p95": null, "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, "tokens_per_sec_max": null, "calls": [] }, @@ -97,7 +228,10 @@ "n_errors": 0, "duration_ms_mean": null, "duration_ms_p50": null, + "duration_ms_p95": null, "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, "tokens_per_sec_max": null, "calls": [] }, @@ -107,7 +241,10 @@ "n_errors": 0, "duration_ms_mean": null, "duration_ms_p50": null, + "duration_ms_p95": null, "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, "tokens_per_sec_max": null, "calls": [] }, @@ -117,7 +254,10 @@ "n_errors": 0, "duration_ms_mean": null, "duration_ms_p50": null, + "duration_ms_p95": null, "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, "tokens_per_sec_max": null, "calls": [] }, @@ -127,7 +267,10 @@ "n_errors": 0, "duration_ms_mean": null, "duration_ms_p50": null, + "duration_ms_p95": null, "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, "tokens_per_sec_max": null, "calls": [ { @@ -168,5 +311,5 @@ "qwen2.5-72b" ] }, - "packaged_at": "2026-05-05T17:43:34Z" + "packaged_at": "2026-05-06T08:04:27Z" } diff --git a/runs/ad057f5b-ed3f-4a95-a38e-361be310ffd6/metadata.json b/runs/ad057f5b-ed3f-4a95-a38e-361be310ffd6/metadata.json index 8cea60c..50cf3de 100644 --- a/runs/ad057f5b-ed3f-4a95-a38e-361be310ffd6/metadata.json +++ b/runs/ad057f5b-ed3f-4a95-a38e-361be310ffd6/metadata.json @@ -2,13 +2,16 @@ "id": "ad057f5b-ed3f-4a95-a38e-361be310ffd6", "title": "pavilion-weeyuga-v3 \u2014 qwen2.5/qwen2.5-coder/qwen3/qwen3.5 on pavilion", "date": "2026-04-29", + "started_at": "2026-04-29T09:51:46Z", + "git_sha": "371ce70c8708ce0aedbe4f3bf096f8e8f8b11d43", "hardware": "pavilion", + "hardware_label": "Pavilion \u00b7 HP laptop \u00b7 GTX 1050 4 GB \u00b7 16 GB RAM \u00b7 i7-9750H", "engine": "weeyuga", "harness": "pavilion-weeyuga-v3", "model_family": "qwen2.5/qwen2.5-coder/qwen3/qwen3.5", "model_sizes": [ "qwen3.5:4b", - "qwen3.5:35b-a3b-uncensored-iq1m", + "qwen3.5:35b-a3b-uncensored", "qwen3.5:35b-a3b-iq2s", "qwen3.5:9b-q6k", "qwen3.5:9b", @@ -23,161 +26,7 @@ "qwen2.5-coder:1.5b", "qwen2.5-coder:0.5b" ], - "task_kind": null, - "headline": "96 calls across 16 cell(s); 12 errors", - "summary_md_path": null, - "raw_jsonl_path": "runs/ad057f5b-ed3f-4a95-a38e-361be310ffd6.jsonl", - "log_path": null, - "synthesis_doc": null, - "cells": [ - { - "cell_id": "pavilion:weeyuga:qwen3.5:4b", - "n_calls": 6, - "n_errors": 0, - "duration_ms_mean": null, - "duration_ms_p50": null, - "tokens_per_sec_mean": null, - "tokens_per_sec_max": null - }, - { - "cell_id": "pavilion:weeyuga:qwen3.5:35b-a3b-uncensored-iq1m", - "n_calls": 6, - "n_errors": 0, - "duration_ms_mean": null, - "duration_ms_p50": null, - "tokens_per_sec_mean": null, - "tokens_per_sec_max": null - }, - { - "cell_id": "pavilion:weeyuga:qwen3.5:35b-a3b-iq2s", - "n_calls": 6, - "n_errors": 0, - "duration_ms_mean": null, - "duration_ms_p50": null, - "tokens_per_sec_mean": null, - "tokens_per_sec_max": null - }, - { - "cell_id": "pavilion:weeyuga:qwen3.5:9b-q6k", - "n_calls": 6, - "n_errors": 0, - "duration_ms_mean": null, - "duration_ms_p50": null, - "tokens_per_sec_mean": null, - "tokens_per_sec_max": null - }, - { - "cell_id": "pavilion:weeyuga:qwen3.5:9b-q4km", - "n_calls": 6, - "n_errors": 0, - "duration_ms_mean": null, - "duration_ms_p50": null, - "tokens_per_sec_mean": null, - "tokens_per_sec_max": null - }, - { - "cell_id": "pavilion:weeyuga:qwen3.5:2b", - "n_calls": 6, - "n_errors": 0, - "duration_ms_mean": null, - "duration_ms_p50": null, - "tokens_per_sec_mean": null, - "tokens_per_sec_max": null - }, - { - "cell_id": "pavilion:weeyuga:qwen3.5:0.8b", - "n_calls": 6, - "n_errors": 0, - "duration_ms_mean": null, - "duration_ms_p50": null, - "tokens_per_sec_mean": null, - "tokens_per_sec_max": null - }, - { - "cell_id": "pavilion:weeyuga:qwen3.5:9b", - "n_calls": 6, - "n_errors": 0, - "duration_ms_mean": null, - "duration_ms_p50": null, - "tokens_per_sec_mean": null, - "tokens_per_sec_max": null - }, - { - "cell_id": "pavilion:weeyuga:qwen2.5-coder:14b", - "n_calls": 6, - "n_errors": 6, - "duration_ms_mean": null, - "duration_ms_p50": null, - "tokens_per_sec_mean": null, - "tokens_per_sec_max": null - }, - { - "cell_id": "pavilion:weeyuga:qwen2.5-coder:3b", - "n_calls": 6, - "n_errors": 0, - "duration_ms_mean": null, - "duration_ms_p50": null, - "tokens_per_sec_mean": null, - "tokens_per_sec_max": null - }, - { - "cell_id": "pavilion:weeyuga:qwen3:14b", - "n_calls": 6, - "n_errors": 4, - "duration_ms_mean": null, - "duration_ms_p50": null, - "tokens_per_sec_mean": null, - "tokens_per_sec_max": null - }, - { - "cell_id": "pavilion:weeyuga:qwen3:8b", - "n_calls": 6, - "n_errors": 2, - "duration_ms_mean": null, - "duration_ms_p50": null, - "tokens_per_sec_mean": null, - "tokens_per_sec_max": null - }, - { - "cell_id": "pavilion:weeyuga:qwen3:4b", - "n_calls": 6, - "n_errors": 0, - "duration_ms_mean": null, - "duration_ms_p50": null, - "tokens_per_sec_mean": null, - "tokens_per_sec_max": null - }, - { - "cell_id": "pavilion:weeyuga:qwen2.5:3b", - "n_calls": 6, - "n_errors": 0, - "duration_ms_mean": null, - "duration_ms_p50": null, - "tokens_per_sec_mean": null, - "tokens_per_sec_max": null - }, - { - "cell_id": "pavilion:weeyuga:qwen2.5-coder:1.5b", - "n_calls": 6, - "n_errors": 0, - "duration_ms_mean": null, - "duration_ms_p50": null, - "tokens_per_sec_mean": null, - "tokens_per_sec_max": null - }, - { - "cell_id": "pavilion:weeyuga:qwen2.5-coder:0.5b", - "n_calls": 6, - "n_errors": 0, - "duration_ms_mean": null, - "duration_ms_p50": null, - "tokens_per_sec_mean": null, - "tokens_per_sec_max": null - } - ], - "n_calls_total": 96, - "n_errors_total": 12, - "started_at": "2026-04-29T09:51:46Z", + "task_kind": "chat", "tags": [ "a3b", "iq2", @@ -187,6 +36,505 @@ "qwen3.5", "weeyuga" ], + "headline": "96 calls across 16 cell(s); 12 errors", + "janie_blurb_md": null, + "janie_blurb_status": "pending", + "caveat": null, + "caveat_severity": null, + "methodology_ref": "PAVILION_LLAMACPP_VS_OLLAMA_v0_INCOMPLETE", + "methodology_deviations_md": null, + "results_table": [ + { + "label": "qwen3.5:4b", + "cell_id": "pavilion:weeyuga:qwen3.5:4b", + "n_calls": 6, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "qwen3.5:35b-a3b-uncensored-iq1m", + "cell_id": "pavilion:weeyuga:qwen3.5:35b-a3b-uncensored-iq1m", + "n_calls": 6, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "qwen3.5:35b-a3b-iq2s", + "cell_id": "pavilion:weeyuga:qwen3.5:35b-a3b-iq2s", + "n_calls": 6, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "qwen3.5:9b-q6k", + "cell_id": "pavilion:weeyuga:qwen3.5:9b-q6k", + "n_calls": 6, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "qwen3.5:9b-q4km", + "cell_id": "pavilion:weeyuga:qwen3.5:9b-q4km", + "n_calls": 6, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "qwen3.5:2b", + "cell_id": "pavilion:weeyuga:qwen3.5:2b", + "n_calls": 6, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "qwen3.5:0.8b", + "cell_id": "pavilion:weeyuga:qwen3.5:0.8b", + "n_calls": 6, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "qwen3.5:9b", + "cell_id": "pavilion:weeyuga:qwen3.5:9b", + "n_calls": 6, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "qwen2.5-coder:14b", + "cell_id": "pavilion:weeyuga:qwen2.5-coder:14b", + "n_calls": 6, + "n_errors": 6, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "qwen2.5-coder:3b", + "cell_id": "pavilion:weeyuga:qwen2.5-coder:3b", + "n_calls": 6, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "qwen3:14b", + "cell_id": "pavilion:weeyuga:qwen3:14b", + "n_calls": 6, + "n_errors": 4, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "qwen3:8b", + "cell_id": "pavilion:weeyuga:qwen3:8b", + "n_calls": 6, + "n_errors": 2, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "qwen3:4b", + "cell_id": "pavilion:weeyuga:qwen3:4b", + "n_calls": 6, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "qwen2.5:3b", + "cell_id": "pavilion:weeyuga:qwen2.5:3b", + "n_calls": 6, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "qwen2.5-coder:1.5b", + "cell_id": "pavilion:weeyuga:qwen2.5-coder:1.5b", + "n_calls": 6, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "qwen2.5-coder:0.5b", + "cell_id": "pavilion:weeyuga:qwen2.5-coder:0.5b", + "n_calls": 6, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + } + ], + "cells": [ + { + "cell_id": "pavilion:weeyuga:qwen3.5:4b", + "n_calls": 6, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null + }, + { + "cell_id": "pavilion:weeyuga:qwen3.5:35b-a3b-uncensored-iq1m", + "n_calls": 6, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null + }, + { + "cell_id": "pavilion:weeyuga:qwen3.5:35b-a3b-iq2s", + "n_calls": 6, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null + }, + { + "cell_id": "pavilion:weeyuga:qwen3.5:9b-q6k", + "n_calls": 6, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null + }, + { + "cell_id": "pavilion:weeyuga:qwen3.5:9b-q4km", + "n_calls": 6, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null + }, + { + "cell_id": "pavilion:weeyuga:qwen3.5:2b", + "n_calls": 6, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null + }, + { + "cell_id": "pavilion:weeyuga:qwen3.5:0.8b", + "n_calls": 6, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null + }, + { + "cell_id": "pavilion:weeyuga:qwen3.5:9b", + "n_calls": 6, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null + }, + { + "cell_id": "pavilion:weeyuga:qwen2.5-coder:14b", + "n_calls": 6, + "n_errors": 6, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null + }, + { + "cell_id": "pavilion:weeyuga:qwen2.5-coder:3b", + "n_calls": 6, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null + }, + { + "cell_id": "pavilion:weeyuga:qwen3:14b", + "n_calls": 6, + "n_errors": 4, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null + }, + { + "cell_id": "pavilion:weeyuga:qwen3:8b", + "n_calls": 6, + "n_errors": 2, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null + }, + { + "cell_id": "pavilion:weeyuga:qwen3:4b", + "n_calls": 6, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null + }, + { + "cell_id": "pavilion:weeyuga:qwen2.5:3b", + "n_calls": 6, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null + }, + { + "cell_id": "pavilion:weeyuga:qwen2.5-coder:1.5b", + "n_calls": 6, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null + }, + { + "cell_id": "pavilion:weeyuga:qwen2.5-coder:0.5b", + "n_calls": 6, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null + } + ], + "n_calls_total": 96, + "n_errors_total": 12, + "chart_spec": { + "kind": "bar", + "x": "cell", + "y": "tokens_per_sec_mean", + "series": [ + "qwen3.5:4b", + "qwen3.5:35b-a3b-uncensored-iq1m", + "qwen3.5:35b-a3b-iq2s", + "qwen3.5:9b-q6k", + "qwen3.5:9b-q4km", + "qwen3.5:2b", + "qwen3.5:0.8b", + "qwen3.5:9b", + "qwen2.5-coder:14b", + "qwen2.5-coder:3b", + "qwen3:14b", + "qwen3:8b", + "qwen3:4b", + "qwen2.5:3b", + "qwen2.5-coder:1.5b", + "qwen2.5-coder:0.5b" + ], + "data_url": "/data/ad057f5b-ed3f-4a95-a38e-361be310ffd6/run.jsonl" + }, + "site_grade": "archive-only", + "site_grade_reason": "no documented method (no synthesis doc and no run.md)", + "raw_data_urls": { + "jsonl": "/data/ad057f5b-ed3f-4a95-a38e-361be310ffd6/run.jsonl", + "log": null, + "md": null, + "metadata": "/data/ad057f5b-ed3f-4a95-a38e-361be310ffd6/metadata.json", + "gitea_dir": "https://git.weeyuga.com/slobodanmargetic988/weeyuga-benchmarks-public/src/branch/main/runs/ad057f5b-ed3f-4a95-a38e-361be310ffd6/" + }, + "synthesis_doc": "PAVILION_LLAMACPP_VS_OLLAMA_v0_INCOMPLETE.md", + "synthesis_docs_all": [ + "PAVILION_LLAMACPP_VS_OLLAMA_v0_INCOMPLETE.md" + ], + "related_ids": [], "status": "complete", "visibility": "draft", "cells_full": [ @@ -196,7 +544,10 @@ "n_errors": 0, "duration_ms_mean": null, "duration_ms_p50": null, + "duration_ms_p95": null, "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, "tokens_per_sec_max": null, "calls": [ { @@ -404,7 +755,10 @@ "n_errors": 0, "duration_ms_mean": null, "duration_ms_p50": null, + "duration_ms_p95": null, "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, "tokens_per_sec_max": null, "calls": [ { @@ -614,7 +968,10 @@ "n_errors": 0, "duration_ms_mean": null, "duration_ms_p50": null, + "duration_ms_p95": null, "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, "tokens_per_sec_max": null, "calls": [ { @@ -824,7 +1181,10 @@ "n_errors": 0, "duration_ms_mean": null, "duration_ms_p50": null, + "duration_ms_p95": null, "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, "tokens_per_sec_max": null, "calls": [ { @@ -1033,7 +1393,10 @@ "n_errors": 0, "duration_ms_mean": null, "duration_ms_p50": null, + "duration_ms_p95": null, "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, "tokens_per_sec_max": null, "calls": [ { @@ -1242,7 +1605,10 @@ "n_errors": 0, "duration_ms_mean": null, "duration_ms_p50": null, + "duration_ms_p95": null, "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, "tokens_per_sec_max": null, "calls": [ { @@ -1456,7 +1822,10 @@ "n_errors": 0, "duration_ms_mean": null, "duration_ms_p50": null, + "duration_ms_p95": null, "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, "tokens_per_sec_max": null, "calls": [ { @@ -1670,7 +2039,10 @@ "n_errors": 0, "duration_ms_mean": null, "duration_ms_p50": null, + "duration_ms_p95": null, "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, "tokens_per_sec_max": null, "calls": [ { @@ -1880,7 +2252,10 @@ "n_errors": 6, "duration_ms_mean": null, "duration_ms_p50": null, + "duration_ms_p95": null, "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, "tokens_per_sec_max": null, "calls": [ { @@ -2069,7 +2444,10 @@ "n_errors": 0, "duration_ms_mean": null, "duration_ms_p50": null, + "duration_ms_p95": null, "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, "tokens_per_sec_max": null, "calls": [ { @@ -2284,7 +2662,10 @@ "n_errors": 4, "duration_ms_mean": null, "duration_ms_p50": null, + "duration_ms_p95": null, "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, "tokens_per_sec_max": null, "calls": [ { @@ -2486,7 +2867,10 @@ "n_errors": 2, "duration_ms_mean": null, "duration_ms_p50": null, + "duration_ms_p95": null, "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, "tokens_per_sec_max": null, "calls": [ { @@ -2695,7 +3079,10 @@ "n_errors": 0, "duration_ms_mean": null, "duration_ms_p50": null, + "duration_ms_p95": null, "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, "tokens_per_sec_max": null, "calls": [ { @@ -2884,7 +3271,10 @@ "n_errors": 0, "duration_ms_mean": null, "duration_ms_p50": null, + "duration_ms_p95": null, "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, "tokens_per_sec_max": null, "calls": [ { @@ -3098,7 +3488,10 @@ "n_errors": 0, "duration_ms_mean": null, "duration_ms_p50": null, + "duration_ms_p95": null, "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, "tokens_per_sec_max": null, "calls": [ { @@ -3313,7 +3706,10 @@ "n_errors": 0, "duration_ms_mean": null, "duration_ms_p50": null, + "duration_ms_p95": null, "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, "tokens_per_sec_max": null, "calls": [ { @@ -3568,5 +3964,5 @@ "env_inference_route": null, "env_llamacpp_url": null }, - "packaged_at": "2026-05-05T17:43:34Z" + "packaged_at": "2026-05-06T08:04:27Z" } diff --git a/runs/ad28cb95-5134-40b5-8514-c8a381f83d87/metadata.json b/runs/ad28cb95-5134-40b5-8514-c8a381f83d87/metadata.json index 6442051..c1974ae 100644 --- a/runs/ad28cb95-5134-40b5-8514-c8a381f83d87/metadata.json +++ b/runs/ad28cb95-5134-40b5-8514-c8a381f83d87/metadata.json @@ -2,51 +2,17 @@ "id": "ad28cb95-5134-40b5-8514-c8a381f83d87", "title": "predator-a3b-ctx-sweep-1 \u2014 qwen3 on predator", "date": "2026-05-04", + "started_at": "2026-05-04T23:48:01Z", + "git_sha": "f06800254dfd046e154c94f663fb7f9f66c79ae3", "hardware": "predator", + "hardware_label": "Predator \u00b7 gaming laptop \u00b7 GTX 1060 6 GB \u00b7 28 GB RAM", "engine": "llamacpp", "harness": "predator-a3b-ctx-sweep-1", "model_family": "qwen3", "model_sizes": [ "qwen3:30b-a3b-iq2m" ], - "task_kind": null, - "headline": "45 calls across 3 cell(s); ~5.0 tok/s mean; p50 83.9s", - "summary_md_path": null, - "raw_jsonl_path": "runs/ad28cb95-5134-40b5-8514-c8a381f83d87.jsonl", - "log_path": "runs/ad28cb95-5134-40b5-8514-c8a381f83d87.log", - "synthesis_doc": "A3B_AND_CPU_OVERNIGHT_2026-05-05.md", - "cells": [ - { - "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl36-ctx32k", - "n_calls": 15, - "n_errors": 0, - "duration_ms_mean": 82375.0, - "duration_ms_p50": 69292.5, - "tokens_per_sec_mean": 4.69, - "tokens_per_sec_max": 4.95 - }, - { - "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl6-ctx64k", - "n_calls": 15, - "n_errors": 0, - "duration_ms_mean": 79021.08, - "duration_ms_p50": 83884.0, - "tokens_per_sec_mean": 5.46, - "tokens_per_sec_max": 5.83 - }, - { - "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl6-ctx128k", - "n_calls": 15, - "n_errors": 0, - "duration_ms_mean": 88976.42, - "duration_ms_p50": 97634.0, - "tokens_per_sec_mean": 4.79, - "tokens_per_sec_max": 5.12 - } - ], - "n_calls_total": 45, - "n_errors_total": 0, - "started_at": "2026-05-04T23:48:01Z", + "task_kind": "param-sweep", "tags": [ "a3b", "ctx", @@ -55,6 +21,128 @@ "qwen3", "sweep" ], + "headline": "45 calls across 3 cell(s); ~5.0 tok/s mean; p50 83.9s", + "janie_blurb_md": null, + "janie_blurb_status": "pending", + "caveat": null, + "caveat_severity": null, + "methodology_ref": "A3B_AND_CPU_OVERNIGHT_2026-05-05", + "methodology_deviations_md": null, + "results_table": [ + { + "label": "qwen3:30b-a3b-iq2m-think500-ngl36-ctx32k", + "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl36-ctx32k", + "n_calls": 15, + "n_errors": 0, + "duration_ms_mean": 82375.0, + "duration_ms_p50": 69292.5, + "duration_ms_p95": 170672.15, + "tokens_per_sec_mean": 4.69, + "tokens_per_sec_p50": 4.74, + "tokens_per_sec_p95": 4.92, + "tokens_per_sec_max": 4.95, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "qwen3:30b-a3b-iq2m-think500-ngl6-ctx64k", + "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl6-ctx64k", + "n_calls": 15, + "n_errors": 0, + "duration_ms_mean": 79021.08, + "duration_ms_p50": 83884.0, + "duration_ms_p95": 146975.5, + "tokens_per_sec_mean": 5.46, + "tokens_per_sec_p50": 5.41, + "tokens_per_sec_p95": 5.73, + "tokens_per_sec_max": 5.83, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "qwen3:30b-a3b-iq2m-think500-ngl6-ctx128k", + "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl6-ctx128k", + "n_calls": 15, + "n_errors": 0, + "duration_ms_mean": 88976.42, + "duration_ms_p50": 97634.0, + "duration_ms_p95": 162935.6, + "tokens_per_sec_mean": 4.79, + "tokens_per_sec_p50": 4.77, + "tokens_per_sec_p95": 5.0, + "tokens_per_sec_max": 5.12, + "memory_mb": null, + "watts_avg": null, + "notes": null + } + ], + "cells": [ + { + "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl36-ctx32k", + "n_calls": 15, + "n_errors": 0, + "duration_ms_mean": 82375.0, + "duration_ms_p50": 69292.5, + "duration_ms_p95": 170672.15, + "tokens_per_sec_mean": 4.69, + "tokens_per_sec_p50": 4.74, + "tokens_per_sec_p95": 4.92, + "tokens_per_sec_max": 4.95 + }, + { + "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl6-ctx64k", + "n_calls": 15, + "n_errors": 0, + "duration_ms_mean": 79021.08, + "duration_ms_p50": 83884.0, + "duration_ms_p95": 146975.5, + "tokens_per_sec_mean": 5.46, + "tokens_per_sec_p50": 5.41, + "tokens_per_sec_p95": 5.73, + "tokens_per_sec_max": 5.83 + }, + { + "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl6-ctx128k", + "n_calls": 15, + "n_errors": 0, + "duration_ms_mean": 88976.42, + "duration_ms_p50": 97634.0, + "duration_ms_p95": 162935.6, + "tokens_per_sec_mean": 4.79, + "tokens_per_sec_p50": 4.77, + "tokens_per_sec_p95": 5.0, + "tokens_per_sec_max": 5.12 + } + ], + "n_calls_total": 45, + "n_errors_total": 0, + "chart_spec": { + "kind": "line", + "x": "param_value", + "y": "tokens_per_sec_mean", + "series": [ + "qwen3:30b-a3b-iq2m-think500-ngl36-ctx32k", + "qwen3:30b-a3b-iq2m-think500-ngl6-ctx64k", + "qwen3:30b-a3b-iq2m-think500-ngl6-ctx128k" + ], + "data_url": "/data/ad28cb95-5134-40b5-8514-c8a381f83d87/run.jsonl" + }, + "site_grade": "standard", + "site_grade_reason": null, + "raw_data_urls": { + "jsonl": "/data/ad28cb95-5134-40b5-8514-c8a381f83d87/run.jsonl", + "log": "/data/ad28cb95-5134-40b5-8514-c8a381f83d87/run.log", + "md": null, + "metadata": "/data/ad28cb95-5134-40b5-8514-c8a381f83d87/metadata.json", + "gitea_dir": "https://git.weeyuga.com/slobodanmargetic988/weeyuga-benchmarks-public/src/branch/main/runs/ad28cb95-5134-40b5-8514-c8a381f83d87/" + }, + "synthesis_doc": "A3B_AND_CPU_OVERNIGHT_2026-05-05.md", + "synthesis_docs_all": [ + "A3B_AND_CPU_OVERNIGHT_2026-05-05.md" + ], + "related_ids": [], "status": "complete", "visibility": "draft", "cells_full": [ @@ -64,7 +152,10 @@ "n_errors": 0, "duration_ms_mean": 82375.0, "duration_ms_p50": 69292.5, + "duration_ms_p95": 170672.15, "tokens_per_sec_mean": 4.69, + "tokens_per_sec_p50": 4.74, + "tokens_per_sec_p95": 4.92, "tokens_per_sec_max": 4.95, "calls": [ { @@ -315,7 +406,10 @@ "n_errors": 0, "duration_ms_mean": 79021.08, "duration_ms_p50": 83884.0, + "duration_ms_p95": 146975.5, "tokens_per_sec_mean": 5.46, + "tokens_per_sec_p50": 5.41, + "tokens_per_sec_p95": 5.73, "tokens_per_sec_max": 5.83, "calls": [ { @@ -566,7 +660,10 @@ "n_errors": 0, "duration_ms_mean": 88976.42, "duration_ms_p50": 97634.0, + "duration_ms_p95": 162935.6, "tokens_per_sec_mean": 4.79, + "tokens_per_sec_p50": 4.77, + "tokens_per_sec_p95": 5.0, "tokens_per_sec_max": 5.12, "calls": [ { @@ -840,5 +937,5 @@ ], "gguf": "Qwen3-30B-A3B-UD-IQ2_M.gguf" }, - "packaged_at": "2026-05-05T17:43:34Z" + "packaged_at": "2026-05-06T08:04:27Z" } diff --git a/runs/b37836bf-d1a2-4d6b-a732-aff89da1fa07/metadata.json b/runs/b37836bf-d1a2-4d6b-a732-aff89da1fa07/metadata.json index 01aa219..540b184 100644 --- a/runs/b37836bf-d1a2-4d6b-a732-aff89da1fa07/metadata.json +++ b/runs/b37836bf-d1a2-4d6b-a732-aff89da1fa07/metadata.json @@ -2,51 +2,17 @@ "id": "b37836bf-d1a2-4d6b-a732-aff89da1fa07", "title": "predator-a3b-ngl-matrix-1 \u2014 qwen3 on predator", "date": "2026-05-04", + "started_at": "2026-05-04T22:50:47Z", + "git_sha": "f06800254dfd046e154c94f663fb7f9f66c79ae3", "hardware": "predator", + "hardware_label": "Predator \u00b7 gaming laptop \u00b7 GTX 1060 6 GB \u00b7 28 GB RAM", "engine": "llamacpp", "harness": "predator-a3b-ngl-matrix-1", "model_family": "qwen3", "model_sizes": [ "qwen3:30b-a3b-iq2m" ], - "task_kind": null, - "headline": "42 calls across 3 cell(s); ~5.7 tok/s mean; p50 71.5s", - "summary_md_path": null, - "raw_jsonl_path": "runs/b37836bf-d1a2-4d6b-a732-aff89da1fa07.jsonl", - "log_path": "runs/b37836bf-d1a2-4d6b-a732-aff89da1fa07.log", - "synthesis_doc": "A3B_NGL_RETUNE_2026-05-05.md", - "cells": [ - { - "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ctx32k-ngl6", - "n_calls": 14, - "n_errors": 0, - "duration_ms_mean": 85436.42, - "duration_ms_p50": 92483.0, - "tokens_per_sec_mean": 4.95, - "tokens_per_sec_max": 5.15 - }, - { - "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ctx32k-ngl12", - "n_calls": 14, - "n_errors": 0, - "duration_ms_mean": 75013.58, - "duration_ms_p50": 81347.0, - "tokens_per_sec_mean": 5.59, - "tokens_per_sec_max": 6.63 - }, - { - "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ctx32k-ngl24", - "n_calls": 14, - "n_errors": 0, - "duration_ms_mean": 62607.33, - "duration_ms_p50": 67049.0, - "tokens_per_sec_mean": 6.58, - "tokens_per_sec_max": 6.89 - } - ], - "n_calls_total": 42, - "n_errors_total": 0, - "started_at": "2026-05-04T22:50:47Z", + "task_kind": "param-sweep", "tags": [ "a3b", "iq2", @@ -55,6 +21,129 @@ "predator", "qwen3" ], + "headline": "42 calls across 3 cell(s); ~5.7 tok/s mean; p50 71.5s", + "janie_blurb_md": null, + "janie_blurb_status": "pending", + "caveat": null, + "caveat_severity": null, + "methodology_ref": "A3B_AND_CPU_OVERNIGHT_2026-05-05", + "methodology_deviations_md": null, + "results_table": [ + { + "label": "qwen3:30b-a3b-iq2m-think500-ctx32k-ngl6", + "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ctx32k-ngl6", + "n_calls": 14, + "n_errors": 0, + "duration_ms_mean": 85436.42, + "duration_ms_p50": 92483.0, + "duration_ms_p95": 154201.8, + "tokens_per_sec_mean": 4.95, + "tokens_per_sec_p50": 4.98, + "tokens_per_sec_p95": 5.15, + "tokens_per_sec_max": 5.15, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "qwen3:30b-a3b-iq2m-think500-ctx32k-ngl12", + "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ctx32k-ngl12", + "n_calls": 14, + "n_errors": 0, + "duration_ms_mean": 75013.58, + "duration_ms_p50": 81347.0, + "duration_ms_p95": 146474.55, + "tokens_per_sec_mean": 5.59, + "tokens_per_sec_p50": 5.36, + "tokens_per_sec_p95": 6.61, + "tokens_per_sec_max": 6.63, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "qwen3:30b-a3b-iq2m-think500-ctx32k-ngl24", + "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ctx32k-ngl24", + "n_calls": 14, + "n_errors": 0, + "duration_ms_mean": 62607.33, + "duration_ms_p50": 67049.0, + "duration_ms_p95": 123041.65, + "tokens_per_sec_mean": 6.58, + "tokens_per_sec_p50": 6.68, + "tokens_per_sec_p95": 6.88, + "tokens_per_sec_max": 6.89, + "memory_mb": null, + "watts_avg": null, + "notes": null + } + ], + "cells": [ + { + "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ctx32k-ngl6", + "n_calls": 14, + "n_errors": 0, + "duration_ms_mean": 85436.42, + "duration_ms_p50": 92483.0, + "duration_ms_p95": 154201.8, + "tokens_per_sec_mean": 4.95, + "tokens_per_sec_p50": 4.98, + "tokens_per_sec_p95": 5.15, + "tokens_per_sec_max": 5.15 + }, + { + "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ctx32k-ngl12", + "n_calls": 14, + "n_errors": 0, + "duration_ms_mean": 75013.58, + "duration_ms_p50": 81347.0, + "duration_ms_p95": 146474.55, + "tokens_per_sec_mean": 5.59, + "tokens_per_sec_p50": 5.36, + "tokens_per_sec_p95": 6.61, + "tokens_per_sec_max": 6.63 + }, + { + "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ctx32k-ngl24", + "n_calls": 14, + "n_errors": 0, + "duration_ms_mean": 62607.33, + "duration_ms_p50": 67049.0, + "duration_ms_p95": 123041.65, + "tokens_per_sec_mean": 6.58, + "tokens_per_sec_p50": 6.68, + "tokens_per_sec_p95": 6.88, + "tokens_per_sec_max": 6.89 + } + ], + "n_calls_total": 42, + "n_errors_total": 0, + "chart_spec": { + "kind": "line", + "x": "param_value", + "y": "tokens_per_sec_mean", + "series": [ + "qwen3:30b-a3b-iq2m-think500-ctx32k-ngl6", + "qwen3:30b-a3b-iq2m-think500-ctx32k-ngl12", + "qwen3:30b-a3b-iq2m-think500-ctx32k-ngl24" + ], + "data_url": "/data/b37836bf-d1a2-4d6b-a732-aff89da1fa07/run.jsonl" + }, + "site_grade": "standard", + "site_grade_reason": null, + "raw_data_urls": { + "jsonl": "/data/b37836bf-d1a2-4d6b-a732-aff89da1fa07/run.jsonl", + "log": "/data/b37836bf-d1a2-4d6b-a732-aff89da1fa07/run.log", + "md": null, + "metadata": "/data/b37836bf-d1a2-4d6b-a732-aff89da1fa07/metadata.json", + "gitea_dir": "https://git.weeyuga.com/slobodanmargetic988/weeyuga-benchmarks-public/src/branch/main/runs/b37836bf-d1a2-4d6b-a732-aff89da1fa07/" + }, + "synthesis_doc": "A3B_AND_CPU_OVERNIGHT_2026-05-05.md", + "synthesis_docs_all": [ + "A3B_AND_CPU_OVERNIGHT_2026-05-05.md", + "A3B_NGL_RETUNE_2026-05-05.md" + ], + "related_ids": [], "status": "complete", "visibility": "draft", "cells_full": [ @@ -64,7 +153,10 @@ "n_errors": 0, "duration_ms_mean": 85436.42, "duration_ms_p50": 92483.0, + "duration_ms_p95": 154201.8, "tokens_per_sec_mean": 4.95, + "tokens_per_sec_p50": 4.98, + "tokens_per_sec_p95": 5.15, "tokens_per_sec_max": 5.15, "calls": [ { @@ -293,7 +385,10 @@ "n_errors": 0, "duration_ms_mean": 75013.58, "duration_ms_p50": 81347.0, + "duration_ms_p95": 146474.55, "tokens_per_sec_mean": 5.59, + "tokens_per_sec_p50": 5.36, + "tokens_per_sec_p95": 6.61, "tokens_per_sec_max": 6.63, "calls": [ { @@ -522,7 +617,10 @@ "n_errors": 0, "duration_ms_mean": 62607.33, "duration_ms_p50": 67049.0, + "duration_ms_p95": 123041.65, "tokens_per_sec_mean": 6.58, + "tokens_per_sec_p50": 6.68, + "tokens_per_sec_p95": 6.88, "tokens_per_sec_max": 6.89, "calls": [ { @@ -766,5 +864,5 @@ "sampling": "T=0.6 top_p=0.95 top_k=20 (Qwen3-A3B README defaults)", "common_args": "--ctx-size 32768 --temp 0.6 --top-p 0.95 --top-k 20 --min-p 0.0 --reasoning-budget 500 --reasoning-format deepseek --jinja --no-warmup --host 0.0.0.0 --port 11436" }, - "packaged_at": "2026-05-05T17:43:34Z" + "packaged_at": "2026-05-06T08:04:27Z" } diff --git a/runs/b54c61c0-b6b8-44ac-bc3e-a515df0f0499/metadata.json b/runs/b54c61c0-b6b8-44ac-bc3e-a515df0f0499/metadata.json index 452d871..d1fa250 100644 --- a/runs/b54c61c0-b6b8-44ac-bc3e-a515df0f0499/metadata.json +++ b/runs/b54c61c0-b6b8-44ac-bc3e-a515df0f0499/metadata.json @@ -2,19 +2,46 @@ "id": "b54c61c0-b6b8-44ac-bc3e-a515df0f0499", "title": "vps50-gemma-e4b-1 \u2014 gemma on vps50", "date": "2026-05-04", + "started_at": "2026-05-04T23:53:09Z", + "git_sha": "f06800254dfd046e154c94f663fb7f9f66c79ae3", "hardware": "vps50", + "hardware_label": "VPS50 \u00b7 cloud \u00b7 16 vCPU AMD EPYC \u00b7 62 GB RAM \u00b7 no GPU", "engine": "llamacpp", "harness": "vps50-gemma-e4b-1", "model_family": "gemma", "model_sizes": [ "gemma-4-e4b" ], - "task_kind": null, + "task_kind": "chat", + "tags": [ + "gemma", + "vps50" + ], "headline": "no calls landed", - "summary_md_path": null, - "raw_jsonl_path": "runs/b54c61c0-b6b8-44ac-bc3e-a515df0f0499.jsonl", - "log_path": "runs/b54c61c0-b6b8-44ac-bc3e-a515df0f0499.log", - "synthesis_doc": null, + "janie_blurb_md": null, + "janie_blurb_status": "pending", + "caveat": null, + "caveat_severity": null, + "methodology_ref": "HARNESS#vps50-gemma-e4b-1", + "methodology_deviations_md": null, + "results_table": [ + { + "label": "gemma-4-e4b", + "cell_id": "vps50:llamacpp:gemma-4-e4b", + "n_calls": 0, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + } + ], "cells": [ { "cell_id": "vps50:llamacpp:gemma-4-e4b", @@ -22,17 +49,36 @@ "n_errors": 0, "duration_ms_mean": null, "duration_ms_p50": null, + "duration_ms_p95": null, "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, "tokens_per_sec_max": null } ], "n_calls_total": 0, "n_errors_total": 0, - "started_at": "2026-05-04T23:53:09Z", - "tags": [ - "gemma", - "vps50" - ], + "chart_spec": { + "kind": "bar", + "x": "cell", + "y": "tokens_per_sec_mean", + "series": [ + "gemma-4-e4b" + ], + "data_url": "/data/b54c61c0-b6b8-44ac-bc3e-a515df0f0499/run.jsonl" + }, + "site_grade": "archive-only", + "site_grade_reason": "status=meta-only", + "raw_data_urls": { + "jsonl": "/data/b54c61c0-b6b8-44ac-bc3e-a515df0f0499/run.jsonl", + "log": "/data/b54c61c0-b6b8-44ac-bc3e-a515df0f0499/run.log", + "md": null, + "metadata": "/data/b54c61c0-b6b8-44ac-bc3e-a515df0f0499/metadata.json", + "gitea_dir": "https://git.weeyuga.com/slobodanmargetic988/weeyuga-benchmarks-public/src/branch/main/runs/b54c61c0-b6b8-44ac-bc3e-a515df0f0499/" + }, + "synthesis_doc": null, + "synthesis_docs_all": [], + "related_ids": [], "status": "meta-only", "visibility": "draft", "cells_full": [ @@ -42,7 +88,10 @@ "n_errors": 0, "duration_ms_mean": null, "duration_ms_p50": null, + "duration_ms_p95": null, "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, "tokens_per_sec_max": null, "calls": [] } @@ -63,5 +112,5 @@ "ngl": 0, "ctx": 32768 }, - "packaged_at": "2026-05-05T17:43:34Z" + "packaged_at": "2026-05-06T08:04:27Z" } diff --git a/runs/cef4b219-6b65-4740-8c3b-5b68aa10cf3f/metadata.json b/runs/cef4b219-6b65-4740-8c3b-5b68aa10cf3f/metadata.json index 218c803..985c62a 100644 --- a/runs/cef4b219-6b65-4740-8c3b-5b68aa10cf3f/metadata.json +++ b/runs/cef4b219-6b65-4740-8c3b-5b68aa10cf3f/metadata.json @@ -1,73 +1,21 @@ { "id": "cef4b219-6b65-4740-8c3b-5b68aa10cf3f", - "title": "vps50-cpu-matrix-1 \u2014 gemma/phi-4/qwen2.5/qwen3 on vps50", + "title": "vps50-cpu-matrix-1 \u2014 gemma/phi/qwen2.5/qwen3 on vps50", "date": "2026-05-05", + "started_at": "2026-05-05T07:00:10Z", + "git_sha": "f06800254dfd046e154c94f663fb7f9f66c79ae3", "hardware": "vps50", + "hardware_label": "VPS50 \u00b7 cloud \u00b7 16 vCPU AMD EPYC \u00b7 62 GB RAM \u00b7 no GPU", "engine": "llamacpp", "harness": "vps50-cpu-matrix-1", - "model_family": "gemma/phi-4/qwen2.5/qwen3", + "model_family": "gemma/phi/qwen2.5/qwen3", "model_sizes": [ "phi-4", "gemma-4-26b-a4b", "qwen3-30b-a3b", "qwen2.5-72b" ], - "task_kind": null, - "headline": "6 calls across 5 cell(s); ~0.0 tok/s mean; p50 527.8s", - "summary_md_path": null, - "raw_jsonl_path": "runs/cef4b219-6b65-4740-8c3b-5b68aa10cf3f.jsonl", - "log_path": "runs/cef4b219-6b65-4740-8c3b-5b68aa10cf3f.log", - "synthesis_doc": "A3B_AND_CPU_OVERNIGHT_2026-05-05.md", - "cells": [ - { - "cell_id": "vps50:llamacpp:phi-4", - "n_calls": 0, - "n_errors": 0, - "duration_ms_mean": null, - "duration_ms_p50": null, - "tokens_per_sec_mean": null, - "tokens_per_sec_max": null - }, - { - "cell_id": "vps50:llamacpp:gemma-4-26b-a4b", - "n_calls": 0, - "n_errors": 0, - "duration_ms_mean": null, - "duration_ms_p50": null, - "tokens_per_sec_mean": null, - "tokens_per_sec_max": null - }, - { - "cell_id": "vps50:llamacpp:qwen3-30b-a3b", - "n_calls": 0, - "n_errors": 0, - "duration_ms_mean": null, - "duration_ms_p50": null, - "tokens_per_sec_mean": null, - "tokens_per_sec_max": null - }, - { - "cell_id": "vps50:llamacpp:qwen2.5-72b", - "n_calls": 0, - "n_errors": 0, - "duration_ms_mean": null, - "duration_ms_p50": null, - "tokens_per_sec_mean": null, - "tokens_per_sec_max": null - }, - { - "cell_id": "vps50:llamacpp:phi-4-q4km-cpu-ctx32k", - "n_calls": 6, - "n_errors": 0, - "duration_ms_mean": 548939.5, - "duration_ms_p50": 527802.5, - "tokens_per_sec_mean": 0.02, - "tokens_per_sec_max": 0.02 - } - ], - "n_calls_total": 6, - "n_errors_total": 0, - "started_at": "2026-05-05T07:00:10Z", + "task_kind": "param-sweep", "tags": [ "a3b", "cpu", @@ -77,6 +25,186 @@ "qwen3", "vps50" ], + "headline": "6 calls across 5 cell(s); ~0.0 tok/s mean; p50 527.8s", + "janie_blurb_md": null, + "janie_blurb_status": "pending", + "caveat": null, + "caveat_severity": null, + "methodology_ref": "A3B_AND_CPU_OVERNIGHT_2026-05-05", + "methodology_deviations_md": null, + "results_table": [ + { + "label": "phi-4", + "cell_id": "vps50:llamacpp:phi-4", + "n_calls": 0, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "gemma-4-26b-a4b", + "cell_id": "vps50:llamacpp:gemma-4-26b-a4b", + "n_calls": 0, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "qwen3-30b-a3b", + "cell_id": "vps50:llamacpp:qwen3-30b-a3b", + "n_calls": 0, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "qwen2.5-72b", + "cell_id": "vps50:llamacpp:qwen2.5-72b", + "n_calls": 0, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "phi-4-q4km-cpu-ctx32k", + "cell_id": "vps50:llamacpp:phi-4-q4km-cpu-ctx32k", + "n_calls": 6, + "n_errors": 0, + "duration_ms_mean": 548939.5, + "duration_ms_p50": 527802.5, + "duration_ms_p95": 614865.75, + "tokens_per_sec_mean": 0.02, + "tokens_per_sec_p50": 0.02, + "tokens_per_sec_p95": 0.02, + "tokens_per_sec_max": 0.02, + "memory_mb": null, + "watts_avg": null, + "notes": null + } + ], + "cells": [ + { + "cell_id": "vps50:llamacpp:phi-4", + "n_calls": 0, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null + }, + { + "cell_id": "vps50:llamacpp:gemma-4-26b-a4b", + "n_calls": 0, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null + }, + { + "cell_id": "vps50:llamacpp:qwen3-30b-a3b", + "n_calls": 0, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null + }, + { + "cell_id": "vps50:llamacpp:qwen2.5-72b", + "n_calls": 0, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null + }, + { + "cell_id": "vps50:llamacpp:phi-4-q4km-cpu-ctx32k", + "n_calls": 6, + "n_errors": 0, + "duration_ms_mean": 548939.5, + "duration_ms_p50": 527802.5, + "duration_ms_p95": 614865.75, + "tokens_per_sec_mean": 0.02, + "tokens_per_sec_p50": 0.02, + "tokens_per_sec_p95": 0.02, + "tokens_per_sec_max": 0.02 + } + ], + "n_calls_total": 6, + "n_errors_total": 0, + "chart_spec": { + "kind": "line", + "x": "param_value", + "y": "tokens_per_sec_mean", + "series": [ + "phi-4", + "gemma-4-26b-a4b", + "qwen3-30b-a3b", + "qwen2.5-72b", + "phi-4-q4km-cpu-ctx32k" + ], + "data_url": "/data/cef4b219-6b65-4740-8c3b-5b68aa10cf3f/run.jsonl" + }, + "site_grade": "standard", + "site_grade_reason": null, + "raw_data_urls": { + "jsonl": "/data/cef4b219-6b65-4740-8c3b-5b68aa10cf3f/run.jsonl", + "log": "/data/cef4b219-6b65-4740-8c3b-5b68aa10cf3f/run.log", + "md": null, + "metadata": "/data/cef4b219-6b65-4740-8c3b-5b68aa10cf3f/metadata.json", + "gitea_dir": "https://git.weeyuga.com/slobodanmargetic988/weeyuga-benchmarks-public/src/branch/main/runs/cef4b219-6b65-4740-8c3b-5b68aa10cf3f/" + }, + "synthesis_doc": "A3B_AND_CPU_OVERNIGHT_2026-05-05.md", + "synthesis_docs_all": [ + "A3B_AND_CPU_OVERNIGHT_2026-05-05.md" + ], + "related_ids": [], "status": "complete", "visibility": "draft", "cells_full": [ @@ -86,7 +214,10 @@ "n_errors": 0, "duration_ms_mean": null, "duration_ms_p50": null, + "duration_ms_p95": null, "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, "tokens_per_sec_max": null, "calls": [] }, @@ -96,7 +227,10 @@ "n_errors": 0, "duration_ms_mean": null, "duration_ms_p50": null, + "duration_ms_p95": null, "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, "tokens_per_sec_max": null, "calls": [] }, @@ -106,7 +240,10 @@ "n_errors": 0, "duration_ms_mean": null, "duration_ms_p50": null, + "duration_ms_p95": null, "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, "tokens_per_sec_max": null, "calls": [] }, @@ -116,7 +253,10 @@ "n_errors": 0, "duration_ms_mean": null, "duration_ms_p50": null, + "duration_ms_p95": null, "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, "tokens_per_sec_max": null, "calls": [] }, @@ -126,7 +266,10 @@ "n_errors": 0, "duration_ms_mean": 548939.5, "duration_ms_p50": 527802.5, + "duration_ms_p95": 614865.75, "tokens_per_sec_mean": 0.02, + "tokens_per_sec_p50": 0.02, + "tokens_per_sec_p95": 0.02, "tokens_per_sec_max": 0.02, "calls": [ { @@ -231,5 +374,5 @@ "qwen2.5-72b" ] }, - "packaged_at": "2026-05-05T17:43:34Z" + "packaged_at": "2026-05-06T08:04:27Z" } diff --git a/runs/d1cff064-9141-48e0-b1d2-729b134e0543/metadata.json b/runs/d1cff064-9141-48e0-b1d2-729b134e0543/metadata.json index ba22cfc..131fc2e 100644 --- a/runs/d1cff064-9141-48e0-b1d2-729b134e0543/metadata.json +++ b/runs/d1cff064-9141-48e0-b1d2-729b134e0543/metadata.json @@ -1,20 +1,49 @@ { "id": "d1cff064-9141-48e0-b1d2-729b134e0543", - "title": "Predator Qwen rerun \u2014 d1cff064-9141-48e0-b1d2-729b134e0543", + "title": "Predator Qwen rerun", "date": "2026-05-04", + "started_at": "2026-05-04T22:02:28Z", + "git_sha": "f06800254dfd046e154c94f663fb7f9f66c79ae3", "hardware": "pavilion", + "hardware_label": "Pavilion \u00b7 HP laptop \u00b7 GTX 1050 4 GB \u00b7 16 GB RAM \u00b7 i7-9750H", "engine": "llamacpp", "harness": "pavilion-a3b-1", "model_family": "qwen3", "model_sizes": [ "qwen3:30b-a3b-iq2xxs" ], - "task_kind": null, - "headline": "`Answer chars` is `len(message.content)`. Both averaged over the 3 warm runs.", - "summary_md_path": "runs/d1cff064-9141-48e0-b1d2-729b134e0543.md", - "raw_jsonl_path": "runs/d1cff064-9141-48e0-b1d2-729b134e0543.jsonl", - "log_path": "runs/d1cff064-9141-48e0-b1d2-729b134e0543.log", - "synthesis_doc": null, + "task_kind": "chat", + "tags": [ + "a3b", + "iq2", + "pavilion", + "qwen3" + ], + "headline": "no calls landed", + "janie_blurb_md": null, + "janie_blurb_status": "pending", + "caveat": null, + "caveat_severity": null, + "methodology_ref": "HARNESS#pavilion-a3b-1", + "methodology_deviations_md": null, + "results_table": [ + { + "label": "qwen3:30b-a3b-iq2xxs-think500", + "cell_id": "pavilion:llamacpp:qwen3:30b-a3b-iq2xxs-think500", + "n_calls": 0, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + } + ], "cells": [ { "cell_id": "pavilion:llamacpp:qwen3:30b-a3b-iq2xxs-think500", @@ -22,19 +51,36 @@ "n_errors": 0, "duration_ms_mean": null, "duration_ms_p50": null, + "duration_ms_p95": null, "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, "tokens_per_sec_max": null } ], "n_calls_total": 0, "n_errors_total": 0, - "started_at": "2026-05-04T22:02:28Z", - "tags": [ - "a3b", - "iq2", - "pavilion", - "qwen3" - ], + "chart_spec": { + "kind": "bar", + "x": "cell", + "y": "tokens_per_sec_mean", + "series": [ + "qwen3:30b-a3b-iq2xxs-think500" + ], + "data_url": "/data/d1cff064-9141-48e0-b1d2-729b134e0543/run.jsonl" + }, + "site_grade": "archive-only", + "site_grade_reason": "status=meta-only", + "raw_data_urls": { + "jsonl": "/data/d1cff064-9141-48e0-b1d2-729b134e0543/run.jsonl", + "log": "/data/d1cff064-9141-48e0-b1d2-729b134e0543/run.log", + "md": "/data/d1cff064-9141-48e0-b1d2-729b134e0543/run.md", + "metadata": "/data/d1cff064-9141-48e0-b1d2-729b134e0543/metadata.json", + "gitea_dir": "https://git.weeyuga.com/slobodanmargetic988/weeyuga-benchmarks-public/src/branch/main/runs/d1cff064-9141-48e0-b1d2-729b134e0543/" + }, + "synthesis_doc": null, + "synthesis_docs_all": [], + "related_ids": [], "status": "meta-only", "visibility": "draft", "cells_full": [ @@ -44,7 +90,10 @@ "n_errors": 0, "duration_ms_mean": null, "duration_ms_p50": null, + "duration_ms_p95": null, "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, "tokens_per_sec_max": null, "calls": [] } @@ -70,5 +119,5 @@ } } }, - "packaged_at": "2026-05-05T17:43:34Z" + "packaged_at": "2026-05-06T08:04:27Z" } diff --git a/runs/e08a7051-a856-4053-b34b-561d7ce5e8cf/metadata.json b/runs/e08a7051-a856-4053-b34b-561d7ce5e8cf/metadata.json index 21974b5..9348080 100644 --- a/runs/e08a7051-a856-4053-b34b-561d7ce5e8cf/metadata.json +++ b/runs/e08a7051-a856-4053-b34b-561d7ce5e8cf/metadata.json @@ -2,78 +2,17 @@ "id": "e08a7051-a856-4053-b34b-561d7ce5e8cf", "title": "predator-a3b-ngl-ctx-2d-1 \u2014 qwen3 on predator", "date": "2026-05-05", + "started_at": "2026-05-05T07:43:00Z", + "git_sha": "f06800254dfd046e154c94f663fb7f9f66c79ae3", "hardware": "predator", + "hardware_label": "Predator \u00b7 gaming laptop \u00b7 GTX 1060 6 GB \u00b7 28 GB RAM", "engine": "llamacpp", "harness": "predator-a3b-ngl-ctx-2d-1", "model_family": "qwen3", "model_sizes": [ "qwen3:30b-a3b-iq2m" ], - "task_kind": null, - "headline": "90 calls across 6 cell(s); ~5.9 tok/s mean; p50 73.4s", - "summary_md_path": null, - "raw_jsonl_path": "runs/e08a7051-a856-4053-b34b-561d7ce5e8cf.jsonl", - "log_path": "runs/e08a7051-a856-4053-b34b-561d7ce5e8cf.log", - "synthesis_doc": "A3B_AND_CPU_OVERNIGHT_2026-05-05.md", - "cells": [ - { - "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx64k", - "n_calls": 15, - "n_errors": 0, - "duration_ms_mean": 85573.42, - "duration_ms_p50": 78940.0, - "tokens_per_sec_mean": 5.48, - "tokens_per_sec_max": 6.8 - }, - { - "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx96k", - "n_calls": 15, - "n_errors": 0, - "duration_ms_mean": 71805.83, - "duration_ms_p50": 58541.0, - "tokens_per_sec_mean": 5.79, - "tokens_per_sec_max": 6.7 - }, - { - "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx131k", - "n_calls": 15, - "n_errors": 0, - "duration_ms_mean": 96406.17, - "duration_ms_p50": 105842.0, - "tokens_per_sec_mean": 5.01, - "tokens_per_sec_max": 6.29 - }, - { - "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx48k", - "n_calls": 15, - "n_errors": 0, - "duration_ms_mean": 65539.5, - "duration_ms_p50": 64606.5, - "tokens_per_sec_mean": 6.6, - "tokens_per_sec_max": 6.91 - }, - { - "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx64k", - "n_calls": 15, - "n_errors": 0, - "duration_ms_mean": 71267.58, - "duration_ms_p50": 80265.5, - "tokens_per_sec_mean": 6.41, - "tokens_per_sec_max": 6.9 - }, - { - "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx96k", - "n_calls": 15, - "n_errors": 0, - "duration_ms_mean": 69299.5, - "duration_ms_p50": 68355.0, - "tokens_per_sec_mean": 6.08, - "tokens_per_sec_max": 6.69 - } - ], - "n_calls_total": 90, - "n_errors_total": 0, - "started_at": "2026-05-05T07:43:00Z", + "task_kind": "param-sweep", "tags": [ "a3b", "ctx", @@ -82,6 +21,215 @@ "predator", "qwen3" ], + "headline": "90 calls across 6 cell(s); ~5.9 tok/s mean; p50 73.4s", + "janie_blurb_md": null, + "janie_blurb_status": "pending", + "caveat": null, + "caveat_severity": null, + "methodology_ref": "A3B_AND_CPU_OVERNIGHT_2026-05-05", + "methodology_deviations_md": null, + "results_table": [ + { + "label": "qwen3:30b-a3b-iq2m-think500-ngl12-ctx64k", + "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx64k", + "n_calls": 15, + "n_errors": 0, + "duration_ms_mean": 85573.42, + "duration_ms_p50": 78940.0, + "duration_ms_p95": 159864.3, + "tokens_per_sec_mean": 5.48, + "tokens_per_sec_p50": 5.31, + "tokens_per_sec_p95": 6.79, + "tokens_per_sec_max": 6.8, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "qwen3:30b-a3b-iq2m-think500-ngl12-ctx96k", + "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx96k", + "n_calls": 15, + "n_errors": 0, + "duration_ms_mean": 71805.83, + "duration_ms_p50": 58541.0, + "duration_ms_p95": 140580.5, + "tokens_per_sec_mean": 5.79, + "tokens_per_sec_p50": 5.75, + "tokens_per_sec_p95": 6.47, + "tokens_per_sec_max": 6.7, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "qwen3:30b-a3b-iq2m-think500-ngl12-ctx131k", + "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx131k", + "n_calls": 15, + "n_errors": 0, + "duration_ms_mean": 96406.17, + "duration_ms_p50": 105842.0, + "duration_ms_p95": 154492.35, + "tokens_per_sec_mean": 5.01, + "tokens_per_sec_p50": 5.13, + "tokens_per_sec_p95": 5.97, + "tokens_per_sec_max": 6.29, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "qwen3:30b-a3b-iq2m-think500-ngl24-ctx48k", + "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx48k", + "n_calls": 15, + "n_errors": 0, + "duration_ms_mean": 65539.5, + "duration_ms_p50": 64606.5, + "duration_ms_p95": 118069.75, + "tokens_per_sec_mean": 6.6, + "tokens_per_sec_p50": 6.59, + "tokens_per_sec_p95": 6.89, + "tokens_per_sec_max": 6.91, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "qwen3:30b-a3b-iq2m-think500-ngl24-ctx64k", + "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx64k", + "n_calls": 15, + "n_errors": 0, + "duration_ms_mean": 71267.58, + "duration_ms_p50": 80265.5, + "duration_ms_p95": 128231.3, + "tokens_per_sec_mean": 6.41, + "tokens_per_sec_p50": 6.39, + "tokens_per_sec_p95": 6.88, + "tokens_per_sec_max": 6.9, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "qwen3:30b-a3b-iq2m-think500-ngl24-ctx96k", + "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx96k", + "n_calls": 15, + "n_errors": 0, + "duration_ms_mean": 69299.5, + "duration_ms_p50": 68355.0, + "duration_ms_p95": 124990.95, + "tokens_per_sec_mean": 6.08, + "tokens_per_sec_p50": 6.44, + "tokens_per_sec_p95": 6.68, + "tokens_per_sec_max": 6.69, + "memory_mb": null, + "watts_avg": null, + "notes": null + } + ], + "cells": [ + { + "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx64k", + "n_calls": 15, + "n_errors": 0, + "duration_ms_mean": 85573.42, + "duration_ms_p50": 78940.0, + "duration_ms_p95": 159864.3, + "tokens_per_sec_mean": 5.48, + "tokens_per_sec_p50": 5.31, + "tokens_per_sec_p95": 6.79, + "tokens_per_sec_max": 6.8 + }, + { + "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx96k", + "n_calls": 15, + "n_errors": 0, + "duration_ms_mean": 71805.83, + "duration_ms_p50": 58541.0, + "duration_ms_p95": 140580.5, + "tokens_per_sec_mean": 5.79, + "tokens_per_sec_p50": 5.75, + "tokens_per_sec_p95": 6.47, + "tokens_per_sec_max": 6.7 + }, + { + "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl12-ctx131k", + "n_calls": 15, + "n_errors": 0, + "duration_ms_mean": 96406.17, + "duration_ms_p50": 105842.0, + "duration_ms_p95": 154492.35, + "tokens_per_sec_mean": 5.01, + "tokens_per_sec_p50": 5.13, + "tokens_per_sec_p95": 5.97, + "tokens_per_sec_max": 6.29 + }, + { + "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx48k", + "n_calls": 15, + "n_errors": 0, + "duration_ms_mean": 65539.5, + "duration_ms_p50": 64606.5, + "duration_ms_p95": 118069.75, + "tokens_per_sec_mean": 6.6, + "tokens_per_sec_p50": 6.59, + "tokens_per_sec_p95": 6.89, + "tokens_per_sec_max": 6.91 + }, + { + "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx64k", + "n_calls": 15, + "n_errors": 0, + "duration_ms_mean": 71267.58, + "duration_ms_p50": 80265.5, + "duration_ms_p95": 128231.3, + "tokens_per_sec_mean": 6.41, + "tokens_per_sec_p50": 6.39, + "tokens_per_sec_p95": 6.88, + "tokens_per_sec_max": 6.9 + }, + { + "cell_id": "predator:llamacpp:qwen3:30b-a3b-iq2m-think500-ngl24-ctx96k", + "n_calls": 15, + "n_errors": 0, + "duration_ms_mean": 69299.5, + "duration_ms_p50": 68355.0, + "duration_ms_p95": 124990.95, + "tokens_per_sec_mean": 6.08, + "tokens_per_sec_p50": 6.44, + "tokens_per_sec_p95": 6.68, + "tokens_per_sec_max": 6.69 + } + ], + "n_calls_total": 90, + "n_errors_total": 0, + "chart_spec": { + "kind": "line", + "x": "param_value", + "y": "tokens_per_sec_mean", + "series": [ + "qwen3:30b-a3b-iq2m-think500-ngl12-ctx64k", + "qwen3:30b-a3b-iq2m-think500-ngl12-ctx96k", + "qwen3:30b-a3b-iq2m-think500-ngl12-ctx131k", + "qwen3:30b-a3b-iq2m-think500-ngl24-ctx48k", + "qwen3:30b-a3b-iq2m-think500-ngl24-ctx64k", + "qwen3:30b-a3b-iq2m-think500-ngl24-ctx96k" + ], + "data_url": "/data/e08a7051-a856-4053-b34b-561d7ce5e8cf/run.jsonl" + }, + "site_grade": "standard", + "site_grade_reason": null, + "raw_data_urls": { + "jsonl": "/data/e08a7051-a856-4053-b34b-561d7ce5e8cf/run.jsonl", + "log": "/data/e08a7051-a856-4053-b34b-561d7ce5e8cf/run.log", + "md": null, + "metadata": "/data/e08a7051-a856-4053-b34b-561d7ce5e8cf/metadata.json", + "gitea_dir": "https://git.weeyuga.com/slobodanmargetic988/weeyuga-benchmarks-public/src/branch/main/runs/e08a7051-a856-4053-b34b-561d7ce5e8cf/" + }, + "synthesis_doc": "A3B_AND_CPU_OVERNIGHT_2026-05-05.md", + "synthesis_docs_all": [ + "A3B_AND_CPU_OVERNIGHT_2026-05-05.md" + ], + "related_ids": [], "status": "complete", "visibility": "draft", "cells_full": [ @@ -91,7 +239,10 @@ "n_errors": 0, "duration_ms_mean": 85573.42, "duration_ms_p50": 78940.0, + "duration_ms_p95": 159864.3, "tokens_per_sec_mean": 5.48, + "tokens_per_sec_p50": 5.31, + "tokens_per_sec_p95": 6.79, "tokens_per_sec_max": 6.8, "calls": [ { @@ -342,7 +493,10 @@ "n_errors": 0, "duration_ms_mean": 71805.83, "duration_ms_p50": 58541.0, + "duration_ms_p95": 140580.5, "tokens_per_sec_mean": 5.79, + "tokens_per_sec_p50": 5.75, + "tokens_per_sec_p95": 6.47, "tokens_per_sec_max": 6.7, "calls": [ { @@ -593,7 +747,10 @@ "n_errors": 0, "duration_ms_mean": 96406.17, "duration_ms_p50": 105842.0, + "duration_ms_p95": 154492.35, "tokens_per_sec_mean": 5.01, + "tokens_per_sec_p50": 5.13, + "tokens_per_sec_p95": 5.97, "tokens_per_sec_max": 6.29, "calls": [ { @@ -844,7 +1001,10 @@ "n_errors": 0, "duration_ms_mean": 65539.5, "duration_ms_p50": 64606.5, + "duration_ms_p95": 118069.75, "tokens_per_sec_mean": 6.6, + "tokens_per_sec_p50": 6.59, + "tokens_per_sec_p95": 6.89, "tokens_per_sec_max": 6.91, "calls": [ { @@ -1095,7 +1255,10 @@ "n_errors": 0, "duration_ms_mean": 71267.58, "duration_ms_p50": 80265.5, + "duration_ms_p95": 128231.3, "tokens_per_sec_mean": 6.41, + "tokens_per_sec_p50": 6.39, + "tokens_per_sec_p95": 6.88, "tokens_per_sec_max": 6.9, "calls": [ { @@ -1346,7 +1509,10 @@ "n_errors": 0, "duration_ms_mean": 69299.5, "duration_ms_p50": 68355.0, + "duration_ms_p95": 124990.95, "tokens_per_sec_mean": 6.08, + "tokens_per_sec_p50": 6.44, + "tokens_per_sec_p95": 6.68, "tokens_per_sec_max": 6.69, "calls": [ { @@ -1641,5 +1807,5 @@ }, "methodology_notes": "Sloba 2026-05-05: 'max tokens should be bigger than reasoning budget'. Prior bench had hello max_tokens=256 < reasoning_budget=500 \u2192 content_chars=0. Bumped to 512/1024/2048 so reasoning + answer both fit." }, - "packaged_at": "2026-05-05T17:43:34Z" + "packaged_at": "2026-05-06T08:04:27Z" } diff --git a/runs/fba9d9b1-cc5d-40bc-9e21-beafbb72c65d/metadata.json b/runs/fba9d9b1-cc5d-40bc-9e21-beafbb72c65d/metadata.json index 0b9eb0b..a39bf2c 100644 --- a/runs/fba9d9b1-cc5d-40bc-9e21-beafbb72c65d/metadata.json +++ b/runs/fba9d9b1-cc5d-40bc-9e21-beafbb72c65d/metadata.json @@ -1,8 +1,11 @@ { "id": "fba9d9b1-cc5d-40bc-9e21-beafbb72c65d", - "title": "Predator Qwen rerun \u2014 fba9d9b1-cc5d-40bc-9e21-beafbb72c65d", + "title": "Predator Qwen rerun", "date": "2026-05-04", + "started_at": "2026-05-04T17:49:07Z", + "git_sha": "f06800254dfd046e154c94f663fb7f9f66c79ae3", "hardware": "predator", + "hardware_label": "Predator \u00b7 gaming laptop \u00b7 GTX 1060 6 GB \u00b7 28 GB RAM", "engine": "llamacpp", "harness": "predator-qwen-rerun-1", "model_family": "qwen3/qwen3.5", @@ -10,12 +13,70 @@ "qwen3.5:9b", "qwen3:14b" ], - "task_kind": null, - "headline": "`Answer chars` is `len(message.content)`. Both averaged over the 3 warm runs.", - "summary_md_path": "runs/fba9d9b1-cc5d-40bc-9e21-beafbb72c65d.md", - "raw_jsonl_path": "runs/fba9d9b1-cc5d-40bc-9e21-beafbb72c65d.jsonl", - "log_path": "runs/fba9d9b1-cc5d-40bc-9e21-beafbb72c65d.log", - "synthesis_doc": "A3B_AND_CPU_OVERNIGHT_2026-05-05.md", + "task_kind": "chat", + "tags": [ + "predator", + "qwen3", + "qwen3.5", + "rerun" + ], + "headline": "36 calls across 3 cell(s); ~10.9 tok/s mean; p50 24.1s", + "janie_blurb_md": null, + "janie_blurb_status": "pending", + "caveat": null, + "caveat_severity": null, + "methodology_ref": "A3B_AND_CPU_OVERNIGHT_2026-05-05", + "methodology_deviations_md": null, + "results_table": [ + { + "label": "qwen3.5:9b-q4km-think500", + "cell_id": "predator:llamacpp:qwen3.5:9b-q4km-think500", + "n_calls": 14, + "n_errors": 0, + "duration_ms_mean": 30474.33, + "duration_ms_p50": 33717.5, + "duration_ms_p95": 52611.55, + "tokens_per_sec_mean": 14.25, + "tokens_per_sec_p50": 14.91, + "tokens_per_sec_p95": 15.3, + "tokens_per_sec_max": 15.32, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "qwen3.5:9b-q4km-nothink", + "cell_id": "predator:llamacpp:qwen3.5:9b-q4km-nothink", + "n_calls": 14, + "n_errors": 0, + "duration_ms_mean": 10425.75, + "duration_ms_p50": 7200.0, + "duration_ms_p95": 23998.85, + "tokens_per_sec_mean": 12.55, + "tokens_per_sec_p50": 14.37, + "tokens_per_sec_p95": 14.93, + "tokens_per_sec_max": 14.95, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "qwen3:14b-q4km", + "cell_id": "predator:llamacpp:qwen3:14b-q4km", + "n_calls": 8, + "n_errors": 0, + "duration_ms_mean": 130876.17, + "duration_ms_p50": 78946.0, + "duration_ms_p95": 277160.0, + "tokens_per_sec_mean": 1.06, + "tokens_per_sec_p50": 1.09, + "tokens_per_sec_p95": 1.28, + "tokens_per_sec_max": 1.33, + "memory_mb": null, + "watts_avg": null, + "notes": null + } + ], "cells": [ { "cell_id": "predator:llamacpp:qwen3.5:9b-q4km-think500", @@ -23,7 +84,10 @@ "n_errors": 0, "duration_ms_mean": 30474.33, "duration_ms_p50": 33717.5, + "duration_ms_p95": 52611.55, "tokens_per_sec_mean": 14.25, + "tokens_per_sec_p50": 14.91, + "tokens_per_sec_p95": 15.3, "tokens_per_sec_max": 15.32 }, { @@ -32,7 +96,10 @@ "n_errors": 0, "duration_ms_mean": 10425.75, "duration_ms_p50": 7200.0, + "duration_ms_p95": 23998.85, "tokens_per_sec_mean": 12.55, + "tokens_per_sec_p50": 14.37, + "tokens_per_sec_p95": 14.93, "tokens_per_sec_max": 14.95 }, { @@ -41,19 +108,41 @@ "n_errors": 0, "duration_ms_mean": 130876.17, "duration_ms_p50": 78946.0, + "duration_ms_p95": 277160.0, "tokens_per_sec_mean": 1.06, + "tokens_per_sec_p50": 1.09, + "tokens_per_sec_p95": 1.28, "tokens_per_sec_max": 1.33 } ], "n_calls_total": 36, "n_errors_total": 0, - "started_at": "2026-05-04T17:49:07Z", - "tags": [ - "predator", - "qwen3", - "qwen3.5", - "rerun" + "chart_spec": { + "kind": "bar", + "x": "cell", + "y": "tokens_per_sec_mean", + "series": [ + "qwen3.5:9b-q4km-think500", + "qwen3.5:9b-q4km-nothink", + "qwen3:14b-q4km" + ], + "data_url": "/data/fba9d9b1-cc5d-40bc-9e21-beafbb72c65d/run.jsonl" + }, + "site_grade": "flagship", + "site_grade_reason": null, + "raw_data_urls": { + "jsonl": "/data/fba9d9b1-cc5d-40bc-9e21-beafbb72c65d/run.jsonl", + "log": "/data/fba9d9b1-cc5d-40bc-9e21-beafbb72c65d/run.log", + "md": "/data/fba9d9b1-cc5d-40bc-9e21-beafbb72c65d/run.md", + "metadata": "/data/fba9d9b1-cc5d-40bc-9e21-beafbb72c65d/metadata.json", + "gitea_dir": "https://git.weeyuga.com/slobodanmargetic988/weeyuga-benchmarks-public/src/branch/main/runs/fba9d9b1-cc5d-40bc-9e21-beafbb72c65d/" + }, + "synthesis_doc": "A3B_AND_CPU_OVERNIGHT_2026-05-05.md", + "synthesis_docs_all": [ + "A3B_AND_CPU_OVERNIGHT_2026-05-05.md", + "A3B_CROSS_MACHINE_2026-05-05.md" ], + "related_ids": [], "status": "complete", "visibility": "draft", "cells_full": [ @@ -63,7 +152,10 @@ "n_errors": 0, "duration_ms_mean": 30474.33, "duration_ms_p50": 33717.5, + "duration_ms_p95": 52611.55, "tokens_per_sec_mean": 14.25, + "tokens_per_sec_p50": 14.91, + "tokens_per_sec_p95": 15.3, "tokens_per_sec_max": 15.32, "calls": [ { @@ -278,7 +370,10 @@ "n_errors": 0, "duration_ms_mean": 10425.75, "duration_ms_p50": 7200.0, + "duration_ms_p95": 23998.85, "tokens_per_sec_mean": 12.55, + "tokens_per_sec_p50": 14.37, + "tokens_per_sec_p95": 14.93, "tokens_per_sec_max": 14.95, "calls": [ { @@ -493,7 +588,10 @@ "n_errors": 0, "duration_ms_mean": 130876.17, "duration_ms_p50": 78946.0, + "duration_ms_p95": 277160.0, "tokens_per_sec_mean": 1.06, + "tokens_per_sec_p50": 1.09, + "tokens_per_sec_p95": 1.28, "tokens_per_sec_max": 1.33, "calls": [ { @@ -637,5 +735,5 @@ } } }, - "packaged_at": "2026-05-05T17:43:34Z" + "packaged_at": "2026-05-06T08:04:27Z" } diff --git a/runs/fe4fa289-67a9-48e8-8a80-d5d88c875208/metadata.json b/runs/fe4fa289-67a9-48e8-8a80-d5d88c875208/metadata.json index 97b1b04..aaf0871 100644 --- a/runs/fe4fa289-67a9-48e8-8a80-d5d88c875208/metadata.json +++ b/runs/fe4fa289-67a9-48e8-8a80-d5d88c875208/metadata.json @@ -2,19 +2,47 @@ "id": "fe4fa289-67a9-48e8-8a80-d5d88c875208", "title": "predator-a3b-1 \u2014 qwen3 on predator", "date": "2026-05-04", + "started_at": "2026-05-04T18:50:19Z", + "git_sha": "f06800254dfd046e154c94f663fb7f9f66c79ae3", "hardware": "predator", + "hardware_label": "Predator \u00b7 gaming laptop \u00b7 GTX 1060 6 GB \u00b7 28 GB RAM", "engine": "llamacpp", "harness": "predator-a3b-1", "model_family": "qwen3", "model_sizes": [ "qwen3:30b-a3b" ], - "task_kind": null, + "task_kind": "chat", + "tags": [ + "a3b", + "predator", + "qwen3" + ], "headline": "2 calls across 1 cell(s)", - "summary_md_path": null, - "raw_jsonl_path": "runs/fe4fa289-67a9-48e8-8a80-d5d88c875208.jsonl", - "log_path": "runs/fe4fa289-67a9-48e8-8a80-d5d88c875208.log", - "synthesis_doc": null, + "janie_blurb_md": null, + "janie_blurb_status": "pending", + "caveat": null, + "caveat_severity": null, + "methodology_ref": "HARNESS#predator-a3b-1", + "methodology_deviations_md": null, + "results_table": [ + { + "label": "qwen3:30b-a3b-q4km-think500", + "cell_id": "predator:llamacpp:qwen3:30b-a3b-q4km-think500", + "n_calls": 2, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + } + ], "cells": [ { "cell_id": "predator:llamacpp:qwen3:30b-a3b-q4km-think500", @@ -22,18 +50,36 @@ "n_errors": 0, "duration_ms_mean": null, "duration_ms_p50": null, + "duration_ms_p95": null, "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, "tokens_per_sec_max": null } ], "n_calls_total": 2, "n_errors_total": 0, - "started_at": "2026-05-04T18:50:19Z", - "tags": [ - "a3b", - "predator", - "qwen3" - ], + "chart_spec": { + "kind": "bar", + "x": "cell", + "y": "tokens_per_sec_mean", + "series": [ + "qwen3:30b-a3b-q4km-think500" + ], + "data_url": "/data/fe4fa289-67a9-48e8-8a80-d5d88c875208/run.jsonl" + }, + "site_grade": "archive-only", + "site_grade_reason": "no documented method (no synthesis doc and no run.md)", + "raw_data_urls": { + "jsonl": "/data/fe4fa289-67a9-48e8-8a80-d5d88c875208/run.jsonl", + "log": "/data/fe4fa289-67a9-48e8-8a80-d5d88c875208/run.log", + "md": null, + "metadata": "/data/fe4fa289-67a9-48e8-8a80-d5d88c875208/metadata.json", + "gitea_dir": "https://git.weeyuga.com/slobodanmargetic988/weeyuga-benchmarks-public/src/branch/main/runs/fe4fa289-67a9-48e8-8a80-d5d88c875208/" + }, + "synthesis_doc": null, + "synthesis_docs_all": [], + "related_ids": [], "status": "complete", "visibility": "draft", "cells_full": [ @@ -43,7 +89,10 @@ "n_errors": 0, "duration_ms_mean": null, "duration_ms_p50": null, + "duration_ms_p95": null, "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, "tokens_per_sec_max": null, "calls": [ { @@ -80,5 +129,5 @@ } } }, - "packaged_at": "2026-05-05T17:43:34Z" + "packaged_at": "2026-05-06T08:04:27Z" } diff --git a/runs/ff1131ca-d021-4e06-8616-4b4cdb54e97e/metadata.json b/runs/ff1131ca-d021-4e06-8616-4b4cdb54e97e/metadata.json index 4a27306..be9ffb1 100644 --- a/runs/ff1131ca-d021-4e06-8616-4b4cdb54e97e/metadata.json +++ b/runs/ff1131ca-d021-4e06-8616-4b4cdb54e97e/metadata.json @@ -2,13 +2,16 @@ "id": "ff1131ca-d021-4e06-8616-4b4cdb54e97e", "title": "pavilion-weeyuga-v1 \u2014 qwen2.5/qwen2.5-coder/qwen3/qwen3.5 on pavilion", "date": "2026-04-28", + "started_at": "2026-04-28T21:03:46Z", + "git_sha": "9934892784228748586130d8abbacd82a919aee2", "hardware": "pavilion", + "hardware_label": "Pavilion \u00b7 HP laptop \u00b7 GTX 1050 4 GB \u00b7 16 GB RAM \u00b7 i7-9750H", "engine": "weeyuga", "harness": "pavilion-weeyuga-v1", "model_family": "qwen2.5/qwen2.5-coder/qwen3/qwen3.5", "model_sizes": [ "qwen3.5:4b", - "qwen3.5:35b-a3b-uncensored-iq1m", + "qwen3.5:35b-a3b-uncensored", "qwen3.5:35b-a3b-iq2s", "qwen3.5:9b-q6k", "qwen3.5:9b", @@ -23,161 +26,7 @@ "qwen2.5-coder:1.5b", "qwen2.5-coder:0.5b" ], - "task_kind": null, - "headline": "17 calls across 16 cell(s); 3 errors", - "summary_md_path": null, - "raw_jsonl_path": "runs/ff1131ca-d021-4e06-8616-4b4cdb54e97e.jsonl", - "log_path": null, - "synthesis_doc": "PAVILION_WEEYUGA_v1.md", - "cells": [ - { - "cell_id": "pavilion:weeyuga:qwen3.5:4b", - "n_calls": 2, - "n_errors": 0, - "duration_ms_mean": null, - "duration_ms_p50": null, - "tokens_per_sec_mean": null, - "tokens_per_sec_max": null - }, - { - "cell_id": "pavilion:weeyuga:qwen3.5:35b-a3b-uncensored-iq1m", - "n_calls": 1, - "n_errors": 0, - "duration_ms_mean": null, - "duration_ms_p50": null, - "tokens_per_sec_mean": null, - "tokens_per_sec_max": null - }, - { - "cell_id": "pavilion:weeyuga:qwen3.5:35b-a3b-iq2s", - "n_calls": 1, - "n_errors": 0, - "duration_ms_mean": null, - "duration_ms_p50": null, - "tokens_per_sec_mean": null, - "tokens_per_sec_max": null - }, - { - "cell_id": "pavilion:weeyuga:qwen3.5:9b-q6k", - "n_calls": 1, - "n_errors": 0, - "duration_ms_mean": null, - "duration_ms_p50": null, - "tokens_per_sec_mean": null, - "tokens_per_sec_max": null - }, - { - "cell_id": "pavilion:weeyuga:qwen3.5:9b-q4km", - "n_calls": 1, - "n_errors": 0, - "duration_ms_mean": null, - "duration_ms_p50": null, - "tokens_per_sec_mean": null, - "tokens_per_sec_max": null - }, - { - "cell_id": "pavilion:weeyuga:qwen3.5:2b", - "n_calls": 1, - "n_errors": 0, - "duration_ms_mean": null, - "duration_ms_p50": null, - "tokens_per_sec_mean": null, - "tokens_per_sec_max": null - }, - { - "cell_id": "pavilion:weeyuga:qwen3.5:0.8b", - "n_calls": 1, - "n_errors": 0, - "duration_ms_mean": null, - "duration_ms_p50": null, - "tokens_per_sec_mean": null, - "tokens_per_sec_max": null - }, - { - "cell_id": "pavilion:weeyuga:qwen3.5:9b", - "n_calls": 1, - "n_errors": 0, - "duration_ms_mean": null, - "duration_ms_p50": null, - "tokens_per_sec_mean": null, - "tokens_per_sec_max": null - }, - { - "cell_id": "pavilion:weeyuga:qwen2.5-coder:14b", - "n_calls": 1, - "n_errors": 1, - "duration_ms_mean": null, - "duration_ms_p50": null, - "tokens_per_sec_mean": null, - "tokens_per_sec_max": null - }, - { - "cell_id": "pavilion:weeyuga:qwen2.5-coder:3b", - "n_calls": 1, - "n_errors": 0, - "duration_ms_mean": null, - "duration_ms_p50": null, - "tokens_per_sec_mean": null, - "tokens_per_sec_max": null - }, - { - "cell_id": "pavilion:weeyuga:qwen3:14b", - "n_calls": 1, - "n_errors": 1, - "duration_ms_mean": null, - "duration_ms_p50": null, - "tokens_per_sec_mean": null, - "tokens_per_sec_max": null - }, - { - "cell_id": "pavilion:weeyuga:qwen3:8b", - "n_calls": 1, - "n_errors": 1, - "duration_ms_mean": null, - "duration_ms_p50": null, - "tokens_per_sec_mean": null, - "tokens_per_sec_max": null - }, - { - "cell_id": "pavilion:weeyuga:qwen3:4b", - "n_calls": 1, - "n_errors": 0, - "duration_ms_mean": null, - "duration_ms_p50": null, - "tokens_per_sec_mean": null, - "tokens_per_sec_max": null - }, - { - "cell_id": "pavilion:weeyuga:qwen2.5:3b", - "n_calls": 1, - "n_errors": 0, - "duration_ms_mean": null, - "duration_ms_p50": null, - "tokens_per_sec_mean": null, - "tokens_per_sec_max": null - }, - { - "cell_id": "pavilion:weeyuga:qwen2.5-coder:1.5b", - "n_calls": 1, - "n_errors": 0, - "duration_ms_mean": null, - "duration_ms_p50": null, - "tokens_per_sec_mean": null, - "tokens_per_sec_max": null - }, - { - "cell_id": "pavilion:weeyuga:qwen2.5-coder:0.5b", - "n_calls": 1, - "n_errors": 0, - "duration_ms_mean": null, - "duration_ms_p50": null, - "tokens_per_sec_mean": null, - "tokens_per_sec_max": null - } - ], - "n_calls_total": 17, - "n_errors_total": 3, - "started_at": "2026-04-28T21:03:46Z", + "task_kind": "chat", "tags": [ "a3b", "iq2", @@ -187,6 +36,506 @@ "qwen3.5", "weeyuga" ], + "headline": "17 calls across 16 cell(s); 3 errors", + "janie_blurb_md": null, + "janie_blurb_status": "pending", + "caveat": null, + "caveat_severity": null, + "methodology_ref": "PAVILION_WEEYUGA_v1", + "methodology_deviations_md": null, + "results_table": [ + { + "label": "qwen3.5:4b", + "cell_id": "pavilion:weeyuga:qwen3.5:4b", + "n_calls": 2, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "qwen3.5:35b-a3b-uncensored-iq1m", + "cell_id": "pavilion:weeyuga:qwen3.5:35b-a3b-uncensored-iq1m", + "n_calls": 1, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "qwen3.5:35b-a3b-iq2s", + "cell_id": "pavilion:weeyuga:qwen3.5:35b-a3b-iq2s", + "n_calls": 1, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "qwen3.5:9b-q6k", + "cell_id": "pavilion:weeyuga:qwen3.5:9b-q6k", + "n_calls": 1, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "qwen3.5:9b-q4km", + "cell_id": "pavilion:weeyuga:qwen3.5:9b-q4km", + "n_calls": 1, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "qwen3.5:2b", + "cell_id": "pavilion:weeyuga:qwen3.5:2b", + "n_calls": 1, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "qwen3.5:0.8b", + "cell_id": "pavilion:weeyuga:qwen3.5:0.8b", + "n_calls": 1, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "qwen3.5:9b", + "cell_id": "pavilion:weeyuga:qwen3.5:9b", + "n_calls": 1, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "qwen2.5-coder:14b", + "cell_id": "pavilion:weeyuga:qwen2.5-coder:14b", + "n_calls": 1, + "n_errors": 1, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "qwen2.5-coder:3b", + "cell_id": "pavilion:weeyuga:qwen2.5-coder:3b", + "n_calls": 1, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "qwen3:14b", + "cell_id": "pavilion:weeyuga:qwen3:14b", + "n_calls": 1, + "n_errors": 1, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "qwen3:8b", + "cell_id": "pavilion:weeyuga:qwen3:8b", + "n_calls": 1, + "n_errors": 1, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "qwen3:4b", + "cell_id": "pavilion:weeyuga:qwen3:4b", + "n_calls": 1, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "qwen2.5:3b", + "cell_id": "pavilion:weeyuga:qwen2.5:3b", + "n_calls": 1, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "qwen2.5-coder:1.5b", + "cell_id": "pavilion:weeyuga:qwen2.5-coder:1.5b", + "n_calls": 1, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + }, + { + "label": "qwen2.5-coder:0.5b", + "cell_id": "pavilion:weeyuga:qwen2.5-coder:0.5b", + "n_calls": 1, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null, + "memory_mb": null, + "watts_avg": null, + "notes": null + } + ], + "cells": [ + { + "cell_id": "pavilion:weeyuga:qwen3.5:4b", + "n_calls": 2, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null + }, + { + "cell_id": "pavilion:weeyuga:qwen3.5:35b-a3b-uncensored-iq1m", + "n_calls": 1, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null + }, + { + "cell_id": "pavilion:weeyuga:qwen3.5:35b-a3b-iq2s", + "n_calls": 1, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null + }, + { + "cell_id": "pavilion:weeyuga:qwen3.5:9b-q6k", + "n_calls": 1, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null + }, + { + "cell_id": "pavilion:weeyuga:qwen3.5:9b-q4km", + "n_calls": 1, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null + }, + { + "cell_id": "pavilion:weeyuga:qwen3.5:2b", + "n_calls": 1, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null + }, + { + "cell_id": "pavilion:weeyuga:qwen3.5:0.8b", + "n_calls": 1, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null + }, + { + "cell_id": "pavilion:weeyuga:qwen3.5:9b", + "n_calls": 1, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null + }, + { + "cell_id": "pavilion:weeyuga:qwen2.5-coder:14b", + "n_calls": 1, + "n_errors": 1, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null + }, + { + "cell_id": "pavilion:weeyuga:qwen2.5-coder:3b", + "n_calls": 1, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null + }, + { + "cell_id": "pavilion:weeyuga:qwen3:14b", + "n_calls": 1, + "n_errors": 1, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null + }, + { + "cell_id": "pavilion:weeyuga:qwen3:8b", + "n_calls": 1, + "n_errors": 1, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null + }, + { + "cell_id": "pavilion:weeyuga:qwen3:4b", + "n_calls": 1, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null + }, + { + "cell_id": "pavilion:weeyuga:qwen2.5:3b", + "n_calls": 1, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null + }, + { + "cell_id": "pavilion:weeyuga:qwen2.5-coder:1.5b", + "n_calls": 1, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null + }, + { + "cell_id": "pavilion:weeyuga:qwen2.5-coder:0.5b", + "n_calls": 1, + "n_errors": 0, + "duration_ms_mean": null, + "duration_ms_p50": null, + "duration_ms_p95": null, + "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, + "tokens_per_sec_max": null + } + ], + "n_calls_total": 17, + "n_errors_total": 3, + "chart_spec": { + "kind": "bar", + "x": "cell", + "y": "tokens_per_sec_mean", + "series": [ + "qwen3.5:4b", + "qwen3.5:35b-a3b-uncensored-iq1m", + "qwen3.5:35b-a3b-iq2s", + "qwen3.5:9b-q6k", + "qwen3.5:9b-q4km", + "qwen3.5:2b", + "qwen3.5:0.8b", + "qwen3.5:9b", + "qwen2.5-coder:14b", + "qwen2.5-coder:3b", + "qwen3:14b", + "qwen3:8b", + "qwen3:4b", + "qwen2.5:3b", + "qwen2.5-coder:1.5b", + "qwen2.5-coder:0.5b" + ], + "data_url": "/data/ff1131ca-d021-4e06-8616-4b4cdb54e97e/run.jsonl" + }, + "site_grade": "archive-only", + "site_grade_reason": "error rate 3/17", + "raw_data_urls": { + "jsonl": "/data/ff1131ca-d021-4e06-8616-4b4cdb54e97e/run.jsonl", + "log": null, + "md": null, + "metadata": "/data/ff1131ca-d021-4e06-8616-4b4cdb54e97e/metadata.json", + "gitea_dir": "https://git.weeyuga.com/slobodanmargetic988/weeyuga-benchmarks-public/src/branch/main/runs/ff1131ca-d021-4e06-8616-4b4cdb54e97e/" + }, + "synthesis_doc": "PAVILION_WEEYUGA_v1.md", + "synthesis_docs_all": [ + "PAVILION_WEEYUGA_v1.md", + "PAVILION_LLAMACPP_VS_OLLAMA_v0_INCOMPLETE.md" + ], + "related_ids": [], "status": "complete", "visibility": "draft", "cells_full": [ @@ -196,7 +545,10 @@ "n_errors": 0, "duration_ms_mean": null, "duration_ms_p50": null, + "duration_ms_p95": null, "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, "tokens_per_sec_max": null, "calls": [ { @@ -266,7 +618,10 @@ "n_errors": 0, "duration_ms_mean": null, "duration_ms_p50": null, + "duration_ms_p95": null, "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, "tokens_per_sec_max": null, "calls": [ { @@ -302,7 +657,10 @@ "n_errors": 0, "duration_ms_mean": null, "duration_ms_p50": null, + "duration_ms_p95": null, "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, "tokens_per_sec_max": null, "calls": [ { @@ -338,7 +696,10 @@ "n_errors": 0, "duration_ms_mean": null, "duration_ms_p50": null, + "duration_ms_p95": null, "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, "tokens_per_sec_max": null, "calls": [ { @@ -374,7 +735,10 @@ "n_errors": 0, "duration_ms_mean": null, "duration_ms_p50": null, + "duration_ms_p95": null, "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, "tokens_per_sec_max": null, "calls": [ { @@ -410,7 +774,10 @@ "n_errors": 0, "duration_ms_mean": null, "duration_ms_p50": null, + "duration_ms_p95": null, "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, "tokens_per_sec_max": null, "calls": [ { @@ -446,7 +813,10 @@ "n_errors": 0, "duration_ms_mean": null, "duration_ms_p50": null, + "duration_ms_p95": null, "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, "tokens_per_sec_max": null, "calls": [ { @@ -482,7 +852,10 @@ "n_errors": 0, "duration_ms_mean": null, "duration_ms_p50": null, + "duration_ms_p95": null, "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, "tokens_per_sec_max": null, "calls": [ { @@ -518,7 +891,10 @@ "n_errors": 1, "duration_ms_mean": null, "duration_ms_p50": null, + "duration_ms_p95": null, "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, "tokens_per_sec_max": null, "calls": [ { @@ -554,7 +930,10 @@ "n_errors": 0, "duration_ms_mean": null, "duration_ms_p50": null, + "duration_ms_p95": null, "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, "tokens_per_sec_max": null, "calls": [ { @@ -590,7 +969,10 @@ "n_errors": 1, "duration_ms_mean": null, "duration_ms_p50": null, + "duration_ms_p95": null, "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, "tokens_per_sec_max": null, "calls": [ { @@ -626,7 +1008,10 @@ "n_errors": 1, "duration_ms_mean": null, "duration_ms_p50": null, + "duration_ms_p95": null, "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, "tokens_per_sec_max": null, "calls": [ { @@ -662,7 +1047,10 @@ "n_errors": 0, "duration_ms_mean": null, "duration_ms_p50": null, + "duration_ms_p95": null, "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, "tokens_per_sec_max": null, "calls": [ { @@ -698,7 +1086,10 @@ "n_errors": 0, "duration_ms_mean": null, "duration_ms_p50": null, + "duration_ms_p95": null, "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, "tokens_per_sec_max": null, "calls": [ { @@ -734,7 +1125,10 @@ "n_errors": 0, "duration_ms_mean": null, "duration_ms_p50": null, + "duration_ms_p95": null, "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, "tokens_per_sec_max": null, "calls": [ { @@ -770,7 +1164,10 @@ "n_errors": 0, "duration_ms_mean": null, "duration_ms_p50": null, + "duration_ms_p95": null, "tokens_per_sec_mean": null, + "tokens_per_sec_p50": null, + "tokens_per_sec_p95": null, "tokens_per_sec_max": null, "calls": [ { @@ -846,5 +1243,5 @@ "env_inference_route": null, "env_llamacpp_url": null }, - "packaged_at": "2026-05-05T17:43:34Z" + "packaged_at": "2026-05-06T08:04:27Z" }