Files
weeyuga-benchmarks-public/runs/ad057f5b-ed3f-4a95-a38e-361be310ffd6/metadata.json

6370 lines
188 KiB
JSON

{
"id": "ad057f5b-ed3f-4a95-a38e-361be310ffd6",
"title": "pavilion-weeyuga-v3 \u2014 qwen2.5/qwen2.5-coder/qwen3/qwen3.5 on pavilion",
"date": "2026-04-29",
"started_at": "2026-04-29T09:51:46Z",
"git_sha": "371ce70c8708ce0aedbe4f3bf096f8e8f8b11d43",
"hardware": "pavilion",
"hardware_label": "Pavilion \u00b7 HP laptop \u00b7 GTX 1050 4 GB \u00b7 16 GB RAM \u00b7 i7-9750H",
"engine": "weeyuga",
"harness": "pavilion-weeyuga-v3",
"model_family": "qwen2.5/qwen2.5-coder/qwen3/qwen3.5",
"model_sizes": [
"qwen3.5:4b",
"qwen3.5:35b-a3b-uncensored",
"qwen3.5:35b-a3b-iq2s",
"qwen3.5:9b-q6k",
"qwen3.5:9b",
"qwen3.5:2b",
"qwen3.5:0.8b",
"qwen2.5-coder:14b",
"qwen2.5-coder:3b",
"qwen3:14b",
"qwen3:8b",
"qwen3:4b",
"qwen2.5:3b",
"qwen2.5-coder:1.5b",
"qwen2.5-coder:0.5b"
],
"task_kind": "chat",
"tags": [
"a3b",
"iq2",
"pavilion",
"qwen2.5",
"qwen3",
"qwen3.5",
"weeyuga"
],
"headline": "96 calls across 16 cell(s); 12 errors",
"janie_blurb_md": null,
"janie_blurb_status": "pending",
"caveat": null,
"caveat_severity": null,
"methodology_ref": "SITE_DATA_AUDIT_AND_MIGRATION_PLAN_2026-05-06",
"methodology_deviations_md": null,
"results_table": [
{
"label": "qwen3.5:4b",
"cell_id": "pavilion:weeyuga:qwen3.5:4b",
"n_calls": 6,
"n_errors": 0,
"duration_ms_mean": null,
"duration_ms_p50": null,
"duration_ms_p95": null,
"tokens_per_sec_mean": null,
"tokens_per_sec_p50": null,
"tokens_per_sec_p95": null,
"tokens_per_sec_max": null,
"memory_mb": null,
"watts_avg": null,
"notes": null
},
{
"label": "qwen3.5:35b-a3b-uncensored-iq1m",
"cell_id": "pavilion:weeyuga:qwen3.5:35b-a3b-uncensored-iq1m",
"n_calls": 6,
"n_errors": 0,
"duration_ms_mean": null,
"duration_ms_p50": null,
"duration_ms_p95": null,
"tokens_per_sec_mean": null,
"tokens_per_sec_p50": null,
"tokens_per_sec_p95": null,
"tokens_per_sec_max": null,
"memory_mb": null,
"watts_avg": null,
"notes": null
},
{
"label": "qwen3.5:35b-a3b-iq2s",
"cell_id": "pavilion:weeyuga:qwen3.5:35b-a3b-iq2s",
"n_calls": 6,
"n_errors": 0,
"duration_ms_mean": null,
"duration_ms_p50": null,
"duration_ms_p95": null,
"tokens_per_sec_mean": null,
"tokens_per_sec_p50": null,
"tokens_per_sec_p95": null,
"tokens_per_sec_max": null,
"memory_mb": null,
"watts_avg": null,
"notes": null
},
{
"label": "qwen3.5:9b-q6k",
"cell_id": "pavilion:weeyuga:qwen3.5:9b-q6k",
"n_calls": 6,
"n_errors": 0,
"duration_ms_mean": null,
"duration_ms_p50": null,
"duration_ms_p95": null,
"tokens_per_sec_mean": null,
"tokens_per_sec_p50": null,
"tokens_per_sec_p95": null,
"tokens_per_sec_max": null,
"memory_mb": null,
"watts_avg": null,
"notes": null
},
{
"label": "qwen3.5:9b-q4km",
"cell_id": "pavilion:weeyuga:qwen3.5:9b-q4km",
"n_calls": 6,
"n_errors": 0,
"duration_ms_mean": null,
"duration_ms_p50": null,
"duration_ms_p95": null,
"tokens_per_sec_mean": null,
"tokens_per_sec_p50": null,
"tokens_per_sec_p95": null,
"tokens_per_sec_max": null,
"memory_mb": null,
"watts_avg": null,
"notes": null
},
{
"label": "qwen3.5:2b",
"cell_id": "pavilion:weeyuga:qwen3.5:2b",
"n_calls": 6,
"n_errors": 0,
"duration_ms_mean": null,
"duration_ms_p50": null,
"duration_ms_p95": null,
"tokens_per_sec_mean": null,
"tokens_per_sec_p50": null,
"tokens_per_sec_p95": null,
"tokens_per_sec_max": null,
"memory_mb": null,
"watts_avg": null,
"notes": null
},
{
"label": "qwen3.5:0.8b",
"cell_id": "pavilion:weeyuga:qwen3.5:0.8b",
"n_calls": 6,
"n_errors": 0,
"duration_ms_mean": null,
"duration_ms_p50": null,
"duration_ms_p95": null,
"tokens_per_sec_mean": null,
"tokens_per_sec_p50": null,
"tokens_per_sec_p95": null,
"tokens_per_sec_max": null,
"memory_mb": null,
"watts_avg": null,
"notes": null
},
{
"label": "qwen3.5:9b",
"cell_id": "pavilion:weeyuga:qwen3.5:9b",
"n_calls": 6,
"n_errors": 0,
"duration_ms_mean": null,
"duration_ms_p50": null,
"duration_ms_p95": null,
"tokens_per_sec_mean": null,
"tokens_per_sec_p50": null,
"tokens_per_sec_p95": null,
"tokens_per_sec_max": null,
"memory_mb": null,
"watts_avg": null,
"notes": null
},
{
"label": "qwen2.5-coder:14b",
"cell_id": "pavilion:weeyuga:qwen2.5-coder:14b",
"n_calls": 6,
"n_errors": 6,
"duration_ms_mean": null,
"duration_ms_p50": null,
"duration_ms_p95": null,
"tokens_per_sec_mean": null,
"tokens_per_sec_p50": null,
"tokens_per_sec_p95": null,
"tokens_per_sec_max": null,
"memory_mb": null,
"watts_avg": null,
"notes": null
},
{
"label": "qwen2.5-coder:3b",
"cell_id": "pavilion:weeyuga:qwen2.5-coder:3b",
"n_calls": 6,
"n_errors": 0,
"duration_ms_mean": null,
"duration_ms_p50": null,
"duration_ms_p95": null,
"tokens_per_sec_mean": null,
"tokens_per_sec_p50": null,
"tokens_per_sec_p95": null,
"tokens_per_sec_max": null,
"memory_mb": null,
"watts_avg": null,
"notes": null
},
{
"label": "qwen3:14b",
"cell_id": "pavilion:weeyuga:qwen3:14b",
"n_calls": 6,
"n_errors": 4,
"duration_ms_mean": null,
"duration_ms_p50": null,
"duration_ms_p95": null,
"tokens_per_sec_mean": null,
"tokens_per_sec_p50": null,
"tokens_per_sec_p95": null,
"tokens_per_sec_max": null,
"memory_mb": null,
"watts_avg": null,
"notes": null
},
{
"label": "qwen3:8b",
"cell_id": "pavilion:weeyuga:qwen3:8b",
"n_calls": 6,
"n_errors": 2,
"duration_ms_mean": null,
"duration_ms_p50": null,
"duration_ms_p95": null,
"tokens_per_sec_mean": null,
"tokens_per_sec_p50": null,
"tokens_per_sec_p95": null,
"tokens_per_sec_max": null,
"memory_mb": null,
"watts_avg": null,
"notes": null
},
{
"label": "qwen3:4b",
"cell_id": "pavilion:weeyuga:qwen3:4b",
"n_calls": 6,
"n_errors": 0,
"duration_ms_mean": null,
"duration_ms_p50": null,
"duration_ms_p95": null,
"tokens_per_sec_mean": null,
"tokens_per_sec_p50": null,
"tokens_per_sec_p95": null,
"tokens_per_sec_max": null,
"memory_mb": null,
"watts_avg": null,
"notes": null
},
{
"label": "qwen2.5:3b",
"cell_id": "pavilion:weeyuga:qwen2.5:3b",
"n_calls": 6,
"n_errors": 0,
"duration_ms_mean": null,
"duration_ms_p50": null,
"duration_ms_p95": null,
"tokens_per_sec_mean": null,
"tokens_per_sec_p50": null,
"tokens_per_sec_p95": null,
"tokens_per_sec_max": null,
"memory_mb": null,
"watts_avg": null,
"notes": null
},
{
"label": "qwen2.5-coder:1.5b",
"cell_id": "pavilion:weeyuga:qwen2.5-coder:1.5b",
"n_calls": 6,
"n_errors": 0,
"duration_ms_mean": null,
"duration_ms_p50": null,
"duration_ms_p95": null,
"tokens_per_sec_mean": null,
"tokens_per_sec_p50": null,
"tokens_per_sec_p95": null,
"tokens_per_sec_max": null,
"memory_mb": null,
"watts_avg": null,
"notes": null
},
{
"label": "qwen2.5-coder:0.5b",
"cell_id": "pavilion:weeyuga:qwen2.5-coder:0.5b",
"n_calls": 6,
"n_errors": 0,
"duration_ms_mean": null,
"duration_ms_p50": null,
"duration_ms_p95": null,
"tokens_per_sec_mean": null,
"tokens_per_sec_p50": null,
"tokens_per_sec_p95": null,
"tokens_per_sec_max": null,
"memory_mb": null,
"watts_avg": null,
"notes": null
}
],
"cells": [
{
"cell_id": "pavilion:weeyuga:qwen3.5:4b",
"n_calls": 6,
"n_errors": 0,
"duration_ms_mean": null,
"duration_ms_p50": null,
"duration_ms_p95": null,
"tokens_per_sec_mean": null,
"tokens_per_sec_p50": null,
"tokens_per_sec_p95": null,
"tokens_per_sec_max": null,
"cold_warm_split": {
"cold": {
"n_calls": 2,
"duration_ms_mean": null,
"duration_ms_p50": null,
"tokens_per_sec_mean": null
},
"warm": {
"n_calls": 4,
"duration_ms_mean": null,
"duration_ms_p50": null,
"tokens_per_sec_mean": null
}
},
"per_prompt": {
"unknown": {
"n_calls": 6,
"n_errors": 0,
"duration_ms_mean": null,
"duration_ms_p50": null,
"tokens_per_sec_mean": null,
"tokens_per_sec_p50": null
}
},
"chars_split": null,
"timeline": [
{
"prompt_id": null,
"run_idx": 0,
"phase": "hello",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 0,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 1,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 2,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 3,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 4,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
}
]
},
{
"cell_id": "pavilion:weeyuga:qwen3.5:35b-a3b-uncensored-iq1m",
"n_calls": 6,
"n_errors": 0,
"duration_ms_mean": null,
"duration_ms_p50": null,
"duration_ms_p95": null,
"tokens_per_sec_mean": null,
"tokens_per_sec_p50": null,
"tokens_per_sec_p95": null,
"tokens_per_sec_max": null,
"cold_warm_split": {
"cold": {
"n_calls": 2,
"duration_ms_mean": null,
"duration_ms_p50": null,
"tokens_per_sec_mean": null
},
"warm": {
"n_calls": 4,
"duration_ms_mean": null,
"duration_ms_p50": null,
"tokens_per_sec_mean": null
}
},
"per_prompt": {
"unknown": {
"n_calls": 6,
"n_errors": 0,
"duration_ms_mean": null,
"duration_ms_p50": null,
"tokens_per_sec_mean": null,
"tokens_per_sec_p50": null
}
},
"chars_split": null,
"timeline": [
{
"prompt_id": null,
"run_idx": 0,
"phase": "hello",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 0,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 1,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 2,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 3,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 4,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
}
]
},
{
"cell_id": "pavilion:weeyuga:qwen3.5:35b-a3b-iq2s",
"n_calls": 6,
"n_errors": 0,
"duration_ms_mean": null,
"duration_ms_p50": null,
"duration_ms_p95": null,
"tokens_per_sec_mean": null,
"tokens_per_sec_p50": null,
"tokens_per_sec_p95": null,
"tokens_per_sec_max": null,
"cold_warm_split": {
"cold": {
"n_calls": 2,
"duration_ms_mean": null,
"duration_ms_p50": null,
"tokens_per_sec_mean": null
},
"warm": {
"n_calls": 4,
"duration_ms_mean": null,
"duration_ms_p50": null,
"tokens_per_sec_mean": null
}
},
"per_prompt": {
"unknown": {
"n_calls": 6,
"n_errors": 0,
"duration_ms_mean": null,
"duration_ms_p50": null,
"tokens_per_sec_mean": null,
"tokens_per_sec_p50": null
}
},
"chars_split": null,
"timeline": [
{
"prompt_id": null,
"run_idx": 0,
"phase": "hello",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 0,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 1,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 2,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 3,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 4,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
}
]
},
{
"cell_id": "pavilion:weeyuga:qwen3.5:9b-q6k",
"n_calls": 6,
"n_errors": 0,
"duration_ms_mean": null,
"duration_ms_p50": null,
"duration_ms_p95": null,
"tokens_per_sec_mean": null,
"tokens_per_sec_p50": null,
"tokens_per_sec_p95": null,
"tokens_per_sec_max": null,
"cold_warm_split": {
"cold": {
"n_calls": 2,
"duration_ms_mean": null,
"duration_ms_p50": null,
"tokens_per_sec_mean": null
},
"warm": {
"n_calls": 4,
"duration_ms_mean": null,
"duration_ms_p50": null,
"tokens_per_sec_mean": null
}
},
"per_prompt": {
"unknown": {
"n_calls": 6,
"n_errors": 0,
"duration_ms_mean": null,
"duration_ms_p50": null,
"tokens_per_sec_mean": null,
"tokens_per_sec_p50": null
}
},
"chars_split": null,
"timeline": [
{
"prompt_id": null,
"run_idx": 0,
"phase": "hello",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 0,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 1,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 2,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 3,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 4,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
}
]
},
{
"cell_id": "pavilion:weeyuga:qwen3.5:9b-q4km",
"n_calls": 6,
"n_errors": 0,
"duration_ms_mean": null,
"duration_ms_p50": null,
"duration_ms_p95": null,
"tokens_per_sec_mean": null,
"tokens_per_sec_p50": null,
"tokens_per_sec_p95": null,
"tokens_per_sec_max": null,
"cold_warm_split": {
"cold": {
"n_calls": 2,
"duration_ms_mean": null,
"duration_ms_p50": null,
"tokens_per_sec_mean": null
},
"warm": {
"n_calls": 4,
"duration_ms_mean": null,
"duration_ms_p50": null,
"tokens_per_sec_mean": null
}
},
"per_prompt": {
"unknown": {
"n_calls": 6,
"n_errors": 0,
"duration_ms_mean": null,
"duration_ms_p50": null,
"tokens_per_sec_mean": null,
"tokens_per_sec_p50": null
}
},
"chars_split": null,
"timeline": [
{
"prompt_id": null,
"run_idx": 0,
"phase": "hello",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 0,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 1,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 2,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 3,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 4,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
}
]
},
{
"cell_id": "pavilion:weeyuga:qwen3.5:2b",
"n_calls": 6,
"n_errors": 0,
"duration_ms_mean": null,
"duration_ms_p50": null,
"duration_ms_p95": null,
"tokens_per_sec_mean": null,
"tokens_per_sec_p50": null,
"tokens_per_sec_p95": null,
"tokens_per_sec_max": null,
"cold_warm_split": {
"cold": {
"n_calls": 2,
"duration_ms_mean": null,
"duration_ms_p50": null,
"tokens_per_sec_mean": null
},
"warm": {
"n_calls": 4,
"duration_ms_mean": null,
"duration_ms_p50": null,
"tokens_per_sec_mean": null
}
},
"per_prompt": {
"unknown": {
"n_calls": 6,
"n_errors": 0,
"duration_ms_mean": null,
"duration_ms_p50": null,
"tokens_per_sec_mean": null,
"tokens_per_sec_p50": null
}
},
"chars_split": null,
"timeline": [
{
"prompt_id": null,
"run_idx": 0,
"phase": "hello",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 0,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 1,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 2,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 3,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 4,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
}
]
},
{
"cell_id": "pavilion:weeyuga:qwen3.5:0.8b",
"n_calls": 6,
"n_errors": 0,
"duration_ms_mean": null,
"duration_ms_p50": null,
"duration_ms_p95": null,
"tokens_per_sec_mean": null,
"tokens_per_sec_p50": null,
"tokens_per_sec_p95": null,
"tokens_per_sec_max": null,
"cold_warm_split": {
"cold": {
"n_calls": 2,
"duration_ms_mean": null,
"duration_ms_p50": null,
"tokens_per_sec_mean": null
},
"warm": {
"n_calls": 4,
"duration_ms_mean": null,
"duration_ms_p50": null,
"tokens_per_sec_mean": null
}
},
"per_prompt": {
"unknown": {
"n_calls": 6,
"n_errors": 0,
"duration_ms_mean": null,
"duration_ms_p50": null,
"tokens_per_sec_mean": null,
"tokens_per_sec_p50": null
}
},
"chars_split": null,
"timeline": [
{
"prompt_id": null,
"run_idx": 0,
"phase": "hello",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 0,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 1,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 2,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 3,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 4,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
}
]
},
{
"cell_id": "pavilion:weeyuga:qwen3.5:9b",
"n_calls": 6,
"n_errors": 0,
"duration_ms_mean": null,
"duration_ms_p50": null,
"duration_ms_p95": null,
"tokens_per_sec_mean": null,
"tokens_per_sec_p50": null,
"tokens_per_sec_p95": null,
"tokens_per_sec_max": null,
"cold_warm_split": {
"cold": {
"n_calls": 2,
"duration_ms_mean": null,
"duration_ms_p50": null,
"tokens_per_sec_mean": null
},
"warm": {
"n_calls": 4,
"duration_ms_mean": null,
"duration_ms_p50": null,
"tokens_per_sec_mean": null
}
},
"per_prompt": {
"unknown": {
"n_calls": 6,
"n_errors": 0,
"duration_ms_mean": null,
"duration_ms_p50": null,
"tokens_per_sec_mean": null,
"tokens_per_sec_p50": null
}
},
"chars_split": null,
"timeline": [
{
"prompt_id": null,
"run_idx": 0,
"phase": "hello",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 0,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 1,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 2,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 3,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 4,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
}
]
},
{
"cell_id": "pavilion:weeyuga:qwen2.5-coder:14b",
"n_calls": 6,
"n_errors": 6,
"duration_ms_mean": null,
"duration_ms_p50": null,
"duration_ms_p95": null,
"tokens_per_sec_mean": null,
"tokens_per_sec_p50": null,
"tokens_per_sec_p95": null,
"tokens_per_sec_max": null,
"cold_warm_split": {
"cold": {
"n_calls": 2,
"duration_ms_mean": null,
"duration_ms_p50": null,
"tokens_per_sec_mean": null
},
"warm": {
"n_calls": 4,
"duration_ms_mean": null,
"duration_ms_p50": null,
"tokens_per_sec_mean": null
}
},
"per_prompt": {
"unknown": {
"n_calls": 6,
"n_errors": 6,
"duration_ms_mean": null,
"duration_ms_p50": null,
"tokens_per_sec_mean": null,
"tokens_per_sec_p50": null
}
},
"chars_split": null,
"timeline": [
{
"prompt_id": null,
"run_idx": 0,
"phase": "hello",
"duration_ms": null,
"tokens_per_sec": null,
"error": "<HTTPError 500: 'Internal Server Error'>"
},
{
"prompt_id": null,
"run_idx": 0,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": "<HTTPError 500: 'Internal Server Error'>"
},
{
"prompt_id": null,
"run_idx": 1,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": "<HTTPError 500: 'Internal Server Error'>"
},
{
"prompt_id": null,
"run_idx": 2,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": "<HTTPError 500: 'Internal Server Error'>"
},
{
"prompt_id": null,
"run_idx": 3,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": "<HTTPError 500: 'Internal Server Error'>"
},
{
"prompt_id": null,
"run_idx": 4,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": "<HTTPError 500: 'Internal Server Error'>"
}
]
},
{
"cell_id": "pavilion:weeyuga:qwen2.5-coder:3b",
"n_calls": 6,
"n_errors": 0,
"duration_ms_mean": null,
"duration_ms_p50": null,
"duration_ms_p95": null,
"tokens_per_sec_mean": null,
"tokens_per_sec_p50": null,
"tokens_per_sec_p95": null,
"tokens_per_sec_max": null,
"cold_warm_split": {
"cold": {
"n_calls": 2,
"duration_ms_mean": null,
"duration_ms_p50": null,
"tokens_per_sec_mean": null
},
"warm": {
"n_calls": 4,
"duration_ms_mean": null,
"duration_ms_p50": null,
"tokens_per_sec_mean": null
}
},
"per_prompt": {
"unknown": {
"n_calls": 6,
"n_errors": 0,
"duration_ms_mean": null,
"duration_ms_p50": null,
"tokens_per_sec_mean": null,
"tokens_per_sec_p50": null
}
},
"chars_split": null,
"timeline": [
{
"prompt_id": null,
"run_idx": 0,
"phase": "hello",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 0,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 1,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 2,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 3,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 4,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
}
]
},
{
"cell_id": "pavilion:weeyuga:qwen3:14b",
"n_calls": 6,
"n_errors": 4,
"duration_ms_mean": null,
"duration_ms_p50": null,
"duration_ms_p95": null,
"tokens_per_sec_mean": null,
"tokens_per_sec_p50": null,
"tokens_per_sec_p95": null,
"tokens_per_sec_max": null,
"cold_warm_split": {
"cold": {
"n_calls": 2,
"duration_ms_mean": null,
"duration_ms_p50": null,
"tokens_per_sec_mean": null
},
"warm": {
"n_calls": 4,
"duration_ms_mean": null,
"duration_ms_p50": null,
"tokens_per_sec_mean": null
}
},
"per_prompt": {
"unknown": {
"n_calls": 6,
"n_errors": 4,
"duration_ms_mean": null,
"duration_ms_p50": null,
"tokens_per_sec_mean": null,
"tokens_per_sec_p50": null
}
},
"chars_split": null,
"timeline": [
{
"prompt_id": null,
"run_idx": 0,
"phase": "hello",
"duration_ms": null,
"tokens_per_sec": null,
"error": "TimeoutError('timed out')"
},
{
"prompt_id": null,
"run_idx": 0,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": "TimeoutError('timed out')"
},
{
"prompt_id": null,
"run_idx": 1,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": "TimeoutError('timed out')"
},
{
"prompt_id": null,
"run_idx": 2,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": "TimeoutError('timed out')"
},
{
"prompt_id": null,
"run_idx": 3,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 4,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
}
]
},
{
"cell_id": "pavilion:weeyuga:qwen3:8b",
"n_calls": 6,
"n_errors": 2,
"duration_ms_mean": null,
"duration_ms_p50": null,
"duration_ms_p95": null,
"tokens_per_sec_mean": null,
"tokens_per_sec_p50": null,
"tokens_per_sec_p95": null,
"tokens_per_sec_max": null,
"cold_warm_split": {
"cold": {
"n_calls": 2,
"duration_ms_mean": null,
"duration_ms_p50": null,
"tokens_per_sec_mean": null
},
"warm": {
"n_calls": 4,
"duration_ms_mean": null,
"duration_ms_p50": null,
"tokens_per_sec_mean": null
}
},
"per_prompt": {
"unknown": {
"n_calls": 6,
"n_errors": 2,
"duration_ms_mean": null,
"duration_ms_p50": null,
"tokens_per_sec_mean": null,
"tokens_per_sec_p50": null
}
},
"chars_split": null,
"timeline": [
{
"prompt_id": null,
"run_idx": 0,
"phase": "hello",
"duration_ms": null,
"tokens_per_sec": null,
"error": "TimeoutError('timed out')"
},
{
"prompt_id": null,
"run_idx": 0,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 1,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": "TimeoutError('timed out')"
},
{
"prompt_id": null,
"run_idx": 2,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 3,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 4,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
}
]
},
{
"cell_id": "pavilion:weeyuga:qwen3:4b",
"n_calls": 6,
"n_errors": 0,
"duration_ms_mean": null,
"duration_ms_p50": null,
"duration_ms_p95": null,
"tokens_per_sec_mean": null,
"tokens_per_sec_p50": null,
"tokens_per_sec_p95": null,
"tokens_per_sec_max": null,
"cold_warm_split": {
"cold": {
"n_calls": 2,
"duration_ms_mean": null,
"duration_ms_p50": null,
"tokens_per_sec_mean": null
},
"warm": {
"n_calls": 4,
"duration_ms_mean": null,
"duration_ms_p50": null,
"tokens_per_sec_mean": null
}
},
"per_prompt": {
"unknown": {
"n_calls": 6,
"n_errors": 0,
"duration_ms_mean": null,
"duration_ms_p50": null,
"tokens_per_sec_mean": null,
"tokens_per_sec_p50": null
}
},
"chars_split": null,
"timeline": [
{
"prompt_id": null,
"run_idx": 0,
"phase": "hello",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 0,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 1,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 2,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 3,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 4,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
}
]
},
{
"cell_id": "pavilion:weeyuga:qwen2.5:3b",
"n_calls": 6,
"n_errors": 0,
"duration_ms_mean": null,
"duration_ms_p50": null,
"duration_ms_p95": null,
"tokens_per_sec_mean": null,
"tokens_per_sec_p50": null,
"tokens_per_sec_p95": null,
"tokens_per_sec_max": null,
"cold_warm_split": {
"cold": {
"n_calls": 2,
"duration_ms_mean": null,
"duration_ms_p50": null,
"tokens_per_sec_mean": null
},
"warm": {
"n_calls": 4,
"duration_ms_mean": null,
"duration_ms_p50": null,
"tokens_per_sec_mean": null
}
},
"per_prompt": {
"unknown": {
"n_calls": 6,
"n_errors": 0,
"duration_ms_mean": null,
"duration_ms_p50": null,
"tokens_per_sec_mean": null,
"tokens_per_sec_p50": null
}
},
"chars_split": null,
"timeline": [
{
"prompt_id": null,
"run_idx": 0,
"phase": "hello",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 0,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 1,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 2,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 3,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 4,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
}
]
},
{
"cell_id": "pavilion:weeyuga:qwen2.5-coder:1.5b",
"n_calls": 6,
"n_errors": 0,
"duration_ms_mean": null,
"duration_ms_p50": null,
"duration_ms_p95": null,
"tokens_per_sec_mean": null,
"tokens_per_sec_p50": null,
"tokens_per_sec_p95": null,
"tokens_per_sec_max": null,
"cold_warm_split": {
"cold": {
"n_calls": 2,
"duration_ms_mean": null,
"duration_ms_p50": null,
"tokens_per_sec_mean": null
},
"warm": {
"n_calls": 4,
"duration_ms_mean": null,
"duration_ms_p50": null,
"tokens_per_sec_mean": null
}
},
"per_prompt": {
"unknown": {
"n_calls": 6,
"n_errors": 0,
"duration_ms_mean": null,
"duration_ms_p50": null,
"tokens_per_sec_mean": null,
"tokens_per_sec_p50": null
}
},
"chars_split": null,
"timeline": [
{
"prompt_id": null,
"run_idx": 0,
"phase": "hello",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 0,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 1,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 2,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 3,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 4,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
}
]
},
{
"cell_id": "pavilion:weeyuga:qwen2.5-coder:0.5b",
"n_calls": 6,
"n_errors": 0,
"duration_ms_mean": null,
"duration_ms_p50": null,
"duration_ms_p95": null,
"tokens_per_sec_mean": null,
"tokens_per_sec_p50": null,
"tokens_per_sec_p95": null,
"tokens_per_sec_max": null,
"cold_warm_split": {
"cold": {
"n_calls": 2,
"duration_ms_mean": null,
"duration_ms_p50": null,
"tokens_per_sec_mean": null
},
"warm": {
"n_calls": 4,
"duration_ms_mean": null,
"duration_ms_p50": null,
"tokens_per_sec_mean": null
}
},
"per_prompt": {
"unknown": {
"n_calls": 6,
"n_errors": 0,
"duration_ms_mean": null,
"duration_ms_p50": null,
"tokens_per_sec_mean": null,
"tokens_per_sec_p50": null
}
},
"chars_split": null,
"timeline": [
{
"prompt_id": null,
"run_idx": 0,
"phase": "hello",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 0,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 1,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 2,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 3,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 4,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
}
]
}
],
"n_calls_total": 96,
"n_errors_total": 12,
"chart_spec": {
"kind": "bar",
"x": "cell",
"y": "tokens_per_sec_mean",
"series": [
"qwen3.5:4b",
"qwen3.5:35b-a3b-uncensored-iq1m",
"qwen3.5:35b-a3b-iq2s",
"qwen3.5:9b-q6k",
"qwen3.5:9b-q4km",
"qwen3.5:2b",
"qwen3.5:0.8b",
"qwen3.5:9b",
"qwen2.5-coder:14b",
"qwen2.5-coder:3b",
"qwen3:14b",
"qwen3:8b",
"qwen3:4b",
"qwen2.5:3b",
"qwen2.5-coder:1.5b",
"qwen2.5-coder:0.5b"
],
"data_url": "/data/ad057f5b-ed3f-4a95-a38e-361be310ffd6/run.jsonl"
},
"site_grade": "archive-only",
"site_grade_reason": "error rate 12/96",
"raw_data_urls": {
"jsonl": "/data/ad057f5b-ed3f-4a95-a38e-361be310ffd6/run.jsonl",
"log": null,
"md": null,
"metadata": "/data/ad057f5b-ed3f-4a95-a38e-361be310ffd6/metadata.json",
"gitea_dir": "https://git.weeyuga.com/slobodanmargetic988/weeyuga-benchmarks-public/src/branch/main/runs/ad057f5b-ed3f-4a95-a38e-361be310ffd6/"
},
"synthesis_doc": "SITE_DATA_AUDIT_AND_MIGRATION_PLAN_2026-05-06.md",
"synthesis_docs_all": [
"SITE_DATA_AUDIT_AND_MIGRATION_PLAN_2026-05-06.md",
"PAVILION_LLAMACPP_VS_OLLAMA_v0_INCOMPLETE.md"
],
"related_ids": [],
"status": "complete",
"visibility": "draft",
"cells_full": [
{
"cell_id": "pavilion:weeyuga:qwen3.5:4b",
"n_calls": 6,
"n_errors": 0,
"duration_ms_mean": null,
"duration_ms_p50": null,
"duration_ms_p95": null,
"tokens_per_sec_mean": null,
"tokens_per_sec_p50": null,
"tokens_per_sec_p95": null,
"tokens_per_sec_max": null,
"cold_warm_split": {
"cold": {
"n_calls": 2,
"duration_ms_mean": null,
"duration_ms_p50": null,
"tokens_per_sec_mean": null
},
"warm": {
"n_calls": 4,
"duration_ms_mean": null,
"duration_ms_p50": null,
"tokens_per_sec_mean": null
}
},
"per_prompt": {
"unknown": {
"n_calls": 6,
"n_errors": 0,
"duration_ms_mean": null,
"duration_ms_p50": null,
"tokens_per_sec_mean": null,
"tokens_per_sec_p50": null
}
},
"chars_split": null,
"timeline": [
{
"prompt_id": null,
"run_idx": 0,
"phase": "hello",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 0,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 1,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 2,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 3,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 4,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
}
],
"calls": [
{
"type": "call",
"ts_utc": "2026-04-29T09:51:53Z",
"cell_id": "pavilion:weeyuga:qwen3.5:4b",
"model": "qwen3.5:4b",
"phase": "hello",
"question_id": "hello_check",
"run_idx": 0,
"duration_seconds": 7.103,
"prompt_tokens": 16,
"completion_tokens": 251,
"tokens_per_second": 35.34,
"finish_reason": "stop",
"weeyuga_meta": null,
"status_code": 200,
"response_chars": 62,
"response_preview": "Hi there! I'm glad to help! What would you like to do today? \ud83d\ude0a",
"required_markers": [],
"markers_hit": [],
"marker_hit_rate": null,
"format_rule": "",
"format_ok": null,
"usable_answer": true,
"error": null
},
{
"type": "call",
"ts_utc": "2026-04-29T09:52:21Z",
"cell_id": "pavilion:weeyuga:qwen3.5:4b",
"model": "qwen3.5:4b",
"phase": "5q",
"question_id": "disk_guard_bash",
"run_idx": 0,
"duration_seconds": 28.241,
"prompt_tokens": 71,
"completion_tokens": 1124,
"tokens_per_second": 39.8,
"finish_reason": "stop",
"weeyuga_meta": null,
"status_code": 200,
"response_chars": 246,
"response_preview": "```bash\n#!/bin/bash\n\n# Check disk usage for /\nif [ -d \"/\" ]; then\n usage=$(df -P / | awk '{print $5}')\n if [ \"$usage\" -gt 85 ]; then\n echo \"WARNING: Disk usage for / is at ${usage}% (above 85% threshold)\"\n exit 1\n fi\n",
"required_markers": [
"#!/usr/bin/env bash",
"df -P /",
"85",
"exit 1"
],
"markers_hit": [
"df -P /",
"85",
"exit 1"
],
"marker_hit_rate": 0.75,
"format_rule": "bash_code",
"format_ok": false,
"usable_answer": true,
"error": null
},
{
"type": "call",
"ts_utc": "2026-04-29T09:53:11Z",
"cell_id": "pavilion:weeyuga:qwen3.5:4b",
"model": "qwen3.5:4b",
"phase": "5q",
"question_id": "ipv4_python_tests",
"run_idx": 1,
"duration_seconds": 49.606,
"prompt_tokens": 55,
"completion_tokens": 2048,
"tokens_per_second": 41.29,
"finish_reason": "length",
"weeyuga_meta": null,
"status_code": 200,
"response_chars": 1630,
"response_preview": "```python\ndef is_valid_ipv4(value: str) -> bool:\n \"\"\"\n Check if a string is a valid IPv4 address.\n \"\"\"\n if not isinstance(value, str):\n return False\n parts = value.split('.')\n if len(parts) != 4:\n return Fals",
"required_markers": [
"def is_valid_ipv4",
"def test_",
"assert",
"split('.')"
],
"markers_hit": [
"def is_valid_ipv4",
"split('.')"
],
"marker_hit_rate": 0.5,
"format_rule": "python_code",
"format_ok": false,
"usable_answer": true,
"error": null
},
{
"type": "call",
"ts_utc": "2026-04-29T09:53:38Z",
"cell_id": "pavilion:weeyuga:qwen3.5:4b",
"model": "qwen3.5:4b",
"phase": "5q",
"question_id": "nginx_safe_reload",
"run_idx": 2,
"duration_seconds": 26.721,
"prompt_tokens": 39,
"completion_tokens": 1076,
"tokens_per_second": 40.27,
"finish_reason": "stop",
"weeyuga_meta": null,
"status_code": 200,
"response_chars": 116,
"response_preview": "cp /etc/nginx/nginx.conf /etc/nginx/nginx.conf.bak\nnginx -t\nif [ $? -eq 0 ]; then nginx -s reload; else nginx -t; fi",
"required_markers": [
"cp /etc/nginx/nginx.conf",
"nginx -t",
"systemctl reload nginx",
"&&"
],
"markers_hit": [
"cp /etc/nginx/nginx.conf",
"nginx -t"
],
"marker_hit_rate": 0.5,
"format_rule": "shell_lines",
"format_ok": true,
"usable_answer": true,
"error": null
},
{
"type": "call",
"ts_utc": "2026-04-29T09:54:27Z",
"cell_id": "pavilion:weeyuga:qwen3.5:4b",
"model": "qwen3.5:4b",
"phase": "5q",
"question_id": "yaml_cli_plan",
"run_idx": 3,
"duration_seconds": 49.969,
"prompt_tokens": 43,
"completion_tokens": 2048,
"tokens_per_second": 40.99,
"finish_reason": "length",
"weeyuga_meta": null,
"status_code": 200,
"response_chars": 7628,
"response_preview": "# Python CLI for Git-Scanned YAML Schema Validation\n\n```python\n#!/usr/bin/env python3\n\"\"\"\nYAML Schema Validator CLI\nScans git repos for changed YAML files and validates against JSON schemas.\n\"\"\"\n\nimport sys\nimport os\nimport json\nimport yaml",
"required_markers": [
"1.",
"2.",
"3.",
"4.",
"JSON schema",
"git"
],
"markers_hit": [
"JSON schema",
"git"
],
"marker_hit_rate": 0.333,
"format_rule": "four_numbered_steps",
"format_ok": false,
"usable_answer": true,
"error": null
},
{
"type": "call",
"ts_utc": "2026-04-29T09:54:56Z",
"cell_id": "pavilion:weeyuga:qwen3.5:4b",
"model": "qwen3.5:4b",
"phase": "5q",
"question_id": "ssh_lockout_triage",
"run_idx": 4,
"duration_seconds": 28.774,
"prompt_tokens": 56,
"completion_tokens": 1124,
"tokens_per_second": 39.06,
"finish_reason": "stop",
"weeyuga_meta": null,
"status_code": 200,
"response_chars": 468,
"response_preview": "- Review `sshd_config` for misconfigurations that might cause permission denied errors.\n- Review `authorized_keys` permissions to ensure only trusted keys are allowed to connect.\n- Review `PasswordAuthentication` settings to ensure password",
"required_markers": [
"sshd_config",
"authorized_keys",
"journalctl",
"rollback",
"PasswordAuthentication"
],
"markers_hit": [
"sshd_config",
"authorized_keys",
"journalctl",
"rollback",
"PasswordAuthentication"
],
"marker_hit_rate": 1.0,
"format_rule": "five_bullets",
"format_ok": true,
"usable_answer": true,
"error": null
}
]
},
{
"cell_id": "pavilion:weeyuga:qwen3.5:35b-a3b-uncensored-iq1m",
"n_calls": 6,
"n_errors": 0,
"duration_ms_mean": null,
"duration_ms_p50": null,
"duration_ms_p95": null,
"tokens_per_sec_mean": null,
"tokens_per_sec_p50": null,
"tokens_per_sec_p95": null,
"tokens_per_sec_max": null,
"cold_warm_split": {
"cold": {
"n_calls": 2,
"duration_ms_mean": null,
"duration_ms_p50": null,
"tokens_per_sec_mean": null
},
"warm": {
"n_calls": 4,
"duration_ms_mean": null,
"duration_ms_p50": null,
"tokens_per_sec_mean": null
}
},
"per_prompt": {
"unknown": {
"n_calls": 6,
"n_errors": 0,
"duration_ms_mean": null,
"duration_ms_p50": null,
"tokens_per_sec_mean": null,
"tokens_per_sec_p50": null
}
},
"chars_split": null,
"timeline": [
{
"prompt_id": null,
"run_idx": 0,
"phase": "hello",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 0,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 1,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 2,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 3,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 4,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
}
],
"calls": [
{
"type": "call",
"ts_utc": "2026-04-29T09:55:15Z",
"cell_id": "pavilion:weeyuga:qwen3.5:35b-a3b-uncensored-iq1m",
"model": "qwen3.5:35b-a3b-uncensored-iq1m",
"phase": "hello",
"question_id": "hello_check",
"run_idx": 0,
"duration_seconds": 18.944,
"prompt_tokens": 16,
"completion_tokens": 750,
"tokens_per_second": 39.59,
"finish_reason": "stop",
"weeyuga_meta": null,
"status_code": 200,
"response_chars": 185,
"response_preview": "Hello! I'm glad to help you. What would you like to do today? Whether you need assistance with a specific task, need help with something, or just want to chat, I'm here to assist you! \ud83d\ude0a",
"required_markers": [],
"markers_hit": [],
"marker_hit_rate": null,
"format_rule": "",
"format_ok": null,
"usable_answer": true,
"error": null
},
{
"type": "call",
"ts_utc": "2026-04-29T09:55:42Z",
"cell_id": "pavilion:weeyuga:qwen3.5:35b-a3b-uncensored-iq1m",
"model": "qwen3.5:35b-a3b-uncensored-iq1m",
"phase": "5q",
"question_id": "disk_guard_bash",
"run_idx": 0,
"duration_seconds": 27.028,
"prompt_tokens": 71,
"completion_tokens": 1123,
"tokens_per_second": 41.55,
"finish_reason": "stop",
"weeyuga_meta": null,
"status_code": 200,
"response_chars": 242,
"response_preview": "#!/bin/bash\n\n# Check disk usage for /\nif [ -d \"/\" ]; then\n usage=$(df -P / | awk '{print $5}')\n if [ \"$usage\" -gt 85 ]; then\n echo \"WARNING: Disk usage for / is at ${usage}% (above 85% threshold)\"\n exit 1\n fi\nfi\n\nexit",
"required_markers": [
"#!/usr/bin/env bash",
"df -P /",
"85",
"exit 1"
],
"markers_hit": [
"df -P /",
"85",
"exit 1"
],
"marker_hit_rate": 0.75,
"format_rule": "bash_code",
"format_ok": false,
"usable_answer": true,
"error": null
},
{
"type": "call",
"ts_utc": "2026-04-29T09:56:02Z",
"cell_id": "pavilion:weeyuga:qwen3.5:35b-a3b-uncensored-iq1m",
"model": "qwen3.5:35b-a3b-uncensored-iq1m",
"phase": "5q",
"question_id": "ipv4_python_tests",
"run_idx": 1,
"duration_seconds": 19.353,
"prompt_tokens": 55,
"completion_tokens": 776,
"tokens_per_second": 40.1,
"finish_reason": "stop",
"weeyuga_meta": null,
"status_code": 200,
"response_chars": 665,
"response_preview": "```python\ndef is_valid_ipv4(value: str) -> bool:\n \"\"\"\n Check if a string represents a valid IPv4 address.\n \"\"\"\n if not isinstance(value, str):\n return False\n parts = value.split('.')\n if len(parts) != 4:\n ret",
"required_markers": [
"def is_valid_ipv4",
"def test_",
"assert",
"split('.')"
],
"markers_hit": [
"def is_valid_ipv4",
"def test_",
"assert",
"split('.')"
],
"marker_hit_rate": 1.0,
"format_rule": "python_code",
"format_ok": true,
"usable_answer": true,
"error": null
},
{
"type": "call",
"ts_utc": "2026-04-29T09:56:27Z",
"cell_id": "pavilion:weeyuga:qwen3.5:35b-a3b-uncensored-iq1m",
"model": "qwen3.5:35b-a3b-uncensored-iq1m",
"phase": "5q",
"question_id": "nginx_safe_reload",
"run_idx": 2,
"duration_seconds": 25.905,
"prompt_tokens": 39,
"completion_tokens": 1073,
"tokens_per_second": 41.42,
"finish_reason": "stop",
"weeyuga_meta": null,
"status_code": 200,
"response_chars": 104,
"response_preview": "cp /etc/nginx/nginx.conf /etc/nginx/nginx.conf.bak\nnginx -t\nif [ $? -eq 0 ]; then\n nginx -s reload\nfi",
"required_markers": [
"cp /etc/nginx/nginx.conf",
"nginx -t",
"systemctl reload nginx",
"&&"
],
"markers_hit": [
"cp /etc/nginx/nginx.conf",
"nginx -t"
],
"marker_hit_rate": 0.5,
"format_rule": "shell_lines",
"format_ok": true,
"usable_answer": true,
"error": null
},
{
"type": "call",
"ts_utc": "2026-04-29T09:57:17Z",
"cell_id": "pavilion:weeyuga:qwen3.5:35b-a3b-uncensored-iq1m",
"model": "qwen3.5:35b-a3b-uncensored-iq1m",
"phase": "5q",
"question_id": "yaml_cli_plan",
"run_idx": 3,
"duration_seconds": 49.427,
"prompt_tokens": 43,
"completion_tokens": 2048,
"tokens_per_second": 41.43,
"finish_reason": "length",
"weeyuga_meta": null,
"status_code": 200,
"response_chars": 9007,
"response_preview": "# Python CLI for Git-Scanned YAML Schema Validation\n\n```python\n#!/usr/bin/env python3\n\"\"\"\nCLI for scanning git repos for changed YAML files and validating against JSON schemas.\n\"\"\"\n\nimport argparse\nimport json\nimport os\nimport sys\nfrom path",
"required_markers": [
"1.",
"2.",
"3.",
"4.",
"JSON schema",
"git"
],
"markers_hit": [
"JSON schema",
"git"
],
"marker_hit_rate": 0.333,
"format_rule": "four_numbered_steps",
"format_ok": false,
"usable_answer": true,
"error": null
},
{
"type": "call",
"ts_utc": "2026-04-29T09:57:44Z",
"cell_id": "pavilion:weeyuga:qwen3.5:35b-a3b-uncensored-iq1m",
"model": "qwen3.5:35b-a3b-uncensored-iq1m",
"phase": "5q",
"question_id": "ssh_lockout_triage",
"run_idx": 4,
"duration_seconds": 27.269,
"prompt_tokens": 56,
"completion_tokens": 1108,
"tokens_per_second": 40.63,
"finish_reason": "stop",
"weeyuga_meta": null,
"status_code": 200,
"response_chars": 354,
"response_preview": "- Review `sshd_config` to ensure security settings are properly configured.\n- Verify `authorized_keys` permissions to prevent unauthorized access.\n- Check `PasswordAuthentication` settings to ensure secure password handling.\n- Review `journ",
"required_markers": [
"sshd_config",
"authorized_keys",
"journalctl",
"rollback",
"PasswordAuthentication"
],
"markers_hit": [
"sshd_config",
"authorized_keys",
"journalctl",
"rollback",
"PasswordAuthentication"
],
"marker_hit_rate": 1.0,
"format_rule": "five_bullets",
"format_ok": true,
"usable_answer": true,
"error": null
}
]
},
{
"cell_id": "pavilion:weeyuga:qwen3.5:35b-a3b-iq2s",
"n_calls": 6,
"n_errors": 0,
"duration_ms_mean": null,
"duration_ms_p50": null,
"duration_ms_p95": null,
"tokens_per_sec_mean": null,
"tokens_per_sec_p50": null,
"tokens_per_sec_p95": null,
"tokens_per_sec_max": null,
"cold_warm_split": {
"cold": {
"n_calls": 2,
"duration_ms_mean": null,
"duration_ms_p50": null,
"tokens_per_sec_mean": null
},
"warm": {
"n_calls": 4,
"duration_ms_mean": null,
"duration_ms_p50": null,
"tokens_per_sec_mean": null
}
},
"per_prompt": {
"unknown": {
"n_calls": 6,
"n_errors": 0,
"duration_ms_mean": null,
"duration_ms_p50": null,
"tokens_per_sec_mean": null,
"tokens_per_sec_p50": null
}
},
"chars_split": null,
"timeline": [
{
"prompt_id": null,
"run_idx": 0,
"phase": "hello",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 0,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 1,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 2,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 3,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 4,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
}
],
"calls": [
{
"type": "call",
"ts_utc": "2026-04-29T09:57:48Z",
"cell_id": "pavilion:weeyuga:qwen3.5:35b-a3b-iq2s",
"model": "qwen3.5:35b-a3b-iq2s",
"phase": "hello",
"question_id": "hello_check",
"run_idx": 0,
"duration_seconds": 3.684,
"prompt_tokens": 16,
"completion_tokens": 134,
"tokens_per_second": 36.37,
"finish_reason": "stop",
"weeyuga_meta": null,
"status_code": 200,
"response_chars": 147,
"response_preview": "Hello! I'd love to help you. What would you like to work on? Whether it's writing, coding, problem-solving, or just chatting, I'm here to assist! \ud83d\ude0a",
"required_markers": [],
"markers_hit": [],
"marker_hit_rate": null,
"format_rule": "",
"format_ok": null,
"usable_answer": true,
"error": null
},
{
"type": "call",
"ts_utc": "2026-04-29T09:58:15Z",
"cell_id": "pavilion:weeyuga:qwen3.5:35b-a3b-iq2s",
"model": "qwen3.5:35b-a3b-iq2s",
"phase": "5q",
"question_id": "disk_guard_bash",
"run_idx": 0,
"duration_seconds": 27.061,
"prompt_tokens": 71,
"completion_tokens": 1121,
"tokens_per_second": 41.42,
"finish_reason": "stop",
"weeyuga_meta": null,
"status_code": 200,
"response_chars": 239,
"response_preview": "#!/bin/bash\n\n# Check disk usage for /\nif [ -d \"/\" ]; then\n disk_usage=$(df -P / | awk '{print $5}')\n if [ \"$disk_usage\" -gt 85 ]; then\n echo \"WARNING: Disk usage for / is at ${disk_usage}% (above 85%)\"\n exit 1\n fi\nfi",
"required_markers": [
"#!/usr/bin/env bash",
"df -P /",
"85",
"exit 1"
],
"markers_hit": [
"df -P /",
"85",
"exit 1"
],
"marker_hit_rate": 0.75,
"format_rule": "bash_code",
"format_ok": false,
"usable_answer": true,
"error": null
},
{
"type": "call",
"ts_utc": "2026-04-29T09:58:45Z",
"cell_id": "pavilion:weeyuga:qwen3.5:35b-a3b-iq2s",
"model": "qwen3.5:35b-a3b-iq2s",
"phase": "5q",
"question_id": "ipv4_python_tests",
"run_idx": 1,
"duration_seconds": 29.886,
"prompt_tokens": 55,
"completion_tokens": 1232,
"tokens_per_second": 41.22,
"finish_reason": "stop",
"weeyuga_meta": null,
"status_code": 200,
"response_chars": 613,
"response_preview": "```python\ndef is_valid_ipv4(value: str) -> bool:\n \"\"\"Check if a string is a valid IPv4 address.\"\"\"\n if not isinstance(value, str):\n return False\n parts = value.split('.')\n if len(parts) != 4:\n return False\n for ",
"required_markers": [
"def is_valid_ipv4",
"def test_",
"assert",
"split('.')"
],
"markers_hit": [
"def is_valid_ipv4",
"def test_",
"assert",
"split('.')"
],
"marker_hit_rate": 1.0,
"format_rule": "python_code",
"format_ok": true,
"usable_answer": true,
"error": null
},
{
"type": "call",
"ts_utc": "2026-04-29T09:59:11Z",
"cell_id": "pavilion:weeyuga:qwen3.5:35b-a3b-iq2s",
"model": "qwen3.5:35b-a3b-iq2s",
"phase": "5q",
"question_id": "nginx_safe_reload",
"run_idx": 2,
"duration_seconds": 25.688,
"prompt_tokens": 39,
"completion_tokens": 1060,
"tokens_per_second": 41.26,
"finish_reason": "stop",
"weeyuga_meta": null,
"status_code": 200,
"response_chars": 75,
"response_preview": "cp /etc/nginx/nginx.conf /etc/nginx/nginx.conf.bak\nnginx -t\nnginx -s reload",
"required_markers": [
"cp /etc/nginx/nginx.conf",
"nginx -t",
"systemctl reload nginx",
"&&"
],
"markers_hit": [
"cp /etc/nginx/nginx.conf",
"nginx -t"
],
"marker_hit_rate": 0.5,
"format_rule": "shell_lines",
"format_ok": true,
"usable_answer": true,
"error": null
},
{
"type": "call",
"ts_utc": "2026-04-29T10:00:01Z",
"cell_id": "pavilion:weeyuga:qwen3.5:35b-a3b-iq2s",
"model": "qwen3.5:35b-a3b-iq2s",
"phase": "5q",
"question_id": "yaml_cli_plan",
"run_idx": 3,
"duration_seconds": 50.264,
"prompt_tokens": 43,
"completion_tokens": 2048,
"tokens_per_second": 40.74,
"finish_reason": "length",
"weeyuga_meta": null,
"status_code": 200,
"response_chars": 7205,
"response_preview": "```python\n#!/usr/bin/env python3\n\"\"\"\nYAML Schema Validator CLI\nScans git repos for changed YAML files and validates against JSON schemas.\n\"\"\"\n\nimport argparse\nimport json\nimport os\nimport sys\nfrom pathlib import Path\nfrom typing import List",
"required_markers": [
"1.",
"2.",
"3.",
"4.",
"JSON schema",
"git"
],
"markers_hit": [
"JSON schema",
"git"
],
"marker_hit_rate": 0.333,
"format_rule": "four_numbered_steps",
"format_ok": false,
"usable_answer": true,
"error": null
},
{
"type": "call",
"ts_utc": "2026-04-29T10:00:28Z",
"cell_id": "pavilion:weeyuga:qwen3.5:35b-a3b-iq2s",
"model": "qwen3.5:35b-a3b-iq2s",
"phase": "5q",
"question_id": "ssh_lockout_triage",
"run_idx": 4,
"duration_seconds": 27.352,
"prompt_tokens": 56,
"completion_tokens": 1116,
"tokens_per_second": 40.8,
"finish_reason": "stop",
"weeyuga_meta": null,
"status_code": 200,
"response_chars": 409,
"response_preview": "- Review `sshd_config` for hardening settings before making changes.\n- Review `authorized_keys` permissions to ensure only trusted keys are allowed.\n- Review `PasswordAuthentication` settings to prevent unauthorized password usage.\n- Review",
"required_markers": [
"sshd_config",
"authorized_keys",
"journalctl",
"rollback",
"PasswordAuthentication"
],
"markers_hit": [
"sshd_config",
"authorized_keys",
"journalctl",
"rollback",
"PasswordAuthentication"
],
"marker_hit_rate": 1.0,
"format_rule": "five_bullets",
"format_ok": true,
"usable_answer": true,
"error": null
}
]
},
{
"cell_id": "pavilion:weeyuga:qwen3.5:9b-q6k",
"n_calls": 6,
"n_errors": 0,
"duration_ms_mean": null,
"duration_ms_p50": null,
"duration_ms_p95": null,
"tokens_per_sec_mean": null,
"tokens_per_sec_p50": null,
"tokens_per_sec_p95": null,
"tokens_per_sec_max": null,
"cold_warm_split": {
"cold": {
"n_calls": 2,
"duration_ms_mean": null,
"duration_ms_p50": null,
"tokens_per_sec_mean": null
},
"warm": {
"n_calls": 4,
"duration_ms_mean": null,
"duration_ms_p50": null,
"tokens_per_sec_mean": null
}
},
"per_prompt": {
"unknown": {
"n_calls": 6,
"n_errors": 0,
"duration_ms_mean": null,
"duration_ms_p50": null,
"tokens_per_sec_mean": null,
"tokens_per_sec_p50": null
}
},
"chars_split": null,
"timeline": [
{
"prompt_id": null,
"run_idx": 0,
"phase": "hello",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 0,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 1,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 2,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 3,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 4,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
}
],
"calls": [
{
"type": "call",
"ts_utc": "2026-04-29T10:00:33Z",
"cell_id": "pavilion:weeyuga:qwen3.5:9b-q6k",
"model": "qwen3.5:9b-q6k",
"phase": "hello",
"question_id": "hello_check",
"run_idx": 0,
"duration_seconds": 4.401,
"prompt_tokens": 16,
"completion_tokens": 163,
"tokens_per_second": 37.04,
"finish_reason": "stop",
"weeyuga_meta": null,
"status_code": 200,
"response_chars": 202,
"response_preview": "Hello! I'm glad to help! \ud83d\ude0a What would you like to do today? Whether it's answering questions, solving problems, writing content, or just chatting, I'm here to assist you. Just let me know what you need!",
"required_markers": [],
"markers_hit": [],
"marker_hit_rate": null,
"format_rule": "",
"format_ok": null,
"usable_answer": true,
"error": null
},
{
"type": "call",
"ts_utc": "2026-04-29T10:01:00Z",
"cell_id": "pavilion:weeyuga:qwen3.5:9b-q6k",
"model": "qwen3.5:9b-q6k",
"phase": "5q",
"question_id": "disk_guard_bash",
"run_idx": 0,
"duration_seconds": 27.133,
"prompt_tokens": 71,
"completion_tokens": 1124,
"tokens_per_second": 41.43,
"finish_reason": "stop",
"weeyuga_meta": null,
"status_code": 200,
"response_chars": 246,
"response_preview": "```bash\n#!/bin/bash\n\n# Check disk usage for /\nif [ -d \"/\" ]; then\n usage=$(df -P / | awk '{print $5}')\n if [ \"$usage\" -gt 85 ]; then\n echo \"WARNING: Disk usage for / is at ${usage}% (above 85% threshold)\"\n exit 1\n fi\n",
"required_markers": [
"#!/usr/bin/env bash",
"df -P /",
"85",
"exit 1"
],
"markers_hit": [
"df -P /",
"85",
"exit 1"
],
"marker_hit_rate": 0.75,
"format_rule": "bash_code",
"format_ok": false,
"usable_answer": true,
"error": null
},
{
"type": "call",
"ts_utc": "2026-04-29T10:01:20Z",
"cell_id": "pavilion:weeyuga:qwen3.5:9b-q6k",
"model": "qwen3.5:9b-q6k",
"phase": "5q",
"question_id": "ipv4_python_tests",
"run_idx": 1,
"duration_seconds": 20.684,
"prompt_tokens": 55,
"completion_tokens": 838,
"tokens_per_second": 40.51,
"finish_reason": "stop",
"weeyuga_meta": null,
"status_code": 200,
"response_chars": 1012,
"response_preview": "```python\ndef is_valid_ipv4(value: str) -> bool:\n \"\"\"\n Check if a string is a valid IPv4 address.\n \n Args:\n value (str): The string to validate as an IPv4 address.\n \n Returns:\n bool: True if the string is",
"required_markers": [
"def is_valid_ipv4",
"def test_",
"assert",
"split('.')"
],
"markers_hit": [
"def is_valid_ipv4",
"def test_",
"assert",
"split('.')"
],
"marker_hit_rate": 1.0,
"format_rule": "python_code",
"format_ok": true,
"usable_answer": true,
"error": null
},
{
"type": "call",
"ts_utc": "2026-04-29T10:01:27Z",
"cell_id": "pavilion:weeyuga:qwen3.5:9b-q6k",
"model": "qwen3.5:9b-q6k",
"phase": "5q",
"question_id": "nginx_safe_reload",
"run_idx": 2,
"duration_seconds": 6.142,
"prompt_tokens": 39,
"completion_tokens": 241,
"tokens_per_second": 39.24,
"finish_reason": "stop",
"weeyuga_meta": null,
"status_code": 200,
"response_chars": 75,
"response_preview": "cp /etc/nginx/nginx.conf /etc/nginx/nginx.conf.bak\nnginx -t\nnginx -s reload",
"required_markers": [
"cp /etc/nginx/nginx.conf",
"nginx -t",
"systemctl reload nginx",
"&&"
],
"markers_hit": [
"cp /etc/nginx/nginx.conf",
"nginx -t"
],
"marker_hit_rate": 0.5,
"format_rule": "shell_lines",
"format_ok": true,
"usable_answer": true,
"error": null
},
{
"type": "call",
"ts_utc": "2026-04-29T10:02:16Z",
"cell_id": "pavilion:weeyuga:qwen3.5:9b-q6k",
"model": "qwen3.5:9b-q6k",
"phase": "5q",
"question_id": "yaml_cli_plan",
"run_idx": 3,
"duration_seconds": 49.257,
"prompt_tokens": 43,
"completion_tokens": 2048,
"tokens_per_second": 41.58,
"finish_reason": "length",
"weeyuga_meta": null,
"status_code": 200,
"response_chars": 7775,
"response_preview": "# Python CLI for Git-Scanned YAML Schema Validation\n\n## Step 1: Project Setup and Imports\n\n```python\nimport os\nimport sys\nimport json\nimport yaml\nfrom pathlib import Path\nfrom typing import List, Dict, Any, Optional\nimport argparse\n\n# Requi",
"required_markers": [
"1.",
"2.",
"3.",
"4.",
"JSON schema",
"git"
],
"markers_hit": [
"git"
],
"marker_hit_rate": 0.167,
"format_rule": "four_numbered_steps",
"format_ok": false,
"usable_answer": true,
"error": null
},
{
"type": "call",
"ts_utc": "2026-04-29T10:02:45Z",
"cell_id": "pavilion:weeyuga:qwen3.5:9b-q6k",
"model": "qwen3.5:9b-q6k",
"phase": "5q",
"question_id": "ssh_lockout_triage",
"run_idx": 4,
"duration_seconds": 29.306,
"prompt_tokens": 56,
"completion_tokens": 1165,
"tokens_per_second": 39.75,
"finish_reason": "stop",
"weeyuga_meta": null,
"status_code": 200,
"response_chars": 574,
"response_preview": "- Review your `sshd_config` file to ensure the `PasswordAuthentication` setting is enabled and `PermitRootLogin` is set to `yes` for security.\n- Verify that your `authorized_keys` file contains only valid public keys and is not empty.\n-",
"required_markers": [
"sshd_config",
"authorized_keys",
"journalctl",
"rollback",
"PasswordAuthentication"
],
"markers_hit": [
"sshd_config",
"authorized_keys",
"journalctl",
"rollback",
"PasswordAuthentication"
],
"marker_hit_rate": 1.0,
"format_rule": "five_bullets",
"format_ok": true,
"usable_answer": true,
"error": null
}
]
},
{
"cell_id": "pavilion:weeyuga:qwen3.5:9b-q4km",
"n_calls": 6,
"n_errors": 0,
"duration_ms_mean": null,
"duration_ms_p50": null,
"duration_ms_p95": null,
"tokens_per_sec_mean": null,
"tokens_per_sec_p50": null,
"tokens_per_sec_p95": null,
"tokens_per_sec_max": null,
"cold_warm_split": {
"cold": {
"n_calls": 2,
"duration_ms_mean": null,
"duration_ms_p50": null,
"tokens_per_sec_mean": null
},
"warm": {
"n_calls": 4,
"duration_ms_mean": null,
"duration_ms_p50": null,
"tokens_per_sec_mean": null
}
},
"per_prompt": {
"unknown": {
"n_calls": 6,
"n_errors": 0,
"duration_ms_mean": null,
"duration_ms_p50": null,
"tokens_per_sec_mean": null,
"tokens_per_sec_p50": null
}
},
"chars_split": null,
"timeline": [
{
"prompt_id": null,
"run_idx": 0,
"phase": "hello",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 0,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 1,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 2,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 3,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 4,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
}
],
"calls": [
{
"type": "call",
"ts_utc": "2026-04-29T10:02:51Z",
"cell_id": "pavilion:weeyuga:qwen3.5:9b-q4km",
"model": "qwen3.5:9b-q4km",
"phase": "hello",
"question_id": "hello_check",
"run_idx": 0,
"duration_seconds": 5.912,
"prompt_tokens": 16,
"completion_tokens": 220,
"tokens_per_second": 37.21,
"finish_reason": "stop",
"weeyuga_meta": null,
"status_code": 200,
"response_chars": 186,
"response_preview": "Hello! I'm glad to help! \ud83d\ude0a \nWhat would you like to do today? For example: \n- Ask a question? \n- Solve a problem? \n- Get information? \n- Or just chat? \n\nLet me know what you need! \ud83d\ude4c",
"required_markers": [],
"markers_hit": [],
"marker_hit_rate": null,
"format_rule": "",
"format_ok": null,
"usable_answer": true,
"error": null
},
{
"type": "call",
"ts_utc": "2026-04-29T10:03:07Z",
"cell_id": "pavilion:weeyuga:qwen3.5:9b-q4km",
"model": "qwen3.5:9b-q4km",
"phase": "5q",
"question_id": "disk_guard_bash",
"run_idx": 0,
"duration_seconds": 15.618,
"prompt_tokens": 71,
"completion_tokens": 636,
"tokens_per_second": 40.72,
"finish_reason": "stop",
"weeyuga_meta": null,
"status_code": 200,
"response_chars": 263,
"response_preview": "```bash\n#!/bin/bash\n\n# Check disk usage for /\nif [ -d \"/\" ]; then\n usage=$(df -P / 2>&1 | tail -n 1 | awk '{print $5}')\n if [ \"$usage\" -gt 85 ]; then\n echo \"WARNING: Disk usage for / is at ${usage}% (above 85% threshold)\"\n ",
"required_markers": [
"#!/usr/bin/env bash",
"df -P /",
"85",
"exit 1"
],
"markers_hit": [
"df -P /",
"85",
"exit 1"
],
"marker_hit_rate": 0.75,
"format_rule": "bash_code",
"format_ok": false,
"usable_answer": true,
"error": null
},
{
"type": "call",
"ts_utc": "2026-04-29T10:03:31Z",
"cell_id": "pavilion:weeyuga:qwen3.5:9b-q4km",
"model": "qwen3.5:9b-q4km",
"phase": "5q",
"question_id": "ipv4_python_tests",
"run_idx": 1,
"duration_seconds": 24.316,
"prompt_tokens": 55,
"completion_tokens": 947,
"tokens_per_second": 38.95,
"finish_reason": "stop",
"weeyuga_meta": null,
"status_code": 200,
"response_chars": 493,
"response_preview": "```python\ndef is_valid_ipv4(value: str) -> bool:\n if not isinstance(value, str):\n return False\n if len(value) != 4:\n return False\n for i in range(4):\n if not value[i].isdigit():\n return False\n ret",
"required_markers": [
"def is_valid_ipv4",
"def test_",
"assert",
"split('.')"
],
"markers_hit": [
"def is_valid_ipv4",
"def test_",
"assert"
],
"marker_hit_rate": 0.75,
"format_rule": "python_code",
"format_ok": true,
"usable_answer": true,
"error": null
},
{
"type": "call",
"ts_utc": "2026-04-29T10:03:56Z",
"cell_id": "pavilion:weeyuga:qwen3.5:9b-q4km",
"model": "qwen3.5:9b-q4km",
"phase": "5q",
"question_id": "nginx_safe_reload",
"run_idx": 2,
"duration_seconds": 25.495,
"prompt_tokens": 39,
"completion_tokens": 1060,
"tokens_per_second": 41.58,
"finish_reason": "stop",
"weeyuga_meta": null,
"status_code": 200,
"response_chars": 75,
"response_preview": "cp /etc/nginx/nginx.conf /etc/nginx/nginx.conf.bak\nnginx -t\nnginx -s reload",
"required_markers": [
"cp /etc/nginx/nginx.conf",
"nginx -t",
"systemctl reload nginx",
"&&"
],
"markers_hit": [
"cp /etc/nginx/nginx.conf",
"nginx -t"
],
"marker_hit_rate": 0.5,
"format_rule": "shell_lines",
"format_ok": true,
"usable_answer": true,
"error": null
},
{
"type": "call",
"ts_utc": "2026-04-29T10:04:46Z",
"cell_id": "pavilion:weeyuga:qwen3.5:9b-q4km",
"model": "qwen3.5:9b-q4km",
"phase": "5q",
"question_id": "yaml_cli_plan",
"run_idx": 3,
"duration_seconds": 49.664,
"prompt_tokens": 43,
"completion_tokens": 2048,
"tokens_per_second": 41.24,
"finish_reason": "length",
"weeyuga_meta": null,
"status_code": 200,
"response_chars": 9006,
"response_preview": "```python\n#!/usr/bin/env python3\n\"\"\"\nCLI: git-yaml-validator\nScans a git repo for changed YAML files, validates them against a JSON schema, and exits nonzero on failure.\n\"\"\"\n\nimport argparse\nimport json\nimport os\nimport sys\nfrom pathlib imp",
"required_markers": [
"1.",
"2.",
"3.",
"4.",
"JSON schema",
"git"
],
"markers_hit": [
"JSON schema",
"git"
],
"marker_hit_rate": 0.333,
"format_rule": "four_numbered_steps",
"format_ok": false,
"usable_answer": true,
"error": null
},
{
"type": "call",
"ts_utc": "2026-04-29T10:05:14Z",
"cell_id": "pavilion:weeyuga:qwen3.5:9b-q4km",
"model": "qwen3.5:9b-q4km",
"phase": "5q",
"question_id": "ssh_lockout_triage",
"run_idx": 4,
"duration_seconds": 27.576,
"prompt_tokens": 56,
"completion_tokens": 1128,
"tokens_per_second": 40.91,
"finish_reason": "stop",
"weeyuga_meta": null,
"status_code": 200,
"response_chars": 401,
"response_preview": "- Review `sshd_config` to ensure `PermitRootLogin no` and `PasswordAuthentication no` are set correctly.\n- Review `authorized_keys` to ensure no sensitive keys are stored or misconfigured.\n- Run `journalctl` to check for recent security iss",
"required_markers": [
"sshd_config",
"authorized_keys",
"journalctl",
"rollback",
"PasswordAuthentication"
],
"markers_hit": [
"sshd_config",
"authorized_keys",
"journalctl",
"rollback",
"PasswordAuthentication"
],
"marker_hit_rate": 1.0,
"format_rule": "five_bullets",
"format_ok": true,
"usable_answer": true,
"error": null
}
]
},
{
"cell_id": "pavilion:weeyuga:qwen3.5:2b",
"n_calls": 6,
"n_errors": 0,
"duration_ms_mean": null,
"duration_ms_p50": null,
"duration_ms_p95": null,
"tokens_per_sec_mean": null,
"tokens_per_sec_p50": null,
"tokens_per_sec_p95": null,
"tokens_per_sec_max": null,
"cold_warm_split": {
"cold": {
"n_calls": 2,
"duration_ms_mean": null,
"duration_ms_p50": null,
"tokens_per_sec_mean": null
},
"warm": {
"n_calls": 4,
"duration_ms_mean": null,
"duration_ms_p50": null,
"tokens_per_sec_mean": null
}
},
"per_prompt": {
"unknown": {
"n_calls": 6,
"n_errors": 0,
"duration_ms_mean": null,
"duration_ms_p50": null,
"tokens_per_sec_mean": null,
"tokens_per_sec_p50": null
}
},
"chars_split": null,
"timeline": [
{
"prompt_id": null,
"run_idx": 0,
"phase": "hello",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 0,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 1,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 2,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 3,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 4,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
}
],
"calls": [
{
"type": "call",
"ts_utc": "2026-04-29T10:05:18Z",
"cell_id": "pavilion:weeyuga:qwen3.5:2b",
"model": "qwen3.5:2b",
"phase": "hello",
"question_id": "hello_check",
"run_idx": 0,
"duration_seconds": 4.366,
"prompt_tokens": 16,
"completion_tokens": 157,
"tokens_per_second": 35.96,
"finish_reason": "stop",
"weeyuga_meta": null,
"status_code": 200,
"response_chars": 91,
"response_preview": "Hello! I'd be happy to help you with anything you need. What can I assist you with today? \ud83d\ude0a",
"required_markers": [],
"markers_hit": [],
"marker_hit_rate": null,
"format_rule": "",
"format_ok": null,
"usable_answer": true,
"error": null
},
{
"type": "call",
"ts_utc": "2026-04-29T10:05:36Z",
"cell_id": "pavilion:weeyuga:qwen3.5:2b",
"model": "qwen3.5:2b",
"phase": "5q",
"question_id": "disk_guard_bash",
"run_idx": 0,
"duration_seconds": 18.333,
"prompt_tokens": 71,
"completion_tokens": 751,
"tokens_per_second": 40.96,
"finish_reason": "stop",
"weeyuga_meta": null,
"status_code": 200,
"response_chars": 246,
"response_preview": "```bash\n#!/bin/bash\n\n# Check disk usage for /\nif [ -d \"/\" ]; then\n usage=$(df -P / | awk '{print $5}')\n if [ \"$usage\" -gt 85 ]; then\n echo \"WARNING: Disk usage for / is at ${usage}% (above 85% threshold)\"\n exit 1\n fi\n",
"required_markers": [
"#!/usr/bin/env bash",
"df -P /",
"85",
"exit 1"
],
"markers_hit": [
"df -P /",
"85",
"exit 1"
],
"marker_hit_rate": 0.75,
"format_rule": "bash_code",
"format_ok": false,
"usable_answer": true,
"error": null
},
{
"type": "call",
"ts_utc": "2026-04-29T10:05:56Z",
"cell_id": "pavilion:weeyuga:qwen3.5:2b",
"model": "qwen3.5:2b",
"phase": "5q",
"question_id": "ipv4_python_tests",
"run_idx": 1,
"duration_seconds": 19.861,
"prompt_tokens": 55,
"completion_tokens": 820,
"tokens_per_second": 41.29,
"finish_reason": "stop",
"weeyuga_meta": null,
"status_code": 200,
"response_chars": 771,
"response_preview": "```python\ndef is_valid_ipv4(value: str) -> bool:\n \"\"\"Check if a string is a valid IPv4 address.\"\"\"\n if not isinstance(value, str):\n return False\n parts = value.split('.')\n if len(parts) != 4:\n return False\n for ",
"required_markers": [
"def is_valid_ipv4",
"def test_",
"assert",
"split('.')"
],
"markers_hit": [
"def is_valid_ipv4",
"def test_",
"assert",
"split('.')"
],
"marker_hit_rate": 1.0,
"format_rule": "python_code",
"format_ok": true,
"usable_answer": true,
"error": null
},
{
"type": "call",
"ts_utc": "2026-04-29T10:06:22Z",
"cell_id": "pavilion:weeyuga:qwen3.5:2b",
"model": "qwen3.5:2b",
"phase": "5q",
"question_id": "nginx_safe_reload",
"run_idx": 2,
"duration_seconds": 25.77,
"prompt_tokens": 39,
"completion_tokens": 1060,
"tokens_per_second": 41.13,
"finish_reason": "stop",
"weeyuga_meta": null,
"status_code": 200,
"response_chars": 75,
"response_preview": "cp /etc/nginx/nginx.conf /etc/nginx/nginx.conf.bak\nnginx -t\nnginx -s reload",
"required_markers": [
"cp /etc/nginx/nginx.conf",
"nginx -t",
"systemctl reload nginx",
"&&"
],
"markers_hit": [
"cp /etc/nginx/nginx.conf",
"nginx -t"
],
"marker_hit_rate": 0.5,
"format_rule": "shell_lines",
"format_ok": true,
"usable_answer": true,
"error": null
},
{
"type": "call",
"ts_utc": "2026-04-29T10:07:12Z",
"cell_id": "pavilion:weeyuga:qwen3.5:2b",
"model": "qwen3.5:2b",
"phase": "5q",
"question_id": "yaml_cli_plan",
"run_idx": 3,
"duration_seconds": 50.003,
"prompt_tokens": 43,
"completion_tokens": 2048,
"tokens_per_second": 40.96,
"finish_reason": "length",
"weeyuga_meta": null,
"status_code": 200,
"response_chars": 11731,
"response_preview": "1. Initialize a Python project structure with a `main.py` script and a `schema.json` file for validation.\n2. Create a `git` command-line interface that uses `git diff` to identify changed files in the repository.\n3. Implement a YAML parser ",
"required_markers": [
"1.",
"2.",
"3.",
"4.",
"JSON schema",
"git"
],
"markers_hit": [
"1.",
"2.",
"3.",
"4.",
"JSON schema",
"git"
],
"marker_hit_rate": 1.0,
"format_rule": "four_numbered_steps",
"format_ok": true,
"usable_answer": true,
"error": null
},
{
"type": "call",
"ts_utc": "2026-04-29T10:07:39Z",
"cell_id": "pavilion:weeyuga:qwen3.5:2b",
"model": "qwen3.5:2b",
"phase": "5q",
"question_id": "ssh_lockout_triage",
"run_idx": 4,
"duration_seconds": 27.148,
"prompt_tokens": 56,
"completion_tokens": 1113,
"tokens_per_second": 41.0,
"finish_reason": "stop",
"weeyuga_meta": null,
"status_code": 200,
"response_chars": 378,
"response_preview": "- Review `sshd_config` for hardening settings before making changes.\n- Review `authorized_keys` permissions to ensure only trusted keys are allowed.\n- Review `PasswordAuthentication` settings to prevent unauthorized password usage.\n- Review",
"required_markers": [
"sshd_config",
"authorized_keys",
"journalctl",
"rollback",
"PasswordAuthentication"
],
"markers_hit": [
"sshd_config",
"authorized_keys",
"journalctl",
"rollback",
"PasswordAuthentication"
],
"marker_hit_rate": 1.0,
"format_rule": "five_bullets",
"format_ok": true,
"usable_answer": true,
"error": null
}
]
},
{
"cell_id": "pavilion:weeyuga:qwen3.5:0.8b",
"n_calls": 6,
"n_errors": 0,
"duration_ms_mean": null,
"duration_ms_p50": null,
"duration_ms_p95": null,
"tokens_per_sec_mean": null,
"tokens_per_sec_p50": null,
"tokens_per_sec_p95": null,
"tokens_per_sec_max": null,
"cold_warm_split": {
"cold": {
"n_calls": 2,
"duration_ms_mean": null,
"duration_ms_p50": null,
"tokens_per_sec_mean": null
},
"warm": {
"n_calls": 4,
"duration_ms_mean": null,
"duration_ms_p50": null,
"tokens_per_sec_mean": null
}
},
"per_prompt": {
"unknown": {
"n_calls": 6,
"n_errors": 0,
"duration_ms_mean": null,
"duration_ms_p50": null,
"tokens_per_sec_mean": null,
"tokens_per_sec_p50": null
}
},
"chars_split": null,
"timeline": [
{
"prompt_id": null,
"run_idx": 0,
"phase": "hello",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 0,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 1,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 2,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 3,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 4,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
}
],
"calls": [
{
"type": "call",
"ts_utc": "2026-04-29T10:07:43Z",
"cell_id": "pavilion:weeyuga:qwen3.5:0.8b",
"model": "qwen3.5:0.8b",
"phase": "hello",
"question_id": "hello_check",
"run_idx": 0,
"duration_seconds": 3.869,
"prompt_tokens": 16,
"completion_tokens": 141,
"tokens_per_second": 36.44,
"finish_reason": "stop",
"weeyuga_meta": null,
"status_code": 200,
"response_chars": 57,
"response_preview": "Hello! I'm glad to help you. What would you like to do? \ud83d\ude0a",
"required_markers": [],
"markers_hit": [],
"marker_hit_rate": null,
"format_rule": "",
"format_ok": null,
"usable_answer": true,
"error": null
},
{
"type": "call",
"ts_utc": "2026-04-29T10:08:00Z",
"cell_id": "pavilion:weeyuga:qwen3.5:0.8b",
"model": "qwen3.5:0.8b",
"phase": "5q",
"question_id": "disk_guard_bash",
"run_idx": 0,
"duration_seconds": 16.794,
"prompt_tokens": 71,
"completion_tokens": 688,
"tokens_per_second": 40.97,
"finish_reason": "stop",
"weeyuga_meta": null,
"status_code": 200,
"response_chars": 217,
"response_preview": "#!/bin/bash\n\n# Check disk usage for /\nif [ -d \"/\" ]; then\n usage=$(df -P / 2>&1 | awk '{print $5}')\n if [ \"$usage\" -gt 85 ]; then\n echo \"WARNING: Disk usage for / is at ${usage}%\"\n exit 1\n fi\nfi",
"required_markers": [
"#!/usr/bin/env bash",
"df -P /",
"85",
"exit 1"
],
"markers_hit": [
"df -P /",
"85",
"exit 1"
],
"marker_hit_rate": 0.75,
"format_rule": "bash_code",
"format_ok": false,
"usable_answer": true,
"error": null
},
{
"type": "call",
"ts_utc": "2026-04-29T10:08:18Z",
"cell_id": "pavilion:weeyuga:qwen3.5:0.8b",
"model": "qwen3.5:0.8b",
"phase": "5q",
"question_id": "ipv4_python_tests",
"run_idx": 1,
"duration_seconds": 17.714,
"prompt_tokens": 55,
"completion_tokens": 722,
"tokens_per_second": 40.76,
"finish_reason": "stop",
"weeyuga_meta": null,
"status_code": 200,
"response_chars": 644,
"response_preview": "```python\ndef is_valid_ipv4(value: str) -> bool:\n \"\"\"Check if a string is a valid IPv4 address.\"\"\"\n if not isinstance(value, str):\n return False\n parts = value.split('.')\n if len(parts) != 4:\n return False\n for ",
"required_markers": [
"def is_valid_ipv4",
"def test_",
"assert",
"split('.')"
],
"markers_hit": [
"def is_valid_ipv4",
"def test_",
"assert",
"split('.')"
],
"marker_hit_rate": 1.0,
"format_rule": "python_code",
"format_ok": true,
"usable_answer": true,
"error": null
},
{
"type": "call",
"ts_utc": "2026-04-29T10:08:44Z",
"cell_id": "pavilion:weeyuga:qwen3.5:0.8b",
"model": "qwen3.5:0.8b",
"phase": "5q",
"question_id": "nginx_safe_reload",
"run_idx": 2,
"duration_seconds": 26.52,
"prompt_tokens": 39,
"completion_tokens": 1087,
"tokens_per_second": 40.99,
"finish_reason": "stop",
"weeyuga_meta": null,
"status_code": 200,
"response_chars": 177,
"response_preview": "cp /etc/nginx/nginx.conf /etc/nginx/nginx.conf.backup\nnginx -t\nif nginx -t; then\n nginx -s reload\nelse\n echo \"Validation failed, reloading nginx...\"\n nginx -s reload\nfi",
"required_markers": [
"cp /etc/nginx/nginx.conf",
"nginx -t",
"systemctl reload nginx",
"&&"
],
"markers_hit": [
"cp /etc/nginx/nginx.conf",
"nginx -t"
],
"marker_hit_rate": 0.5,
"format_rule": "shell_lines",
"format_ok": true,
"usable_answer": true,
"error": null
},
{
"type": "call",
"ts_utc": "2026-04-29T10:09:35Z",
"cell_id": "pavilion:weeyuga:qwen3.5:0.8b",
"model": "qwen3.5:0.8b",
"phase": "5q",
"question_id": "yaml_cli_plan",
"run_idx": 3,
"duration_seconds": 50.97,
"prompt_tokens": 43,
"completion_tokens": 2048,
"tokens_per_second": 40.18,
"finish_reason": "length",
"weeyuga_meta": null,
"status_code": 200,
"response_chars": 7262,
"response_preview": "# Python CLI for Git-Scanned YAML Schema Validation\n\n```python\n#!/usr/bin/env python3\n\"\"\"\nYAML Schema Validator CLI\nScans git repos for changed YAML files and validates against JSON schemas.\n\"\"\"\n\nimport argparse\nimport json\nimport os\nimport",
"required_markers": [
"1.",
"2.",
"3.",
"4.",
"JSON schema",
"git"
],
"markers_hit": [
"1.",
"2.",
"3.",
"4.",
"JSON schema",
"git"
],
"marker_hit_rate": 1.0,
"format_rule": "four_numbered_steps",
"format_ok": true,
"usable_answer": true,
"error": null
},
{
"type": "call",
"ts_utc": "2026-04-29T10:10:03Z",
"cell_id": "pavilion:weeyuga:qwen3.5:0.8b",
"model": "qwen3.5:0.8b",
"phase": "5q",
"question_id": "ssh_lockout_triage",
"run_idx": 4,
"duration_seconds": 27.569,
"prompt_tokens": 56,
"completion_tokens": 1125,
"tokens_per_second": 40.81,
"finish_reason": "stop",
"weeyuga_meta": null,
"status_code": 200,
"response_chars": 428,
"response_preview": "- Review `sshd_config` for security settings like `PermitRootLogin no` and `PasswordAuthentication no`.\n- Review `authorized_keys` permissions and ownership to ensure only trusted users can access.\n- Review `PasswordAuthentication` settings",
"required_markers": [
"sshd_config",
"authorized_keys",
"journalctl",
"rollback",
"PasswordAuthentication"
],
"markers_hit": [
"sshd_config",
"authorized_keys",
"journalctl",
"rollback",
"PasswordAuthentication"
],
"marker_hit_rate": 1.0,
"format_rule": "five_bullets",
"format_ok": true,
"usable_answer": true,
"error": null
}
]
},
{
"cell_id": "pavilion:weeyuga:qwen3.5:9b",
"n_calls": 6,
"n_errors": 0,
"duration_ms_mean": null,
"duration_ms_p50": null,
"duration_ms_p95": null,
"tokens_per_sec_mean": null,
"tokens_per_sec_p50": null,
"tokens_per_sec_p95": null,
"tokens_per_sec_max": null,
"cold_warm_split": {
"cold": {
"n_calls": 2,
"duration_ms_mean": null,
"duration_ms_p50": null,
"tokens_per_sec_mean": null
},
"warm": {
"n_calls": 4,
"duration_ms_mean": null,
"duration_ms_p50": null,
"tokens_per_sec_mean": null
}
},
"per_prompt": {
"unknown": {
"n_calls": 6,
"n_errors": 0,
"duration_ms_mean": null,
"duration_ms_p50": null,
"tokens_per_sec_mean": null,
"tokens_per_sec_p50": null
}
},
"chars_split": null,
"timeline": [
{
"prompt_id": null,
"run_idx": 0,
"phase": "hello",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 0,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 1,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 2,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 3,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 4,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
}
],
"calls": [
{
"type": "call",
"ts_utc": "2026-04-29T10:10:13Z",
"cell_id": "pavilion:weeyuga:qwen3.5:9b",
"model": "qwen3.5:9b",
"phase": "hello",
"question_id": "hello_check",
"run_idx": 0,
"duration_seconds": 10.852,
"prompt_tokens": 16,
"completion_tokens": 416,
"tokens_per_second": 38.33,
"finish_reason": "stop",
"weeyuga_meta": null,
"status_code": 200,
"response_chars": 157,
"response_preview": "Hello! I'm glad to help you. What would you like to do today? Whether it's answering questions, solving problems, or just chatting, I'm here to assist you! \ud83d\ude0a",
"required_markers": [],
"markers_hit": [],
"marker_hit_rate": null,
"format_rule": "",
"format_ok": null,
"usable_answer": true,
"error": null
},
{
"type": "call",
"ts_utc": "2026-04-29T10:10:40Z",
"cell_id": "pavilion:weeyuga:qwen3.5:9b",
"model": "qwen3.5:9b",
"phase": "5q",
"question_id": "disk_guard_bash",
"run_idx": 0,
"duration_seconds": 26.865,
"prompt_tokens": 71,
"completion_tokens": 1127,
"tokens_per_second": 41.95,
"finish_reason": "stop",
"weeyuga_meta": null,
"status_code": 200,
"response_chars": 247,
"response_preview": "#!/bin/bash\n\n# Check disk usage for /\nif [ -d \"/\" ]; then\n usage=$(df -P / 2>&1 | awk '{print $5}')\n if [ \"$usage\" -gt 85 ]; then\n echo \"WARNING: Disk usage for / is at ${usage}% (above 85% threshold)\"\n exit 1\n fi\nfi\n",
"required_markers": [
"#!/usr/bin/env bash",
"df -P /",
"85",
"exit 1"
],
"markers_hit": [
"df -P /",
"85",
"exit 1"
],
"marker_hit_rate": 0.75,
"format_rule": "bash_code",
"format_ok": false,
"usable_answer": true,
"error": null
},
{
"type": "call",
"ts_utc": "2026-04-29T10:10:58Z",
"cell_id": "pavilion:weeyuga:qwen3.5:9b",
"model": "qwen3.5:9b",
"phase": "5q",
"question_id": "ipv4_python_tests",
"run_idx": 1,
"duration_seconds": 17.892,
"prompt_tokens": 55,
"completion_tokens": 732,
"tokens_per_second": 40.91,
"finish_reason": "stop",
"weeyuga_meta": null,
"status_code": 200,
"response_chars": 574,
"response_preview": "```python\ndef is_valid_ipv4(value: str) -> bool:\n \"\"\"Check if a string is a valid IPv4 address.\"\"\"\n if not isinstance(value, str):\n return False\n parts = value.split('.')\n if len(parts) != 4:\n return False\n for ",
"required_markers": [
"def is_valid_ipv4",
"def test_",
"assert",
"split('.')"
],
"markers_hit": [
"def is_valid_ipv4",
"def test_",
"assert",
"split('.')"
],
"marker_hit_rate": 1.0,
"format_rule": "python_code",
"format_ok": false,
"usable_answer": true,
"error": null
},
{
"type": "call",
"ts_utc": "2026-04-29T10:11:25Z",
"cell_id": "pavilion:weeyuga:qwen3.5:9b",
"model": "qwen3.5:9b",
"phase": "5q",
"question_id": "nginx_safe_reload",
"run_idx": 2,
"duration_seconds": 26.612,
"prompt_tokens": 39,
"completion_tokens": 1073,
"tokens_per_second": 40.32,
"finish_reason": "stop",
"weeyuga_meta": null,
"status_code": 200,
"response_chars": 104,
"response_preview": "cp /etc/nginx/nginx.conf /etc/nginx/nginx.conf.bak\nnginx -t\nif [ $? -eq 0 ]; then\n nginx -s reload\nfi",
"required_markers": [
"cp /etc/nginx/nginx.conf",
"nginx -t",
"systemctl reload nginx",
"&&"
],
"markers_hit": [
"cp /etc/nginx/nginx.conf",
"nginx -t"
],
"marker_hit_rate": 0.5,
"format_rule": "shell_lines",
"format_ok": true,
"usable_answer": true,
"error": null
},
{
"type": "call",
"ts_utc": "2026-04-29T10:12:14Z",
"cell_id": "pavilion:weeyuga:qwen3.5:9b",
"model": "qwen3.5:9b",
"phase": "5q",
"question_id": "yaml_cli_plan",
"run_idx": 3,
"duration_seconds": 49.467,
"prompt_tokens": 43,
"completion_tokens": 2048,
"tokens_per_second": 41.4,
"finish_reason": "length",
"weeyuga_meta": null,
"status_code": 200,
"response_chars": 9329,
"response_preview": "```python\n#!/usr/bin/env python3\n\"\"\"\nYAML Schema Validator CLI\nScans git repos for changed YAML files and validates against a JSON schema.\n\"\"\"\n\nimport argparse\nimport json\nimport os\nimport sys\nfrom pathlib import Path\nfrom typing import Lis",
"required_markers": [
"1.",
"2.",
"3.",
"4.",
"JSON schema",
"git"
],
"markers_hit": [
"JSON schema",
"git"
],
"marker_hit_rate": 0.333,
"format_rule": "four_numbered_steps",
"format_ok": false,
"usable_answer": true,
"error": null
},
{
"type": "call",
"ts_utc": "2026-04-29T10:12:42Z",
"cell_id": "pavilion:weeyuga:qwen3.5:9b",
"model": "qwen3.5:9b",
"phase": "5q",
"question_id": "ssh_lockout_triage",
"run_idx": 4,
"duration_seconds": 27.33,
"prompt_tokens": 56,
"completion_tokens": 1104,
"tokens_per_second": 40.4,
"finish_reason": "stop",
"weeyuga_meta": null,
"status_code": 200,
"response_chars": 279,
"response_preview": "- Review `sshd_config` to ensure `PasswordAuthentication` is enabled.\n- Verify `authorized_keys` contains the correct public key.\n- Run `journalctl` to check for recent errors.\n- Review `rollback` to ensure the current state is safe.\n- Ensu",
"required_markers": [
"sshd_config",
"authorized_keys",
"journalctl",
"rollback",
"PasswordAuthentication"
],
"markers_hit": [
"sshd_config",
"authorized_keys",
"journalctl",
"rollback",
"PasswordAuthentication"
],
"marker_hit_rate": 1.0,
"format_rule": "five_bullets",
"format_ok": true,
"usable_answer": true,
"error": null
}
]
},
{
"cell_id": "pavilion:weeyuga:qwen2.5-coder:14b",
"n_calls": 6,
"n_errors": 6,
"duration_ms_mean": null,
"duration_ms_p50": null,
"duration_ms_p95": null,
"tokens_per_sec_mean": null,
"tokens_per_sec_p50": null,
"tokens_per_sec_p95": null,
"tokens_per_sec_max": null,
"cold_warm_split": {
"cold": {
"n_calls": 2,
"duration_ms_mean": null,
"duration_ms_p50": null,
"tokens_per_sec_mean": null
},
"warm": {
"n_calls": 4,
"duration_ms_mean": null,
"duration_ms_p50": null,
"tokens_per_sec_mean": null
}
},
"per_prompt": {
"unknown": {
"n_calls": 6,
"n_errors": 6,
"duration_ms_mean": null,
"duration_ms_p50": null,
"tokens_per_sec_mean": null,
"tokens_per_sec_p50": null
}
},
"chars_split": null,
"timeline": [
{
"prompt_id": null,
"run_idx": 0,
"phase": "hello",
"duration_ms": null,
"tokens_per_sec": null,
"error": "<HTTPError 500: 'Internal Server Error'>"
},
{
"prompt_id": null,
"run_idx": 0,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": "<HTTPError 500: 'Internal Server Error'>"
},
{
"prompt_id": null,
"run_idx": 1,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": "<HTTPError 500: 'Internal Server Error'>"
},
{
"prompt_id": null,
"run_idx": 2,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": "<HTTPError 500: 'Internal Server Error'>"
},
{
"prompt_id": null,
"run_idx": 3,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": "<HTTPError 500: 'Internal Server Error'>"
},
{
"prompt_id": null,
"run_idx": 4,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": "<HTTPError 500: 'Internal Server Error'>"
}
],
"calls": [
{
"type": "call",
"ts_utc": "2026-04-29T10:12:49Z",
"cell_id": "pavilion:weeyuga:qwen2.5-coder:14b",
"model": "qwen2.5-coder:14b",
"phase": "hello",
"question_id": "hello_check",
"run_idx": 0,
"duration_seconds": 7.472,
"prompt_tokens": null,
"completion_tokens": null,
"tokens_per_second": null,
"finish_reason": null,
"weeyuga_meta": null,
"status_code": null,
"response_chars": 0,
"response_preview": "",
"required_markers": [],
"markers_hit": [],
"marker_hit_rate": null,
"format_rule": "",
"format_ok": null,
"usable_answer": false,
"error": "<HTTPError 500: 'Internal Server Error'>"
},
{
"type": "call",
"ts_utc": "2026-04-29T10:12:50Z",
"cell_id": "pavilion:weeyuga:qwen2.5-coder:14b",
"model": "qwen2.5-coder:14b",
"phase": "5q",
"question_id": "disk_guard_bash",
"run_idx": 0,
"duration_seconds": 0.882,
"prompt_tokens": null,
"completion_tokens": null,
"tokens_per_second": null,
"finish_reason": null,
"weeyuga_meta": null,
"status_code": null,
"response_chars": 0,
"response_preview": "",
"required_markers": [
"#!/usr/bin/env bash",
"df -P /",
"85",
"exit 1"
],
"markers_hit": [],
"marker_hit_rate": 0.0,
"format_rule": "bash_code",
"format_ok": false,
"usable_answer": false,
"error": "<HTTPError 500: 'Internal Server Error'>"
},
{
"type": "call",
"ts_utc": "2026-04-29T10:12:51Z",
"cell_id": "pavilion:weeyuga:qwen2.5-coder:14b",
"model": "qwen2.5-coder:14b",
"phase": "5q",
"question_id": "ipv4_python_tests",
"run_idx": 1,
"duration_seconds": 1.059,
"prompt_tokens": null,
"completion_tokens": null,
"tokens_per_second": null,
"finish_reason": null,
"weeyuga_meta": null,
"status_code": null,
"response_chars": 0,
"response_preview": "",
"required_markers": [
"def is_valid_ipv4",
"def test_",
"assert",
"split('.')"
],
"markers_hit": [],
"marker_hit_rate": 0.0,
"format_rule": "python_code",
"format_ok": false,
"usable_answer": false,
"error": "<HTTPError 500: 'Internal Server Error'>"
},
{
"type": "call",
"ts_utc": "2026-04-29T10:12:52Z",
"cell_id": "pavilion:weeyuga:qwen2.5-coder:14b",
"model": "qwen2.5-coder:14b",
"phase": "5q",
"question_id": "nginx_safe_reload",
"run_idx": 2,
"duration_seconds": 0.922,
"prompt_tokens": null,
"completion_tokens": null,
"tokens_per_second": null,
"finish_reason": null,
"weeyuga_meta": null,
"status_code": null,
"response_chars": 0,
"response_preview": "",
"required_markers": [
"cp /etc/nginx/nginx.conf",
"nginx -t",
"systemctl reload nginx",
"&&"
],
"markers_hit": [],
"marker_hit_rate": 0.0,
"format_rule": "shell_lines",
"format_ok": false,
"usable_answer": false,
"error": "<HTTPError 500: 'Internal Server Error'>"
},
{
"type": "call",
"ts_utc": "2026-04-29T10:12:53Z",
"cell_id": "pavilion:weeyuga:qwen2.5-coder:14b",
"model": "qwen2.5-coder:14b",
"phase": "5q",
"question_id": "yaml_cli_plan",
"run_idx": 3,
"duration_seconds": 0.991,
"prompt_tokens": null,
"completion_tokens": null,
"tokens_per_second": null,
"finish_reason": null,
"weeyuga_meta": null,
"status_code": null,
"response_chars": 0,
"response_preview": "",
"required_markers": [
"1.",
"2.",
"3.",
"4.",
"JSON schema",
"git"
],
"markers_hit": [],
"marker_hit_rate": 0.0,
"format_rule": "four_numbered_steps",
"format_ok": false,
"usable_answer": false,
"error": "<HTTPError 500: 'Internal Server Error'>"
},
{
"type": "call",
"ts_utc": "2026-04-29T10:12:54Z",
"cell_id": "pavilion:weeyuga:qwen2.5-coder:14b",
"model": "qwen2.5-coder:14b",
"phase": "5q",
"question_id": "ssh_lockout_triage",
"run_idx": 4,
"duration_seconds": 1.054,
"prompt_tokens": null,
"completion_tokens": null,
"tokens_per_second": null,
"finish_reason": null,
"weeyuga_meta": null,
"status_code": null,
"response_chars": 0,
"response_preview": "",
"required_markers": [
"sshd_config",
"authorized_keys",
"journalctl",
"rollback",
"PasswordAuthentication"
],
"markers_hit": [],
"marker_hit_rate": 0.0,
"format_rule": "five_bullets",
"format_ok": false,
"usable_answer": false,
"error": "<HTTPError 500: 'Internal Server Error'>"
}
]
},
{
"cell_id": "pavilion:weeyuga:qwen2.5-coder:3b",
"n_calls": 6,
"n_errors": 0,
"duration_ms_mean": null,
"duration_ms_p50": null,
"duration_ms_p95": null,
"tokens_per_sec_mean": null,
"tokens_per_sec_p50": null,
"tokens_per_sec_p95": null,
"tokens_per_sec_max": null,
"cold_warm_split": {
"cold": {
"n_calls": 2,
"duration_ms_mean": null,
"duration_ms_p50": null,
"tokens_per_sec_mean": null
},
"warm": {
"n_calls": 4,
"duration_ms_mean": null,
"duration_ms_p50": null,
"tokens_per_sec_mean": null
}
},
"per_prompt": {
"unknown": {
"n_calls": 6,
"n_errors": 0,
"duration_ms_mean": null,
"duration_ms_p50": null,
"tokens_per_sec_mean": null,
"tokens_per_sec_p50": null
}
},
"chars_split": null,
"timeline": [
{
"prompt_id": null,
"run_idx": 0,
"phase": "hello",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 0,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 1,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 2,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 3,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 4,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
}
],
"calls": [
{
"type": "call",
"ts_utc": "2026-04-29T10:13:22Z",
"cell_id": "pavilion:weeyuga:qwen2.5-coder:3b",
"model": "qwen2.5-coder:3b",
"phase": "hello",
"question_id": "hello_check",
"run_idx": 0,
"duration_seconds": 28.318,
"prompt_tokens": 35,
"completion_tokens": 17,
"tokens_per_second": 0.6,
"finish_reason": "stop",
"weeyuga_meta": null,
"status_code": 200,
"response_chars": 62,
"response_preview": "Of course! I'm here to help. What do you need assistance with?",
"required_markers": [],
"markers_hit": [],
"marker_hit_rate": null,
"format_rule": "",
"format_ok": null,
"usable_answer": true,
"error": null
},
{
"type": "call",
"ts_utc": "2026-04-29T10:13:50Z",
"cell_id": "pavilion:weeyuga:qwen2.5-coder:3b",
"model": "qwen2.5-coder:3b",
"phase": "5q",
"question_id": "disk_guard_bash",
"run_idx": 0,
"duration_seconds": 27.993,
"prompt_tokens": 90,
"completion_tokens": 378,
"tokens_per_second": 13.5,
"finish_reason": "stop",
"weeyuga_meta": null,
"status_code": 200,
"response_chars": 1474,
"response_preview": "Certainly! Below is a Bash script that checks disk usage for `/`, prints a human-readable warning, and exits with status 1 when usage is above 85%.\n\n```bash\n#!/bin/bash\n\n# Check if df -P / command exists\nif ! command -v df &> /dev/null; the",
"required_markers": [
"#!/usr/bin/env bash",
"df -P /",
"85",
"exit 1"
],
"markers_hit": [
"df -P /",
"85",
"exit 1"
],
"marker_hit_rate": 0.75,
"format_rule": "bash_code",
"format_ok": false,
"usable_answer": true,
"error": null
},
{
"type": "call",
"ts_utc": "2026-04-29T10:14:15Z",
"cell_id": "pavilion:weeyuga:qwen2.5-coder:3b",
"model": "qwen2.5-coder:3b",
"phase": "5q",
"question_id": "ipv4_python_tests",
"run_idx": 1,
"duration_seconds": 24.785,
"prompt_tokens": 74,
"completion_tokens": 305,
"tokens_per_second": 12.31,
"finish_reason": "stop",
"weeyuga_meta": null,
"status_code": 200,
"response_chars": 1075,
"response_preview": "```python\nimport pytest\n\ndef is_valid_ipv4(value: str) -> bool:\n parts = value.split('.')\n if len(parts) != 4:\n return False\n \n for part in parts:\n try:\n num = int(part)\n if not (0 <= num <= 2",
"required_markers": [
"def is_valid_ipv4",
"def test_",
"assert",
"split('.')"
],
"markers_hit": [
"def is_valid_ipv4",
"def test_",
"assert",
"split('.')"
],
"marker_hit_rate": 1.0,
"format_rule": "python_code",
"format_ok": true,
"usable_answer": true,
"error": null
},
{
"type": "call",
"ts_utc": "2026-04-29T10:14:19Z",
"cell_id": "pavilion:weeyuga:qwen2.5-coder:3b",
"model": "qwen2.5-coder:3b",
"phase": "5q",
"question_id": "nginx_safe_reload",
"run_idx": 2,
"duration_seconds": 3.778,
"prompt_tokens": 58,
"completion_tokens": 43,
"tokens_per_second": 11.38,
"finish_reason": "stop",
"weeyuga_meta": null,
"status_code": 200,
"response_chars": 140,
"response_preview": "```bash\nsudo cp /etc/nginx/nginx.conf /etc/nginx/nginx.conf.bak\n\nsudo nginx -t\n\nif [ $? -eq 0 ]; then\n sudo systemctl reload nginx\nfi\n```",
"required_markers": [
"cp /etc/nginx/nginx.conf",
"nginx -t",
"systemctl reload nginx",
"&&"
],
"markers_hit": [
"cp /etc/nginx/nginx.conf",
"nginx -t",
"systemctl reload nginx"
],
"marker_hit_rate": 0.75,
"format_rule": "shell_lines",
"format_ok": true,
"usable_answer": true,
"error": null
},
{
"type": "call",
"ts_utc": "2026-04-29T10:14:56Z",
"cell_id": "pavilion:weeyuga:qwen2.5-coder:3b",
"model": "qwen2.5-coder:3b",
"phase": "5q",
"question_id": "yaml_cli_plan",
"run_idx": 3,
"duration_seconds": 36.655,
"prompt_tokens": 62,
"completion_tokens": 501,
"tokens_per_second": 13.67,
"finish_reason": "stop",
"weeyuga_meta": null,
"status_code": 200,
"response_chars": 2192,
"response_preview": "1. **Install Required Packages**: Ensure you have `git`, `pyyaml`, and `jsonschema` installed. You can install these using pip:\n ```bash\n pip install gitpython pyyaml jsonschema\n ```\n\n2. **Create the Python Script**: Create a new file",
"required_markers": [
"1.",
"2.",
"3.",
"4.",
"JSON schema",
"git"
],
"markers_hit": [
"1.",
"2.",
"3.",
"4.",
"JSON schema",
"git"
],
"marker_hit_rate": 1.0,
"format_rule": "four_numbered_steps",
"format_ok": true,
"usable_answer": true,
"error": null
},
{
"type": "call",
"ts_utc": "2026-04-29T10:15:08Z",
"cell_id": "pavilion:weeyuga:qwen2.5-coder:3b",
"model": "qwen2.5-coder:3b",
"phase": "5q",
"question_id": "ssh_lockout_triage",
"run_idx": 4,
"duration_seconds": 11.989,
"prompt_tokens": 75,
"completion_tokens": 154,
"tokens_per_second": 12.85,
"finish_reason": "stop",
"weeyuga_meta": null,
"status_code": 200,
"response_chars": 699,
"response_preview": "1. **Check `sshd_config`**: Ensure that `PasswordAuthentication` is set to `no`. This prevents password-based authentication.\n2. **Review `authorized_keys`**: Verify that the public keys in `~/.ssh/authorized_keys` are correct and match tho",
"required_markers": [
"sshd_config",
"authorized_keys",
"journalctl",
"rollback",
"PasswordAuthentication"
],
"markers_hit": [
"sshd_config",
"authorized_keys",
"journalctl",
"rollback",
"PasswordAuthentication"
],
"marker_hit_rate": 1.0,
"format_rule": "five_bullets",
"format_ok": false,
"usable_answer": true,
"error": null
}
]
},
{
"cell_id": "pavilion:weeyuga:qwen3:14b",
"n_calls": 6,
"n_errors": 4,
"duration_ms_mean": null,
"duration_ms_p50": null,
"duration_ms_p95": null,
"tokens_per_sec_mean": null,
"tokens_per_sec_p50": null,
"tokens_per_sec_p95": null,
"tokens_per_sec_max": null,
"cold_warm_split": {
"cold": {
"n_calls": 2,
"duration_ms_mean": null,
"duration_ms_p50": null,
"tokens_per_sec_mean": null
},
"warm": {
"n_calls": 4,
"duration_ms_mean": null,
"duration_ms_p50": null,
"tokens_per_sec_mean": null
}
},
"per_prompt": {
"unknown": {
"n_calls": 6,
"n_errors": 4,
"duration_ms_mean": null,
"duration_ms_p50": null,
"tokens_per_sec_mean": null,
"tokens_per_sec_p50": null
}
},
"chars_split": null,
"timeline": [
{
"prompt_id": null,
"run_idx": 0,
"phase": "hello",
"duration_ms": null,
"tokens_per_sec": null,
"error": "TimeoutError('timed out')"
},
{
"prompt_id": null,
"run_idx": 0,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": "TimeoutError('timed out')"
},
{
"prompt_id": null,
"run_idx": 1,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": "TimeoutError('timed out')"
},
{
"prompt_id": null,
"run_idx": 2,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": "TimeoutError('timed out')"
},
{
"prompt_id": null,
"run_idx": 3,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 4,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
}
],
"calls": [
{
"type": "call",
"ts_utc": "2026-04-29T10:21:08Z",
"cell_id": "pavilion:weeyuga:qwen3:14b",
"model": "qwen3:14b",
"phase": "hello",
"question_id": "hello_check",
"run_idx": 0,
"duration_seconds": 360.058,
"prompt_tokens": null,
"completion_tokens": null,
"tokens_per_second": null,
"finish_reason": null,
"weeyuga_meta": null,
"status_code": null,
"response_chars": 0,
"response_preview": "",
"required_markers": [],
"markers_hit": [],
"marker_hit_rate": null,
"format_rule": "",
"format_ok": null,
"usable_answer": false,
"error": "TimeoutError('timed out')"
},
{
"type": "call",
"ts_utc": "2026-04-29T10:27:08Z",
"cell_id": "pavilion:weeyuga:qwen3:14b",
"model": "qwen3:14b",
"phase": "5q",
"question_id": "disk_guard_bash",
"run_idx": 0,
"duration_seconds": 360.069,
"prompt_tokens": null,
"completion_tokens": null,
"tokens_per_second": null,
"finish_reason": null,
"weeyuga_meta": null,
"status_code": null,
"response_chars": 0,
"response_preview": "",
"required_markers": [
"#!/usr/bin/env bash",
"df -P /",
"85",
"exit 1"
],
"markers_hit": [],
"marker_hit_rate": 0.0,
"format_rule": "bash_code",
"format_ok": false,
"usable_answer": false,
"error": "TimeoutError('timed out')"
},
{
"type": "call",
"ts_utc": "2026-04-29T10:33:08Z",
"cell_id": "pavilion:weeyuga:qwen3:14b",
"model": "qwen3:14b",
"phase": "5q",
"question_id": "ipv4_python_tests",
"run_idx": 1,
"duration_seconds": 360.075,
"prompt_tokens": null,
"completion_tokens": null,
"tokens_per_second": null,
"finish_reason": null,
"weeyuga_meta": null,
"status_code": null,
"response_chars": 0,
"response_preview": "",
"required_markers": [
"def is_valid_ipv4",
"def test_",
"assert",
"split('.')"
],
"markers_hit": [],
"marker_hit_rate": 0.0,
"format_rule": "python_code",
"format_ok": false,
"usable_answer": false,
"error": "TimeoutError('timed out')"
},
{
"type": "call",
"ts_utc": "2026-04-29T10:39:08Z",
"cell_id": "pavilion:weeyuga:qwen3:14b",
"model": "qwen3:14b",
"phase": "5q",
"question_id": "nginx_safe_reload",
"run_idx": 2,
"duration_seconds": 360.065,
"prompt_tokens": null,
"completion_tokens": null,
"tokens_per_second": null,
"finish_reason": null,
"weeyuga_meta": null,
"status_code": null,
"response_chars": 0,
"response_preview": "",
"required_markers": [
"cp /etc/nginx/nginx.conf",
"nginx -t",
"systemctl reload nginx",
"&&"
],
"markers_hit": [],
"marker_hit_rate": 0.0,
"format_rule": "shell_lines",
"format_ok": false,
"usable_answer": false,
"error": "TimeoutError('timed out')"
},
{
"type": "call",
"ts_utc": "2026-04-29T10:44:37Z",
"cell_id": "pavilion:weeyuga:qwen3:14b",
"model": "qwen3:14b",
"phase": "5q",
"question_id": "yaml_cli_plan",
"run_idx": 3,
"duration_seconds": 328.959,
"prompt_tokens": 43,
"completion_tokens": 903,
"tokens_per_second": 2.75,
"finish_reason": "stop",
"weeyuga_meta": null,
"status_code": 200,
"response_chars": 751,
"response_preview": "1. **Use `git diff` to identify changed YAML files**: Execute `git diff --name-only` to list modified files, then filter for `.yaml` or `.yml` extensions using a regex or `grep` to isolate relevant files. \n\n2. **Parse YAML files into Pytho",
"required_markers": [
"1.",
"2.",
"3.",
"4.",
"JSON schema",
"git"
],
"markers_hit": [
"1.",
"2.",
"3.",
"4.",
"JSON schema",
"git"
],
"marker_hit_rate": 1.0,
"format_rule": "four_numbered_steps",
"format_ok": true,
"usable_answer": true,
"error": null
},
{
"type": "call",
"ts_utc": "2026-04-29T10:48:48Z",
"cell_id": "pavilion:weeyuga:qwen3:14b",
"model": "qwen3:14b",
"phase": "5q",
"question_id": "ssh_lockout_triage",
"run_idx": 4,
"duration_seconds": 251.383,
"prompt_tokens": 56,
"completion_tokens": 787,
"tokens_per_second": 3.13,
"finish_reason": "stop",
"weeyuga_meta": null,
"status_code": 200,
"response_chars": 794,
"response_preview": "- **Check `sshd_config` for `PasswordAuthentication` setting**: Ensure it's enabled if password login is required, or confirm public key authentication is properly configured. \n- **Verify `authorized_keys` file permissions and content**: C",
"required_markers": [
"sshd_config",
"authorized_keys",
"journalctl",
"rollback",
"PasswordAuthentication"
],
"markers_hit": [
"sshd_config",
"authorized_keys",
"journalctl",
"rollback",
"PasswordAuthentication"
],
"marker_hit_rate": 1.0,
"format_rule": "five_bullets",
"format_ok": true,
"usable_answer": true,
"error": null
}
]
},
{
"cell_id": "pavilion:weeyuga:qwen3:8b",
"n_calls": 6,
"n_errors": 2,
"duration_ms_mean": null,
"duration_ms_p50": null,
"duration_ms_p95": null,
"tokens_per_sec_mean": null,
"tokens_per_sec_p50": null,
"tokens_per_sec_p95": null,
"tokens_per_sec_max": null,
"cold_warm_split": {
"cold": {
"n_calls": 2,
"duration_ms_mean": null,
"duration_ms_p50": null,
"tokens_per_sec_mean": null
},
"warm": {
"n_calls": 4,
"duration_ms_mean": null,
"duration_ms_p50": null,
"tokens_per_sec_mean": null
}
},
"per_prompt": {
"unknown": {
"n_calls": 6,
"n_errors": 2,
"duration_ms_mean": null,
"duration_ms_p50": null,
"tokens_per_sec_mean": null,
"tokens_per_sec_p50": null
}
},
"chars_split": null,
"timeline": [
{
"prompt_id": null,
"run_idx": 0,
"phase": "hello",
"duration_ms": null,
"tokens_per_sec": null,
"error": "TimeoutError('timed out')"
},
{
"prompt_id": null,
"run_idx": 0,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 1,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": "TimeoutError('timed out')"
},
{
"prompt_id": null,
"run_idx": 2,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 3,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 4,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
}
],
"calls": [
{
"type": "call",
"ts_utc": "2026-04-29T11:20:47Z",
"cell_id": "pavilion:weeyuga:qwen3:8b",
"model": "qwen3:8b",
"phase": "hello",
"question_id": "hello_check",
"run_idx": 0,
"duration_seconds": 360.092,
"prompt_tokens": null,
"completion_tokens": null,
"tokens_per_second": null,
"finish_reason": null,
"weeyuga_meta": null,
"status_code": null,
"response_chars": 0,
"response_preview": "",
"required_markers": [],
"markers_hit": [],
"marker_hit_rate": null,
"format_rule": "",
"format_ok": null,
"usable_answer": false,
"error": "TimeoutError('timed out')"
},
{
"type": "call",
"ts_utc": "2026-04-29T11:25:53Z",
"cell_id": "pavilion:weeyuga:qwen3:8b",
"model": "qwen3:8b",
"phase": "5q",
"question_id": "disk_guard_bash",
"run_idx": 0,
"duration_seconds": 305.673,
"prompt_tokens": 71,
"completion_tokens": 1427,
"tokens_per_second": 4.67,
"finish_reason": "stop",
"weeyuga_meta": null,
"status_code": 200,
"response_chars": 214,
"response_preview": "#!/bin/bash\n\nusage_percent=$(df -P / | grep '/$' | awk '{print $5}' | cut -d'%' -f1)\n\nif (( usage_percent > 85 )); then\n echo \"Warning: / is at $usage_percent% usage. Please free up space.\"\n exit 1\nfi\n\nexit 0",
"required_markers": [
"#!/usr/bin/env bash",
"df -P /",
"85",
"exit 1"
],
"markers_hit": [
"df -P /",
"85",
"exit 1"
],
"marker_hit_rate": 0.75,
"format_rule": "bash_code",
"format_ok": false,
"usable_answer": true,
"error": null
},
{
"type": "call",
"ts_utc": "2026-04-29T11:31:53Z",
"cell_id": "pavilion:weeyuga:qwen3:8b",
"model": "qwen3:8b",
"phase": "5q",
"question_id": "ipv4_python_tests",
"run_idx": 1,
"duration_seconds": 360.074,
"prompt_tokens": null,
"completion_tokens": null,
"tokens_per_second": null,
"finish_reason": null,
"weeyuga_meta": null,
"status_code": null,
"response_chars": 0,
"response_preview": "",
"required_markers": [
"def is_valid_ipv4",
"def test_",
"assert",
"split('.')"
],
"markers_hit": [],
"marker_hit_rate": 0.0,
"format_rule": "python_code",
"format_ok": false,
"usable_answer": false,
"error": "TimeoutError('timed out')"
},
{
"type": "call",
"ts_utc": "2026-04-29T11:36:36Z",
"cell_id": "pavilion:weeyuga:qwen3:8b",
"model": "qwen3:8b",
"phase": "5q",
"question_id": "nginx_safe_reload",
"run_idx": 2,
"duration_seconds": 283.312,
"prompt_tokens": 39,
"completion_tokens": 933,
"tokens_per_second": 3.29,
"finish_reason": "stop",
"weeyuga_meta": null,
"status_code": 200,
"response_chars": 114,
"response_preview": "cp /etc/nginx/nginx.conf /etc/nginx/nginx.conf.$(date +\\%F-\\%T)\nnginx -t\nif [ $? -eq 0 ]; then nginx -s reload; fi",
"required_markers": [
"cp /etc/nginx/nginx.conf",
"nginx -t",
"systemctl reload nginx",
"&&"
],
"markers_hit": [
"cp /etc/nginx/nginx.conf",
"nginx -t"
],
"marker_hit_rate": 0.5,
"format_rule": "shell_lines",
"format_ok": true,
"usable_answer": true,
"error": null
},
{
"type": "call",
"ts_utc": "2026-04-29T11:39:10Z",
"cell_id": "pavilion:weeyuga:qwen3:8b",
"model": "qwen3:8b",
"phase": "5q",
"question_id": "yaml_cli_plan",
"run_idx": 3,
"duration_seconds": 154.227,
"prompt_tokens": 43,
"completion_tokens": 776,
"tokens_per_second": 5.03,
"finish_reason": "stop",
"weeyuga_meta": null,
"status_code": 200,
"response_chars": 363,
"response_preview": "1. Use `git diff --name-only` to list changed files, filter for `.yml`/`.yaml` extensions. \n2. Parse each YAML file into JSON and load the provided JSON schema using `jsonschema`. \n3. Validate each parsed YAML against the schema, collecti",
"required_markers": [
"1.",
"2.",
"3.",
"4.",
"JSON schema",
"git"
],
"markers_hit": [
"1.",
"2.",
"3.",
"4.",
"JSON schema",
"git"
],
"marker_hit_rate": 1.0,
"format_rule": "four_numbered_steps",
"format_ok": true,
"usable_answer": true,
"error": null
},
{
"type": "call",
"ts_utc": "2026-04-29T11:41:15Z",
"cell_id": "pavilion:weeyuga:qwen3:8b",
"model": "qwen3:8b",
"phase": "5q",
"question_id": "ssh_lockout_triage",
"run_idx": 4,
"duration_seconds": 124.742,
"prompt_tokens": 56,
"completion_tokens": 629,
"tokens_per_second": 5.04,
"finish_reason": "stop",
"weeyuga_meta": null,
"status_code": 200,
"response_chars": 921,
"response_preview": "- **Verify `sshd_config` settings**: Ensure `PasswordAuthentication` is set to `no` (if disabling passwords) and `PermitRootLogin` is configured securely. Check for syntax errors using `sshd_config_test`. \n- **Inspect `authorized_keys` fil",
"required_markers": [
"sshd_config",
"authorized_keys",
"journalctl",
"rollback",
"PasswordAuthentication"
],
"markers_hit": [
"sshd_config",
"authorized_keys",
"journalctl",
"rollback",
"PasswordAuthentication"
],
"marker_hit_rate": 1.0,
"format_rule": "five_bullets",
"format_ok": true,
"usable_answer": true,
"error": null
}
]
},
{
"cell_id": "pavilion:weeyuga:qwen3:4b",
"n_calls": 6,
"n_errors": 0,
"duration_ms_mean": null,
"duration_ms_p50": null,
"duration_ms_p95": null,
"tokens_per_sec_mean": null,
"tokens_per_sec_p50": null,
"tokens_per_sec_p95": null,
"tokens_per_sec_max": null,
"cold_warm_split": {
"cold": {
"n_calls": 2,
"duration_ms_mean": null,
"duration_ms_p50": null,
"tokens_per_sec_mean": null
},
"warm": {
"n_calls": 4,
"duration_ms_mean": null,
"duration_ms_p50": null,
"tokens_per_sec_mean": null
}
},
"per_prompt": {
"unknown": {
"n_calls": 6,
"n_errors": 0,
"duration_ms_mean": null,
"duration_ms_p50": null,
"tokens_per_sec_mean": null,
"tokens_per_sec_p50": null
}
},
"chars_split": null,
"timeline": [
{
"prompt_id": null,
"run_idx": 0,
"phase": "hello",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 0,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 1,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 2,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 3,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 4,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
}
],
"calls": [
{
"type": "call",
"ts_utc": "2026-04-29T11:43:45Z",
"cell_id": "pavilion:weeyuga:qwen3:4b",
"model": "qwen3:4b",
"phase": "hello",
"question_id": "hello_check",
"run_idx": 0,
"duration_seconds": 150.036,
"prompt_tokens": 16,
"completion_tokens": 411,
"tokens_per_second": 2.74,
"finish_reason": "stop",
"weeyuga_meta": null,
"status_code": 200,
"response_chars": 248,
"response_preview": "Hi! \ud83d\udc4b Absolutely \u2014 I'm here to help. \ud83d\ude0a \n**What would you like assistance with?** \n(Whether it's a question, problem, creative idea, or just need a quick explanation \u2014 I'm all ears!)\n\nJust share what you need, and I'll do my best to suppor",
"required_markers": [],
"markers_hit": [],
"marker_hit_rate": null,
"format_rule": "",
"format_ok": null,
"usable_answer": true,
"error": null
},
{
"type": "call",
"ts_utc": "2026-04-29T11:47:43Z",
"cell_id": "pavilion:weeyuga:qwen3:4b",
"model": "qwen3:4b",
"phase": "5q",
"question_id": "disk_guard_bash",
"run_idx": 0,
"duration_seconds": 238.435,
"prompt_tokens": 71,
"completion_tokens": 2048,
"tokens_per_second": 8.59,
"finish_reason": "length",
"weeyuga_meta": null,
"status_code": 200,
"response_chars": 0,
"response_preview": "",
"required_markers": [
"#!/usr/bin/env bash",
"df -P /",
"85",
"exit 1"
],
"markers_hit": [],
"marker_hit_rate": 0.0,
"format_rule": "bash_code",
"format_ok": false,
"usable_answer": false,
"error": null
},
{
"type": "call",
"ts_utc": "2026-04-29T11:51:39Z",
"cell_id": "pavilion:weeyuga:qwen3:4b",
"model": "qwen3:4b",
"phase": "5q",
"question_id": "ipv4_python_tests",
"run_idx": 1,
"duration_seconds": 235.851,
"prompt_tokens": 55,
"completion_tokens": 2048,
"tokens_per_second": 8.68,
"finish_reason": "length",
"weeyuga_meta": null,
"status_code": 200,
"response_chars": 0,
"response_preview": "",
"required_markers": [
"def is_valid_ipv4",
"def test_",
"assert",
"split('.')"
],
"markers_hit": [],
"marker_hit_rate": 0.0,
"format_rule": "python_code",
"format_ok": false,
"usable_answer": false,
"error": null
},
{
"type": "call",
"ts_utc": "2026-04-29T11:55:33Z",
"cell_id": "pavilion:weeyuga:qwen3:4b",
"model": "qwen3:4b",
"phase": "5q",
"question_id": "nginx_safe_reload",
"run_idx": 2,
"duration_seconds": 233.393,
"prompt_tokens": 39,
"completion_tokens": 2048,
"tokens_per_second": 8.77,
"finish_reason": "length",
"weeyuga_meta": null,
"status_code": 200,
"response_chars": 0,
"response_preview": "",
"required_markers": [
"cp /etc/nginx/nginx.conf",
"nginx -t",
"systemctl reload nginx",
"&&"
],
"markers_hit": [],
"marker_hit_rate": 0.0,
"format_rule": "shell_lines",
"format_ok": false,
"usable_answer": false,
"error": null
},
{
"type": "call",
"ts_utc": "2026-04-29T11:59:33Z",
"cell_id": "pavilion:weeyuga:qwen3:4b",
"model": "qwen3:4b",
"phase": "5q",
"question_id": "yaml_cli_plan",
"run_idx": 3,
"duration_seconds": 240.036,
"prompt_tokens": 43,
"completion_tokens": 2048,
"tokens_per_second": 8.53,
"finish_reason": "length",
"weeyuga_meta": null,
"status_code": 200,
"response_chars": 0,
"response_preview": "",
"required_markers": [
"1.",
"2.",
"3.",
"4.",
"JSON schema",
"git"
],
"markers_hit": [],
"marker_hit_rate": 0.0,
"format_rule": "four_numbered_steps",
"format_ok": false,
"usable_answer": false,
"error": null
},
{
"type": "call",
"ts_utc": "2026-04-29T12:03:30Z",
"cell_id": "pavilion:weeyuga:qwen3:4b",
"model": "qwen3:4b",
"phase": "5q",
"question_id": "ssh_lockout_triage",
"run_idx": 4,
"duration_seconds": 237.376,
"prompt_tokens": 56,
"completion_tokens": 2048,
"tokens_per_second": 8.63,
"finish_reason": "length",
"weeyuga_meta": null,
"status_code": 200,
"response_chars": 0,
"response_preview": "",
"required_markers": [
"sshd_config",
"authorized_keys",
"journalctl",
"rollback",
"PasswordAuthentication"
],
"markers_hit": [],
"marker_hit_rate": 0.0,
"format_rule": "five_bullets",
"format_ok": false,
"usable_answer": false,
"error": null
}
]
},
{
"cell_id": "pavilion:weeyuga:qwen2.5:3b",
"n_calls": 6,
"n_errors": 0,
"duration_ms_mean": null,
"duration_ms_p50": null,
"duration_ms_p95": null,
"tokens_per_sec_mean": null,
"tokens_per_sec_p50": null,
"tokens_per_sec_p95": null,
"tokens_per_sec_max": null,
"cold_warm_split": {
"cold": {
"n_calls": 2,
"duration_ms_mean": null,
"duration_ms_p50": null,
"tokens_per_sec_mean": null
},
"warm": {
"n_calls": 4,
"duration_ms_mean": null,
"duration_ms_p50": null,
"tokens_per_sec_mean": null
}
},
"per_prompt": {
"unknown": {
"n_calls": 6,
"n_errors": 0,
"duration_ms_mean": null,
"duration_ms_p50": null,
"tokens_per_sec_mean": null,
"tokens_per_sec_p50": null
}
},
"chars_split": null,
"timeline": [
{
"prompt_id": null,
"run_idx": 0,
"phase": "hello",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 0,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 1,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 2,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 3,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 4,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
}
],
"calls": [
{
"type": "call",
"ts_utc": "2026-04-29T12:03:59Z",
"cell_id": "pavilion:weeyuga:qwen2.5:3b",
"model": "qwen2.5:3b",
"phase": "hello",
"question_id": "hello_check",
"run_idx": 0,
"duration_seconds": 28.558,
"prompt_tokens": 35,
"completion_tokens": 19,
"tokens_per_second": 0.67,
"finish_reason": "stop",
"weeyuga_meta": null,
"status_code": 200,
"response_chars": 72,
"response_preview": "Of course! I'd be happy to help. What do you need assistance with today?",
"required_markers": [],
"markers_hit": [],
"marker_hit_rate": null,
"format_rule": "",
"format_ok": null,
"usable_answer": true,
"error": null
},
{
"type": "call",
"ts_utc": "2026-04-29T12:04:07Z",
"cell_id": "pavilion:weeyuga:qwen2.5:3b",
"model": "qwen2.5:3b",
"phase": "5q",
"question_id": "disk_guard_bash",
"run_idx": 0,
"duration_seconds": 7.986,
"prompt_tokens": 90,
"completion_tokens": 97,
"tokens_per_second": 12.15,
"finish_reason": "stop",
"weeyuga_meta": null,
"status_code": 200,
"response_chars": 284,
"response_preview": "```bash\n#!/bin/bash\n\n# Check disk usage for /\nusage=$(df -Ph / | awk 'NR==2 {print $5}')\n\n# Convert to percentage and check if it's above 85%\nif (( $(echo \"$usage > 85\" | bc -l) )); then\n echo \"Warning: Disk usage on / is over 85%. Curre",
"required_markers": [
"#!/usr/bin/env bash",
"df -P /",
"85",
"exit 1"
],
"markers_hit": [
"85",
"exit 1"
],
"marker_hit_rate": 0.5,
"format_rule": "bash_code",
"format_ok": false,
"usable_answer": true,
"error": null
},
{
"type": "call",
"ts_utc": "2026-04-29T12:04:36Z",
"cell_id": "pavilion:weeyuga:qwen2.5:3b",
"model": "qwen2.5:3b",
"phase": "5q",
"question_id": "ipv4_python_tests",
"run_idx": 1,
"duration_seconds": 28.968,
"prompt_tokens": 74,
"completion_tokens": 378,
"tokens_per_second": 13.05,
"finish_reason": "stop",
"weeyuga_meta": null,
"status_code": 200,
"response_chars": 1393,
"response_preview": "```python\ndef is_valid_ipv4(value: str) -> bool:\n \"\"\"\n Checks if the given value is a valid IPv4 address.\n \n :param value: A string representing the IP address to validate.\n :return: True if the value is a valid IPv4 address,",
"required_markers": [
"def is_valid_ipv4",
"def test_",
"assert",
"split('.')"
],
"markers_hit": [
"def is_valid_ipv4",
"def test_",
"assert",
"split('.')"
],
"marker_hit_rate": 1.0,
"format_rule": "python_code",
"format_ok": false,
"usable_answer": true,
"error": null
},
{
"type": "call",
"ts_utc": "2026-04-29T12:04:39Z",
"cell_id": "pavilion:weeyuga:qwen2.5:3b",
"model": "qwen2.5:3b",
"phase": "5q",
"question_id": "nginx_safe_reload",
"run_idx": 2,
"duration_seconds": 3.395,
"prompt_tokens": 58,
"completion_tokens": 37,
"tokens_per_second": 10.9,
"finish_reason": "stop",
"weeyuga_meta": null,
"status_code": 200,
"response_chars": 123,
"response_preview": "sudo cp /etc/nginx/nginx.conf /etc/nginx/nginx.conf.bak\nsudo nginx -t\nif [ $? -eq 0 ]; then\n sudo systemctl reload nginx\nfi",
"required_markers": [
"cp /etc/nginx/nginx.conf",
"nginx -t",
"systemctl reload nginx",
"&&"
],
"markers_hit": [
"cp /etc/nginx/nginx.conf",
"nginx -t",
"systemctl reload nginx"
],
"marker_hit_rate": 0.75,
"format_rule": "shell_lines",
"format_ok": true,
"usable_answer": true,
"error": null
},
{
"type": "call",
"ts_utc": "2026-04-29T12:05:26Z",
"cell_id": "pavilion:weeyuga:qwen2.5:3b",
"model": "qwen2.5:3b",
"phase": "5q",
"question_id": "yaml_cli_plan",
"run_idx": 3,
"duration_seconds": 46.998,
"prompt_tokens": 62,
"completion_tokens": 606,
"tokens_per_second": 12.89,
"finish_reason": "stop",
"weeyuga_meta": null,
"status_code": 200,
"response_chars": 2760,
"response_preview": "Certainly! Below are the four numbered steps to plan your Python CLI tool that scans a Git repository for changed YAML files, validates them against a JSON schema, and exits with a non-zero status if any validation fails.\n\n1. **Initialize t",
"required_markers": [
"1.",
"2.",
"3.",
"4.",
"JSON schema",
"git"
],
"markers_hit": [
"1.",
"2.",
"3.",
"4.",
"JSON schema",
"git"
],
"marker_hit_rate": 1.0,
"format_rule": "four_numbered_steps",
"format_ok": true,
"usable_answer": true,
"error": null
},
{
"type": "call",
"ts_utc": "2026-04-29T12:05:40Z",
"cell_id": "pavilion:weeyuga:qwen2.5:3b",
"model": "qwen2.5:3b",
"phase": "5q",
"question_id": "ssh_lockout_triage",
"run_idx": 4,
"duration_seconds": 14.325,
"prompt_tokens": 75,
"completion_tokens": 181,
"tokens_per_second": 12.64,
"finish_reason": "stop",
"weeyuga_meta": null,
"status_code": 200,
"response_chars": 871,
"response_preview": "- Ensure that `PasswordAuthentication` is set to `no` in `/etc/ssh/sshd_config`. This prevents SSH from accepting password authentication.\n- Verify that your public key is correctly added to the user's authorized keys file located at `~/.ss",
"required_markers": [
"sshd_config",
"authorized_keys",
"journalctl",
"rollback",
"PasswordAuthentication"
],
"markers_hit": [
"sshd_config",
"authorized_keys",
"journalctl",
"rollback",
"PasswordAuthentication"
],
"marker_hit_rate": 1.0,
"format_rule": "five_bullets",
"format_ok": true,
"usable_answer": true,
"error": null
}
]
},
{
"cell_id": "pavilion:weeyuga:qwen2.5-coder:1.5b",
"n_calls": 6,
"n_errors": 0,
"duration_ms_mean": null,
"duration_ms_p50": null,
"duration_ms_p95": null,
"tokens_per_sec_mean": null,
"tokens_per_sec_p50": null,
"tokens_per_sec_p95": null,
"tokens_per_sec_max": null,
"cold_warm_split": {
"cold": {
"n_calls": 2,
"duration_ms_mean": null,
"duration_ms_p50": null,
"tokens_per_sec_mean": null
},
"warm": {
"n_calls": 4,
"duration_ms_mean": null,
"duration_ms_p50": null,
"tokens_per_sec_mean": null
}
},
"per_prompt": {
"unknown": {
"n_calls": 6,
"n_errors": 0,
"duration_ms_mean": null,
"duration_ms_p50": null,
"tokens_per_sec_mean": null,
"tokens_per_sec_p50": null
}
},
"chars_split": null,
"timeline": [
{
"prompt_id": null,
"run_idx": 0,
"phase": "hello",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 0,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 1,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 2,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 3,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 4,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
}
],
"calls": [
{
"type": "call",
"ts_utc": "2026-04-29T12:06:01Z",
"cell_id": "pavilion:weeyuga:qwen2.5-coder:1.5b",
"model": "qwen2.5-coder:1.5b",
"phase": "hello",
"question_id": "hello_check",
"run_idx": 0,
"duration_seconds": 20.837,
"prompt_tokens": 35,
"completion_tokens": 11,
"tokens_per_second": 0.53,
"finish_reason": "stop",
"weeyuga_meta": null,
"status_code": 200,
"response_chars": 38,
"response_preview": "Of course! How may I assist you today?",
"required_markers": [],
"markers_hit": [],
"marker_hit_rate": null,
"format_rule": "",
"format_ok": null,
"usable_answer": true,
"error": null
},
{
"type": "call",
"ts_utc": "2026-04-29T12:06:11Z",
"cell_id": "pavilion:weeyuga:qwen2.5-coder:1.5b",
"model": "qwen2.5-coder:1.5b",
"phase": "5q",
"question_id": "disk_guard_bash",
"run_idx": 0,
"duration_seconds": 9.55,
"prompt_tokens": 90,
"completion_tokens": 243,
"tokens_per_second": 25.45,
"finish_reason": "stop",
"weeyuga_meta": null,
"status_code": 200,
"response_chars": 861,
"response_preview": "```bash\n#!/bin/bash\n\n# Check disk usage for /\ndf -P / | awk '{print $5}' | grep -E '[0-9]+' > /tmp/disk_usage.txt\n\n# Read the percentage from the file\npercentage=$(cat /tmp/disk_usage.txt)\n\n# Exit with status 1 if usage is above 85 percent\n",
"required_markers": [
"#!/usr/bin/env bash",
"df -P /",
"85",
"exit 1"
],
"markers_hit": [
"df -P /",
"85",
"exit 1"
],
"marker_hit_rate": 0.75,
"format_rule": "bash_code",
"format_ok": false,
"usable_answer": true,
"error": null
},
{
"type": "call",
"ts_utc": "2026-04-29T12:06:23Z",
"cell_id": "pavilion:weeyuga:qwen2.5-coder:1.5b",
"model": "qwen2.5-coder:1.5b",
"phase": "5q",
"question_id": "ipv4_python_tests",
"run_idx": 1,
"duration_seconds": 12.454,
"prompt_tokens": 74,
"completion_tokens": 303,
"tokens_per_second": 24.33,
"finish_reason": "stop",
"weeyuga_meta": null,
"status_code": 200,
"response_chars": 1101,
"response_preview": "```python\nimport pytest\n\ndef is_valid_ipv4(value: str) -> bool:\n \"\"\"\n Check if the provided string is a valid IPv4 address.\n \n Args:\n value (str): The string to check.\n \n Returns:\n bool: True if the string is a valid",
"required_markers": [
"def is_valid_ipv4",
"def test_",
"assert",
"split('.')"
],
"markers_hit": [
"def is_valid_ipv4",
"def test_",
"assert",
"split('.')"
],
"marker_hit_rate": 1.0,
"format_rule": "python_code",
"format_ok": false,
"usable_answer": true,
"error": null
},
{
"type": "call",
"ts_utc": "2026-04-29T12:06:26Z",
"cell_id": "pavilion:weeyuga:qwen2.5-coder:1.5b",
"model": "qwen2.5-coder:1.5b",
"phase": "5q",
"question_id": "nginx_safe_reload",
"run_idx": 2,
"duration_seconds": 2.926,
"prompt_tokens": 58,
"completion_tokens": 64,
"tokens_per_second": 21.87,
"finish_reason": "stop",
"weeyuga_meta": null,
"status_code": 200,
"response_chars": 257,
"response_preview": "```bash\n# Backup the original nginx configuration file\ncp /etc/nginx/nginx.conf /etc/nginx/nginx.conf.bak\n\n# Validate the nginx configuration\nnginx -t\n\n# Reload nginx if the configuration is valid\nif [ $? -eq 0 ]; then\n sudo systemctl re",
"required_markers": [
"cp /etc/nginx/nginx.conf",
"nginx -t",
"systemctl reload nginx",
"&&"
],
"markers_hit": [
"cp /etc/nginx/nginx.conf",
"nginx -t",
"systemctl reload nginx"
],
"marker_hit_rate": 0.75,
"format_rule": "shell_lines",
"format_ok": true,
"usable_answer": true,
"error": null
},
{
"type": "call",
"ts_utc": "2026-04-29T12:06:44Z",
"cell_id": "pavilion:weeyuga:qwen2.5-coder:1.5b",
"model": "qwen2.5-coder:1.5b",
"phase": "5q",
"question_id": "yaml_cli_plan",
"run_idx": 3,
"duration_seconds": 18.124,
"prompt_tokens": 62,
"completion_tokens": 473,
"tokens_per_second": 26.1,
"finish_reason": "stop",
"weeyuga_meta": null,
"status_code": 200,
"response_chars": 2139,
"response_preview": "1. Install the required packages:\n```bash\npip install gitpython jsonschema\n```\n\n2. Create a new Python file named `validate_yaml.py`:\n\n3. Add the following code to the `validate_yaml.py` file:\n\n```python\nimport os\nimport json\nfrom git impor",
"required_markers": [
"1.",
"2.",
"3.",
"4.",
"JSON schema",
"git"
],
"markers_hit": [
"1.",
"2.",
"3.",
"4.",
"JSON schema",
"git"
],
"marker_hit_rate": 1.0,
"format_rule": "four_numbered_steps",
"format_ok": true,
"usable_answer": true,
"error": null
},
{
"type": "call",
"ts_utc": "2026-04-29T12:06:55Z",
"cell_id": "pavilion:weeyuga:qwen2.5-coder:1.5b",
"model": "qwen2.5-coder:1.5b",
"phase": "5q",
"question_id": "ssh_lockout_triage",
"run_idx": 4,
"duration_seconds": 10.595,
"prompt_tokens": 75,
"completion_tokens": 271,
"tokens_per_second": 25.58,
"finish_reason": "stop",
"weeyuga_meta": null,
"status_code": 200,
"response_chars": 1086,
"response_preview": "- **Check for Permissions**: Ensure that the `authorized_keys` file has the correct permissions (`600`) to restrict access.\n ```sh\n chmod 600 ~/.ssh/authorized_keys\n ```\n\n- **Review SSHD Configuration**: Verify that the `PasswordAuthenti",
"required_markers": [
"sshd_config",
"authorized_keys",
"journalctl",
"rollback",
"PasswordAuthentication"
],
"markers_hit": [
"sshd_config",
"authorized_keys",
"journalctl",
"rollback",
"PasswordAuthentication"
],
"marker_hit_rate": 1.0,
"format_rule": "five_bullets",
"format_ok": true,
"usable_answer": true,
"error": null
}
]
},
{
"cell_id": "pavilion:weeyuga:qwen2.5-coder:0.5b",
"n_calls": 6,
"n_errors": 0,
"duration_ms_mean": null,
"duration_ms_p50": null,
"duration_ms_p95": null,
"tokens_per_sec_mean": null,
"tokens_per_sec_p50": null,
"tokens_per_sec_p95": null,
"tokens_per_sec_max": null,
"cold_warm_split": {
"cold": {
"n_calls": 2,
"duration_ms_mean": null,
"duration_ms_p50": null,
"tokens_per_sec_mean": null
},
"warm": {
"n_calls": 4,
"duration_ms_mean": null,
"duration_ms_p50": null,
"tokens_per_sec_mean": null
}
},
"per_prompt": {
"unknown": {
"n_calls": 6,
"n_errors": 0,
"duration_ms_mean": null,
"duration_ms_p50": null,
"tokens_per_sec_mean": null,
"tokens_per_sec_p50": null
}
},
"chars_split": null,
"timeline": [
{
"prompt_id": null,
"run_idx": 0,
"phase": "hello",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 0,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 1,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 2,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 3,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
},
{
"prompt_id": null,
"run_idx": 4,
"phase": "5q",
"duration_ms": null,
"tokens_per_sec": null,
"error": null
}
],
"calls": [
{
"type": "call",
"ts_utc": "2026-04-29T12:07:09Z",
"cell_id": "pavilion:weeyuga:qwen2.5-coder:0.5b",
"model": "qwen2.5-coder:0.5b",
"phase": "hello",
"question_id": "hello_check",
"run_idx": 0,
"duration_seconds": 13.774,
"prompt_tokens": 35,
"completion_tokens": 11,
"tokens_per_second": 0.8,
"finish_reason": "stop",
"weeyuga_meta": null,
"status_code": 200,
"response_chars": 38,
"response_preview": "Of course! How can I assist you today?",
"required_markers": [],
"markers_hit": [],
"marker_hit_rate": null,
"format_rule": "",
"format_ok": null,
"usable_answer": true,
"error": null
},
{
"type": "call",
"ts_utc": "2026-04-29T12:07:17Z",
"cell_id": "pavilion:weeyuga:qwen2.5-coder:0.5b",
"model": "qwen2.5-coder:0.5b",
"phase": "5q",
"question_id": "disk_guard_bash",
"run_idx": 0,
"duration_seconds": 8.292,
"prompt_tokens": 90,
"completion_tokens": 418,
"tokens_per_second": 50.41,
"finish_reason": "stop",
"weeyuga_meta": null,
"status_code": 200,
"response_chars": 1654,
"response_preview": "Certainly! Below is a Bash script that checks disk usage for `/`, prints a human-readable warning if usage is above 85 percent, and exits with status 1 when usage is above 85 percent.\n\n```bash\n#!/bin/bash\n\n# Check if df -P / exists\nif ! com",
"required_markers": [
"#!/usr/bin/env bash",
"df -P /",
"85",
"exit 1"
],
"markers_hit": [
"df -P /",
"85",
"exit 1"
],
"marker_hit_rate": 0.75,
"format_rule": "bash_code",
"format_ok": false,
"usable_answer": true,
"error": null
},
{
"type": "call",
"ts_utc": "2026-04-29T12:07:27Z",
"cell_id": "pavilion:weeyuga:qwen2.5-coder:0.5b",
"model": "qwen2.5-coder:0.5b",
"phase": "5q",
"question_id": "ipv4_python_tests",
"run_idx": 1,
"duration_seconds": 10.239,
"prompt_tokens": 74,
"completion_tokens": 520,
"tokens_per_second": 50.79,
"finish_reason": "stop",
"weeyuga_meta": null,
"status_code": 200,
"response_chars": 1505,
"response_preview": "```python\nimport re\n\ndef is_valid_ipv4(value: str) -> bool:\n \"\"\"\n Check if the provided string is a valid IPv4 address.\n \n Args:\n value (str): The string to check.\n \n Returns:\n bool: True if the string is a valid IPv",
"required_markers": [
"def is_valid_ipv4",
"def test_",
"assert",
"split('.')"
],
"markers_hit": [
"def is_valid_ipv4",
"def test_",
"assert"
],
"marker_hit_rate": 0.75,
"format_rule": "python_code",
"format_ok": false,
"usable_answer": true,
"error": null
},
{
"type": "call",
"ts_utc": "2026-04-29T12:07:30Z",
"cell_id": "pavilion:weeyuga:qwen2.5-coder:0.5b",
"model": "qwen2.5-coder:0.5b",
"phase": "5q",
"question_id": "nginx_safe_reload",
"run_idx": 2,
"duration_seconds": 3.174,
"prompt_tokens": 58,
"completion_tokens": 149,
"tokens_per_second": 46.94,
"finish_reason": "stop",
"weeyuga_meta": null,
"status_code": 200,
"response_chars": 576,
"response_preview": "```bash\n# Backup /etc/nginx/nginx.conf\nsudo cp -p /etc/nginx/nginx.conf /etc/nginx/nginx.conf.bak\n\n# Validate nginx config\nsudo nginx -t\n\n# Reload nginx only if validation passes\nif [ $? -eq 0 ]; then\n sudo systemctl reload nginx\nfi\n```\n",
"required_markers": [
"cp /etc/nginx/nginx.conf",
"nginx -t",
"systemctl reload nginx",
"&&"
],
"markers_hit": [
"nginx -t",
"systemctl reload nginx"
],
"marker_hit_rate": 0.5,
"format_rule": "shell_lines",
"format_ok": true,
"usable_answer": true,
"error": null
},
{
"type": "call",
"ts_utc": "2026-04-29T12:07:42Z",
"cell_id": "pavilion:weeyuga:qwen2.5-coder:0.5b",
"model": "qwen2.5-coder:0.5b",
"phase": "5q",
"question_id": "yaml_cli_plan",
"run_idx": 3,
"duration_seconds": 12.01,
"prompt_tokens": 62,
"completion_tokens": 613,
"tokens_per_second": 51.04,
"finish_reason": "stop",
"weeyuga_meta": null,
"status_code": 200,
"response_chars": 2834,
"response_preview": "To create a Python CLI that scans a Git repository for changed YAML files, validates them against a JSON schema, and exits nonzero on failure, you can use the `git` command-line tool along with some Python libraries like `argparse`, `jsonsc",
"required_markers": [
"1.",
"2.",
"3.",
"4.",
"JSON schema",
"git"
],
"markers_hit": [
"1.",
"2.",
"3.",
"JSON schema",
"git"
],
"marker_hit_rate": 0.833,
"format_rule": "four_numbered_steps",
"format_ok": true,
"usable_answer": true,
"error": null
},
{
"type": "call",
"ts_utc": "2026-04-29T12:07:51Z",
"cell_id": "pavilion:weeyuga:qwen2.5-coder:0.5b",
"model": "qwen2.5-coder:0.5b",
"phase": "5q",
"question_id": "ssh_lockout_triage",
"run_idx": 4,
"duration_seconds": 8.951,
"prompt_tokens": 75,
"completion_tokens": 451,
"tokens_per_second": 50.39,
"finish_reason": "stop",
"weeyuga_meta": null,
"status_code": 200,
"response_chars": 1588,
"response_preview": "- **Hardening**: Ensure that your SSH server is secure by using strong passwords, enabling two-factor authentication, and updating software to the latest version.\n- **Safest First Checks**:\n - **Authorized Keys**: Verify that your public k",
"required_markers": [
"sshd_config",
"authorized_keys",
"journalctl",
"rollback",
"PasswordAuthentication"
],
"markers_hit": [
"sshd_config",
"authorized_keys",
"journalctl",
"rollback",
"PasswordAuthentication"
],
"marker_hit_rate": 1.0,
"format_rule": "five_bullets",
"format_ok": true,
"usable_answer": true,
"error": null
}
]
}
],
"meta_record": {
"type": "meta",
"benchmark_run_id": "ad057f5b-ed3f-4a95-a38e-361be310ffd6",
"harness_version": "3",
"harness_path": "scripts/benchmarks/run_pavilion_weeyuga.py",
"git_sha": "371ce70c8708ce0aedbe4f3bf096f8e8f8b11d43",
"git_dirty": true,
"started_at_utc": "2026-04-29T09:51:46Z",
"host": "Slobodans-MacBook-Air.local",
"load_avg_start": [
1.60205078125,
1.9716796875,
1.95751953125
],
"target_url": "http://10.8.0.3:11435",
"weeyuga_url": "http://10.8.0.3:11435",
"cell_id_prefix": "pavilion:weeyuga",
"execution_shape": "per-model-block",
"phase_plan": "hello+5q",
"models_planned": [
"qwen3.5:4b",
"qwen3.5:35b-a3b-uncensored-iq1m",
"qwen3.5:35b-a3b-iq2s",
"qwen3.5:9b-q6k",
"qwen3.5:9b-q4km",
"qwen3.5:2b",
"qwen3.5:0.8b",
"qwen3.5:9b",
"qwen2.5-coder:14b",
"qwen2.5-coder:3b",
"qwen3:14b",
"qwen3:8b",
"qwen3:4b",
"qwen2.5:3b",
"qwen2.5-coder:1.5b",
"qwen2.5-coder:0.5b"
],
"canonical_options": {
"temperature": 0.1,
"num_ctx": 4096,
"num_predict": 2048
},
"timeout_seconds": 360,
"suite_5q_path": "/Users/slobodan/Documents/MyServers/instances/vps-81-17-99-14/telemetry/small_model_eval_questions.json",
"suite_20q_path": "/Users/slobodan/Documents/MyServers/instances/vps-81-17-99-14/telemetry/python_task_suite_questions.json",
"env_inference_route": null,
"env_llamacpp_url": null
},
"packaged_at": "2026-05-06T12:28:24Z"
}