6370 lines
188 KiB
JSON
6370 lines
188 KiB
JSON
{
|
|
"id": "ad057f5b-ed3f-4a95-a38e-361be310ffd6",
|
|
"title": "pavilion-weeyuga-v3 \u2014 qwen2.5/qwen2.5-coder/qwen3/qwen3.5 on pavilion",
|
|
"date": "2026-04-29",
|
|
"started_at": "2026-04-29T09:51:46Z",
|
|
"git_sha": "371ce70c8708ce0aedbe4f3bf096f8e8f8b11d43",
|
|
"hardware": "pavilion",
|
|
"hardware_label": "Pavilion \u00b7 HP laptop \u00b7 GTX 1050 4 GB \u00b7 16 GB RAM \u00b7 i7-9750H",
|
|
"engine": "weeyuga",
|
|
"harness": "pavilion-weeyuga-v3",
|
|
"model_family": "qwen2.5/qwen2.5-coder/qwen3/qwen3.5",
|
|
"model_sizes": [
|
|
"qwen3.5:4b",
|
|
"qwen3.5:35b-a3b-uncensored",
|
|
"qwen3.5:35b-a3b-iq2s",
|
|
"qwen3.5:9b-q6k",
|
|
"qwen3.5:9b",
|
|
"qwen3.5:2b",
|
|
"qwen3.5:0.8b",
|
|
"qwen2.5-coder:14b",
|
|
"qwen2.5-coder:3b",
|
|
"qwen3:14b",
|
|
"qwen3:8b",
|
|
"qwen3:4b",
|
|
"qwen2.5:3b",
|
|
"qwen2.5-coder:1.5b",
|
|
"qwen2.5-coder:0.5b"
|
|
],
|
|
"task_kind": "chat",
|
|
"tags": [
|
|
"a3b",
|
|
"iq2",
|
|
"pavilion",
|
|
"qwen2.5",
|
|
"qwen3",
|
|
"qwen3.5",
|
|
"weeyuga"
|
|
],
|
|
"headline": "96 calls across 16 cell(s); 12 errors",
|
|
"janie_blurb_md": null,
|
|
"janie_blurb_status": "pending",
|
|
"caveat": null,
|
|
"caveat_severity": null,
|
|
"methodology_ref": "SITE_DATA_AUDIT_AND_MIGRATION_PLAN_2026-05-06",
|
|
"methodology_deviations_md": null,
|
|
"results_table": [
|
|
{
|
|
"label": "qwen3.5:4b",
|
|
"cell_id": "pavilion:weeyuga:qwen3.5:4b",
|
|
"n_calls": 6,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"duration_ms_p95": null,
|
|
"tokens_per_sec_mean": null,
|
|
"tokens_per_sec_p50": null,
|
|
"tokens_per_sec_p95": null,
|
|
"tokens_per_sec_max": null,
|
|
"memory_mb": null,
|
|
"watts_avg": null,
|
|
"notes": null
|
|
},
|
|
{
|
|
"label": "qwen3.5:35b-a3b-uncensored-iq1m",
|
|
"cell_id": "pavilion:weeyuga:qwen3.5:35b-a3b-uncensored-iq1m",
|
|
"n_calls": 6,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"duration_ms_p95": null,
|
|
"tokens_per_sec_mean": null,
|
|
"tokens_per_sec_p50": null,
|
|
"tokens_per_sec_p95": null,
|
|
"tokens_per_sec_max": null,
|
|
"memory_mb": null,
|
|
"watts_avg": null,
|
|
"notes": null
|
|
},
|
|
{
|
|
"label": "qwen3.5:35b-a3b-iq2s",
|
|
"cell_id": "pavilion:weeyuga:qwen3.5:35b-a3b-iq2s",
|
|
"n_calls": 6,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"duration_ms_p95": null,
|
|
"tokens_per_sec_mean": null,
|
|
"tokens_per_sec_p50": null,
|
|
"tokens_per_sec_p95": null,
|
|
"tokens_per_sec_max": null,
|
|
"memory_mb": null,
|
|
"watts_avg": null,
|
|
"notes": null
|
|
},
|
|
{
|
|
"label": "qwen3.5:9b-q6k",
|
|
"cell_id": "pavilion:weeyuga:qwen3.5:9b-q6k",
|
|
"n_calls": 6,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"duration_ms_p95": null,
|
|
"tokens_per_sec_mean": null,
|
|
"tokens_per_sec_p50": null,
|
|
"tokens_per_sec_p95": null,
|
|
"tokens_per_sec_max": null,
|
|
"memory_mb": null,
|
|
"watts_avg": null,
|
|
"notes": null
|
|
},
|
|
{
|
|
"label": "qwen3.5:9b-q4km",
|
|
"cell_id": "pavilion:weeyuga:qwen3.5:9b-q4km",
|
|
"n_calls": 6,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"duration_ms_p95": null,
|
|
"tokens_per_sec_mean": null,
|
|
"tokens_per_sec_p50": null,
|
|
"tokens_per_sec_p95": null,
|
|
"tokens_per_sec_max": null,
|
|
"memory_mb": null,
|
|
"watts_avg": null,
|
|
"notes": null
|
|
},
|
|
{
|
|
"label": "qwen3.5:2b",
|
|
"cell_id": "pavilion:weeyuga:qwen3.5:2b",
|
|
"n_calls": 6,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"duration_ms_p95": null,
|
|
"tokens_per_sec_mean": null,
|
|
"tokens_per_sec_p50": null,
|
|
"tokens_per_sec_p95": null,
|
|
"tokens_per_sec_max": null,
|
|
"memory_mb": null,
|
|
"watts_avg": null,
|
|
"notes": null
|
|
},
|
|
{
|
|
"label": "qwen3.5:0.8b",
|
|
"cell_id": "pavilion:weeyuga:qwen3.5:0.8b",
|
|
"n_calls": 6,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"duration_ms_p95": null,
|
|
"tokens_per_sec_mean": null,
|
|
"tokens_per_sec_p50": null,
|
|
"tokens_per_sec_p95": null,
|
|
"tokens_per_sec_max": null,
|
|
"memory_mb": null,
|
|
"watts_avg": null,
|
|
"notes": null
|
|
},
|
|
{
|
|
"label": "qwen3.5:9b",
|
|
"cell_id": "pavilion:weeyuga:qwen3.5:9b",
|
|
"n_calls": 6,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"duration_ms_p95": null,
|
|
"tokens_per_sec_mean": null,
|
|
"tokens_per_sec_p50": null,
|
|
"tokens_per_sec_p95": null,
|
|
"tokens_per_sec_max": null,
|
|
"memory_mb": null,
|
|
"watts_avg": null,
|
|
"notes": null
|
|
},
|
|
{
|
|
"label": "qwen2.5-coder:14b",
|
|
"cell_id": "pavilion:weeyuga:qwen2.5-coder:14b",
|
|
"n_calls": 6,
|
|
"n_errors": 6,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"duration_ms_p95": null,
|
|
"tokens_per_sec_mean": null,
|
|
"tokens_per_sec_p50": null,
|
|
"tokens_per_sec_p95": null,
|
|
"tokens_per_sec_max": null,
|
|
"memory_mb": null,
|
|
"watts_avg": null,
|
|
"notes": null
|
|
},
|
|
{
|
|
"label": "qwen2.5-coder:3b",
|
|
"cell_id": "pavilion:weeyuga:qwen2.5-coder:3b",
|
|
"n_calls": 6,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"duration_ms_p95": null,
|
|
"tokens_per_sec_mean": null,
|
|
"tokens_per_sec_p50": null,
|
|
"tokens_per_sec_p95": null,
|
|
"tokens_per_sec_max": null,
|
|
"memory_mb": null,
|
|
"watts_avg": null,
|
|
"notes": null
|
|
},
|
|
{
|
|
"label": "qwen3:14b",
|
|
"cell_id": "pavilion:weeyuga:qwen3:14b",
|
|
"n_calls": 6,
|
|
"n_errors": 4,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"duration_ms_p95": null,
|
|
"tokens_per_sec_mean": null,
|
|
"tokens_per_sec_p50": null,
|
|
"tokens_per_sec_p95": null,
|
|
"tokens_per_sec_max": null,
|
|
"memory_mb": null,
|
|
"watts_avg": null,
|
|
"notes": null
|
|
},
|
|
{
|
|
"label": "qwen3:8b",
|
|
"cell_id": "pavilion:weeyuga:qwen3:8b",
|
|
"n_calls": 6,
|
|
"n_errors": 2,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"duration_ms_p95": null,
|
|
"tokens_per_sec_mean": null,
|
|
"tokens_per_sec_p50": null,
|
|
"tokens_per_sec_p95": null,
|
|
"tokens_per_sec_max": null,
|
|
"memory_mb": null,
|
|
"watts_avg": null,
|
|
"notes": null
|
|
},
|
|
{
|
|
"label": "qwen3:4b",
|
|
"cell_id": "pavilion:weeyuga:qwen3:4b",
|
|
"n_calls": 6,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"duration_ms_p95": null,
|
|
"tokens_per_sec_mean": null,
|
|
"tokens_per_sec_p50": null,
|
|
"tokens_per_sec_p95": null,
|
|
"tokens_per_sec_max": null,
|
|
"memory_mb": null,
|
|
"watts_avg": null,
|
|
"notes": null
|
|
},
|
|
{
|
|
"label": "qwen2.5:3b",
|
|
"cell_id": "pavilion:weeyuga:qwen2.5:3b",
|
|
"n_calls": 6,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"duration_ms_p95": null,
|
|
"tokens_per_sec_mean": null,
|
|
"tokens_per_sec_p50": null,
|
|
"tokens_per_sec_p95": null,
|
|
"tokens_per_sec_max": null,
|
|
"memory_mb": null,
|
|
"watts_avg": null,
|
|
"notes": null
|
|
},
|
|
{
|
|
"label": "qwen2.5-coder:1.5b",
|
|
"cell_id": "pavilion:weeyuga:qwen2.5-coder:1.5b",
|
|
"n_calls": 6,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"duration_ms_p95": null,
|
|
"tokens_per_sec_mean": null,
|
|
"tokens_per_sec_p50": null,
|
|
"tokens_per_sec_p95": null,
|
|
"tokens_per_sec_max": null,
|
|
"memory_mb": null,
|
|
"watts_avg": null,
|
|
"notes": null
|
|
},
|
|
{
|
|
"label": "qwen2.5-coder:0.5b",
|
|
"cell_id": "pavilion:weeyuga:qwen2.5-coder:0.5b",
|
|
"n_calls": 6,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"duration_ms_p95": null,
|
|
"tokens_per_sec_mean": null,
|
|
"tokens_per_sec_p50": null,
|
|
"tokens_per_sec_p95": null,
|
|
"tokens_per_sec_max": null,
|
|
"memory_mb": null,
|
|
"watts_avg": null,
|
|
"notes": null
|
|
}
|
|
],
|
|
"cells": [
|
|
{
|
|
"cell_id": "pavilion:weeyuga:qwen3.5:4b",
|
|
"n_calls": 6,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"duration_ms_p95": null,
|
|
"tokens_per_sec_mean": null,
|
|
"tokens_per_sec_p50": null,
|
|
"tokens_per_sec_p95": null,
|
|
"tokens_per_sec_max": null,
|
|
"cold_warm_split": {
|
|
"cold": {
|
|
"n_calls": 2,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"tokens_per_sec_mean": null
|
|
},
|
|
"warm": {
|
|
"n_calls": 4,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"tokens_per_sec_mean": null
|
|
}
|
|
},
|
|
"per_prompt": {
|
|
"unknown": {
|
|
"n_calls": 6,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"tokens_per_sec_mean": null,
|
|
"tokens_per_sec_p50": null
|
|
}
|
|
},
|
|
"chars_split": null,
|
|
"timeline": [
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 0,
|
|
"phase": "hello",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 0,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 1,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 2,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 3,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 4,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"cell_id": "pavilion:weeyuga:qwen3.5:35b-a3b-uncensored-iq1m",
|
|
"n_calls": 6,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"duration_ms_p95": null,
|
|
"tokens_per_sec_mean": null,
|
|
"tokens_per_sec_p50": null,
|
|
"tokens_per_sec_p95": null,
|
|
"tokens_per_sec_max": null,
|
|
"cold_warm_split": {
|
|
"cold": {
|
|
"n_calls": 2,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"tokens_per_sec_mean": null
|
|
},
|
|
"warm": {
|
|
"n_calls": 4,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"tokens_per_sec_mean": null
|
|
}
|
|
},
|
|
"per_prompt": {
|
|
"unknown": {
|
|
"n_calls": 6,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"tokens_per_sec_mean": null,
|
|
"tokens_per_sec_p50": null
|
|
}
|
|
},
|
|
"chars_split": null,
|
|
"timeline": [
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 0,
|
|
"phase": "hello",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 0,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 1,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 2,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 3,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 4,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"cell_id": "pavilion:weeyuga:qwen3.5:35b-a3b-iq2s",
|
|
"n_calls": 6,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"duration_ms_p95": null,
|
|
"tokens_per_sec_mean": null,
|
|
"tokens_per_sec_p50": null,
|
|
"tokens_per_sec_p95": null,
|
|
"tokens_per_sec_max": null,
|
|
"cold_warm_split": {
|
|
"cold": {
|
|
"n_calls": 2,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"tokens_per_sec_mean": null
|
|
},
|
|
"warm": {
|
|
"n_calls": 4,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"tokens_per_sec_mean": null
|
|
}
|
|
},
|
|
"per_prompt": {
|
|
"unknown": {
|
|
"n_calls": 6,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"tokens_per_sec_mean": null,
|
|
"tokens_per_sec_p50": null
|
|
}
|
|
},
|
|
"chars_split": null,
|
|
"timeline": [
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 0,
|
|
"phase": "hello",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 0,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 1,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 2,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 3,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 4,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"cell_id": "pavilion:weeyuga:qwen3.5:9b-q6k",
|
|
"n_calls": 6,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"duration_ms_p95": null,
|
|
"tokens_per_sec_mean": null,
|
|
"tokens_per_sec_p50": null,
|
|
"tokens_per_sec_p95": null,
|
|
"tokens_per_sec_max": null,
|
|
"cold_warm_split": {
|
|
"cold": {
|
|
"n_calls": 2,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"tokens_per_sec_mean": null
|
|
},
|
|
"warm": {
|
|
"n_calls": 4,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"tokens_per_sec_mean": null
|
|
}
|
|
},
|
|
"per_prompt": {
|
|
"unknown": {
|
|
"n_calls": 6,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"tokens_per_sec_mean": null,
|
|
"tokens_per_sec_p50": null
|
|
}
|
|
},
|
|
"chars_split": null,
|
|
"timeline": [
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 0,
|
|
"phase": "hello",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 0,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 1,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 2,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 3,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 4,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"cell_id": "pavilion:weeyuga:qwen3.5:9b-q4km",
|
|
"n_calls": 6,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"duration_ms_p95": null,
|
|
"tokens_per_sec_mean": null,
|
|
"tokens_per_sec_p50": null,
|
|
"tokens_per_sec_p95": null,
|
|
"tokens_per_sec_max": null,
|
|
"cold_warm_split": {
|
|
"cold": {
|
|
"n_calls": 2,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"tokens_per_sec_mean": null
|
|
},
|
|
"warm": {
|
|
"n_calls": 4,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"tokens_per_sec_mean": null
|
|
}
|
|
},
|
|
"per_prompt": {
|
|
"unknown": {
|
|
"n_calls": 6,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"tokens_per_sec_mean": null,
|
|
"tokens_per_sec_p50": null
|
|
}
|
|
},
|
|
"chars_split": null,
|
|
"timeline": [
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 0,
|
|
"phase": "hello",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 0,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 1,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 2,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 3,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 4,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"cell_id": "pavilion:weeyuga:qwen3.5:2b",
|
|
"n_calls": 6,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"duration_ms_p95": null,
|
|
"tokens_per_sec_mean": null,
|
|
"tokens_per_sec_p50": null,
|
|
"tokens_per_sec_p95": null,
|
|
"tokens_per_sec_max": null,
|
|
"cold_warm_split": {
|
|
"cold": {
|
|
"n_calls": 2,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"tokens_per_sec_mean": null
|
|
},
|
|
"warm": {
|
|
"n_calls": 4,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"tokens_per_sec_mean": null
|
|
}
|
|
},
|
|
"per_prompt": {
|
|
"unknown": {
|
|
"n_calls": 6,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"tokens_per_sec_mean": null,
|
|
"tokens_per_sec_p50": null
|
|
}
|
|
},
|
|
"chars_split": null,
|
|
"timeline": [
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 0,
|
|
"phase": "hello",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 0,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 1,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 2,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 3,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 4,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"cell_id": "pavilion:weeyuga:qwen3.5:0.8b",
|
|
"n_calls": 6,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"duration_ms_p95": null,
|
|
"tokens_per_sec_mean": null,
|
|
"tokens_per_sec_p50": null,
|
|
"tokens_per_sec_p95": null,
|
|
"tokens_per_sec_max": null,
|
|
"cold_warm_split": {
|
|
"cold": {
|
|
"n_calls": 2,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"tokens_per_sec_mean": null
|
|
},
|
|
"warm": {
|
|
"n_calls": 4,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"tokens_per_sec_mean": null
|
|
}
|
|
},
|
|
"per_prompt": {
|
|
"unknown": {
|
|
"n_calls": 6,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"tokens_per_sec_mean": null,
|
|
"tokens_per_sec_p50": null
|
|
}
|
|
},
|
|
"chars_split": null,
|
|
"timeline": [
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 0,
|
|
"phase": "hello",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 0,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 1,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 2,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 3,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 4,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"cell_id": "pavilion:weeyuga:qwen3.5:9b",
|
|
"n_calls": 6,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"duration_ms_p95": null,
|
|
"tokens_per_sec_mean": null,
|
|
"tokens_per_sec_p50": null,
|
|
"tokens_per_sec_p95": null,
|
|
"tokens_per_sec_max": null,
|
|
"cold_warm_split": {
|
|
"cold": {
|
|
"n_calls": 2,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"tokens_per_sec_mean": null
|
|
},
|
|
"warm": {
|
|
"n_calls": 4,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"tokens_per_sec_mean": null
|
|
}
|
|
},
|
|
"per_prompt": {
|
|
"unknown": {
|
|
"n_calls": 6,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"tokens_per_sec_mean": null,
|
|
"tokens_per_sec_p50": null
|
|
}
|
|
},
|
|
"chars_split": null,
|
|
"timeline": [
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 0,
|
|
"phase": "hello",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 0,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 1,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 2,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 3,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 4,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"cell_id": "pavilion:weeyuga:qwen2.5-coder:14b",
|
|
"n_calls": 6,
|
|
"n_errors": 6,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"duration_ms_p95": null,
|
|
"tokens_per_sec_mean": null,
|
|
"tokens_per_sec_p50": null,
|
|
"tokens_per_sec_p95": null,
|
|
"tokens_per_sec_max": null,
|
|
"cold_warm_split": {
|
|
"cold": {
|
|
"n_calls": 2,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"tokens_per_sec_mean": null
|
|
},
|
|
"warm": {
|
|
"n_calls": 4,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"tokens_per_sec_mean": null
|
|
}
|
|
},
|
|
"per_prompt": {
|
|
"unknown": {
|
|
"n_calls": 6,
|
|
"n_errors": 6,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"tokens_per_sec_mean": null,
|
|
"tokens_per_sec_p50": null
|
|
}
|
|
},
|
|
"chars_split": null,
|
|
"timeline": [
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 0,
|
|
"phase": "hello",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": "<HTTPError 500: 'Internal Server Error'>"
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 0,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": "<HTTPError 500: 'Internal Server Error'>"
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 1,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": "<HTTPError 500: 'Internal Server Error'>"
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 2,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": "<HTTPError 500: 'Internal Server Error'>"
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 3,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": "<HTTPError 500: 'Internal Server Error'>"
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 4,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": "<HTTPError 500: 'Internal Server Error'>"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"cell_id": "pavilion:weeyuga:qwen2.5-coder:3b",
|
|
"n_calls": 6,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"duration_ms_p95": null,
|
|
"tokens_per_sec_mean": null,
|
|
"tokens_per_sec_p50": null,
|
|
"tokens_per_sec_p95": null,
|
|
"tokens_per_sec_max": null,
|
|
"cold_warm_split": {
|
|
"cold": {
|
|
"n_calls": 2,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"tokens_per_sec_mean": null
|
|
},
|
|
"warm": {
|
|
"n_calls": 4,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"tokens_per_sec_mean": null
|
|
}
|
|
},
|
|
"per_prompt": {
|
|
"unknown": {
|
|
"n_calls": 6,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"tokens_per_sec_mean": null,
|
|
"tokens_per_sec_p50": null
|
|
}
|
|
},
|
|
"chars_split": null,
|
|
"timeline": [
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 0,
|
|
"phase": "hello",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 0,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 1,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 2,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 3,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 4,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"cell_id": "pavilion:weeyuga:qwen3:14b",
|
|
"n_calls": 6,
|
|
"n_errors": 4,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"duration_ms_p95": null,
|
|
"tokens_per_sec_mean": null,
|
|
"tokens_per_sec_p50": null,
|
|
"tokens_per_sec_p95": null,
|
|
"tokens_per_sec_max": null,
|
|
"cold_warm_split": {
|
|
"cold": {
|
|
"n_calls": 2,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"tokens_per_sec_mean": null
|
|
},
|
|
"warm": {
|
|
"n_calls": 4,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"tokens_per_sec_mean": null
|
|
}
|
|
},
|
|
"per_prompt": {
|
|
"unknown": {
|
|
"n_calls": 6,
|
|
"n_errors": 4,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"tokens_per_sec_mean": null,
|
|
"tokens_per_sec_p50": null
|
|
}
|
|
},
|
|
"chars_split": null,
|
|
"timeline": [
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 0,
|
|
"phase": "hello",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": "TimeoutError('timed out')"
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 0,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": "TimeoutError('timed out')"
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 1,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": "TimeoutError('timed out')"
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 2,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": "TimeoutError('timed out')"
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 3,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 4,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"cell_id": "pavilion:weeyuga:qwen3:8b",
|
|
"n_calls": 6,
|
|
"n_errors": 2,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"duration_ms_p95": null,
|
|
"tokens_per_sec_mean": null,
|
|
"tokens_per_sec_p50": null,
|
|
"tokens_per_sec_p95": null,
|
|
"tokens_per_sec_max": null,
|
|
"cold_warm_split": {
|
|
"cold": {
|
|
"n_calls": 2,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"tokens_per_sec_mean": null
|
|
},
|
|
"warm": {
|
|
"n_calls": 4,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"tokens_per_sec_mean": null
|
|
}
|
|
},
|
|
"per_prompt": {
|
|
"unknown": {
|
|
"n_calls": 6,
|
|
"n_errors": 2,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"tokens_per_sec_mean": null,
|
|
"tokens_per_sec_p50": null
|
|
}
|
|
},
|
|
"chars_split": null,
|
|
"timeline": [
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 0,
|
|
"phase": "hello",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": "TimeoutError('timed out')"
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 0,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 1,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": "TimeoutError('timed out')"
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 2,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 3,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 4,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"cell_id": "pavilion:weeyuga:qwen3:4b",
|
|
"n_calls": 6,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"duration_ms_p95": null,
|
|
"tokens_per_sec_mean": null,
|
|
"tokens_per_sec_p50": null,
|
|
"tokens_per_sec_p95": null,
|
|
"tokens_per_sec_max": null,
|
|
"cold_warm_split": {
|
|
"cold": {
|
|
"n_calls": 2,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"tokens_per_sec_mean": null
|
|
},
|
|
"warm": {
|
|
"n_calls": 4,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"tokens_per_sec_mean": null
|
|
}
|
|
},
|
|
"per_prompt": {
|
|
"unknown": {
|
|
"n_calls": 6,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"tokens_per_sec_mean": null,
|
|
"tokens_per_sec_p50": null
|
|
}
|
|
},
|
|
"chars_split": null,
|
|
"timeline": [
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 0,
|
|
"phase": "hello",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 0,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 1,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 2,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 3,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 4,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"cell_id": "pavilion:weeyuga:qwen2.5:3b",
|
|
"n_calls": 6,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"duration_ms_p95": null,
|
|
"tokens_per_sec_mean": null,
|
|
"tokens_per_sec_p50": null,
|
|
"tokens_per_sec_p95": null,
|
|
"tokens_per_sec_max": null,
|
|
"cold_warm_split": {
|
|
"cold": {
|
|
"n_calls": 2,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"tokens_per_sec_mean": null
|
|
},
|
|
"warm": {
|
|
"n_calls": 4,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"tokens_per_sec_mean": null
|
|
}
|
|
},
|
|
"per_prompt": {
|
|
"unknown": {
|
|
"n_calls": 6,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"tokens_per_sec_mean": null,
|
|
"tokens_per_sec_p50": null
|
|
}
|
|
},
|
|
"chars_split": null,
|
|
"timeline": [
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 0,
|
|
"phase": "hello",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 0,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 1,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 2,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 3,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 4,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"cell_id": "pavilion:weeyuga:qwen2.5-coder:1.5b",
|
|
"n_calls": 6,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"duration_ms_p95": null,
|
|
"tokens_per_sec_mean": null,
|
|
"tokens_per_sec_p50": null,
|
|
"tokens_per_sec_p95": null,
|
|
"tokens_per_sec_max": null,
|
|
"cold_warm_split": {
|
|
"cold": {
|
|
"n_calls": 2,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"tokens_per_sec_mean": null
|
|
},
|
|
"warm": {
|
|
"n_calls": 4,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"tokens_per_sec_mean": null
|
|
}
|
|
},
|
|
"per_prompt": {
|
|
"unknown": {
|
|
"n_calls": 6,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"tokens_per_sec_mean": null,
|
|
"tokens_per_sec_p50": null
|
|
}
|
|
},
|
|
"chars_split": null,
|
|
"timeline": [
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 0,
|
|
"phase": "hello",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 0,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 1,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 2,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 3,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 4,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"cell_id": "pavilion:weeyuga:qwen2.5-coder:0.5b",
|
|
"n_calls": 6,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"duration_ms_p95": null,
|
|
"tokens_per_sec_mean": null,
|
|
"tokens_per_sec_p50": null,
|
|
"tokens_per_sec_p95": null,
|
|
"tokens_per_sec_max": null,
|
|
"cold_warm_split": {
|
|
"cold": {
|
|
"n_calls": 2,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"tokens_per_sec_mean": null
|
|
},
|
|
"warm": {
|
|
"n_calls": 4,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"tokens_per_sec_mean": null
|
|
}
|
|
},
|
|
"per_prompt": {
|
|
"unknown": {
|
|
"n_calls": 6,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"tokens_per_sec_mean": null,
|
|
"tokens_per_sec_p50": null
|
|
}
|
|
},
|
|
"chars_split": null,
|
|
"timeline": [
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 0,
|
|
"phase": "hello",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 0,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 1,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 2,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 3,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 4,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
}
|
|
]
|
|
}
|
|
],
|
|
"n_calls_total": 96,
|
|
"n_errors_total": 12,
|
|
"chart_spec": {
|
|
"kind": "bar",
|
|
"x": "cell",
|
|
"y": "tokens_per_sec_mean",
|
|
"series": [
|
|
"qwen3.5:4b",
|
|
"qwen3.5:35b-a3b-uncensored-iq1m",
|
|
"qwen3.5:35b-a3b-iq2s",
|
|
"qwen3.5:9b-q6k",
|
|
"qwen3.5:9b-q4km",
|
|
"qwen3.5:2b",
|
|
"qwen3.5:0.8b",
|
|
"qwen3.5:9b",
|
|
"qwen2.5-coder:14b",
|
|
"qwen2.5-coder:3b",
|
|
"qwen3:14b",
|
|
"qwen3:8b",
|
|
"qwen3:4b",
|
|
"qwen2.5:3b",
|
|
"qwen2.5-coder:1.5b",
|
|
"qwen2.5-coder:0.5b"
|
|
],
|
|
"data_url": "/data/ad057f5b-ed3f-4a95-a38e-361be310ffd6/run.jsonl"
|
|
},
|
|
"site_grade": "archive-only",
|
|
"site_grade_reason": "error rate 12/96",
|
|
"raw_data_urls": {
|
|
"jsonl": "/data/ad057f5b-ed3f-4a95-a38e-361be310ffd6/run.jsonl",
|
|
"log": null,
|
|
"md": null,
|
|
"metadata": "/data/ad057f5b-ed3f-4a95-a38e-361be310ffd6/metadata.json",
|
|
"gitea_dir": "https://git.weeyuga.com/slobodanmargetic988/weeyuga-benchmarks-public/src/branch/main/runs/ad057f5b-ed3f-4a95-a38e-361be310ffd6/"
|
|
},
|
|
"synthesis_doc": "SITE_DATA_AUDIT_AND_MIGRATION_PLAN_2026-05-06.md",
|
|
"synthesis_docs_all": [
|
|
"SITE_DATA_AUDIT_AND_MIGRATION_PLAN_2026-05-06.md",
|
|
"PAVILION_LLAMACPP_VS_OLLAMA_v0_INCOMPLETE.md"
|
|
],
|
|
"related_ids": [],
|
|
"status": "complete",
|
|
"visibility": "draft",
|
|
"cells_full": [
|
|
{
|
|
"cell_id": "pavilion:weeyuga:qwen3.5:4b",
|
|
"n_calls": 6,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"duration_ms_p95": null,
|
|
"tokens_per_sec_mean": null,
|
|
"tokens_per_sec_p50": null,
|
|
"tokens_per_sec_p95": null,
|
|
"tokens_per_sec_max": null,
|
|
"cold_warm_split": {
|
|
"cold": {
|
|
"n_calls": 2,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"tokens_per_sec_mean": null
|
|
},
|
|
"warm": {
|
|
"n_calls": 4,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"tokens_per_sec_mean": null
|
|
}
|
|
},
|
|
"per_prompt": {
|
|
"unknown": {
|
|
"n_calls": 6,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"tokens_per_sec_mean": null,
|
|
"tokens_per_sec_p50": null
|
|
}
|
|
},
|
|
"chars_split": null,
|
|
"timeline": [
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 0,
|
|
"phase": "hello",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 0,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 1,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 2,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 3,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 4,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
}
|
|
],
|
|
"calls": [
|
|
{
|
|
"type": "call",
|
|
"ts_utc": "2026-04-29T09:51:53Z",
|
|
"cell_id": "pavilion:weeyuga:qwen3.5:4b",
|
|
"model": "qwen3.5:4b",
|
|
"phase": "hello",
|
|
"question_id": "hello_check",
|
|
"run_idx": 0,
|
|
"duration_seconds": 7.103,
|
|
"prompt_tokens": 16,
|
|
"completion_tokens": 251,
|
|
"tokens_per_second": 35.34,
|
|
"finish_reason": "stop",
|
|
"weeyuga_meta": null,
|
|
"status_code": 200,
|
|
"response_chars": 62,
|
|
"response_preview": "Hi there! I'm glad to help! What would you like to do today? \ud83d\ude0a",
|
|
"required_markers": [],
|
|
"markers_hit": [],
|
|
"marker_hit_rate": null,
|
|
"format_rule": "",
|
|
"format_ok": null,
|
|
"usable_answer": true,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"ts_utc": "2026-04-29T09:52:21Z",
|
|
"cell_id": "pavilion:weeyuga:qwen3.5:4b",
|
|
"model": "qwen3.5:4b",
|
|
"phase": "5q",
|
|
"question_id": "disk_guard_bash",
|
|
"run_idx": 0,
|
|
"duration_seconds": 28.241,
|
|
"prompt_tokens": 71,
|
|
"completion_tokens": 1124,
|
|
"tokens_per_second": 39.8,
|
|
"finish_reason": "stop",
|
|
"weeyuga_meta": null,
|
|
"status_code": 200,
|
|
"response_chars": 246,
|
|
"response_preview": "```bash\n#!/bin/bash\n\n# Check disk usage for /\nif [ -d \"/\" ]; then\n usage=$(df -P / | awk '{print $5}')\n if [ \"$usage\" -gt 85 ]; then\n echo \"WARNING: Disk usage for / is at ${usage}% (above 85% threshold)\"\n exit 1\n fi\n",
|
|
"required_markers": [
|
|
"#!/usr/bin/env bash",
|
|
"df -P /",
|
|
"85",
|
|
"exit 1"
|
|
],
|
|
"markers_hit": [
|
|
"df -P /",
|
|
"85",
|
|
"exit 1"
|
|
],
|
|
"marker_hit_rate": 0.75,
|
|
"format_rule": "bash_code",
|
|
"format_ok": false,
|
|
"usable_answer": true,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"ts_utc": "2026-04-29T09:53:11Z",
|
|
"cell_id": "pavilion:weeyuga:qwen3.5:4b",
|
|
"model": "qwen3.5:4b",
|
|
"phase": "5q",
|
|
"question_id": "ipv4_python_tests",
|
|
"run_idx": 1,
|
|
"duration_seconds": 49.606,
|
|
"prompt_tokens": 55,
|
|
"completion_tokens": 2048,
|
|
"tokens_per_second": 41.29,
|
|
"finish_reason": "length",
|
|
"weeyuga_meta": null,
|
|
"status_code": 200,
|
|
"response_chars": 1630,
|
|
"response_preview": "```python\ndef is_valid_ipv4(value: str) -> bool:\n \"\"\"\n Check if a string is a valid IPv4 address.\n \"\"\"\n if not isinstance(value, str):\n return False\n parts = value.split('.')\n if len(parts) != 4:\n return Fals",
|
|
"required_markers": [
|
|
"def is_valid_ipv4",
|
|
"def test_",
|
|
"assert",
|
|
"split('.')"
|
|
],
|
|
"markers_hit": [
|
|
"def is_valid_ipv4",
|
|
"split('.')"
|
|
],
|
|
"marker_hit_rate": 0.5,
|
|
"format_rule": "python_code",
|
|
"format_ok": false,
|
|
"usable_answer": true,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"ts_utc": "2026-04-29T09:53:38Z",
|
|
"cell_id": "pavilion:weeyuga:qwen3.5:4b",
|
|
"model": "qwen3.5:4b",
|
|
"phase": "5q",
|
|
"question_id": "nginx_safe_reload",
|
|
"run_idx": 2,
|
|
"duration_seconds": 26.721,
|
|
"prompt_tokens": 39,
|
|
"completion_tokens": 1076,
|
|
"tokens_per_second": 40.27,
|
|
"finish_reason": "stop",
|
|
"weeyuga_meta": null,
|
|
"status_code": 200,
|
|
"response_chars": 116,
|
|
"response_preview": "cp /etc/nginx/nginx.conf /etc/nginx/nginx.conf.bak\nnginx -t\nif [ $? -eq 0 ]; then nginx -s reload; else nginx -t; fi",
|
|
"required_markers": [
|
|
"cp /etc/nginx/nginx.conf",
|
|
"nginx -t",
|
|
"systemctl reload nginx",
|
|
"&&"
|
|
],
|
|
"markers_hit": [
|
|
"cp /etc/nginx/nginx.conf",
|
|
"nginx -t"
|
|
],
|
|
"marker_hit_rate": 0.5,
|
|
"format_rule": "shell_lines",
|
|
"format_ok": true,
|
|
"usable_answer": true,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"ts_utc": "2026-04-29T09:54:27Z",
|
|
"cell_id": "pavilion:weeyuga:qwen3.5:4b",
|
|
"model": "qwen3.5:4b",
|
|
"phase": "5q",
|
|
"question_id": "yaml_cli_plan",
|
|
"run_idx": 3,
|
|
"duration_seconds": 49.969,
|
|
"prompt_tokens": 43,
|
|
"completion_tokens": 2048,
|
|
"tokens_per_second": 40.99,
|
|
"finish_reason": "length",
|
|
"weeyuga_meta": null,
|
|
"status_code": 200,
|
|
"response_chars": 7628,
|
|
"response_preview": "# Python CLI for Git-Scanned YAML Schema Validation\n\n```python\n#!/usr/bin/env python3\n\"\"\"\nYAML Schema Validator CLI\nScans git repos for changed YAML files and validates against JSON schemas.\n\"\"\"\n\nimport sys\nimport os\nimport json\nimport yaml",
|
|
"required_markers": [
|
|
"1.",
|
|
"2.",
|
|
"3.",
|
|
"4.",
|
|
"JSON schema",
|
|
"git"
|
|
],
|
|
"markers_hit": [
|
|
"JSON schema",
|
|
"git"
|
|
],
|
|
"marker_hit_rate": 0.333,
|
|
"format_rule": "four_numbered_steps",
|
|
"format_ok": false,
|
|
"usable_answer": true,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"ts_utc": "2026-04-29T09:54:56Z",
|
|
"cell_id": "pavilion:weeyuga:qwen3.5:4b",
|
|
"model": "qwen3.5:4b",
|
|
"phase": "5q",
|
|
"question_id": "ssh_lockout_triage",
|
|
"run_idx": 4,
|
|
"duration_seconds": 28.774,
|
|
"prompt_tokens": 56,
|
|
"completion_tokens": 1124,
|
|
"tokens_per_second": 39.06,
|
|
"finish_reason": "stop",
|
|
"weeyuga_meta": null,
|
|
"status_code": 200,
|
|
"response_chars": 468,
|
|
"response_preview": "- Review `sshd_config` for misconfigurations that might cause permission denied errors.\n- Review `authorized_keys` permissions to ensure only trusted keys are allowed to connect.\n- Review `PasswordAuthentication` settings to ensure password",
|
|
"required_markers": [
|
|
"sshd_config",
|
|
"authorized_keys",
|
|
"journalctl",
|
|
"rollback",
|
|
"PasswordAuthentication"
|
|
],
|
|
"markers_hit": [
|
|
"sshd_config",
|
|
"authorized_keys",
|
|
"journalctl",
|
|
"rollback",
|
|
"PasswordAuthentication"
|
|
],
|
|
"marker_hit_rate": 1.0,
|
|
"format_rule": "five_bullets",
|
|
"format_ok": true,
|
|
"usable_answer": true,
|
|
"error": null
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"cell_id": "pavilion:weeyuga:qwen3.5:35b-a3b-uncensored-iq1m",
|
|
"n_calls": 6,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"duration_ms_p95": null,
|
|
"tokens_per_sec_mean": null,
|
|
"tokens_per_sec_p50": null,
|
|
"tokens_per_sec_p95": null,
|
|
"tokens_per_sec_max": null,
|
|
"cold_warm_split": {
|
|
"cold": {
|
|
"n_calls": 2,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"tokens_per_sec_mean": null
|
|
},
|
|
"warm": {
|
|
"n_calls": 4,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"tokens_per_sec_mean": null
|
|
}
|
|
},
|
|
"per_prompt": {
|
|
"unknown": {
|
|
"n_calls": 6,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"tokens_per_sec_mean": null,
|
|
"tokens_per_sec_p50": null
|
|
}
|
|
},
|
|
"chars_split": null,
|
|
"timeline": [
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 0,
|
|
"phase": "hello",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 0,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 1,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 2,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 3,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 4,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
}
|
|
],
|
|
"calls": [
|
|
{
|
|
"type": "call",
|
|
"ts_utc": "2026-04-29T09:55:15Z",
|
|
"cell_id": "pavilion:weeyuga:qwen3.5:35b-a3b-uncensored-iq1m",
|
|
"model": "qwen3.5:35b-a3b-uncensored-iq1m",
|
|
"phase": "hello",
|
|
"question_id": "hello_check",
|
|
"run_idx": 0,
|
|
"duration_seconds": 18.944,
|
|
"prompt_tokens": 16,
|
|
"completion_tokens": 750,
|
|
"tokens_per_second": 39.59,
|
|
"finish_reason": "stop",
|
|
"weeyuga_meta": null,
|
|
"status_code": 200,
|
|
"response_chars": 185,
|
|
"response_preview": "Hello! I'm glad to help you. What would you like to do today? Whether you need assistance with a specific task, need help with something, or just want to chat, I'm here to assist you! \ud83d\ude0a",
|
|
"required_markers": [],
|
|
"markers_hit": [],
|
|
"marker_hit_rate": null,
|
|
"format_rule": "",
|
|
"format_ok": null,
|
|
"usable_answer": true,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"ts_utc": "2026-04-29T09:55:42Z",
|
|
"cell_id": "pavilion:weeyuga:qwen3.5:35b-a3b-uncensored-iq1m",
|
|
"model": "qwen3.5:35b-a3b-uncensored-iq1m",
|
|
"phase": "5q",
|
|
"question_id": "disk_guard_bash",
|
|
"run_idx": 0,
|
|
"duration_seconds": 27.028,
|
|
"prompt_tokens": 71,
|
|
"completion_tokens": 1123,
|
|
"tokens_per_second": 41.55,
|
|
"finish_reason": "stop",
|
|
"weeyuga_meta": null,
|
|
"status_code": 200,
|
|
"response_chars": 242,
|
|
"response_preview": "#!/bin/bash\n\n# Check disk usage for /\nif [ -d \"/\" ]; then\n usage=$(df -P / | awk '{print $5}')\n if [ \"$usage\" -gt 85 ]; then\n echo \"WARNING: Disk usage for / is at ${usage}% (above 85% threshold)\"\n exit 1\n fi\nfi\n\nexit",
|
|
"required_markers": [
|
|
"#!/usr/bin/env bash",
|
|
"df -P /",
|
|
"85",
|
|
"exit 1"
|
|
],
|
|
"markers_hit": [
|
|
"df -P /",
|
|
"85",
|
|
"exit 1"
|
|
],
|
|
"marker_hit_rate": 0.75,
|
|
"format_rule": "bash_code",
|
|
"format_ok": false,
|
|
"usable_answer": true,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"ts_utc": "2026-04-29T09:56:02Z",
|
|
"cell_id": "pavilion:weeyuga:qwen3.5:35b-a3b-uncensored-iq1m",
|
|
"model": "qwen3.5:35b-a3b-uncensored-iq1m",
|
|
"phase": "5q",
|
|
"question_id": "ipv4_python_tests",
|
|
"run_idx": 1,
|
|
"duration_seconds": 19.353,
|
|
"prompt_tokens": 55,
|
|
"completion_tokens": 776,
|
|
"tokens_per_second": 40.1,
|
|
"finish_reason": "stop",
|
|
"weeyuga_meta": null,
|
|
"status_code": 200,
|
|
"response_chars": 665,
|
|
"response_preview": "```python\ndef is_valid_ipv4(value: str) -> bool:\n \"\"\"\n Check if a string represents a valid IPv4 address.\n \"\"\"\n if not isinstance(value, str):\n return False\n parts = value.split('.')\n if len(parts) != 4:\n ret",
|
|
"required_markers": [
|
|
"def is_valid_ipv4",
|
|
"def test_",
|
|
"assert",
|
|
"split('.')"
|
|
],
|
|
"markers_hit": [
|
|
"def is_valid_ipv4",
|
|
"def test_",
|
|
"assert",
|
|
"split('.')"
|
|
],
|
|
"marker_hit_rate": 1.0,
|
|
"format_rule": "python_code",
|
|
"format_ok": true,
|
|
"usable_answer": true,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"ts_utc": "2026-04-29T09:56:27Z",
|
|
"cell_id": "pavilion:weeyuga:qwen3.5:35b-a3b-uncensored-iq1m",
|
|
"model": "qwen3.5:35b-a3b-uncensored-iq1m",
|
|
"phase": "5q",
|
|
"question_id": "nginx_safe_reload",
|
|
"run_idx": 2,
|
|
"duration_seconds": 25.905,
|
|
"prompt_tokens": 39,
|
|
"completion_tokens": 1073,
|
|
"tokens_per_second": 41.42,
|
|
"finish_reason": "stop",
|
|
"weeyuga_meta": null,
|
|
"status_code": 200,
|
|
"response_chars": 104,
|
|
"response_preview": "cp /etc/nginx/nginx.conf /etc/nginx/nginx.conf.bak\nnginx -t\nif [ $? -eq 0 ]; then\n nginx -s reload\nfi",
|
|
"required_markers": [
|
|
"cp /etc/nginx/nginx.conf",
|
|
"nginx -t",
|
|
"systemctl reload nginx",
|
|
"&&"
|
|
],
|
|
"markers_hit": [
|
|
"cp /etc/nginx/nginx.conf",
|
|
"nginx -t"
|
|
],
|
|
"marker_hit_rate": 0.5,
|
|
"format_rule": "shell_lines",
|
|
"format_ok": true,
|
|
"usable_answer": true,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"ts_utc": "2026-04-29T09:57:17Z",
|
|
"cell_id": "pavilion:weeyuga:qwen3.5:35b-a3b-uncensored-iq1m",
|
|
"model": "qwen3.5:35b-a3b-uncensored-iq1m",
|
|
"phase": "5q",
|
|
"question_id": "yaml_cli_plan",
|
|
"run_idx": 3,
|
|
"duration_seconds": 49.427,
|
|
"prompt_tokens": 43,
|
|
"completion_tokens": 2048,
|
|
"tokens_per_second": 41.43,
|
|
"finish_reason": "length",
|
|
"weeyuga_meta": null,
|
|
"status_code": 200,
|
|
"response_chars": 9007,
|
|
"response_preview": "# Python CLI for Git-Scanned YAML Schema Validation\n\n```python\n#!/usr/bin/env python3\n\"\"\"\nCLI for scanning git repos for changed YAML files and validating against JSON schemas.\n\"\"\"\n\nimport argparse\nimport json\nimport os\nimport sys\nfrom path",
|
|
"required_markers": [
|
|
"1.",
|
|
"2.",
|
|
"3.",
|
|
"4.",
|
|
"JSON schema",
|
|
"git"
|
|
],
|
|
"markers_hit": [
|
|
"JSON schema",
|
|
"git"
|
|
],
|
|
"marker_hit_rate": 0.333,
|
|
"format_rule": "four_numbered_steps",
|
|
"format_ok": false,
|
|
"usable_answer": true,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"ts_utc": "2026-04-29T09:57:44Z",
|
|
"cell_id": "pavilion:weeyuga:qwen3.5:35b-a3b-uncensored-iq1m",
|
|
"model": "qwen3.5:35b-a3b-uncensored-iq1m",
|
|
"phase": "5q",
|
|
"question_id": "ssh_lockout_triage",
|
|
"run_idx": 4,
|
|
"duration_seconds": 27.269,
|
|
"prompt_tokens": 56,
|
|
"completion_tokens": 1108,
|
|
"tokens_per_second": 40.63,
|
|
"finish_reason": "stop",
|
|
"weeyuga_meta": null,
|
|
"status_code": 200,
|
|
"response_chars": 354,
|
|
"response_preview": "- Review `sshd_config` to ensure security settings are properly configured.\n- Verify `authorized_keys` permissions to prevent unauthorized access.\n- Check `PasswordAuthentication` settings to ensure secure password handling.\n- Review `journ",
|
|
"required_markers": [
|
|
"sshd_config",
|
|
"authorized_keys",
|
|
"journalctl",
|
|
"rollback",
|
|
"PasswordAuthentication"
|
|
],
|
|
"markers_hit": [
|
|
"sshd_config",
|
|
"authorized_keys",
|
|
"journalctl",
|
|
"rollback",
|
|
"PasswordAuthentication"
|
|
],
|
|
"marker_hit_rate": 1.0,
|
|
"format_rule": "five_bullets",
|
|
"format_ok": true,
|
|
"usable_answer": true,
|
|
"error": null
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"cell_id": "pavilion:weeyuga:qwen3.5:35b-a3b-iq2s",
|
|
"n_calls": 6,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"duration_ms_p95": null,
|
|
"tokens_per_sec_mean": null,
|
|
"tokens_per_sec_p50": null,
|
|
"tokens_per_sec_p95": null,
|
|
"tokens_per_sec_max": null,
|
|
"cold_warm_split": {
|
|
"cold": {
|
|
"n_calls": 2,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"tokens_per_sec_mean": null
|
|
},
|
|
"warm": {
|
|
"n_calls": 4,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"tokens_per_sec_mean": null
|
|
}
|
|
},
|
|
"per_prompt": {
|
|
"unknown": {
|
|
"n_calls": 6,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"tokens_per_sec_mean": null,
|
|
"tokens_per_sec_p50": null
|
|
}
|
|
},
|
|
"chars_split": null,
|
|
"timeline": [
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 0,
|
|
"phase": "hello",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 0,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 1,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 2,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 3,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 4,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
}
|
|
],
|
|
"calls": [
|
|
{
|
|
"type": "call",
|
|
"ts_utc": "2026-04-29T09:57:48Z",
|
|
"cell_id": "pavilion:weeyuga:qwen3.5:35b-a3b-iq2s",
|
|
"model": "qwen3.5:35b-a3b-iq2s",
|
|
"phase": "hello",
|
|
"question_id": "hello_check",
|
|
"run_idx": 0,
|
|
"duration_seconds": 3.684,
|
|
"prompt_tokens": 16,
|
|
"completion_tokens": 134,
|
|
"tokens_per_second": 36.37,
|
|
"finish_reason": "stop",
|
|
"weeyuga_meta": null,
|
|
"status_code": 200,
|
|
"response_chars": 147,
|
|
"response_preview": "Hello! I'd love to help you. What would you like to work on? Whether it's writing, coding, problem-solving, or just chatting, I'm here to assist! \ud83d\ude0a",
|
|
"required_markers": [],
|
|
"markers_hit": [],
|
|
"marker_hit_rate": null,
|
|
"format_rule": "",
|
|
"format_ok": null,
|
|
"usable_answer": true,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"ts_utc": "2026-04-29T09:58:15Z",
|
|
"cell_id": "pavilion:weeyuga:qwen3.5:35b-a3b-iq2s",
|
|
"model": "qwen3.5:35b-a3b-iq2s",
|
|
"phase": "5q",
|
|
"question_id": "disk_guard_bash",
|
|
"run_idx": 0,
|
|
"duration_seconds": 27.061,
|
|
"prompt_tokens": 71,
|
|
"completion_tokens": 1121,
|
|
"tokens_per_second": 41.42,
|
|
"finish_reason": "stop",
|
|
"weeyuga_meta": null,
|
|
"status_code": 200,
|
|
"response_chars": 239,
|
|
"response_preview": "#!/bin/bash\n\n# Check disk usage for /\nif [ -d \"/\" ]; then\n disk_usage=$(df -P / | awk '{print $5}')\n if [ \"$disk_usage\" -gt 85 ]; then\n echo \"WARNING: Disk usage for / is at ${disk_usage}% (above 85%)\"\n exit 1\n fi\nfi",
|
|
"required_markers": [
|
|
"#!/usr/bin/env bash",
|
|
"df -P /",
|
|
"85",
|
|
"exit 1"
|
|
],
|
|
"markers_hit": [
|
|
"df -P /",
|
|
"85",
|
|
"exit 1"
|
|
],
|
|
"marker_hit_rate": 0.75,
|
|
"format_rule": "bash_code",
|
|
"format_ok": false,
|
|
"usable_answer": true,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"ts_utc": "2026-04-29T09:58:45Z",
|
|
"cell_id": "pavilion:weeyuga:qwen3.5:35b-a3b-iq2s",
|
|
"model": "qwen3.5:35b-a3b-iq2s",
|
|
"phase": "5q",
|
|
"question_id": "ipv4_python_tests",
|
|
"run_idx": 1,
|
|
"duration_seconds": 29.886,
|
|
"prompt_tokens": 55,
|
|
"completion_tokens": 1232,
|
|
"tokens_per_second": 41.22,
|
|
"finish_reason": "stop",
|
|
"weeyuga_meta": null,
|
|
"status_code": 200,
|
|
"response_chars": 613,
|
|
"response_preview": "```python\ndef is_valid_ipv4(value: str) -> bool:\n \"\"\"Check if a string is a valid IPv4 address.\"\"\"\n if not isinstance(value, str):\n return False\n parts = value.split('.')\n if len(parts) != 4:\n return False\n for ",
|
|
"required_markers": [
|
|
"def is_valid_ipv4",
|
|
"def test_",
|
|
"assert",
|
|
"split('.')"
|
|
],
|
|
"markers_hit": [
|
|
"def is_valid_ipv4",
|
|
"def test_",
|
|
"assert",
|
|
"split('.')"
|
|
],
|
|
"marker_hit_rate": 1.0,
|
|
"format_rule": "python_code",
|
|
"format_ok": true,
|
|
"usable_answer": true,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"ts_utc": "2026-04-29T09:59:11Z",
|
|
"cell_id": "pavilion:weeyuga:qwen3.5:35b-a3b-iq2s",
|
|
"model": "qwen3.5:35b-a3b-iq2s",
|
|
"phase": "5q",
|
|
"question_id": "nginx_safe_reload",
|
|
"run_idx": 2,
|
|
"duration_seconds": 25.688,
|
|
"prompt_tokens": 39,
|
|
"completion_tokens": 1060,
|
|
"tokens_per_second": 41.26,
|
|
"finish_reason": "stop",
|
|
"weeyuga_meta": null,
|
|
"status_code": 200,
|
|
"response_chars": 75,
|
|
"response_preview": "cp /etc/nginx/nginx.conf /etc/nginx/nginx.conf.bak\nnginx -t\nnginx -s reload",
|
|
"required_markers": [
|
|
"cp /etc/nginx/nginx.conf",
|
|
"nginx -t",
|
|
"systemctl reload nginx",
|
|
"&&"
|
|
],
|
|
"markers_hit": [
|
|
"cp /etc/nginx/nginx.conf",
|
|
"nginx -t"
|
|
],
|
|
"marker_hit_rate": 0.5,
|
|
"format_rule": "shell_lines",
|
|
"format_ok": true,
|
|
"usable_answer": true,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"ts_utc": "2026-04-29T10:00:01Z",
|
|
"cell_id": "pavilion:weeyuga:qwen3.5:35b-a3b-iq2s",
|
|
"model": "qwen3.5:35b-a3b-iq2s",
|
|
"phase": "5q",
|
|
"question_id": "yaml_cli_plan",
|
|
"run_idx": 3,
|
|
"duration_seconds": 50.264,
|
|
"prompt_tokens": 43,
|
|
"completion_tokens": 2048,
|
|
"tokens_per_second": 40.74,
|
|
"finish_reason": "length",
|
|
"weeyuga_meta": null,
|
|
"status_code": 200,
|
|
"response_chars": 7205,
|
|
"response_preview": "```python\n#!/usr/bin/env python3\n\"\"\"\nYAML Schema Validator CLI\nScans git repos for changed YAML files and validates against JSON schemas.\n\"\"\"\n\nimport argparse\nimport json\nimport os\nimport sys\nfrom pathlib import Path\nfrom typing import List",
|
|
"required_markers": [
|
|
"1.",
|
|
"2.",
|
|
"3.",
|
|
"4.",
|
|
"JSON schema",
|
|
"git"
|
|
],
|
|
"markers_hit": [
|
|
"JSON schema",
|
|
"git"
|
|
],
|
|
"marker_hit_rate": 0.333,
|
|
"format_rule": "four_numbered_steps",
|
|
"format_ok": false,
|
|
"usable_answer": true,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"ts_utc": "2026-04-29T10:00:28Z",
|
|
"cell_id": "pavilion:weeyuga:qwen3.5:35b-a3b-iq2s",
|
|
"model": "qwen3.5:35b-a3b-iq2s",
|
|
"phase": "5q",
|
|
"question_id": "ssh_lockout_triage",
|
|
"run_idx": 4,
|
|
"duration_seconds": 27.352,
|
|
"prompt_tokens": 56,
|
|
"completion_tokens": 1116,
|
|
"tokens_per_second": 40.8,
|
|
"finish_reason": "stop",
|
|
"weeyuga_meta": null,
|
|
"status_code": 200,
|
|
"response_chars": 409,
|
|
"response_preview": "- Review `sshd_config` for hardening settings before making changes.\n- Review `authorized_keys` permissions to ensure only trusted keys are allowed.\n- Review `PasswordAuthentication` settings to prevent unauthorized password usage.\n- Review",
|
|
"required_markers": [
|
|
"sshd_config",
|
|
"authorized_keys",
|
|
"journalctl",
|
|
"rollback",
|
|
"PasswordAuthentication"
|
|
],
|
|
"markers_hit": [
|
|
"sshd_config",
|
|
"authorized_keys",
|
|
"journalctl",
|
|
"rollback",
|
|
"PasswordAuthentication"
|
|
],
|
|
"marker_hit_rate": 1.0,
|
|
"format_rule": "five_bullets",
|
|
"format_ok": true,
|
|
"usable_answer": true,
|
|
"error": null
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"cell_id": "pavilion:weeyuga:qwen3.5:9b-q6k",
|
|
"n_calls": 6,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"duration_ms_p95": null,
|
|
"tokens_per_sec_mean": null,
|
|
"tokens_per_sec_p50": null,
|
|
"tokens_per_sec_p95": null,
|
|
"tokens_per_sec_max": null,
|
|
"cold_warm_split": {
|
|
"cold": {
|
|
"n_calls": 2,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"tokens_per_sec_mean": null
|
|
},
|
|
"warm": {
|
|
"n_calls": 4,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"tokens_per_sec_mean": null
|
|
}
|
|
},
|
|
"per_prompt": {
|
|
"unknown": {
|
|
"n_calls": 6,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"tokens_per_sec_mean": null,
|
|
"tokens_per_sec_p50": null
|
|
}
|
|
},
|
|
"chars_split": null,
|
|
"timeline": [
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 0,
|
|
"phase": "hello",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 0,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 1,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 2,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 3,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 4,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
}
|
|
],
|
|
"calls": [
|
|
{
|
|
"type": "call",
|
|
"ts_utc": "2026-04-29T10:00:33Z",
|
|
"cell_id": "pavilion:weeyuga:qwen3.5:9b-q6k",
|
|
"model": "qwen3.5:9b-q6k",
|
|
"phase": "hello",
|
|
"question_id": "hello_check",
|
|
"run_idx": 0,
|
|
"duration_seconds": 4.401,
|
|
"prompt_tokens": 16,
|
|
"completion_tokens": 163,
|
|
"tokens_per_second": 37.04,
|
|
"finish_reason": "stop",
|
|
"weeyuga_meta": null,
|
|
"status_code": 200,
|
|
"response_chars": 202,
|
|
"response_preview": "Hello! I'm glad to help! \ud83d\ude0a What would you like to do today? Whether it's answering questions, solving problems, writing content, or just chatting, I'm here to assist you. Just let me know what you need!",
|
|
"required_markers": [],
|
|
"markers_hit": [],
|
|
"marker_hit_rate": null,
|
|
"format_rule": "",
|
|
"format_ok": null,
|
|
"usable_answer": true,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"ts_utc": "2026-04-29T10:01:00Z",
|
|
"cell_id": "pavilion:weeyuga:qwen3.5:9b-q6k",
|
|
"model": "qwen3.5:9b-q6k",
|
|
"phase": "5q",
|
|
"question_id": "disk_guard_bash",
|
|
"run_idx": 0,
|
|
"duration_seconds": 27.133,
|
|
"prompt_tokens": 71,
|
|
"completion_tokens": 1124,
|
|
"tokens_per_second": 41.43,
|
|
"finish_reason": "stop",
|
|
"weeyuga_meta": null,
|
|
"status_code": 200,
|
|
"response_chars": 246,
|
|
"response_preview": "```bash\n#!/bin/bash\n\n# Check disk usage for /\nif [ -d \"/\" ]; then\n usage=$(df -P / | awk '{print $5}')\n if [ \"$usage\" -gt 85 ]; then\n echo \"WARNING: Disk usage for / is at ${usage}% (above 85% threshold)\"\n exit 1\n fi\n",
|
|
"required_markers": [
|
|
"#!/usr/bin/env bash",
|
|
"df -P /",
|
|
"85",
|
|
"exit 1"
|
|
],
|
|
"markers_hit": [
|
|
"df -P /",
|
|
"85",
|
|
"exit 1"
|
|
],
|
|
"marker_hit_rate": 0.75,
|
|
"format_rule": "bash_code",
|
|
"format_ok": false,
|
|
"usable_answer": true,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"ts_utc": "2026-04-29T10:01:20Z",
|
|
"cell_id": "pavilion:weeyuga:qwen3.5:9b-q6k",
|
|
"model": "qwen3.5:9b-q6k",
|
|
"phase": "5q",
|
|
"question_id": "ipv4_python_tests",
|
|
"run_idx": 1,
|
|
"duration_seconds": 20.684,
|
|
"prompt_tokens": 55,
|
|
"completion_tokens": 838,
|
|
"tokens_per_second": 40.51,
|
|
"finish_reason": "stop",
|
|
"weeyuga_meta": null,
|
|
"status_code": 200,
|
|
"response_chars": 1012,
|
|
"response_preview": "```python\ndef is_valid_ipv4(value: str) -> bool:\n \"\"\"\n Check if a string is a valid IPv4 address.\n \n Args:\n value (str): The string to validate as an IPv4 address.\n \n Returns:\n bool: True if the string is",
|
|
"required_markers": [
|
|
"def is_valid_ipv4",
|
|
"def test_",
|
|
"assert",
|
|
"split('.')"
|
|
],
|
|
"markers_hit": [
|
|
"def is_valid_ipv4",
|
|
"def test_",
|
|
"assert",
|
|
"split('.')"
|
|
],
|
|
"marker_hit_rate": 1.0,
|
|
"format_rule": "python_code",
|
|
"format_ok": true,
|
|
"usable_answer": true,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"ts_utc": "2026-04-29T10:01:27Z",
|
|
"cell_id": "pavilion:weeyuga:qwen3.5:9b-q6k",
|
|
"model": "qwen3.5:9b-q6k",
|
|
"phase": "5q",
|
|
"question_id": "nginx_safe_reload",
|
|
"run_idx": 2,
|
|
"duration_seconds": 6.142,
|
|
"prompt_tokens": 39,
|
|
"completion_tokens": 241,
|
|
"tokens_per_second": 39.24,
|
|
"finish_reason": "stop",
|
|
"weeyuga_meta": null,
|
|
"status_code": 200,
|
|
"response_chars": 75,
|
|
"response_preview": "cp /etc/nginx/nginx.conf /etc/nginx/nginx.conf.bak\nnginx -t\nnginx -s reload",
|
|
"required_markers": [
|
|
"cp /etc/nginx/nginx.conf",
|
|
"nginx -t",
|
|
"systemctl reload nginx",
|
|
"&&"
|
|
],
|
|
"markers_hit": [
|
|
"cp /etc/nginx/nginx.conf",
|
|
"nginx -t"
|
|
],
|
|
"marker_hit_rate": 0.5,
|
|
"format_rule": "shell_lines",
|
|
"format_ok": true,
|
|
"usable_answer": true,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"ts_utc": "2026-04-29T10:02:16Z",
|
|
"cell_id": "pavilion:weeyuga:qwen3.5:9b-q6k",
|
|
"model": "qwen3.5:9b-q6k",
|
|
"phase": "5q",
|
|
"question_id": "yaml_cli_plan",
|
|
"run_idx": 3,
|
|
"duration_seconds": 49.257,
|
|
"prompt_tokens": 43,
|
|
"completion_tokens": 2048,
|
|
"tokens_per_second": 41.58,
|
|
"finish_reason": "length",
|
|
"weeyuga_meta": null,
|
|
"status_code": 200,
|
|
"response_chars": 7775,
|
|
"response_preview": "# Python CLI for Git-Scanned YAML Schema Validation\n\n## Step 1: Project Setup and Imports\n\n```python\nimport os\nimport sys\nimport json\nimport yaml\nfrom pathlib import Path\nfrom typing import List, Dict, Any, Optional\nimport argparse\n\n# Requi",
|
|
"required_markers": [
|
|
"1.",
|
|
"2.",
|
|
"3.",
|
|
"4.",
|
|
"JSON schema",
|
|
"git"
|
|
],
|
|
"markers_hit": [
|
|
"git"
|
|
],
|
|
"marker_hit_rate": 0.167,
|
|
"format_rule": "four_numbered_steps",
|
|
"format_ok": false,
|
|
"usable_answer": true,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"ts_utc": "2026-04-29T10:02:45Z",
|
|
"cell_id": "pavilion:weeyuga:qwen3.5:9b-q6k",
|
|
"model": "qwen3.5:9b-q6k",
|
|
"phase": "5q",
|
|
"question_id": "ssh_lockout_triage",
|
|
"run_idx": 4,
|
|
"duration_seconds": 29.306,
|
|
"prompt_tokens": 56,
|
|
"completion_tokens": 1165,
|
|
"tokens_per_second": 39.75,
|
|
"finish_reason": "stop",
|
|
"weeyuga_meta": null,
|
|
"status_code": 200,
|
|
"response_chars": 574,
|
|
"response_preview": "- Review your `sshd_config` file to ensure the `PasswordAuthentication` setting is enabled and `PermitRootLogin` is set to `yes` for security.\n- Verify that your `authorized_keys` file contains only valid public keys and is not empty.\n-",
|
|
"required_markers": [
|
|
"sshd_config",
|
|
"authorized_keys",
|
|
"journalctl",
|
|
"rollback",
|
|
"PasswordAuthentication"
|
|
],
|
|
"markers_hit": [
|
|
"sshd_config",
|
|
"authorized_keys",
|
|
"journalctl",
|
|
"rollback",
|
|
"PasswordAuthentication"
|
|
],
|
|
"marker_hit_rate": 1.0,
|
|
"format_rule": "five_bullets",
|
|
"format_ok": true,
|
|
"usable_answer": true,
|
|
"error": null
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"cell_id": "pavilion:weeyuga:qwen3.5:9b-q4km",
|
|
"n_calls": 6,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"duration_ms_p95": null,
|
|
"tokens_per_sec_mean": null,
|
|
"tokens_per_sec_p50": null,
|
|
"tokens_per_sec_p95": null,
|
|
"tokens_per_sec_max": null,
|
|
"cold_warm_split": {
|
|
"cold": {
|
|
"n_calls": 2,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"tokens_per_sec_mean": null
|
|
},
|
|
"warm": {
|
|
"n_calls": 4,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"tokens_per_sec_mean": null
|
|
}
|
|
},
|
|
"per_prompt": {
|
|
"unknown": {
|
|
"n_calls": 6,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"tokens_per_sec_mean": null,
|
|
"tokens_per_sec_p50": null
|
|
}
|
|
},
|
|
"chars_split": null,
|
|
"timeline": [
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 0,
|
|
"phase": "hello",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 0,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 1,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 2,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 3,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 4,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
}
|
|
],
|
|
"calls": [
|
|
{
|
|
"type": "call",
|
|
"ts_utc": "2026-04-29T10:02:51Z",
|
|
"cell_id": "pavilion:weeyuga:qwen3.5:9b-q4km",
|
|
"model": "qwen3.5:9b-q4km",
|
|
"phase": "hello",
|
|
"question_id": "hello_check",
|
|
"run_idx": 0,
|
|
"duration_seconds": 5.912,
|
|
"prompt_tokens": 16,
|
|
"completion_tokens": 220,
|
|
"tokens_per_second": 37.21,
|
|
"finish_reason": "stop",
|
|
"weeyuga_meta": null,
|
|
"status_code": 200,
|
|
"response_chars": 186,
|
|
"response_preview": "Hello! I'm glad to help! \ud83d\ude0a \nWhat would you like to do today? For example: \n- Ask a question? \n- Solve a problem? \n- Get information? \n- Or just chat? \n\nLet me know what you need! \ud83d\ude4c",
|
|
"required_markers": [],
|
|
"markers_hit": [],
|
|
"marker_hit_rate": null,
|
|
"format_rule": "",
|
|
"format_ok": null,
|
|
"usable_answer": true,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"ts_utc": "2026-04-29T10:03:07Z",
|
|
"cell_id": "pavilion:weeyuga:qwen3.5:9b-q4km",
|
|
"model": "qwen3.5:9b-q4km",
|
|
"phase": "5q",
|
|
"question_id": "disk_guard_bash",
|
|
"run_idx": 0,
|
|
"duration_seconds": 15.618,
|
|
"prompt_tokens": 71,
|
|
"completion_tokens": 636,
|
|
"tokens_per_second": 40.72,
|
|
"finish_reason": "stop",
|
|
"weeyuga_meta": null,
|
|
"status_code": 200,
|
|
"response_chars": 263,
|
|
"response_preview": "```bash\n#!/bin/bash\n\n# Check disk usage for /\nif [ -d \"/\" ]; then\n usage=$(df -P / 2>&1 | tail -n 1 | awk '{print $5}')\n if [ \"$usage\" -gt 85 ]; then\n echo \"WARNING: Disk usage for / is at ${usage}% (above 85% threshold)\"\n ",
|
|
"required_markers": [
|
|
"#!/usr/bin/env bash",
|
|
"df -P /",
|
|
"85",
|
|
"exit 1"
|
|
],
|
|
"markers_hit": [
|
|
"df -P /",
|
|
"85",
|
|
"exit 1"
|
|
],
|
|
"marker_hit_rate": 0.75,
|
|
"format_rule": "bash_code",
|
|
"format_ok": false,
|
|
"usable_answer": true,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"ts_utc": "2026-04-29T10:03:31Z",
|
|
"cell_id": "pavilion:weeyuga:qwen3.5:9b-q4km",
|
|
"model": "qwen3.5:9b-q4km",
|
|
"phase": "5q",
|
|
"question_id": "ipv4_python_tests",
|
|
"run_idx": 1,
|
|
"duration_seconds": 24.316,
|
|
"prompt_tokens": 55,
|
|
"completion_tokens": 947,
|
|
"tokens_per_second": 38.95,
|
|
"finish_reason": "stop",
|
|
"weeyuga_meta": null,
|
|
"status_code": 200,
|
|
"response_chars": 493,
|
|
"response_preview": "```python\ndef is_valid_ipv4(value: str) -> bool:\n if not isinstance(value, str):\n return False\n if len(value) != 4:\n return False\n for i in range(4):\n if not value[i].isdigit():\n return False\n ret",
|
|
"required_markers": [
|
|
"def is_valid_ipv4",
|
|
"def test_",
|
|
"assert",
|
|
"split('.')"
|
|
],
|
|
"markers_hit": [
|
|
"def is_valid_ipv4",
|
|
"def test_",
|
|
"assert"
|
|
],
|
|
"marker_hit_rate": 0.75,
|
|
"format_rule": "python_code",
|
|
"format_ok": true,
|
|
"usable_answer": true,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"ts_utc": "2026-04-29T10:03:56Z",
|
|
"cell_id": "pavilion:weeyuga:qwen3.5:9b-q4km",
|
|
"model": "qwen3.5:9b-q4km",
|
|
"phase": "5q",
|
|
"question_id": "nginx_safe_reload",
|
|
"run_idx": 2,
|
|
"duration_seconds": 25.495,
|
|
"prompt_tokens": 39,
|
|
"completion_tokens": 1060,
|
|
"tokens_per_second": 41.58,
|
|
"finish_reason": "stop",
|
|
"weeyuga_meta": null,
|
|
"status_code": 200,
|
|
"response_chars": 75,
|
|
"response_preview": "cp /etc/nginx/nginx.conf /etc/nginx/nginx.conf.bak\nnginx -t\nnginx -s reload",
|
|
"required_markers": [
|
|
"cp /etc/nginx/nginx.conf",
|
|
"nginx -t",
|
|
"systemctl reload nginx",
|
|
"&&"
|
|
],
|
|
"markers_hit": [
|
|
"cp /etc/nginx/nginx.conf",
|
|
"nginx -t"
|
|
],
|
|
"marker_hit_rate": 0.5,
|
|
"format_rule": "shell_lines",
|
|
"format_ok": true,
|
|
"usable_answer": true,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"ts_utc": "2026-04-29T10:04:46Z",
|
|
"cell_id": "pavilion:weeyuga:qwen3.5:9b-q4km",
|
|
"model": "qwen3.5:9b-q4km",
|
|
"phase": "5q",
|
|
"question_id": "yaml_cli_plan",
|
|
"run_idx": 3,
|
|
"duration_seconds": 49.664,
|
|
"prompt_tokens": 43,
|
|
"completion_tokens": 2048,
|
|
"tokens_per_second": 41.24,
|
|
"finish_reason": "length",
|
|
"weeyuga_meta": null,
|
|
"status_code": 200,
|
|
"response_chars": 9006,
|
|
"response_preview": "```python\n#!/usr/bin/env python3\n\"\"\"\nCLI: git-yaml-validator\nScans a git repo for changed YAML files, validates them against a JSON schema, and exits nonzero on failure.\n\"\"\"\n\nimport argparse\nimport json\nimport os\nimport sys\nfrom pathlib imp",
|
|
"required_markers": [
|
|
"1.",
|
|
"2.",
|
|
"3.",
|
|
"4.",
|
|
"JSON schema",
|
|
"git"
|
|
],
|
|
"markers_hit": [
|
|
"JSON schema",
|
|
"git"
|
|
],
|
|
"marker_hit_rate": 0.333,
|
|
"format_rule": "four_numbered_steps",
|
|
"format_ok": false,
|
|
"usable_answer": true,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"ts_utc": "2026-04-29T10:05:14Z",
|
|
"cell_id": "pavilion:weeyuga:qwen3.5:9b-q4km",
|
|
"model": "qwen3.5:9b-q4km",
|
|
"phase": "5q",
|
|
"question_id": "ssh_lockout_triage",
|
|
"run_idx": 4,
|
|
"duration_seconds": 27.576,
|
|
"prompt_tokens": 56,
|
|
"completion_tokens": 1128,
|
|
"tokens_per_second": 40.91,
|
|
"finish_reason": "stop",
|
|
"weeyuga_meta": null,
|
|
"status_code": 200,
|
|
"response_chars": 401,
|
|
"response_preview": "- Review `sshd_config` to ensure `PermitRootLogin no` and `PasswordAuthentication no` are set correctly.\n- Review `authorized_keys` to ensure no sensitive keys are stored or misconfigured.\n- Run `journalctl` to check for recent security iss",
|
|
"required_markers": [
|
|
"sshd_config",
|
|
"authorized_keys",
|
|
"journalctl",
|
|
"rollback",
|
|
"PasswordAuthentication"
|
|
],
|
|
"markers_hit": [
|
|
"sshd_config",
|
|
"authorized_keys",
|
|
"journalctl",
|
|
"rollback",
|
|
"PasswordAuthentication"
|
|
],
|
|
"marker_hit_rate": 1.0,
|
|
"format_rule": "five_bullets",
|
|
"format_ok": true,
|
|
"usable_answer": true,
|
|
"error": null
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"cell_id": "pavilion:weeyuga:qwen3.5:2b",
|
|
"n_calls": 6,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"duration_ms_p95": null,
|
|
"tokens_per_sec_mean": null,
|
|
"tokens_per_sec_p50": null,
|
|
"tokens_per_sec_p95": null,
|
|
"tokens_per_sec_max": null,
|
|
"cold_warm_split": {
|
|
"cold": {
|
|
"n_calls": 2,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"tokens_per_sec_mean": null
|
|
},
|
|
"warm": {
|
|
"n_calls": 4,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"tokens_per_sec_mean": null
|
|
}
|
|
},
|
|
"per_prompt": {
|
|
"unknown": {
|
|
"n_calls": 6,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"tokens_per_sec_mean": null,
|
|
"tokens_per_sec_p50": null
|
|
}
|
|
},
|
|
"chars_split": null,
|
|
"timeline": [
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 0,
|
|
"phase": "hello",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 0,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 1,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 2,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 3,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 4,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
}
|
|
],
|
|
"calls": [
|
|
{
|
|
"type": "call",
|
|
"ts_utc": "2026-04-29T10:05:18Z",
|
|
"cell_id": "pavilion:weeyuga:qwen3.5:2b",
|
|
"model": "qwen3.5:2b",
|
|
"phase": "hello",
|
|
"question_id": "hello_check",
|
|
"run_idx": 0,
|
|
"duration_seconds": 4.366,
|
|
"prompt_tokens": 16,
|
|
"completion_tokens": 157,
|
|
"tokens_per_second": 35.96,
|
|
"finish_reason": "stop",
|
|
"weeyuga_meta": null,
|
|
"status_code": 200,
|
|
"response_chars": 91,
|
|
"response_preview": "Hello! I'd be happy to help you with anything you need. What can I assist you with today? \ud83d\ude0a",
|
|
"required_markers": [],
|
|
"markers_hit": [],
|
|
"marker_hit_rate": null,
|
|
"format_rule": "",
|
|
"format_ok": null,
|
|
"usable_answer": true,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"ts_utc": "2026-04-29T10:05:36Z",
|
|
"cell_id": "pavilion:weeyuga:qwen3.5:2b",
|
|
"model": "qwen3.5:2b",
|
|
"phase": "5q",
|
|
"question_id": "disk_guard_bash",
|
|
"run_idx": 0,
|
|
"duration_seconds": 18.333,
|
|
"prompt_tokens": 71,
|
|
"completion_tokens": 751,
|
|
"tokens_per_second": 40.96,
|
|
"finish_reason": "stop",
|
|
"weeyuga_meta": null,
|
|
"status_code": 200,
|
|
"response_chars": 246,
|
|
"response_preview": "```bash\n#!/bin/bash\n\n# Check disk usage for /\nif [ -d \"/\" ]; then\n usage=$(df -P / | awk '{print $5}')\n if [ \"$usage\" -gt 85 ]; then\n echo \"WARNING: Disk usage for / is at ${usage}% (above 85% threshold)\"\n exit 1\n fi\n",
|
|
"required_markers": [
|
|
"#!/usr/bin/env bash",
|
|
"df -P /",
|
|
"85",
|
|
"exit 1"
|
|
],
|
|
"markers_hit": [
|
|
"df -P /",
|
|
"85",
|
|
"exit 1"
|
|
],
|
|
"marker_hit_rate": 0.75,
|
|
"format_rule": "bash_code",
|
|
"format_ok": false,
|
|
"usable_answer": true,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"ts_utc": "2026-04-29T10:05:56Z",
|
|
"cell_id": "pavilion:weeyuga:qwen3.5:2b",
|
|
"model": "qwen3.5:2b",
|
|
"phase": "5q",
|
|
"question_id": "ipv4_python_tests",
|
|
"run_idx": 1,
|
|
"duration_seconds": 19.861,
|
|
"prompt_tokens": 55,
|
|
"completion_tokens": 820,
|
|
"tokens_per_second": 41.29,
|
|
"finish_reason": "stop",
|
|
"weeyuga_meta": null,
|
|
"status_code": 200,
|
|
"response_chars": 771,
|
|
"response_preview": "```python\ndef is_valid_ipv4(value: str) -> bool:\n \"\"\"Check if a string is a valid IPv4 address.\"\"\"\n if not isinstance(value, str):\n return False\n parts = value.split('.')\n if len(parts) != 4:\n return False\n for ",
|
|
"required_markers": [
|
|
"def is_valid_ipv4",
|
|
"def test_",
|
|
"assert",
|
|
"split('.')"
|
|
],
|
|
"markers_hit": [
|
|
"def is_valid_ipv4",
|
|
"def test_",
|
|
"assert",
|
|
"split('.')"
|
|
],
|
|
"marker_hit_rate": 1.0,
|
|
"format_rule": "python_code",
|
|
"format_ok": true,
|
|
"usable_answer": true,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"ts_utc": "2026-04-29T10:06:22Z",
|
|
"cell_id": "pavilion:weeyuga:qwen3.5:2b",
|
|
"model": "qwen3.5:2b",
|
|
"phase": "5q",
|
|
"question_id": "nginx_safe_reload",
|
|
"run_idx": 2,
|
|
"duration_seconds": 25.77,
|
|
"prompt_tokens": 39,
|
|
"completion_tokens": 1060,
|
|
"tokens_per_second": 41.13,
|
|
"finish_reason": "stop",
|
|
"weeyuga_meta": null,
|
|
"status_code": 200,
|
|
"response_chars": 75,
|
|
"response_preview": "cp /etc/nginx/nginx.conf /etc/nginx/nginx.conf.bak\nnginx -t\nnginx -s reload",
|
|
"required_markers": [
|
|
"cp /etc/nginx/nginx.conf",
|
|
"nginx -t",
|
|
"systemctl reload nginx",
|
|
"&&"
|
|
],
|
|
"markers_hit": [
|
|
"cp /etc/nginx/nginx.conf",
|
|
"nginx -t"
|
|
],
|
|
"marker_hit_rate": 0.5,
|
|
"format_rule": "shell_lines",
|
|
"format_ok": true,
|
|
"usable_answer": true,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"ts_utc": "2026-04-29T10:07:12Z",
|
|
"cell_id": "pavilion:weeyuga:qwen3.5:2b",
|
|
"model": "qwen3.5:2b",
|
|
"phase": "5q",
|
|
"question_id": "yaml_cli_plan",
|
|
"run_idx": 3,
|
|
"duration_seconds": 50.003,
|
|
"prompt_tokens": 43,
|
|
"completion_tokens": 2048,
|
|
"tokens_per_second": 40.96,
|
|
"finish_reason": "length",
|
|
"weeyuga_meta": null,
|
|
"status_code": 200,
|
|
"response_chars": 11731,
|
|
"response_preview": "1. Initialize a Python project structure with a `main.py` script and a `schema.json` file for validation.\n2. Create a `git` command-line interface that uses `git diff` to identify changed files in the repository.\n3. Implement a YAML parser ",
|
|
"required_markers": [
|
|
"1.",
|
|
"2.",
|
|
"3.",
|
|
"4.",
|
|
"JSON schema",
|
|
"git"
|
|
],
|
|
"markers_hit": [
|
|
"1.",
|
|
"2.",
|
|
"3.",
|
|
"4.",
|
|
"JSON schema",
|
|
"git"
|
|
],
|
|
"marker_hit_rate": 1.0,
|
|
"format_rule": "four_numbered_steps",
|
|
"format_ok": true,
|
|
"usable_answer": true,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"ts_utc": "2026-04-29T10:07:39Z",
|
|
"cell_id": "pavilion:weeyuga:qwen3.5:2b",
|
|
"model": "qwen3.5:2b",
|
|
"phase": "5q",
|
|
"question_id": "ssh_lockout_triage",
|
|
"run_idx": 4,
|
|
"duration_seconds": 27.148,
|
|
"prompt_tokens": 56,
|
|
"completion_tokens": 1113,
|
|
"tokens_per_second": 41.0,
|
|
"finish_reason": "stop",
|
|
"weeyuga_meta": null,
|
|
"status_code": 200,
|
|
"response_chars": 378,
|
|
"response_preview": "- Review `sshd_config` for hardening settings before making changes.\n- Review `authorized_keys` permissions to ensure only trusted keys are allowed.\n- Review `PasswordAuthentication` settings to prevent unauthorized password usage.\n- Review",
|
|
"required_markers": [
|
|
"sshd_config",
|
|
"authorized_keys",
|
|
"journalctl",
|
|
"rollback",
|
|
"PasswordAuthentication"
|
|
],
|
|
"markers_hit": [
|
|
"sshd_config",
|
|
"authorized_keys",
|
|
"journalctl",
|
|
"rollback",
|
|
"PasswordAuthentication"
|
|
],
|
|
"marker_hit_rate": 1.0,
|
|
"format_rule": "five_bullets",
|
|
"format_ok": true,
|
|
"usable_answer": true,
|
|
"error": null
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"cell_id": "pavilion:weeyuga:qwen3.5:0.8b",
|
|
"n_calls": 6,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"duration_ms_p95": null,
|
|
"tokens_per_sec_mean": null,
|
|
"tokens_per_sec_p50": null,
|
|
"tokens_per_sec_p95": null,
|
|
"tokens_per_sec_max": null,
|
|
"cold_warm_split": {
|
|
"cold": {
|
|
"n_calls": 2,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"tokens_per_sec_mean": null
|
|
},
|
|
"warm": {
|
|
"n_calls": 4,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"tokens_per_sec_mean": null
|
|
}
|
|
},
|
|
"per_prompt": {
|
|
"unknown": {
|
|
"n_calls": 6,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"tokens_per_sec_mean": null,
|
|
"tokens_per_sec_p50": null
|
|
}
|
|
},
|
|
"chars_split": null,
|
|
"timeline": [
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 0,
|
|
"phase": "hello",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 0,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 1,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 2,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 3,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 4,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
}
|
|
],
|
|
"calls": [
|
|
{
|
|
"type": "call",
|
|
"ts_utc": "2026-04-29T10:07:43Z",
|
|
"cell_id": "pavilion:weeyuga:qwen3.5:0.8b",
|
|
"model": "qwen3.5:0.8b",
|
|
"phase": "hello",
|
|
"question_id": "hello_check",
|
|
"run_idx": 0,
|
|
"duration_seconds": 3.869,
|
|
"prompt_tokens": 16,
|
|
"completion_tokens": 141,
|
|
"tokens_per_second": 36.44,
|
|
"finish_reason": "stop",
|
|
"weeyuga_meta": null,
|
|
"status_code": 200,
|
|
"response_chars": 57,
|
|
"response_preview": "Hello! I'm glad to help you. What would you like to do? \ud83d\ude0a",
|
|
"required_markers": [],
|
|
"markers_hit": [],
|
|
"marker_hit_rate": null,
|
|
"format_rule": "",
|
|
"format_ok": null,
|
|
"usable_answer": true,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"ts_utc": "2026-04-29T10:08:00Z",
|
|
"cell_id": "pavilion:weeyuga:qwen3.5:0.8b",
|
|
"model": "qwen3.5:0.8b",
|
|
"phase": "5q",
|
|
"question_id": "disk_guard_bash",
|
|
"run_idx": 0,
|
|
"duration_seconds": 16.794,
|
|
"prompt_tokens": 71,
|
|
"completion_tokens": 688,
|
|
"tokens_per_second": 40.97,
|
|
"finish_reason": "stop",
|
|
"weeyuga_meta": null,
|
|
"status_code": 200,
|
|
"response_chars": 217,
|
|
"response_preview": "#!/bin/bash\n\n# Check disk usage for /\nif [ -d \"/\" ]; then\n usage=$(df -P / 2>&1 | awk '{print $5}')\n if [ \"$usage\" -gt 85 ]; then\n echo \"WARNING: Disk usage for / is at ${usage}%\"\n exit 1\n fi\nfi",
|
|
"required_markers": [
|
|
"#!/usr/bin/env bash",
|
|
"df -P /",
|
|
"85",
|
|
"exit 1"
|
|
],
|
|
"markers_hit": [
|
|
"df -P /",
|
|
"85",
|
|
"exit 1"
|
|
],
|
|
"marker_hit_rate": 0.75,
|
|
"format_rule": "bash_code",
|
|
"format_ok": false,
|
|
"usable_answer": true,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"ts_utc": "2026-04-29T10:08:18Z",
|
|
"cell_id": "pavilion:weeyuga:qwen3.5:0.8b",
|
|
"model": "qwen3.5:0.8b",
|
|
"phase": "5q",
|
|
"question_id": "ipv4_python_tests",
|
|
"run_idx": 1,
|
|
"duration_seconds": 17.714,
|
|
"prompt_tokens": 55,
|
|
"completion_tokens": 722,
|
|
"tokens_per_second": 40.76,
|
|
"finish_reason": "stop",
|
|
"weeyuga_meta": null,
|
|
"status_code": 200,
|
|
"response_chars": 644,
|
|
"response_preview": "```python\ndef is_valid_ipv4(value: str) -> bool:\n \"\"\"Check if a string is a valid IPv4 address.\"\"\"\n if not isinstance(value, str):\n return False\n parts = value.split('.')\n if len(parts) != 4:\n return False\n for ",
|
|
"required_markers": [
|
|
"def is_valid_ipv4",
|
|
"def test_",
|
|
"assert",
|
|
"split('.')"
|
|
],
|
|
"markers_hit": [
|
|
"def is_valid_ipv4",
|
|
"def test_",
|
|
"assert",
|
|
"split('.')"
|
|
],
|
|
"marker_hit_rate": 1.0,
|
|
"format_rule": "python_code",
|
|
"format_ok": true,
|
|
"usable_answer": true,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"ts_utc": "2026-04-29T10:08:44Z",
|
|
"cell_id": "pavilion:weeyuga:qwen3.5:0.8b",
|
|
"model": "qwen3.5:0.8b",
|
|
"phase": "5q",
|
|
"question_id": "nginx_safe_reload",
|
|
"run_idx": 2,
|
|
"duration_seconds": 26.52,
|
|
"prompt_tokens": 39,
|
|
"completion_tokens": 1087,
|
|
"tokens_per_second": 40.99,
|
|
"finish_reason": "stop",
|
|
"weeyuga_meta": null,
|
|
"status_code": 200,
|
|
"response_chars": 177,
|
|
"response_preview": "cp /etc/nginx/nginx.conf /etc/nginx/nginx.conf.backup\nnginx -t\nif nginx -t; then\n nginx -s reload\nelse\n echo \"Validation failed, reloading nginx...\"\n nginx -s reload\nfi",
|
|
"required_markers": [
|
|
"cp /etc/nginx/nginx.conf",
|
|
"nginx -t",
|
|
"systemctl reload nginx",
|
|
"&&"
|
|
],
|
|
"markers_hit": [
|
|
"cp /etc/nginx/nginx.conf",
|
|
"nginx -t"
|
|
],
|
|
"marker_hit_rate": 0.5,
|
|
"format_rule": "shell_lines",
|
|
"format_ok": true,
|
|
"usable_answer": true,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"ts_utc": "2026-04-29T10:09:35Z",
|
|
"cell_id": "pavilion:weeyuga:qwen3.5:0.8b",
|
|
"model": "qwen3.5:0.8b",
|
|
"phase": "5q",
|
|
"question_id": "yaml_cli_plan",
|
|
"run_idx": 3,
|
|
"duration_seconds": 50.97,
|
|
"prompt_tokens": 43,
|
|
"completion_tokens": 2048,
|
|
"tokens_per_second": 40.18,
|
|
"finish_reason": "length",
|
|
"weeyuga_meta": null,
|
|
"status_code": 200,
|
|
"response_chars": 7262,
|
|
"response_preview": "# Python CLI for Git-Scanned YAML Schema Validation\n\n```python\n#!/usr/bin/env python3\n\"\"\"\nYAML Schema Validator CLI\nScans git repos for changed YAML files and validates against JSON schemas.\n\"\"\"\n\nimport argparse\nimport json\nimport os\nimport",
|
|
"required_markers": [
|
|
"1.",
|
|
"2.",
|
|
"3.",
|
|
"4.",
|
|
"JSON schema",
|
|
"git"
|
|
],
|
|
"markers_hit": [
|
|
"1.",
|
|
"2.",
|
|
"3.",
|
|
"4.",
|
|
"JSON schema",
|
|
"git"
|
|
],
|
|
"marker_hit_rate": 1.0,
|
|
"format_rule": "four_numbered_steps",
|
|
"format_ok": true,
|
|
"usable_answer": true,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"ts_utc": "2026-04-29T10:10:03Z",
|
|
"cell_id": "pavilion:weeyuga:qwen3.5:0.8b",
|
|
"model": "qwen3.5:0.8b",
|
|
"phase": "5q",
|
|
"question_id": "ssh_lockout_triage",
|
|
"run_idx": 4,
|
|
"duration_seconds": 27.569,
|
|
"prompt_tokens": 56,
|
|
"completion_tokens": 1125,
|
|
"tokens_per_second": 40.81,
|
|
"finish_reason": "stop",
|
|
"weeyuga_meta": null,
|
|
"status_code": 200,
|
|
"response_chars": 428,
|
|
"response_preview": "- Review `sshd_config` for security settings like `PermitRootLogin no` and `PasswordAuthentication no`.\n- Review `authorized_keys` permissions and ownership to ensure only trusted users can access.\n- Review `PasswordAuthentication` settings",
|
|
"required_markers": [
|
|
"sshd_config",
|
|
"authorized_keys",
|
|
"journalctl",
|
|
"rollback",
|
|
"PasswordAuthentication"
|
|
],
|
|
"markers_hit": [
|
|
"sshd_config",
|
|
"authorized_keys",
|
|
"journalctl",
|
|
"rollback",
|
|
"PasswordAuthentication"
|
|
],
|
|
"marker_hit_rate": 1.0,
|
|
"format_rule": "five_bullets",
|
|
"format_ok": true,
|
|
"usable_answer": true,
|
|
"error": null
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"cell_id": "pavilion:weeyuga:qwen3.5:9b",
|
|
"n_calls": 6,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"duration_ms_p95": null,
|
|
"tokens_per_sec_mean": null,
|
|
"tokens_per_sec_p50": null,
|
|
"tokens_per_sec_p95": null,
|
|
"tokens_per_sec_max": null,
|
|
"cold_warm_split": {
|
|
"cold": {
|
|
"n_calls": 2,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"tokens_per_sec_mean": null
|
|
},
|
|
"warm": {
|
|
"n_calls": 4,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"tokens_per_sec_mean": null
|
|
}
|
|
},
|
|
"per_prompt": {
|
|
"unknown": {
|
|
"n_calls": 6,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"tokens_per_sec_mean": null,
|
|
"tokens_per_sec_p50": null
|
|
}
|
|
},
|
|
"chars_split": null,
|
|
"timeline": [
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 0,
|
|
"phase": "hello",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 0,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 1,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 2,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 3,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 4,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
}
|
|
],
|
|
"calls": [
|
|
{
|
|
"type": "call",
|
|
"ts_utc": "2026-04-29T10:10:13Z",
|
|
"cell_id": "pavilion:weeyuga:qwen3.5:9b",
|
|
"model": "qwen3.5:9b",
|
|
"phase": "hello",
|
|
"question_id": "hello_check",
|
|
"run_idx": 0,
|
|
"duration_seconds": 10.852,
|
|
"prompt_tokens": 16,
|
|
"completion_tokens": 416,
|
|
"tokens_per_second": 38.33,
|
|
"finish_reason": "stop",
|
|
"weeyuga_meta": null,
|
|
"status_code": 200,
|
|
"response_chars": 157,
|
|
"response_preview": "Hello! I'm glad to help you. What would you like to do today? Whether it's answering questions, solving problems, or just chatting, I'm here to assist you! \ud83d\ude0a",
|
|
"required_markers": [],
|
|
"markers_hit": [],
|
|
"marker_hit_rate": null,
|
|
"format_rule": "",
|
|
"format_ok": null,
|
|
"usable_answer": true,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"ts_utc": "2026-04-29T10:10:40Z",
|
|
"cell_id": "pavilion:weeyuga:qwen3.5:9b",
|
|
"model": "qwen3.5:9b",
|
|
"phase": "5q",
|
|
"question_id": "disk_guard_bash",
|
|
"run_idx": 0,
|
|
"duration_seconds": 26.865,
|
|
"prompt_tokens": 71,
|
|
"completion_tokens": 1127,
|
|
"tokens_per_second": 41.95,
|
|
"finish_reason": "stop",
|
|
"weeyuga_meta": null,
|
|
"status_code": 200,
|
|
"response_chars": 247,
|
|
"response_preview": "#!/bin/bash\n\n# Check disk usage for /\nif [ -d \"/\" ]; then\n usage=$(df -P / 2>&1 | awk '{print $5}')\n if [ \"$usage\" -gt 85 ]; then\n echo \"WARNING: Disk usage for / is at ${usage}% (above 85% threshold)\"\n exit 1\n fi\nfi\n",
|
|
"required_markers": [
|
|
"#!/usr/bin/env bash",
|
|
"df -P /",
|
|
"85",
|
|
"exit 1"
|
|
],
|
|
"markers_hit": [
|
|
"df -P /",
|
|
"85",
|
|
"exit 1"
|
|
],
|
|
"marker_hit_rate": 0.75,
|
|
"format_rule": "bash_code",
|
|
"format_ok": false,
|
|
"usable_answer": true,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"ts_utc": "2026-04-29T10:10:58Z",
|
|
"cell_id": "pavilion:weeyuga:qwen3.5:9b",
|
|
"model": "qwen3.5:9b",
|
|
"phase": "5q",
|
|
"question_id": "ipv4_python_tests",
|
|
"run_idx": 1,
|
|
"duration_seconds": 17.892,
|
|
"prompt_tokens": 55,
|
|
"completion_tokens": 732,
|
|
"tokens_per_second": 40.91,
|
|
"finish_reason": "stop",
|
|
"weeyuga_meta": null,
|
|
"status_code": 200,
|
|
"response_chars": 574,
|
|
"response_preview": "```python\ndef is_valid_ipv4(value: str) -> bool:\n \"\"\"Check if a string is a valid IPv4 address.\"\"\"\n if not isinstance(value, str):\n return False\n parts = value.split('.')\n if len(parts) != 4:\n return False\n for ",
|
|
"required_markers": [
|
|
"def is_valid_ipv4",
|
|
"def test_",
|
|
"assert",
|
|
"split('.')"
|
|
],
|
|
"markers_hit": [
|
|
"def is_valid_ipv4",
|
|
"def test_",
|
|
"assert",
|
|
"split('.')"
|
|
],
|
|
"marker_hit_rate": 1.0,
|
|
"format_rule": "python_code",
|
|
"format_ok": false,
|
|
"usable_answer": true,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"ts_utc": "2026-04-29T10:11:25Z",
|
|
"cell_id": "pavilion:weeyuga:qwen3.5:9b",
|
|
"model": "qwen3.5:9b",
|
|
"phase": "5q",
|
|
"question_id": "nginx_safe_reload",
|
|
"run_idx": 2,
|
|
"duration_seconds": 26.612,
|
|
"prompt_tokens": 39,
|
|
"completion_tokens": 1073,
|
|
"tokens_per_second": 40.32,
|
|
"finish_reason": "stop",
|
|
"weeyuga_meta": null,
|
|
"status_code": 200,
|
|
"response_chars": 104,
|
|
"response_preview": "cp /etc/nginx/nginx.conf /etc/nginx/nginx.conf.bak\nnginx -t\nif [ $? -eq 0 ]; then\n nginx -s reload\nfi",
|
|
"required_markers": [
|
|
"cp /etc/nginx/nginx.conf",
|
|
"nginx -t",
|
|
"systemctl reload nginx",
|
|
"&&"
|
|
],
|
|
"markers_hit": [
|
|
"cp /etc/nginx/nginx.conf",
|
|
"nginx -t"
|
|
],
|
|
"marker_hit_rate": 0.5,
|
|
"format_rule": "shell_lines",
|
|
"format_ok": true,
|
|
"usable_answer": true,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"ts_utc": "2026-04-29T10:12:14Z",
|
|
"cell_id": "pavilion:weeyuga:qwen3.5:9b",
|
|
"model": "qwen3.5:9b",
|
|
"phase": "5q",
|
|
"question_id": "yaml_cli_plan",
|
|
"run_idx": 3,
|
|
"duration_seconds": 49.467,
|
|
"prompt_tokens": 43,
|
|
"completion_tokens": 2048,
|
|
"tokens_per_second": 41.4,
|
|
"finish_reason": "length",
|
|
"weeyuga_meta": null,
|
|
"status_code": 200,
|
|
"response_chars": 9329,
|
|
"response_preview": "```python\n#!/usr/bin/env python3\n\"\"\"\nYAML Schema Validator CLI\nScans git repos for changed YAML files and validates against a JSON schema.\n\"\"\"\n\nimport argparse\nimport json\nimport os\nimport sys\nfrom pathlib import Path\nfrom typing import Lis",
|
|
"required_markers": [
|
|
"1.",
|
|
"2.",
|
|
"3.",
|
|
"4.",
|
|
"JSON schema",
|
|
"git"
|
|
],
|
|
"markers_hit": [
|
|
"JSON schema",
|
|
"git"
|
|
],
|
|
"marker_hit_rate": 0.333,
|
|
"format_rule": "four_numbered_steps",
|
|
"format_ok": false,
|
|
"usable_answer": true,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"ts_utc": "2026-04-29T10:12:42Z",
|
|
"cell_id": "pavilion:weeyuga:qwen3.5:9b",
|
|
"model": "qwen3.5:9b",
|
|
"phase": "5q",
|
|
"question_id": "ssh_lockout_triage",
|
|
"run_idx": 4,
|
|
"duration_seconds": 27.33,
|
|
"prompt_tokens": 56,
|
|
"completion_tokens": 1104,
|
|
"tokens_per_second": 40.4,
|
|
"finish_reason": "stop",
|
|
"weeyuga_meta": null,
|
|
"status_code": 200,
|
|
"response_chars": 279,
|
|
"response_preview": "- Review `sshd_config` to ensure `PasswordAuthentication` is enabled.\n- Verify `authorized_keys` contains the correct public key.\n- Run `journalctl` to check for recent errors.\n- Review `rollback` to ensure the current state is safe.\n- Ensu",
|
|
"required_markers": [
|
|
"sshd_config",
|
|
"authorized_keys",
|
|
"journalctl",
|
|
"rollback",
|
|
"PasswordAuthentication"
|
|
],
|
|
"markers_hit": [
|
|
"sshd_config",
|
|
"authorized_keys",
|
|
"journalctl",
|
|
"rollback",
|
|
"PasswordAuthentication"
|
|
],
|
|
"marker_hit_rate": 1.0,
|
|
"format_rule": "five_bullets",
|
|
"format_ok": true,
|
|
"usable_answer": true,
|
|
"error": null
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"cell_id": "pavilion:weeyuga:qwen2.5-coder:14b",
|
|
"n_calls": 6,
|
|
"n_errors": 6,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"duration_ms_p95": null,
|
|
"tokens_per_sec_mean": null,
|
|
"tokens_per_sec_p50": null,
|
|
"tokens_per_sec_p95": null,
|
|
"tokens_per_sec_max": null,
|
|
"cold_warm_split": {
|
|
"cold": {
|
|
"n_calls": 2,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"tokens_per_sec_mean": null
|
|
},
|
|
"warm": {
|
|
"n_calls": 4,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"tokens_per_sec_mean": null
|
|
}
|
|
},
|
|
"per_prompt": {
|
|
"unknown": {
|
|
"n_calls": 6,
|
|
"n_errors": 6,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"tokens_per_sec_mean": null,
|
|
"tokens_per_sec_p50": null
|
|
}
|
|
},
|
|
"chars_split": null,
|
|
"timeline": [
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 0,
|
|
"phase": "hello",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": "<HTTPError 500: 'Internal Server Error'>"
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 0,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": "<HTTPError 500: 'Internal Server Error'>"
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 1,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": "<HTTPError 500: 'Internal Server Error'>"
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 2,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": "<HTTPError 500: 'Internal Server Error'>"
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 3,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": "<HTTPError 500: 'Internal Server Error'>"
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 4,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": "<HTTPError 500: 'Internal Server Error'>"
|
|
}
|
|
],
|
|
"calls": [
|
|
{
|
|
"type": "call",
|
|
"ts_utc": "2026-04-29T10:12:49Z",
|
|
"cell_id": "pavilion:weeyuga:qwen2.5-coder:14b",
|
|
"model": "qwen2.5-coder:14b",
|
|
"phase": "hello",
|
|
"question_id": "hello_check",
|
|
"run_idx": 0,
|
|
"duration_seconds": 7.472,
|
|
"prompt_tokens": null,
|
|
"completion_tokens": null,
|
|
"tokens_per_second": null,
|
|
"finish_reason": null,
|
|
"weeyuga_meta": null,
|
|
"status_code": null,
|
|
"response_chars": 0,
|
|
"response_preview": "",
|
|
"required_markers": [],
|
|
"markers_hit": [],
|
|
"marker_hit_rate": null,
|
|
"format_rule": "",
|
|
"format_ok": null,
|
|
"usable_answer": false,
|
|
"error": "<HTTPError 500: 'Internal Server Error'>"
|
|
},
|
|
{
|
|
"type": "call",
|
|
"ts_utc": "2026-04-29T10:12:50Z",
|
|
"cell_id": "pavilion:weeyuga:qwen2.5-coder:14b",
|
|
"model": "qwen2.5-coder:14b",
|
|
"phase": "5q",
|
|
"question_id": "disk_guard_bash",
|
|
"run_idx": 0,
|
|
"duration_seconds": 0.882,
|
|
"prompt_tokens": null,
|
|
"completion_tokens": null,
|
|
"tokens_per_second": null,
|
|
"finish_reason": null,
|
|
"weeyuga_meta": null,
|
|
"status_code": null,
|
|
"response_chars": 0,
|
|
"response_preview": "",
|
|
"required_markers": [
|
|
"#!/usr/bin/env bash",
|
|
"df -P /",
|
|
"85",
|
|
"exit 1"
|
|
],
|
|
"markers_hit": [],
|
|
"marker_hit_rate": 0.0,
|
|
"format_rule": "bash_code",
|
|
"format_ok": false,
|
|
"usable_answer": false,
|
|
"error": "<HTTPError 500: 'Internal Server Error'>"
|
|
},
|
|
{
|
|
"type": "call",
|
|
"ts_utc": "2026-04-29T10:12:51Z",
|
|
"cell_id": "pavilion:weeyuga:qwen2.5-coder:14b",
|
|
"model": "qwen2.5-coder:14b",
|
|
"phase": "5q",
|
|
"question_id": "ipv4_python_tests",
|
|
"run_idx": 1,
|
|
"duration_seconds": 1.059,
|
|
"prompt_tokens": null,
|
|
"completion_tokens": null,
|
|
"tokens_per_second": null,
|
|
"finish_reason": null,
|
|
"weeyuga_meta": null,
|
|
"status_code": null,
|
|
"response_chars": 0,
|
|
"response_preview": "",
|
|
"required_markers": [
|
|
"def is_valid_ipv4",
|
|
"def test_",
|
|
"assert",
|
|
"split('.')"
|
|
],
|
|
"markers_hit": [],
|
|
"marker_hit_rate": 0.0,
|
|
"format_rule": "python_code",
|
|
"format_ok": false,
|
|
"usable_answer": false,
|
|
"error": "<HTTPError 500: 'Internal Server Error'>"
|
|
},
|
|
{
|
|
"type": "call",
|
|
"ts_utc": "2026-04-29T10:12:52Z",
|
|
"cell_id": "pavilion:weeyuga:qwen2.5-coder:14b",
|
|
"model": "qwen2.5-coder:14b",
|
|
"phase": "5q",
|
|
"question_id": "nginx_safe_reload",
|
|
"run_idx": 2,
|
|
"duration_seconds": 0.922,
|
|
"prompt_tokens": null,
|
|
"completion_tokens": null,
|
|
"tokens_per_second": null,
|
|
"finish_reason": null,
|
|
"weeyuga_meta": null,
|
|
"status_code": null,
|
|
"response_chars": 0,
|
|
"response_preview": "",
|
|
"required_markers": [
|
|
"cp /etc/nginx/nginx.conf",
|
|
"nginx -t",
|
|
"systemctl reload nginx",
|
|
"&&"
|
|
],
|
|
"markers_hit": [],
|
|
"marker_hit_rate": 0.0,
|
|
"format_rule": "shell_lines",
|
|
"format_ok": false,
|
|
"usable_answer": false,
|
|
"error": "<HTTPError 500: 'Internal Server Error'>"
|
|
},
|
|
{
|
|
"type": "call",
|
|
"ts_utc": "2026-04-29T10:12:53Z",
|
|
"cell_id": "pavilion:weeyuga:qwen2.5-coder:14b",
|
|
"model": "qwen2.5-coder:14b",
|
|
"phase": "5q",
|
|
"question_id": "yaml_cli_plan",
|
|
"run_idx": 3,
|
|
"duration_seconds": 0.991,
|
|
"prompt_tokens": null,
|
|
"completion_tokens": null,
|
|
"tokens_per_second": null,
|
|
"finish_reason": null,
|
|
"weeyuga_meta": null,
|
|
"status_code": null,
|
|
"response_chars": 0,
|
|
"response_preview": "",
|
|
"required_markers": [
|
|
"1.",
|
|
"2.",
|
|
"3.",
|
|
"4.",
|
|
"JSON schema",
|
|
"git"
|
|
],
|
|
"markers_hit": [],
|
|
"marker_hit_rate": 0.0,
|
|
"format_rule": "four_numbered_steps",
|
|
"format_ok": false,
|
|
"usable_answer": false,
|
|
"error": "<HTTPError 500: 'Internal Server Error'>"
|
|
},
|
|
{
|
|
"type": "call",
|
|
"ts_utc": "2026-04-29T10:12:54Z",
|
|
"cell_id": "pavilion:weeyuga:qwen2.5-coder:14b",
|
|
"model": "qwen2.5-coder:14b",
|
|
"phase": "5q",
|
|
"question_id": "ssh_lockout_triage",
|
|
"run_idx": 4,
|
|
"duration_seconds": 1.054,
|
|
"prompt_tokens": null,
|
|
"completion_tokens": null,
|
|
"tokens_per_second": null,
|
|
"finish_reason": null,
|
|
"weeyuga_meta": null,
|
|
"status_code": null,
|
|
"response_chars": 0,
|
|
"response_preview": "",
|
|
"required_markers": [
|
|
"sshd_config",
|
|
"authorized_keys",
|
|
"journalctl",
|
|
"rollback",
|
|
"PasswordAuthentication"
|
|
],
|
|
"markers_hit": [],
|
|
"marker_hit_rate": 0.0,
|
|
"format_rule": "five_bullets",
|
|
"format_ok": false,
|
|
"usable_answer": false,
|
|
"error": "<HTTPError 500: 'Internal Server Error'>"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"cell_id": "pavilion:weeyuga:qwen2.5-coder:3b",
|
|
"n_calls": 6,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"duration_ms_p95": null,
|
|
"tokens_per_sec_mean": null,
|
|
"tokens_per_sec_p50": null,
|
|
"tokens_per_sec_p95": null,
|
|
"tokens_per_sec_max": null,
|
|
"cold_warm_split": {
|
|
"cold": {
|
|
"n_calls": 2,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"tokens_per_sec_mean": null
|
|
},
|
|
"warm": {
|
|
"n_calls": 4,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"tokens_per_sec_mean": null
|
|
}
|
|
},
|
|
"per_prompt": {
|
|
"unknown": {
|
|
"n_calls": 6,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"tokens_per_sec_mean": null,
|
|
"tokens_per_sec_p50": null
|
|
}
|
|
},
|
|
"chars_split": null,
|
|
"timeline": [
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 0,
|
|
"phase": "hello",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 0,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 1,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 2,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 3,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 4,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
}
|
|
],
|
|
"calls": [
|
|
{
|
|
"type": "call",
|
|
"ts_utc": "2026-04-29T10:13:22Z",
|
|
"cell_id": "pavilion:weeyuga:qwen2.5-coder:3b",
|
|
"model": "qwen2.5-coder:3b",
|
|
"phase": "hello",
|
|
"question_id": "hello_check",
|
|
"run_idx": 0,
|
|
"duration_seconds": 28.318,
|
|
"prompt_tokens": 35,
|
|
"completion_tokens": 17,
|
|
"tokens_per_second": 0.6,
|
|
"finish_reason": "stop",
|
|
"weeyuga_meta": null,
|
|
"status_code": 200,
|
|
"response_chars": 62,
|
|
"response_preview": "Of course! I'm here to help. What do you need assistance with?",
|
|
"required_markers": [],
|
|
"markers_hit": [],
|
|
"marker_hit_rate": null,
|
|
"format_rule": "",
|
|
"format_ok": null,
|
|
"usable_answer": true,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"ts_utc": "2026-04-29T10:13:50Z",
|
|
"cell_id": "pavilion:weeyuga:qwen2.5-coder:3b",
|
|
"model": "qwen2.5-coder:3b",
|
|
"phase": "5q",
|
|
"question_id": "disk_guard_bash",
|
|
"run_idx": 0,
|
|
"duration_seconds": 27.993,
|
|
"prompt_tokens": 90,
|
|
"completion_tokens": 378,
|
|
"tokens_per_second": 13.5,
|
|
"finish_reason": "stop",
|
|
"weeyuga_meta": null,
|
|
"status_code": 200,
|
|
"response_chars": 1474,
|
|
"response_preview": "Certainly! Below is a Bash script that checks disk usage for `/`, prints a human-readable warning, and exits with status 1 when usage is above 85%.\n\n```bash\n#!/bin/bash\n\n# Check if df -P / command exists\nif ! command -v df &> /dev/null; the",
|
|
"required_markers": [
|
|
"#!/usr/bin/env bash",
|
|
"df -P /",
|
|
"85",
|
|
"exit 1"
|
|
],
|
|
"markers_hit": [
|
|
"df -P /",
|
|
"85",
|
|
"exit 1"
|
|
],
|
|
"marker_hit_rate": 0.75,
|
|
"format_rule": "bash_code",
|
|
"format_ok": false,
|
|
"usable_answer": true,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"ts_utc": "2026-04-29T10:14:15Z",
|
|
"cell_id": "pavilion:weeyuga:qwen2.5-coder:3b",
|
|
"model": "qwen2.5-coder:3b",
|
|
"phase": "5q",
|
|
"question_id": "ipv4_python_tests",
|
|
"run_idx": 1,
|
|
"duration_seconds": 24.785,
|
|
"prompt_tokens": 74,
|
|
"completion_tokens": 305,
|
|
"tokens_per_second": 12.31,
|
|
"finish_reason": "stop",
|
|
"weeyuga_meta": null,
|
|
"status_code": 200,
|
|
"response_chars": 1075,
|
|
"response_preview": "```python\nimport pytest\n\ndef is_valid_ipv4(value: str) -> bool:\n parts = value.split('.')\n if len(parts) != 4:\n return False\n \n for part in parts:\n try:\n num = int(part)\n if not (0 <= num <= 2",
|
|
"required_markers": [
|
|
"def is_valid_ipv4",
|
|
"def test_",
|
|
"assert",
|
|
"split('.')"
|
|
],
|
|
"markers_hit": [
|
|
"def is_valid_ipv4",
|
|
"def test_",
|
|
"assert",
|
|
"split('.')"
|
|
],
|
|
"marker_hit_rate": 1.0,
|
|
"format_rule": "python_code",
|
|
"format_ok": true,
|
|
"usable_answer": true,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"ts_utc": "2026-04-29T10:14:19Z",
|
|
"cell_id": "pavilion:weeyuga:qwen2.5-coder:3b",
|
|
"model": "qwen2.5-coder:3b",
|
|
"phase": "5q",
|
|
"question_id": "nginx_safe_reload",
|
|
"run_idx": 2,
|
|
"duration_seconds": 3.778,
|
|
"prompt_tokens": 58,
|
|
"completion_tokens": 43,
|
|
"tokens_per_second": 11.38,
|
|
"finish_reason": "stop",
|
|
"weeyuga_meta": null,
|
|
"status_code": 200,
|
|
"response_chars": 140,
|
|
"response_preview": "```bash\nsudo cp /etc/nginx/nginx.conf /etc/nginx/nginx.conf.bak\n\nsudo nginx -t\n\nif [ $? -eq 0 ]; then\n sudo systemctl reload nginx\nfi\n```",
|
|
"required_markers": [
|
|
"cp /etc/nginx/nginx.conf",
|
|
"nginx -t",
|
|
"systemctl reload nginx",
|
|
"&&"
|
|
],
|
|
"markers_hit": [
|
|
"cp /etc/nginx/nginx.conf",
|
|
"nginx -t",
|
|
"systemctl reload nginx"
|
|
],
|
|
"marker_hit_rate": 0.75,
|
|
"format_rule": "shell_lines",
|
|
"format_ok": true,
|
|
"usable_answer": true,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"ts_utc": "2026-04-29T10:14:56Z",
|
|
"cell_id": "pavilion:weeyuga:qwen2.5-coder:3b",
|
|
"model": "qwen2.5-coder:3b",
|
|
"phase": "5q",
|
|
"question_id": "yaml_cli_plan",
|
|
"run_idx": 3,
|
|
"duration_seconds": 36.655,
|
|
"prompt_tokens": 62,
|
|
"completion_tokens": 501,
|
|
"tokens_per_second": 13.67,
|
|
"finish_reason": "stop",
|
|
"weeyuga_meta": null,
|
|
"status_code": 200,
|
|
"response_chars": 2192,
|
|
"response_preview": "1. **Install Required Packages**: Ensure you have `git`, `pyyaml`, and `jsonschema` installed. You can install these using pip:\n ```bash\n pip install gitpython pyyaml jsonschema\n ```\n\n2. **Create the Python Script**: Create a new file",
|
|
"required_markers": [
|
|
"1.",
|
|
"2.",
|
|
"3.",
|
|
"4.",
|
|
"JSON schema",
|
|
"git"
|
|
],
|
|
"markers_hit": [
|
|
"1.",
|
|
"2.",
|
|
"3.",
|
|
"4.",
|
|
"JSON schema",
|
|
"git"
|
|
],
|
|
"marker_hit_rate": 1.0,
|
|
"format_rule": "four_numbered_steps",
|
|
"format_ok": true,
|
|
"usable_answer": true,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"ts_utc": "2026-04-29T10:15:08Z",
|
|
"cell_id": "pavilion:weeyuga:qwen2.5-coder:3b",
|
|
"model": "qwen2.5-coder:3b",
|
|
"phase": "5q",
|
|
"question_id": "ssh_lockout_triage",
|
|
"run_idx": 4,
|
|
"duration_seconds": 11.989,
|
|
"prompt_tokens": 75,
|
|
"completion_tokens": 154,
|
|
"tokens_per_second": 12.85,
|
|
"finish_reason": "stop",
|
|
"weeyuga_meta": null,
|
|
"status_code": 200,
|
|
"response_chars": 699,
|
|
"response_preview": "1. **Check `sshd_config`**: Ensure that `PasswordAuthentication` is set to `no`. This prevents password-based authentication.\n2. **Review `authorized_keys`**: Verify that the public keys in `~/.ssh/authorized_keys` are correct and match tho",
|
|
"required_markers": [
|
|
"sshd_config",
|
|
"authorized_keys",
|
|
"journalctl",
|
|
"rollback",
|
|
"PasswordAuthentication"
|
|
],
|
|
"markers_hit": [
|
|
"sshd_config",
|
|
"authorized_keys",
|
|
"journalctl",
|
|
"rollback",
|
|
"PasswordAuthentication"
|
|
],
|
|
"marker_hit_rate": 1.0,
|
|
"format_rule": "five_bullets",
|
|
"format_ok": false,
|
|
"usable_answer": true,
|
|
"error": null
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"cell_id": "pavilion:weeyuga:qwen3:14b",
|
|
"n_calls": 6,
|
|
"n_errors": 4,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"duration_ms_p95": null,
|
|
"tokens_per_sec_mean": null,
|
|
"tokens_per_sec_p50": null,
|
|
"tokens_per_sec_p95": null,
|
|
"tokens_per_sec_max": null,
|
|
"cold_warm_split": {
|
|
"cold": {
|
|
"n_calls": 2,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"tokens_per_sec_mean": null
|
|
},
|
|
"warm": {
|
|
"n_calls": 4,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"tokens_per_sec_mean": null
|
|
}
|
|
},
|
|
"per_prompt": {
|
|
"unknown": {
|
|
"n_calls": 6,
|
|
"n_errors": 4,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"tokens_per_sec_mean": null,
|
|
"tokens_per_sec_p50": null
|
|
}
|
|
},
|
|
"chars_split": null,
|
|
"timeline": [
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 0,
|
|
"phase": "hello",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": "TimeoutError('timed out')"
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 0,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": "TimeoutError('timed out')"
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 1,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": "TimeoutError('timed out')"
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 2,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": "TimeoutError('timed out')"
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 3,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 4,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
}
|
|
],
|
|
"calls": [
|
|
{
|
|
"type": "call",
|
|
"ts_utc": "2026-04-29T10:21:08Z",
|
|
"cell_id": "pavilion:weeyuga:qwen3:14b",
|
|
"model": "qwen3:14b",
|
|
"phase": "hello",
|
|
"question_id": "hello_check",
|
|
"run_idx": 0,
|
|
"duration_seconds": 360.058,
|
|
"prompt_tokens": null,
|
|
"completion_tokens": null,
|
|
"tokens_per_second": null,
|
|
"finish_reason": null,
|
|
"weeyuga_meta": null,
|
|
"status_code": null,
|
|
"response_chars": 0,
|
|
"response_preview": "",
|
|
"required_markers": [],
|
|
"markers_hit": [],
|
|
"marker_hit_rate": null,
|
|
"format_rule": "",
|
|
"format_ok": null,
|
|
"usable_answer": false,
|
|
"error": "TimeoutError('timed out')"
|
|
},
|
|
{
|
|
"type": "call",
|
|
"ts_utc": "2026-04-29T10:27:08Z",
|
|
"cell_id": "pavilion:weeyuga:qwen3:14b",
|
|
"model": "qwen3:14b",
|
|
"phase": "5q",
|
|
"question_id": "disk_guard_bash",
|
|
"run_idx": 0,
|
|
"duration_seconds": 360.069,
|
|
"prompt_tokens": null,
|
|
"completion_tokens": null,
|
|
"tokens_per_second": null,
|
|
"finish_reason": null,
|
|
"weeyuga_meta": null,
|
|
"status_code": null,
|
|
"response_chars": 0,
|
|
"response_preview": "",
|
|
"required_markers": [
|
|
"#!/usr/bin/env bash",
|
|
"df -P /",
|
|
"85",
|
|
"exit 1"
|
|
],
|
|
"markers_hit": [],
|
|
"marker_hit_rate": 0.0,
|
|
"format_rule": "bash_code",
|
|
"format_ok": false,
|
|
"usable_answer": false,
|
|
"error": "TimeoutError('timed out')"
|
|
},
|
|
{
|
|
"type": "call",
|
|
"ts_utc": "2026-04-29T10:33:08Z",
|
|
"cell_id": "pavilion:weeyuga:qwen3:14b",
|
|
"model": "qwen3:14b",
|
|
"phase": "5q",
|
|
"question_id": "ipv4_python_tests",
|
|
"run_idx": 1,
|
|
"duration_seconds": 360.075,
|
|
"prompt_tokens": null,
|
|
"completion_tokens": null,
|
|
"tokens_per_second": null,
|
|
"finish_reason": null,
|
|
"weeyuga_meta": null,
|
|
"status_code": null,
|
|
"response_chars": 0,
|
|
"response_preview": "",
|
|
"required_markers": [
|
|
"def is_valid_ipv4",
|
|
"def test_",
|
|
"assert",
|
|
"split('.')"
|
|
],
|
|
"markers_hit": [],
|
|
"marker_hit_rate": 0.0,
|
|
"format_rule": "python_code",
|
|
"format_ok": false,
|
|
"usable_answer": false,
|
|
"error": "TimeoutError('timed out')"
|
|
},
|
|
{
|
|
"type": "call",
|
|
"ts_utc": "2026-04-29T10:39:08Z",
|
|
"cell_id": "pavilion:weeyuga:qwen3:14b",
|
|
"model": "qwen3:14b",
|
|
"phase": "5q",
|
|
"question_id": "nginx_safe_reload",
|
|
"run_idx": 2,
|
|
"duration_seconds": 360.065,
|
|
"prompt_tokens": null,
|
|
"completion_tokens": null,
|
|
"tokens_per_second": null,
|
|
"finish_reason": null,
|
|
"weeyuga_meta": null,
|
|
"status_code": null,
|
|
"response_chars": 0,
|
|
"response_preview": "",
|
|
"required_markers": [
|
|
"cp /etc/nginx/nginx.conf",
|
|
"nginx -t",
|
|
"systemctl reload nginx",
|
|
"&&"
|
|
],
|
|
"markers_hit": [],
|
|
"marker_hit_rate": 0.0,
|
|
"format_rule": "shell_lines",
|
|
"format_ok": false,
|
|
"usable_answer": false,
|
|
"error": "TimeoutError('timed out')"
|
|
},
|
|
{
|
|
"type": "call",
|
|
"ts_utc": "2026-04-29T10:44:37Z",
|
|
"cell_id": "pavilion:weeyuga:qwen3:14b",
|
|
"model": "qwen3:14b",
|
|
"phase": "5q",
|
|
"question_id": "yaml_cli_plan",
|
|
"run_idx": 3,
|
|
"duration_seconds": 328.959,
|
|
"prompt_tokens": 43,
|
|
"completion_tokens": 903,
|
|
"tokens_per_second": 2.75,
|
|
"finish_reason": "stop",
|
|
"weeyuga_meta": null,
|
|
"status_code": 200,
|
|
"response_chars": 751,
|
|
"response_preview": "1. **Use `git diff` to identify changed YAML files**: Execute `git diff --name-only` to list modified files, then filter for `.yaml` or `.yml` extensions using a regex or `grep` to isolate relevant files. \n\n2. **Parse YAML files into Pytho",
|
|
"required_markers": [
|
|
"1.",
|
|
"2.",
|
|
"3.",
|
|
"4.",
|
|
"JSON schema",
|
|
"git"
|
|
],
|
|
"markers_hit": [
|
|
"1.",
|
|
"2.",
|
|
"3.",
|
|
"4.",
|
|
"JSON schema",
|
|
"git"
|
|
],
|
|
"marker_hit_rate": 1.0,
|
|
"format_rule": "four_numbered_steps",
|
|
"format_ok": true,
|
|
"usable_answer": true,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"ts_utc": "2026-04-29T10:48:48Z",
|
|
"cell_id": "pavilion:weeyuga:qwen3:14b",
|
|
"model": "qwen3:14b",
|
|
"phase": "5q",
|
|
"question_id": "ssh_lockout_triage",
|
|
"run_idx": 4,
|
|
"duration_seconds": 251.383,
|
|
"prompt_tokens": 56,
|
|
"completion_tokens": 787,
|
|
"tokens_per_second": 3.13,
|
|
"finish_reason": "stop",
|
|
"weeyuga_meta": null,
|
|
"status_code": 200,
|
|
"response_chars": 794,
|
|
"response_preview": "- **Check `sshd_config` for `PasswordAuthentication` setting**: Ensure it's enabled if password login is required, or confirm public key authentication is properly configured. \n- **Verify `authorized_keys` file permissions and content**: C",
|
|
"required_markers": [
|
|
"sshd_config",
|
|
"authorized_keys",
|
|
"journalctl",
|
|
"rollback",
|
|
"PasswordAuthentication"
|
|
],
|
|
"markers_hit": [
|
|
"sshd_config",
|
|
"authorized_keys",
|
|
"journalctl",
|
|
"rollback",
|
|
"PasswordAuthentication"
|
|
],
|
|
"marker_hit_rate": 1.0,
|
|
"format_rule": "five_bullets",
|
|
"format_ok": true,
|
|
"usable_answer": true,
|
|
"error": null
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"cell_id": "pavilion:weeyuga:qwen3:8b",
|
|
"n_calls": 6,
|
|
"n_errors": 2,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"duration_ms_p95": null,
|
|
"tokens_per_sec_mean": null,
|
|
"tokens_per_sec_p50": null,
|
|
"tokens_per_sec_p95": null,
|
|
"tokens_per_sec_max": null,
|
|
"cold_warm_split": {
|
|
"cold": {
|
|
"n_calls": 2,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"tokens_per_sec_mean": null
|
|
},
|
|
"warm": {
|
|
"n_calls": 4,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"tokens_per_sec_mean": null
|
|
}
|
|
},
|
|
"per_prompt": {
|
|
"unknown": {
|
|
"n_calls": 6,
|
|
"n_errors": 2,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"tokens_per_sec_mean": null,
|
|
"tokens_per_sec_p50": null
|
|
}
|
|
},
|
|
"chars_split": null,
|
|
"timeline": [
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 0,
|
|
"phase": "hello",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": "TimeoutError('timed out')"
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 0,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 1,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": "TimeoutError('timed out')"
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 2,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 3,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 4,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
}
|
|
],
|
|
"calls": [
|
|
{
|
|
"type": "call",
|
|
"ts_utc": "2026-04-29T11:20:47Z",
|
|
"cell_id": "pavilion:weeyuga:qwen3:8b",
|
|
"model": "qwen3:8b",
|
|
"phase": "hello",
|
|
"question_id": "hello_check",
|
|
"run_idx": 0,
|
|
"duration_seconds": 360.092,
|
|
"prompt_tokens": null,
|
|
"completion_tokens": null,
|
|
"tokens_per_second": null,
|
|
"finish_reason": null,
|
|
"weeyuga_meta": null,
|
|
"status_code": null,
|
|
"response_chars": 0,
|
|
"response_preview": "",
|
|
"required_markers": [],
|
|
"markers_hit": [],
|
|
"marker_hit_rate": null,
|
|
"format_rule": "",
|
|
"format_ok": null,
|
|
"usable_answer": false,
|
|
"error": "TimeoutError('timed out')"
|
|
},
|
|
{
|
|
"type": "call",
|
|
"ts_utc": "2026-04-29T11:25:53Z",
|
|
"cell_id": "pavilion:weeyuga:qwen3:8b",
|
|
"model": "qwen3:8b",
|
|
"phase": "5q",
|
|
"question_id": "disk_guard_bash",
|
|
"run_idx": 0,
|
|
"duration_seconds": 305.673,
|
|
"prompt_tokens": 71,
|
|
"completion_tokens": 1427,
|
|
"tokens_per_second": 4.67,
|
|
"finish_reason": "stop",
|
|
"weeyuga_meta": null,
|
|
"status_code": 200,
|
|
"response_chars": 214,
|
|
"response_preview": "#!/bin/bash\n\nusage_percent=$(df -P / | grep '/$' | awk '{print $5}' | cut -d'%' -f1)\n\nif (( usage_percent > 85 )); then\n echo \"Warning: / is at $usage_percent% usage. Please free up space.\"\n exit 1\nfi\n\nexit 0",
|
|
"required_markers": [
|
|
"#!/usr/bin/env bash",
|
|
"df -P /",
|
|
"85",
|
|
"exit 1"
|
|
],
|
|
"markers_hit": [
|
|
"df -P /",
|
|
"85",
|
|
"exit 1"
|
|
],
|
|
"marker_hit_rate": 0.75,
|
|
"format_rule": "bash_code",
|
|
"format_ok": false,
|
|
"usable_answer": true,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"ts_utc": "2026-04-29T11:31:53Z",
|
|
"cell_id": "pavilion:weeyuga:qwen3:8b",
|
|
"model": "qwen3:8b",
|
|
"phase": "5q",
|
|
"question_id": "ipv4_python_tests",
|
|
"run_idx": 1,
|
|
"duration_seconds": 360.074,
|
|
"prompt_tokens": null,
|
|
"completion_tokens": null,
|
|
"tokens_per_second": null,
|
|
"finish_reason": null,
|
|
"weeyuga_meta": null,
|
|
"status_code": null,
|
|
"response_chars": 0,
|
|
"response_preview": "",
|
|
"required_markers": [
|
|
"def is_valid_ipv4",
|
|
"def test_",
|
|
"assert",
|
|
"split('.')"
|
|
],
|
|
"markers_hit": [],
|
|
"marker_hit_rate": 0.0,
|
|
"format_rule": "python_code",
|
|
"format_ok": false,
|
|
"usable_answer": false,
|
|
"error": "TimeoutError('timed out')"
|
|
},
|
|
{
|
|
"type": "call",
|
|
"ts_utc": "2026-04-29T11:36:36Z",
|
|
"cell_id": "pavilion:weeyuga:qwen3:8b",
|
|
"model": "qwen3:8b",
|
|
"phase": "5q",
|
|
"question_id": "nginx_safe_reload",
|
|
"run_idx": 2,
|
|
"duration_seconds": 283.312,
|
|
"prompt_tokens": 39,
|
|
"completion_tokens": 933,
|
|
"tokens_per_second": 3.29,
|
|
"finish_reason": "stop",
|
|
"weeyuga_meta": null,
|
|
"status_code": 200,
|
|
"response_chars": 114,
|
|
"response_preview": "cp /etc/nginx/nginx.conf /etc/nginx/nginx.conf.$(date +\\%F-\\%T)\nnginx -t\nif [ $? -eq 0 ]; then nginx -s reload; fi",
|
|
"required_markers": [
|
|
"cp /etc/nginx/nginx.conf",
|
|
"nginx -t",
|
|
"systemctl reload nginx",
|
|
"&&"
|
|
],
|
|
"markers_hit": [
|
|
"cp /etc/nginx/nginx.conf",
|
|
"nginx -t"
|
|
],
|
|
"marker_hit_rate": 0.5,
|
|
"format_rule": "shell_lines",
|
|
"format_ok": true,
|
|
"usable_answer": true,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"ts_utc": "2026-04-29T11:39:10Z",
|
|
"cell_id": "pavilion:weeyuga:qwen3:8b",
|
|
"model": "qwen3:8b",
|
|
"phase": "5q",
|
|
"question_id": "yaml_cli_plan",
|
|
"run_idx": 3,
|
|
"duration_seconds": 154.227,
|
|
"prompt_tokens": 43,
|
|
"completion_tokens": 776,
|
|
"tokens_per_second": 5.03,
|
|
"finish_reason": "stop",
|
|
"weeyuga_meta": null,
|
|
"status_code": 200,
|
|
"response_chars": 363,
|
|
"response_preview": "1. Use `git diff --name-only` to list changed files, filter for `.yml`/`.yaml` extensions. \n2. Parse each YAML file into JSON and load the provided JSON schema using `jsonschema`. \n3. Validate each parsed YAML against the schema, collecti",
|
|
"required_markers": [
|
|
"1.",
|
|
"2.",
|
|
"3.",
|
|
"4.",
|
|
"JSON schema",
|
|
"git"
|
|
],
|
|
"markers_hit": [
|
|
"1.",
|
|
"2.",
|
|
"3.",
|
|
"4.",
|
|
"JSON schema",
|
|
"git"
|
|
],
|
|
"marker_hit_rate": 1.0,
|
|
"format_rule": "four_numbered_steps",
|
|
"format_ok": true,
|
|
"usable_answer": true,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"ts_utc": "2026-04-29T11:41:15Z",
|
|
"cell_id": "pavilion:weeyuga:qwen3:8b",
|
|
"model": "qwen3:8b",
|
|
"phase": "5q",
|
|
"question_id": "ssh_lockout_triage",
|
|
"run_idx": 4,
|
|
"duration_seconds": 124.742,
|
|
"prompt_tokens": 56,
|
|
"completion_tokens": 629,
|
|
"tokens_per_second": 5.04,
|
|
"finish_reason": "stop",
|
|
"weeyuga_meta": null,
|
|
"status_code": 200,
|
|
"response_chars": 921,
|
|
"response_preview": "- **Verify `sshd_config` settings**: Ensure `PasswordAuthentication` is set to `no` (if disabling passwords) and `PermitRootLogin` is configured securely. Check for syntax errors using `sshd_config_test`. \n- **Inspect `authorized_keys` fil",
|
|
"required_markers": [
|
|
"sshd_config",
|
|
"authorized_keys",
|
|
"journalctl",
|
|
"rollback",
|
|
"PasswordAuthentication"
|
|
],
|
|
"markers_hit": [
|
|
"sshd_config",
|
|
"authorized_keys",
|
|
"journalctl",
|
|
"rollback",
|
|
"PasswordAuthentication"
|
|
],
|
|
"marker_hit_rate": 1.0,
|
|
"format_rule": "five_bullets",
|
|
"format_ok": true,
|
|
"usable_answer": true,
|
|
"error": null
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"cell_id": "pavilion:weeyuga:qwen3:4b",
|
|
"n_calls": 6,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"duration_ms_p95": null,
|
|
"tokens_per_sec_mean": null,
|
|
"tokens_per_sec_p50": null,
|
|
"tokens_per_sec_p95": null,
|
|
"tokens_per_sec_max": null,
|
|
"cold_warm_split": {
|
|
"cold": {
|
|
"n_calls": 2,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"tokens_per_sec_mean": null
|
|
},
|
|
"warm": {
|
|
"n_calls": 4,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"tokens_per_sec_mean": null
|
|
}
|
|
},
|
|
"per_prompt": {
|
|
"unknown": {
|
|
"n_calls": 6,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"tokens_per_sec_mean": null,
|
|
"tokens_per_sec_p50": null
|
|
}
|
|
},
|
|
"chars_split": null,
|
|
"timeline": [
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 0,
|
|
"phase": "hello",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 0,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 1,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 2,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 3,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 4,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
}
|
|
],
|
|
"calls": [
|
|
{
|
|
"type": "call",
|
|
"ts_utc": "2026-04-29T11:43:45Z",
|
|
"cell_id": "pavilion:weeyuga:qwen3:4b",
|
|
"model": "qwen3:4b",
|
|
"phase": "hello",
|
|
"question_id": "hello_check",
|
|
"run_idx": 0,
|
|
"duration_seconds": 150.036,
|
|
"prompt_tokens": 16,
|
|
"completion_tokens": 411,
|
|
"tokens_per_second": 2.74,
|
|
"finish_reason": "stop",
|
|
"weeyuga_meta": null,
|
|
"status_code": 200,
|
|
"response_chars": 248,
|
|
"response_preview": "Hi! \ud83d\udc4b Absolutely \u2014 I'm here to help. \ud83d\ude0a \n**What would you like assistance with?** \n(Whether it's a question, problem, creative idea, or just need a quick explanation \u2014 I'm all ears!)\n\nJust share what you need, and I'll do my best to suppor",
|
|
"required_markers": [],
|
|
"markers_hit": [],
|
|
"marker_hit_rate": null,
|
|
"format_rule": "",
|
|
"format_ok": null,
|
|
"usable_answer": true,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"ts_utc": "2026-04-29T11:47:43Z",
|
|
"cell_id": "pavilion:weeyuga:qwen3:4b",
|
|
"model": "qwen3:4b",
|
|
"phase": "5q",
|
|
"question_id": "disk_guard_bash",
|
|
"run_idx": 0,
|
|
"duration_seconds": 238.435,
|
|
"prompt_tokens": 71,
|
|
"completion_tokens": 2048,
|
|
"tokens_per_second": 8.59,
|
|
"finish_reason": "length",
|
|
"weeyuga_meta": null,
|
|
"status_code": 200,
|
|
"response_chars": 0,
|
|
"response_preview": "",
|
|
"required_markers": [
|
|
"#!/usr/bin/env bash",
|
|
"df -P /",
|
|
"85",
|
|
"exit 1"
|
|
],
|
|
"markers_hit": [],
|
|
"marker_hit_rate": 0.0,
|
|
"format_rule": "bash_code",
|
|
"format_ok": false,
|
|
"usable_answer": false,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"ts_utc": "2026-04-29T11:51:39Z",
|
|
"cell_id": "pavilion:weeyuga:qwen3:4b",
|
|
"model": "qwen3:4b",
|
|
"phase": "5q",
|
|
"question_id": "ipv4_python_tests",
|
|
"run_idx": 1,
|
|
"duration_seconds": 235.851,
|
|
"prompt_tokens": 55,
|
|
"completion_tokens": 2048,
|
|
"tokens_per_second": 8.68,
|
|
"finish_reason": "length",
|
|
"weeyuga_meta": null,
|
|
"status_code": 200,
|
|
"response_chars": 0,
|
|
"response_preview": "",
|
|
"required_markers": [
|
|
"def is_valid_ipv4",
|
|
"def test_",
|
|
"assert",
|
|
"split('.')"
|
|
],
|
|
"markers_hit": [],
|
|
"marker_hit_rate": 0.0,
|
|
"format_rule": "python_code",
|
|
"format_ok": false,
|
|
"usable_answer": false,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"ts_utc": "2026-04-29T11:55:33Z",
|
|
"cell_id": "pavilion:weeyuga:qwen3:4b",
|
|
"model": "qwen3:4b",
|
|
"phase": "5q",
|
|
"question_id": "nginx_safe_reload",
|
|
"run_idx": 2,
|
|
"duration_seconds": 233.393,
|
|
"prompt_tokens": 39,
|
|
"completion_tokens": 2048,
|
|
"tokens_per_second": 8.77,
|
|
"finish_reason": "length",
|
|
"weeyuga_meta": null,
|
|
"status_code": 200,
|
|
"response_chars": 0,
|
|
"response_preview": "",
|
|
"required_markers": [
|
|
"cp /etc/nginx/nginx.conf",
|
|
"nginx -t",
|
|
"systemctl reload nginx",
|
|
"&&"
|
|
],
|
|
"markers_hit": [],
|
|
"marker_hit_rate": 0.0,
|
|
"format_rule": "shell_lines",
|
|
"format_ok": false,
|
|
"usable_answer": false,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"ts_utc": "2026-04-29T11:59:33Z",
|
|
"cell_id": "pavilion:weeyuga:qwen3:4b",
|
|
"model": "qwen3:4b",
|
|
"phase": "5q",
|
|
"question_id": "yaml_cli_plan",
|
|
"run_idx": 3,
|
|
"duration_seconds": 240.036,
|
|
"prompt_tokens": 43,
|
|
"completion_tokens": 2048,
|
|
"tokens_per_second": 8.53,
|
|
"finish_reason": "length",
|
|
"weeyuga_meta": null,
|
|
"status_code": 200,
|
|
"response_chars": 0,
|
|
"response_preview": "",
|
|
"required_markers": [
|
|
"1.",
|
|
"2.",
|
|
"3.",
|
|
"4.",
|
|
"JSON schema",
|
|
"git"
|
|
],
|
|
"markers_hit": [],
|
|
"marker_hit_rate": 0.0,
|
|
"format_rule": "four_numbered_steps",
|
|
"format_ok": false,
|
|
"usable_answer": false,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"ts_utc": "2026-04-29T12:03:30Z",
|
|
"cell_id": "pavilion:weeyuga:qwen3:4b",
|
|
"model": "qwen3:4b",
|
|
"phase": "5q",
|
|
"question_id": "ssh_lockout_triage",
|
|
"run_idx": 4,
|
|
"duration_seconds": 237.376,
|
|
"prompt_tokens": 56,
|
|
"completion_tokens": 2048,
|
|
"tokens_per_second": 8.63,
|
|
"finish_reason": "length",
|
|
"weeyuga_meta": null,
|
|
"status_code": 200,
|
|
"response_chars": 0,
|
|
"response_preview": "",
|
|
"required_markers": [
|
|
"sshd_config",
|
|
"authorized_keys",
|
|
"journalctl",
|
|
"rollback",
|
|
"PasswordAuthentication"
|
|
],
|
|
"markers_hit": [],
|
|
"marker_hit_rate": 0.0,
|
|
"format_rule": "five_bullets",
|
|
"format_ok": false,
|
|
"usable_answer": false,
|
|
"error": null
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"cell_id": "pavilion:weeyuga:qwen2.5:3b",
|
|
"n_calls": 6,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"duration_ms_p95": null,
|
|
"tokens_per_sec_mean": null,
|
|
"tokens_per_sec_p50": null,
|
|
"tokens_per_sec_p95": null,
|
|
"tokens_per_sec_max": null,
|
|
"cold_warm_split": {
|
|
"cold": {
|
|
"n_calls": 2,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"tokens_per_sec_mean": null
|
|
},
|
|
"warm": {
|
|
"n_calls": 4,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"tokens_per_sec_mean": null
|
|
}
|
|
},
|
|
"per_prompt": {
|
|
"unknown": {
|
|
"n_calls": 6,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"tokens_per_sec_mean": null,
|
|
"tokens_per_sec_p50": null
|
|
}
|
|
},
|
|
"chars_split": null,
|
|
"timeline": [
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 0,
|
|
"phase": "hello",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 0,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 1,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 2,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 3,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 4,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
}
|
|
],
|
|
"calls": [
|
|
{
|
|
"type": "call",
|
|
"ts_utc": "2026-04-29T12:03:59Z",
|
|
"cell_id": "pavilion:weeyuga:qwen2.5:3b",
|
|
"model": "qwen2.5:3b",
|
|
"phase": "hello",
|
|
"question_id": "hello_check",
|
|
"run_idx": 0,
|
|
"duration_seconds": 28.558,
|
|
"prompt_tokens": 35,
|
|
"completion_tokens": 19,
|
|
"tokens_per_second": 0.67,
|
|
"finish_reason": "stop",
|
|
"weeyuga_meta": null,
|
|
"status_code": 200,
|
|
"response_chars": 72,
|
|
"response_preview": "Of course! I'd be happy to help. What do you need assistance with today?",
|
|
"required_markers": [],
|
|
"markers_hit": [],
|
|
"marker_hit_rate": null,
|
|
"format_rule": "",
|
|
"format_ok": null,
|
|
"usable_answer": true,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"ts_utc": "2026-04-29T12:04:07Z",
|
|
"cell_id": "pavilion:weeyuga:qwen2.5:3b",
|
|
"model": "qwen2.5:3b",
|
|
"phase": "5q",
|
|
"question_id": "disk_guard_bash",
|
|
"run_idx": 0,
|
|
"duration_seconds": 7.986,
|
|
"prompt_tokens": 90,
|
|
"completion_tokens": 97,
|
|
"tokens_per_second": 12.15,
|
|
"finish_reason": "stop",
|
|
"weeyuga_meta": null,
|
|
"status_code": 200,
|
|
"response_chars": 284,
|
|
"response_preview": "```bash\n#!/bin/bash\n\n# Check disk usage for /\nusage=$(df -Ph / | awk 'NR==2 {print $5}')\n\n# Convert to percentage and check if it's above 85%\nif (( $(echo \"$usage > 85\" | bc -l) )); then\n echo \"Warning: Disk usage on / is over 85%. Curre",
|
|
"required_markers": [
|
|
"#!/usr/bin/env bash",
|
|
"df -P /",
|
|
"85",
|
|
"exit 1"
|
|
],
|
|
"markers_hit": [
|
|
"85",
|
|
"exit 1"
|
|
],
|
|
"marker_hit_rate": 0.5,
|
|
"format_rule": "bash_code",
|
|
"format_ok": false,
|
|
"usable_answer": true,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"ts_utc": "2026-04-29T12:04:36Z",
|
|
"cell_id": "pavilion:weeyuga:qwen2.5:3b",
|
|
"model": "qwen2.5:3b",
|
|
"phase": "5q",
|
|
"question_id": "ipv4_python_tests",
|
|
"run_idx": 1,
|
|
"duration_seconds": 28.968,
|
|
"prompt_tokens": 74,
|
|
"completion_tokens": 378,
|
|
"tokens_per_second": 13.05,
|
|
"finish_reason": "stop",
|
|
"weeyuga_meta": null,
|
|
"status_code": 200,
|
|
"response_chars": 1393,
|
|
"response_preview": "```python\ndef is_valid_ipv4(value: str) -> bool:\n \"\"\"\n Checks if the given value is a valid IPv4 address.\n \n :param value: A string representing the IP address to validate.\n :return: True if the value is a valid IPv4 address,",
|
|
"required_markers": [
|
|
"def is_valid_ipv4",
|
|
"def test_",
|
|
"assert",
|
|
"split('.')"
|
|
],
|
|
"markers_hit": [
|
|
"def is_valid_ipv4",
|
|
"def test_",
|
|
"assert",
|
|
"split('.')"
|
|
],
|
|
"marker_hit_rate": 1.0,
|
|
"format_rule": "python_code",
|
|
"format_ok": false,
|
|
"usable_answer": true,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"ts_utc": "2026-04-29T12:04:39Z",
|
|
"cell_id": "pavilion:weeyuga:qwen2.5:3b",
|
|
"model": "qwen2.5:3b",
|
|
"phase": "5q",
|
|
"question_id": "nginx_safe_reload",
|
|
"run_idx": 2,
|
|
"duration_seconds": 3.395,
|
|
"prompt_tokens": 58,
|
|
"completion_tokens": 37,
|
|
"tokens_per_second": 10.9,
|
|
"finish_reason": "stop",
|
|
"weeyuga_meta": null,
|
|
"status_code": 200,
|
|
"response_chars": 123,
|
|
"response_preview": "sudo cp /etc/nginx/nginx.conf /etc/nginx/nginx.conf.bak\nsudo nginx -t\nif [ $? -eq 0 ]; then\n sudo systemctl reload nginx\nfi",
|
|
"required_markers": [
|
|
"cp /etc/nginx/nginx.conf",
|
|
"nginx -t",
|
|
"systemctl reload nginx",
|
|
"&&"
|
|
],
|
|
"markers_hit": [
|
|
"cp /etc/nginx/nginx.conf",
|
|
"nginx -t",
|
|
"systemctl reload nginx"
|
|
],
|
|
"marker_hit_rate": 0.75,
|
|
"format_rule": "shell_lines",
|
|
"format_ok": true,
|
|
"usable_answer": true,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"ts_utc": "2026-04-29T12:05:26Z",
|
|
"cell_id": "pavilion:weeyuga:qwen2.5:3b",
|
|
"model": "qwen2.5:3b",
|
|
"phase": "5q",
|
|
"question_id": "yaml_cli_plan",
|
|
"run_idx": 3,
|
|
"duration_seconds": 46.998,
|
|
"prompt_tokens": 62,
|
|
"completion_tokens": 606,
|
|
"tokens_per_second": 12.89,
|
|
"finish_reason": "stop",
|
|
"weeyuga_meta": null,
|
|
"status_code": 200,
|
|
"response_chars": 2760,
|
|
"response_preview": "Certainly! Below are the four numbered steps to plan your Python CLI tool that scans a Git repository for changed YAML files, validates them against a JSON schema, and exits with a non-zero status if any validation fails.\n\n1. **Initialize t",
|
|
"required_markers": [
|
|
"1.",
|
|
"2.",
|
|
"3.",
|
|
"4.",
|
|
"JSON schema",
|
|
"git"
|
|
],
|
|
"markers_hit": [
|
|
"1.",
|
|
"2.",
|
|
"3.",
|
|
"4.",
|
|
"JSON schema",
|
|
"git"
|
|
],
|
|
"marker_hit_rate": 1.0,
|
|
"format_rule": "four_numbered_steps",
|
|
"format_ok": true,
|
|
"usable_answer": true,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"ts_utc": "2026-04-29T12:05:40Z",
|
|
"cell_id": "pavilion:weeyuga:qwen2.5:3b",
|
|
"model": "qwen2.5:3b",
|
|
"phase": "5q",
|
|
"question_id": "ssh_lockout_triage",
|
|
"run_idx": 4,
|
|
"duration_seconds": 14.325,
|
|
"prompt_tokens": 75,
|
|
"completion_tokens": 181,
|
|
"tokens_per_second": 12.64,
|
|
"finish_reason": "stop",
|
|
"weeyuga_meta": null,
|
|
"status_code": 200,
|
|
"response_chars": 871,
|
|
"response_preview": "- Ensure that `PasswordAuthentication` is set to `no` in `/etc/ssh/sshd_config`. This prevents SSH from accepting password authentication.\n- Verify that your public key is correctly added to the user's authorized keys file located at `~/.ss",
|
|
"required_markers": [
|
|
"sshd_config",
|
|
"authorized_keys",
|
|
"journalctl",
|
|
"rollback",
|
|
"PasswordAuthentication"
|
|
],
|
|
"markers_hit": [
|
|
"sshd_config",
|
|
"authorized_keys",
|
|
"journalctl",
|
|
"rollback",
|
|
"PasswordAuthentication"
|
|
],
|
|
"marker_hit_rate": 1.0,
|
|
"format_rule": "five_bullets",
|
|
"format_ok": true,
|
|
"usable_answer": true,
|
|
"error": null
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"cell_id": "pavilion:weeyuga:qwen2.5-coder:1.5b",
|
|
"n_calls": 6,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"duration_ms_p95": null,
|
|
"tokens_per_sec_mean": null,
|
|
"tokens_per_sec_p50": null,
|
|
"tokens_per_sec_p95": null,
|
|
"tokens_per_sec_max": null,
|
|
"cold_warm_split": {
|
|
"cold": {
|
|
"n_calls": 2,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"tokens_per_sec_mean": null
|
|
},
|
|
"warm": {
|
|
"n_calls": 4,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"tokens_per_sec_mean": null
|
|
}
|
|
},
|
|
"per_prompt": {
|
|
"unknown": {
|
|
"n_calls": 6,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"tokens_per_sec_mean": null,
|
|
"tokens_per_sec_p50": null
|
|
}
|
|
},
|
|
"chars_split": null,
|
|
"timeline": [
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 0,
|
|
"phase": "hello",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 0,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 1,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 2,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 3,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 4,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
}
|
|
],
|
|
"calls": [
|
|
{
|
|
"type": "call",
|
|
"ts_utc": "2026-04-29T12:06:01Z",
|
|
"cell_id": "pavilion:weeyuga:qwen2.5-coder:1.5b",
|
|
"model": "qwen2.5-coder:1.5b",
|
|
"phase": "hello",
|
|
"question_id": "hello_check",
|
|
"run_idx": 0,
|
|
"duration_seconds": 20.837,
|
|
"prompt_tokens": 35,
|
|
"completion_tokens": 11,
|
|
"tokens_per_second": 0.53,
|
|
"finish_reason": "stop",
|
|
"weeyuga_meta": null,
|
|
"status_code": 200,
|
|
"response_chars": 38,
|
|
"response_preview": "Of course! How may I assist you today?",
|
|
"required_markers": [],
|
|
"markers_hit": [],
|
|
"marker_hit_rate": null,
|
|
"format_rule": "",
|
|
"format_ok": null,
|
|
"usable_answer": true,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"ts_utc": "2026-04-29T12:06:11Z",
|
|
"cell_id": "pavilion:weeyuga:qwen2.5-coder:1.5b",
|
|
"model": "qwen2.5-coder:1.5b",
|
|
"phase": "5q",
|
|
"question_id": "disk_guard_bash",
|
|
"run_idx": 0,
|
|
"duration_seconds": 9.55,
|
|
"prompt_tokens": 90,
|
|
"completion_tokens": 243,
|
|
"tokens_per_second": 25.45,
|
|
"finish_reason": "stop",
|
|
"weeyuga_meta": null,
|
|
"status_code": 200,
|
|
"response_chars": 861,
|
|
"response_preview": "```bash\n#!/bin/bash\n\n# Check disk usage for /\ndf -P / | awk '{print $5}' | grep -E '[0-9]+' > /tmp/disk_usage.txt\n\n# Read the percentage from the file\npercentage=$(cat /tmp/disk_usage.txt)\n\n# Exit with status 1 if usage is above 85 percent\n",
|
|
"required_markers": [
|
|
"#!/usr/bin/env bash",
|
|
"df -P /",
|
|
"85",
|
|
"exit 1"
|
|
],
|
|
"markers_hit": [
|
|
"df -P /",
|
|
"85",
|
|
"exit 1"
|
|
],
|
|
"marker_hit_rate": 0.75,
|
|
"format_rule": "bash_code",
|
|
"format_ok": false,
|
|
"usable_answer": true,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"ts_utc": "2026-04-29T12:06:23Z",
|
|
"cell_id": "pavilion:weeyuga:qwen2.5-coder:1.5b",
|
|
"model": "qwen2.5-coder:1.5b",
|
|
"phase": "5q",
|
|
"question_id": "ipv4_python_tests",
|
|
"run_idx": 1,
|
|
"duration_seconds": 12.454,
|
|
"prompt_tokens": 74,
|
|
"completion_tokens": 303,
|
|
"tokens_per_second": 24.33,
|
|
"finish_reason": "stop",
|
|
"weeyuga_meta": null,
|
|
"status_code": 200,
|
|
"response_chars": 1101,
|
|
"response_preview": "```python\nimport pytest\n\ndef is_valid_ipv4(value: str) -> bool:\n \"\"\"\n Check if the provided string is a valid IPv4 address.\n \n Args:\n value (str): The string to check.\n \n Returns:\n bool: True if the string is a valid",
|
|
"required_markers": [
|
|
"def is_valid_ipv4",
|
|
"def test_",
|
|
"assert",
|
|
"split('.')"
|
|
],
|
|
"markers_hit": [
|
|
"def is_valid_ipv4",
|
|
"def test_",
|
|
"assert",
|
|
"split('.')"
|
|
],
|
|
"marker_hit_rate": 1.0,
|
|
"format_rule": "python_code",
|
|
"format_ok": false,
|
|
"usable_answer": true,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"ts_utc": "2026-04-29T12:06:26Z",
|
|
"cell_id": "pavilion:weeyuga:qwen2.5-coder:1.5b",
|
|
"model": "qwen2.5-coder:1.5b",
|
|
"phase": "5q",
|
|
"question_id": "nginx_safe_reload",
|
|
"run_idx": 2,
|
|
"duration_seconds": 2.926,
|
|
"prompt_tokens": 58,
|
|
"completion_tokens": 64,
|
|
"tokens_per_second": 21.87,
|
|
"finish_reason": "stop",
|
|
"weeyuga_meta": null,
|
|
"status_code": 200,
|
|
"response_chars": 257,
|
|
"response_preview": "```bash\n# Backup the original nginx configuration file\ncp /etc/nginx/nginx.conf /etc/nginx/nginx.conf.bak\n\n# Validate the nginx configuration\nnginx -t\n\n# Reload nginx if the configuration is valid\nif [ $? -eq 0 ]; then\n sudo systemctl re",
|
|
"required_markers": [
|
|
"cp /etc/nginx/nginx.conf",
|
|
"nginx -t",
|
|
"systemctl reload nginx",
|
|
"&&"
|
|
],
|
|
"markers_hit": [
|
|
"cp /etc/nginx/nginx.conf",
|
|
"nginx -t",
|
|
"systemctl reload nginx"
|
|
],
|
|
"marker_hit_rate": 0.75,
|
|
"format_rule": "shell_lines",
|
|
"format_ok": true,
|
|
"usable_answer": true,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"ts_utc": "2026-04-29T12:06:44Z",
|
|
"cell_id": "pavilion:weeyuga:qwen2.5-coder:1.5b",
|
|
"model": "qwen2.5-coder:1.5b",
|
|
"phase": "5q",
|
|
"question_id": "yaml_cli_plan",
|
|
"run_idx": 3,
|
|
"duration_seconds": 18.124,
|
|
"prompt_tokens": 62,
|
|
"completion_tokens": 473,
|
|
"tokens_per_second": 26.1,
|
|
"finish_reason": "stop",
|
|
"weeyuga_meta": null,
|
|
"status_code": 200,
|
|
"response_chars": 2139,
|
|
"response_preview": "1. Install the required packages:\n```bash\npip install gitpython jsonschema\n```\n\n2. Create a new Python file named `validate_yaml.py`:\n\n3. Add the following code to the `validate_yaml.py` file:\n\n```python\nimport os\nimport json\nfrom git impor",
|
|
"required_markers": [
|
|
"1.",
|
|
"2.",
|
|
"3.",
|
|
"4.",
|
|
"JSON schema",
|
|
"git"
|
|
],
|
|
"markers_hit": [
|
|
"1.",
|
|
"2.",
|
|
"3.",
|
|
"4.",
|
|
"JSON schema",
|
|
"git"
|
|
],
|
|
"marker_hit_rate": 1.0,
|
|
"format_rule": "four_numbered_steps",
|
|
"format_ok": true,
|
|
"usable_answer": true,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"ts_utc": "2026-04-29T12:06:55Z",
|
|
"cell_id": "pavilion:weeyuga:qwen2.5-coder:1.5b",
|
|
"model": "qwen2.5-coder:1.5b",
|
|
"phase": "5q",
|
|
"question_id": "ssh_lockout_triage",
|
|
"run_idx": 4,
|
|
"duration_seconds": 10.595,
|
|
"prompt_tokens": 75,
|
|
"completion_tokens": 271,
|
|
"tokens_per_second": 25.58,
|
|
"finish_reason": "stop",
|
|
"weeyuga_meta": null,
|
|
"status_code": 200,
|
|
"response_chars": 1086,
|
|
"response_preview": "- **Check for Permissions**: Ensure that the `authorized_keys` file has the correct permissions (`600`) to restrict access.\n ```sh\n chmod 600 ~/.ssh/authorized_keys\n ```\n\n- **Review SSHD Configuration**: Verify that the `PasswordAuthenti",
|
|
"required_markers": [
|
|
"sshd_config",
|
|
"authorized_keys",
|
|
"journalctl",
|
|
"rollback",
|
|
"PasswordAuthentication"
|
|
],
|
|
"markers_hit": [
|
|
"sshd_config",
|
|
"authorized_keys",
|
|
"journalctl",
|
|
"rollback",
|
|
"PasswordAuthentication"
|
|
],
|
|
"marker_hit_rate": 1.0,
|
|
"format_rule": "five_bullets",
|
|
"format_ok": true,
|
|
"usable_answer": true,
|
|
"error": null
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"cell_id": "pavilion:weeyuga:qwen2.5-coder:0.5b",
|
|
"n_calls": 6,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"duration_ms_p95": null,
|
|
"tokens_per_sec_mean": null,
|
|
"tokens_per_sec_p50": null,
|
|
"tokens_per_sec_p95": null,
|
|
"tokens_per_sec_max": null,
|
|
"cold_warm_split": {
|
|
"cold": {
|
|
"n_calls": 2,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"tokens_per_sec_mean": null
|
|
},
|
|
"warm": {
|
|
"n_calls": 4,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"tokens_per_sec_mean": null
|
|
}
|
|
},
|
|
"per_prompt": {
|
|
"unknown": {
|
|
"n_calls": 6,
|
|
"n_errors": 0,
|
|
"duration_ms_mean": null,
|
|
"duration_ms_p50": null,
|
|
"tokens_per_sec_mean": null,
|
|
"tokens_per_sec_p50": null
|
|
}
|
|
},
|
|
"chars_split": null,
|
|
"timeline": [
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 0,
|
|
"phase": "hello",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 0,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 1,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 2,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 3,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
},
|
|
{
|
|
"prompt_id": null,
|
|
"run_idx": 4,
|
|
"phase": "5q",
|
|
"duration_ms": null,
|
|
"tokens_per_sec": null,
|
|
"error": null
|
|
}
|
|
],
|
|
"calls": [
|
|
{
|
|
"type": "call",
|
|
"ts_utc": "2026-04-29T12:07:09Z",
|
|
"cell_id": "pavilion:weeyuga:qwen2.5-coder:0.5b",
|
|
"model": "qwen2.5-coder:0.5b",
|
|
"phase": "hello",
|
|
"question_id": "hello_check",
|
|
"run_idx": 0,
|
|
"duration_seconds": 13.774,
|
|
"prompt_tokens": 35,
|
|
"completion_tokens": 11,
|
|
"tokens_per_second": 0.8,
|
|
"finish_reason": "stop",
|
|
"weeyuga_meta": null,
|
|
"status_code": 200,
|
|
"response_chars": 38,
|
|
"response_preview": "Of course! How can I assist you today?",
|
|
"required_markers": [],
|
|
"markers_hit": [],
|
|
"marker_hit_rate": null,
|
|
"format_rule": "",
|
|
"format_ok": null,
|
|
"usable_answer": true,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"ts_utc": "2026-04-29T12:07:17Z",
|
|
"cell_id": "pavilion:weeyuga:qwen2.5-coder:0.5b",
|
|
"model": "qwen2.5-coder:0.5b",
|
|
"phase": "5q",
|
|
"question_id": "disk_guard_bash",
|
|
"run_idx": 0,
|
|
"duration_seconds": 8.292,
|
|
"prompt_tokens": 90,
|
|
"completion_tokens": 418,
|
|
"tokens_per_second": 50.41,
|
|
"finish_reason": "stop",
|
|
"weeyuga_meta": null,
|
|
"status_code": 200,
|
|
"response_chars": 1654,
|
|
"response_preview": "Certainly! Below is a Bash script that checks disk usage for `/`, prints a human-readable warning if usage is above 85 percent, and exits with status 1 when usage is above 85 percent.\n\n```bash\n#!/bin/bash\n\n# Check if df -P / exists\nif ! com",
|
|
"required_markers": [
|
|
"#!/usr/bin/env bash",
|
|
"df -P /",
|
|
"85",
|
|
"exit 1"
|
|
],
|
|
"markers_hit": [
|
|
"df -P /",
|
|
"85",
|
|
"exit 1"
|
|
],
|
|
"marker_hit_rate": 0.75,
|
|
"format_rule": "bash_code",
|
|
"format_ok": false,
|
|
"usable_answer": true,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"ts_utc": "2026-04-29T12:07:27Z",
|
|
"cell_id": "pavilion:weeyuga:qwen2.5-coder:0.5b",
|
|
"model": "qwen2.5-coder:0.5b",
|
|
"phase": "5q",
|
|
"question_id": "ipv4_python_tests",
|
|
"run_idx": 1,
|
|
"duration_seconds": 10.239,
|
|
"prompt_tokens": 74,
|
|
"completion_tokens": 520,
|
|
"tokens_per_second": 50.79,
|
|
"finish_reason": "stop",
|
|
"weeyuga_meta": null,
|
|
"status_code": 200,
|
|
"response_chars": 1505,
|
|
"response_preview": "```python\nimport re\n\ndef is_valid_ipv4(value: str) -> bool:\n \"\"\"\n Check if the provided string is a valid IPv4 address.\n \n Args:\n value (str): The string to check.\n \n Returns:\n bool: True if the string is a valid IPv",
|
|
"required_markers": [
|
|
"def is_valid_ipv4",
|
|
"def test_",
|
|
"assert",
|
|
"split('.')"
|
|
],
|
|
"markers_hit": [
|
|
"def is_valid_ipv4",
|
|
"def test_",
|
|
"assert"
|
|
],
|
|
"marker_hit_rate": 0.75,
|
|
"format_rule": "python_code",
|
|
"format_ok": false,
|
|
"usable_answer": true,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"ts_utc": "2026-04-29T12:07:30Z",
|
|
"cell_id": "pavilion:weeyuga:qwen2.5-coder:0.5b",
|
|
"model": "qwen2.5-coder:0.5b",
|
|
"phase": "5q",
|
|
"question_id": "nginx_safe_reload",
|
|
"run_idx": 2,
|
|
"duration_seconds": 3.174,
|
|
"prompt_tokens": 58,
|
|
"completion_tokens": 149,
|
|
"tokens_per_second": 46.94,
|
|
"finish_reason": "stop",
|
|
"weeyuga_meta": null,
|
|
"status_code": 200,
|
|
"response_chars": 576,
|
|
"response_preview": "```bash\n# Backup /etc/nginx/nginx.conf\nsudo cp -p /etc/nginx/nginx.conf /etc/nginx/nginx.conf.bak\n\n# Validate nginx config\nsudo nginx -t\n\n# Reload nginx only if validation passes\nif [ $? -eq 0 ]; then\n sudo systemctl reload nginx\nfi\n```\n",
|
|
"required_markers": [
|
|
"cp /etc/nginx/nginx.conf",
|
|
"nginx -t",
|
|
"systemctl reload nginx",
|
|
"&&"
|
|
],
|
|
"markers_hit": [
|
|
"nginx -t",
|
|
"systemctl reload nginx"
|
|
],
|
|
"marker_hit_rate": 0.5,
|
|
"format_rule": "shell_lines",
|
|
"format_ok": true,
|
|
"usable_answer": true,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"ts_utc": "2026-04-29T12:07:42Z",
|
|
"cell_id": "pavilion:weeyuga:qwen2.5-coder:0.5b",
|
|
"model": "qwen2.5-coder:0.5b",
|
|
"phase": "5q",
|
|
"question_id": "yaml_cli_plan",
|
|
"run_idx": 3,
|
|
"duration_seconds": 12.01,
|
|
"prompt_tokens": 62,
|
|
"completion_tokens": 613,
|
|
"tokens_per_second": 51.04,
|
|
"finish_reason": "stop",
|
|
"weeyuga_meta": null,
|
|
"status_code": 200,
|
|
"response_chars": 2834,
|
|
"response_preview": "To create a Python CLI that scans a Git repository for changed YAML files, validates them against a JSON schema, and exits nonzero on failure, you can use the `git` command-line tool along with some Python libraries like `argparse`, `jsonsc",
|
|
"required_markers": [
|
|
"1.",
|
|
"2.",
|
|
"3.",
|
|
"4.",
|
|
"JSON schema",
|
|
"git"
|
|
],
|
|
"markers_hit": [
|
|
"1.",
|
|
"2.",
|
|
"3.",
|
|
"JSON schema",
|
|
"git"
|
|
],
|
|
"marker_hit_rate": 0.833,
|
|
"format_rule": "four_numbered_steps",
|
|
"format_ok": true,
|
|
"usable_answer": true,
|
|
"error": null
|
|
},
|
|
{
|
|
"type": "call",
|
|
"ts_utc": "2026-04-29T12:07:51Z",
|
|
"cell_id": "pavilion:weeyuga:qwen2.5-coder:0.5b",
|
|
"model": "qwen2.5-coder:0.5b",
|
|
"phase": "5q",
|
|
"question_id": "ssh_lockout_triage",
|
|
"run_idx": 4,
|
|
"duration_seconds": 8.951,
|
|
"prompt_tokens": 75,
|
|
"completion_tokens": 451,
|
|
"tokens_per_second": 50.39,
|
|
"finish_reason": "stop",
|
|
"weeyuga_meta": null,
|
|
"status_code": 200,
|
|
"response_chars": 1588,
|
|
"response_preview": "- **Hardening**: Ensure that your SSH server is secure by using strong passwords, enabling two-factor authentication, and updating software to the latest version.\n- **Safest First Checks**:\n - **Authorized Keys**: Verify that your public k",
|
|
"required_markers": [
|
|
"sshd_config",
|
|
"authorized_keys",
|
|
"journalctl",
|
|
"rollback",
|
|
"PasswordAuthentication"
|
|
],
|
|
"markers_hit": [
|
|
"sshd_config",
|
|
"authorized_keys",
|
|
"journalctl",
|
|
"rollback",
|
|
"PasswordAuthentication"
|
|
],
|
|
"marker_hit_rate": 1.0,
|
|
"format_rule": "five_bullets",
|
|
"format_ok": true,
|
|
"usable_answer": true,
|
|
"error": null
|
|
}
|
|
]
|
|
}
|
|
],
|
|
"meta_record": {
|
|
"type": "meta",
|
|
"benchmark_run_id": "ad057f5b-ed3f-4a95-a38e-361be310ffd6",
|
|
"harness_version": "3",
|
|
"harness_path": "scripts/benchmarks/run_pavilion_weeyuga.py",
|
|
"git_sha": "371ce70c8708ce0aedbe4f3bf096f8e8f8b11d43",
|
|
"git_dirty": true,
|
|
"started_at_utc": "2026-04-29T09:51:46Z",
|
|
"host": "Slobodans-MacBook-Air.local",
|
|
"load_avg_start": [
|
|
1.60205078125,
|
|
1.9716796875,
|
|
1.95751953125
|
|
],
|
|
"target_url": "http://10.8.0.3:11435",
|
|
"weeyuga_url": "http://10.8.0.3:11435",
|
|
"cell_id_prefix": "pavilion:weeyuga",
|
|
"execution_shape": "per-model-block",
|
|
"phase_plan": "hello+5q",
|
|
"models_planned": [
|
|
"qwen3.5:4b",
|
|
"qwen3.5:35b-a3b-uncensored-iq1m",
|
|
"qwen3.5:35b-a3b-iq2s",
|
|
"qwen3.5:9b-q6k",
|
|
"qwen3.5:9b-q4km",
|
|
"qwen3.5:2b",
|
|
"qwen3.5:0.8b",
|
|
"qwen3.5:9b",
|
|
"qwen2.5-coder:14b",
|
|
"qwen2.5-coder:3b",
|
|
"qwen3:14b",
|
|
"qwen3:8b",
|
|
"qwen3:4b",
|
|
"qwen2.5:3b",
|
|
"qwen2.5-coder:1.5b",
|
|
"qwen2.5-coder:0.5b"
|
|
],
|
|
"canonical_options": {
|
|
"temperature": 0.1,
|
|
"num_ctx": 4096,
|
|
"num_predict": 2048
|
|
},
|
|
"timeout_seconds": 360,
|
|
"suite_5q_path": "/Users/slobodan/Documents/MyServers/instances/vps-81-17-99-14/telemetry/small_model_eval_questions.json",
|
|
"suite_20q_path": "/Users/slobodan/Documents/MyServers/instances/vps-81-17-99-14/telemetry/python_task_suite_questions.json",
|
|
"env_inference_route": null,
|
|
"env_llamacpp_url": null
|
|
},
|
|
"packaged_at": "2026-05-06T12:28:24Z"
|
|
}
|