weeyuga-benchmarks-public/runs/ad057f5b-ed3f-4a95-a38e-361be310ffd6/metadata.json

{
  "id": "ad057f5b-ed3f-4a95-a38e-361be310ffd6",
  "title": "pavilion-weeyuga-v3 \u2014 qwen2.5/qwen2.5-coder/qwen3/qwen3.5 on pavilion",
  "date": "2026-04-29",
  "started_at": "2026-04-29T09:51:46Z",
  "git_sha": "371ce70c8708ce0aedbe4f3bf096f8e8f8b11d43",
  "hardware": "pavilion",
  "hardware_label": "Pavilion \u00b7 HP laptop \u00b7 GTX 1050 4 GB \u00b7 16 GB RAM \u00b7 i7-9750H",
  "engine": "weeyuga",
  "harness": "pavilion-weeyuga-v3",
  "model_family": "qwen2.5/qwen2.5-coder/qwen3/qwen3.5",
  "model_sizes": [
    "qwen3.5:4b",
    "qwen3.5:35b-a3b-uncensored",
    "qwen3.5:35b-a3b-iq2s",
    "qwen3.5:9b-q6k",
    "qwen3.5:9b",
    "qwen3.5:2b",
    "qwen3.5:0.8b",
    "qwen2.5-coder:14b",
    "qwen2.5-coder:3b",
    "qwen3:14b",
    "qwen3:8b",
    "qwen3:4b",
    "qwen2.5:3b",
    "qwen2.5-coder:1.5b",
    "qwen2.5-coder:0.5b"
  ],
  "task_kind": "chat",
  "tags": [
    "a3b",
    "iq2",
    "pavilion",
    "qwen2.5",
    "qwen3",
    "qwen3.5",
    "weeyuga"
  ],
  "headline": "96 calls across 16 cell(s); 12 errors",
  "janie_blurb_md": null,
  "janie_blurb_status": "pending",
  "caveat": null,
  "caveat_severity": null,
  "methodology_ref": "SITE_DATA_AUDIT_AND_MIGRATION_PLAN_2026-05-06",
  "methodology_deviations_md": null,
  "results_table": [
    {
      "label": "qwen3.5:4b",
      "cell_id": "pavilion:weeyuga:qwen3.5:4b",
      "n_calls": 6,
      "n_errors": 0,
      "duration_ms_mean": null,
      "duration_ms_p50": null,
      "duration_ms_p95": null,
      "tokens_per_sec_mean": null,
      "tokens_per_sec_p50": null,
      "tokens_per_sec_p95": null,
      "tokens_per_sec_max": null,
      "memory_mb": null,
      "watts_avg": null,
      "notes": null
    },
    {
      "label": "qwen3.5:35b-a3b-uncensored-iq1m",
      "cell_id": "pavilion:weeyuga:qwen3.5:35b-a3b-uncensored-iq1m",
      "n_calls": 6,
      "n_errors": 0,
      "duration_ms_mean": null,
      "duration_ms_p50": null,
      "duration_ms_p95": null,
      "tokens_per_sec_mean": null,
      "tokens_per_sec_p50": null,
      "tokens_per_sec_p95": null,
      "tokens_per_sec_max": null,
      "memory_mb": null,
      "watts_avg": null,
      "notes": null
    },
    {
      "label": "qwen3.5:35b-a3b-iq2s",
      "cell_id": "pavilion:weeyuga:qwen3.5:35b-a3b-iq2s",
      "n_calls": 6,
      "n_errors": 0,
      "duration_ms_mean": null,
      "duration_ms_p50": null,
      "duration_ms_p95": null,
      "tokens_per_sec_mean": null,
      "tokens_per_sec_p50": null,
      "tokens_per_sec_p95": null,
      "tokens_per_sec_max": null,
      "memory_mb": null,
      "watts_avg": null,
      "notes": null
    },
    {
      "label": "qwen3.5:9b-q6k",
      "cell_id": "pavilion:weeyuga:qwen3.5:9b-q6k",
      "n_calls": 6,
      "n_errors": 0,
      "duration_ms_mean": null,
      "duration_ms_p50": null,
      "duration_ms_p95": null,
      "tokens_per_sec_mean": null,
      "tokens_per_sec_p50": null,
      "tokens_per_sec_p95": null,
      "tokens_per_sec_max": null,
      "memory_mb": null,
      "watts_avg": null,
      "notes": null
    },
    {
      "label": "qwen3.5:9b-q4km",
      "cell_id": "pavilion:weeyuga:qwen3.5:9b-q4km",
      "n_calls": 6,
      "n_errors": 0,
      "duration_ms_mean": null,
      "duration_ms_p50": null,
      "duration_ms_p95": null,
      "tokens_per_sec_mean": null,
      "tokens_per_sec_p50": null,
      "tokens_per_sec_p95": null,
      "tokens_per_sec_max": null,
      "memory_mb": null,
      "watts_avg": null,
      "notes": null
    },
    {
      "label": "qwen3.5:2b",
      "cell_id": "pavilion:weeyuga:qwen3.5:2b",
      "n_calls": 6,
      "n_errors": 0,
      "duration_ms_mean": null,
      "duration_ms_p50": null,
      "duration_ms_p95": null,
      "tokens_per_sec_mean": null,
      "tokens_per_sec_p50": null,
      "tokens_per_sec_p95": null,
      "tokens_per_sec_max": null,
      "memory_mb": null,
      "watts_avg": null,
      "notes": null
    },
    {
      "label": "qwen3.5:0.8b",
      "cell_id": "pavilion:weeyuga:qwen3.5:0.8b",
      "n_calls": 6,
      "n_errors": 0,
      "duration_ms_mean": null,
      "duration_ms_p50": null,
      "duration_ms_p95": null,
      "tokens_per_sec_mean": null,
      "tokens_per_sec_p50": null,
      "tokens_per_sec_p95": null,
      "tokens_per_sec_max": null,
      "memory_mb": null,
      "watts_avg": null,
      "notes": null
    },
    {
      "label": "qwen3.5:9b",
      "cell_id": "pavilion:weeyuga:qwen3.5:9b",
      "n_calls": 6,
      "n_errors": 0,
      "duration_ms_mean": null,
      "duration_ms_p50": null,
      "duration_ms_p95": null,
      "tokens_per_sec_mean": null,
      "tokens_per_sec_p50": null,
      "tokens_per_sec_p95": null,
      "tokens_per_sec_max": null,
      "memory_mb": null,
      "watts_avg": null,
      "notes": null
    },
    {
      "label": "qwen2.5-coder:14b",
      "cell_id": "pavilion:weeyuga:qwen2.5-coder:14b",
      "n_calls": 6,
      "n_errors": 6,
      "duration_ms_mean": null,
      "duration_ms_p50": null,
      "duration_ms_p95": null,
      "tokens_per_sec_mean": null,
      "tokens_per_sec_p50": null,
      "tokens_per_sec_p95": null,
      "tokens_per_sec_max": null,
      "memory_mb": null,
      "watts_avg": null,
      "notes": null
    },
    {
      "label": "qwen2.5-coder:3b",
      "cell_id": "pavilion:weeyuga:qwen2.5-coder:3b",
      "n_calls": 6,
      "n_errors": 0,
      "duration_ms_mean": null,
      "duration_ms_p50": null,
      "duration_ms_p95": null,
      "tokens_per_sec_mean": null,
      "tokens_per_sec_p50": null,
      "tokens_per_sec_p95": null,
      "tokens_per_sec_max": null,
      "memory_mb": null,
      "watts_avg": null,
      "notes": null
    },
    {
      "label": "qwen3:14b",
      "cell_id": "pavilion:weeyuga:qwen3:14b",
      "n_calls": 6,
      "n_errors": 4,
      "duration_ms_mean": null,
      "duration_ms_p50": null,
      "duration_ms_p95": null,
      "tokens_per_sec_mean": null,
      "tokens_per_sec_p50": null,
      "tokens_per_sec_p95": null,
      "tokens_per_sec_max": null,
      "memory_mb": null,
      "watts_avg": null,
      "notes": null
    },
    {
      "label": "qwen3:8b",
      "cell_id": "pavilion:weeyuga:qwen3:8b",
      "n_calls": 6,
      "n_errors": 2,
      "duration_ms_mean": null,
      "duration_ms_p50": null,
      "duration_ms_p95": null,
      "tokens_per_sec_mean": null,
      "tokens_per_sec_p50": null,
      "tokens_per_sec_p95": null,
      "tokens_per_sec_max": null,
      "memory_mb": null,
      "watts_avg": null,
      "notes": null
    },
    {
      "label": "qwen3:4b",
      "cell_id": "pavilion:weeyuga:qwen3:4b",
      "n_calls": 6,
      "n_errors": 0,
      "duration_ms_mean": null,
      "duration_ms_p50": null,
      "duration_ms_p95": null,
      "tokens_per_sec_mean": null,
      "tokens_per_sec_p50": null,
      "tokens_per_sec_p95": null,
      "tokens_per_sec_max": null,
      "memory_mb": null,
      "watts_avg": null,
      "notes": null
    },
    {
      "label": "qwen2.5:3b",
      "cell_id": "pavilion:weeyuga:qwen2.5:3b",
      "n_calls": 6,
      "n_errors": 0,
      "duration_ms_mean": null,
      "duration_ms_p50": null,
      "duration_ms_p95": null,
      "tokens_per_sec_mean": null,
      "tokens_per_sec_p50": null,
      "tokens_per_sec_p95": null,
      "tokens_per_sec_max": null,
      "memory_mb": null,
      "watts_avg": null,
      "notes": null
    },
    {
      "label": "qwen2.5-coder:1.5b",
      "cell_id": "pavilion:weeyuga:qwen2.5-coder:1.5b",
      "n_calls": 6,
      "n_errors": 0,
      "duration_ms_mean": null,
      "duration_ms_p50": null,
      "duration_ms_p95": null,
      "tokens_per_sec_mean": null,
      "tokens_per_sec_p50": null,
      "tokens_per_sec_p95": null,
      "tokens_per_sec_max": null,
      "memory_mb": null,
      "watts_avg": null,
      "notes": null
    },
    {
      "label": "qwen2.5-coder:0.5b",
      "cell_id": "pavilion:weeyuga:qwen2.5-coder:0.5b",
      "n_calls": 6,
      "n_errors": 0,
      "duration_ms_mean": null,
      "duration_ms_p50": null,
      "duration_ms_p95": null,
      "tokens_per_sec_mean": null,
      "tokens_per_sec_p50": null,
      "tokens_per_sec_p95": null,
      "tokens_per_sec_max": null,
      "memory_mb": null,
      "watts_avg": null,
      "notes": null
    }
  ],
  "cells": [
    {
      "cell_id": "pavilion:weeyuga:qwen3.5:4b",
      "n_calls": 6,
      "n_errors": 0,
      "duration_ms_mean": null,
      "duration_ms_p50": null,
      "duration_ms_p95": null,
      "tokens_per_sec_mean": null,
      "tokens_per_sec_p50": null,
      "tokens_per_sec_p95": null,
      "tokens_per_sec_max": null,
      "cold_warm_split": {
        "cold": {
          "n_calls": 2,
          "duration_ms_mean": null,
          "duration_ms_p50": null,
          "tokens_per_sec_mean": null
        },
        "warm": {
          "n_calls": 4,
          "duration_ms_mean": null,
          "duration_ms_p50": null,
          "tokens_per_sec_mean": null
        }
      },
      "per_prompt": {
        "unknown": {
          "n_calls": 6,
          "n_errors": 0,
          "duration_ms_mean": null,
          "duration_ms_p50": null,
          "tokens_per_sec_mean": null,
          "tokens_per_sec_p50": null
        }
      },
      "chars_split": null,
      "timeline": [
        {
          "prompt_id": null,
          "run_idx": 0,
          "phase": "hello",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 0,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 1,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 2,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 3,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 4,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        }
      ]
    },
    {
      "cell_id": "pavilion:weeyuga:qwen3.5:35b-a3b-uncensored-iq1m",
      "n_calls": 6,
      "n_errors": 0,
      "duration_ms_mean": null,
      "duration_ms_p50": null,
      "duration_ms_p95": null,
      "tokens_per_sec_mean": null,
      "tokens_per_sec_p50": null,
      "tokens_per_sec_p95": null,
      "tokens_per_sec_max": null,
      "cold_warm_split": {
        "cold": {
          "n_calls": 2,
          "duration_ms_mean": null,
          "duration_ms_p50": null,
          "tokens_per_sec_mean": null
        },
        "warm": {
          "n_calls": 4,
          "duration_ms_mean": null,
          "duration_ms_p50": null,
          "tokens_per_sec_mean": null
        }
      },
      "per_prompt": {
        "unknown": {
          "n_calls": 6,
          "n_errors": 0,
          "duration_ms_mean": null,
          "duration_ms_p50": null,
          "tokens_per_sec_mean": null,
          "tokens_per_sec_p50": null
        }
      },
      "chars_split": null,
      "timeline": [
        {
          "prompt_id": null,
          "run_idx": 0,
          "phase": "hello",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 0,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 1,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 2,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 3,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 4,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        }
      ]
    },
    {
      "cell_id": "pavilion:weeyuga:qwen3.5:35b-a3b-iq2s",
      "n_calls": 6,
      "n_errors": 0,
      "duration_ms_mean": null,
      "duration_ms_p50": null,
      "duration_ms_p95": null,
      "tokens_per_sec_mean": null,
      "tokens_per_sec_p50": null,
      "tokens_per_sec_p95": null,
      "tokens_per_sec_max": null,
      "cold_warm_split": {
        "cold": {
          "n_calls": 2,
          "duration_ms_mean": null,
          "duration_ms_p50": null,
          "tokens_per_sec_mean": null
        },
        "warm": {
          "n_calls": 4,
          "duration_ms_mean": null,
          "duration_ms_p50": null,
          "tokens_per_sec_mean": null
        }
      },
      "per_prompt": {
        "unknown": {
          "n_calls": 6,
          "n_errors": 0,
          "duration_ms_mean": null,
          "duration_ms_p50": null,
          "tokens_per_sec_mean": null,
          "tokens_per_sec_p50": null
        }
      },
      "chars_split": null,
      "timeline": [
        {
          "prompt_id": null,
          "run_idx": 0,
          "phase": "hello",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 0,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 1,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 2,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 3,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 4,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        }
      ]
    },
    {
      "cell_id": "pavilion:weeyuga:qwen3.5:9b-q6k",
      "n_calls": 6,
      "n_errors": 0,
      "duration_ms_mean": null,
      "duration_ms_p50": null,
      "duration_ms_p95": null,
      "tokens_per_sec_mean": null,
      "tokens_per_sec_p50": null,
      "tokens_per_sec_p95": null,
      "tokens_per_sec_max": null,
      "cold_warm_split": {
        "cold": {
          "n_calls": 2,
          "duration_ms_mean": null,
          "duration_ms_p50": null,
          "tokens_per_sec_mean": null
        },
        "warm": {
          "n_calls": 4,
          "duration_ms_mean": null,
          "duration_ms_p50": null,
          "tokens_per_sec_mean": null
        }
      },
      "per_prompt": {
        "unknown": {
          "n_calls": 6,
          "n_errors": 0,
          "duration_ms_mean": null,
          "duration_ms_p50": null,
          "tokens_per_sec_mean": null,
          "tokens_per_sec_p50": null
        }
      },
      "chars_split": null,
      "timeline": [
        {
          "prompt_id": null,
          "run_idx": 0,
          "phase": "hello",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 0,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 1,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 2,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 3,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 4,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        }
      ]
    },
    {
      "cell_id": "pavilion:weeyuga:qwen3.5:9b-q4km",
      "n_calls": 6,
      "n_errors": 0,
      "duration_ms_mean": null,
      "duration_ms_p50": null,
      "duration_ms_p95": null,
      "tokens_per_sec_mean": null,
      "tokens_per_sec_p50": null,
      "tokens_per_sec_p95": null,
      "tokens_per_sec_max": null,
      "cold_warm_split": {
        "cold": {
          "n_calls": 2,
          "duration_ms_mean": null,
          "duration_ms_p50": null,
          "tokens_per_sec_mean": null
        },
        "warm": {
          "n_calls": 4,
          "duration_ms_mean": null,
          "duration_ms_p50": null,
          "tokens_per_sec_mean": null
        }
      },
      "per_prompt": {
        "unknown": {
          "n_calls": 6,
          "n_errors": 0,
          "duration_ms_mean": null,
          "duration_ms_p50": null,
          "tokens_per_sec_mean": null,
          "tokens_per_sec_p50": null
        }
      },
      "chars_split": null,
      "timeline": [
        {
          "prompt_id": null,
          "run_idx": 0,
          "phase": "hello",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 0,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 1,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 2,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 3,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 4,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        }
      ]
    },
    {
      "cell_id": "pavilion:weeyuga:qwen3.5:2b",
      "n_calls": 6,
      "n_errors": 0,
      "duration_ms_mean": null,
      "duration_ms_p50": null,
      "duration_ms_p95": null,
      "tokens_per_sec_mean": null,
      "tokens_per_sec_p50": null,
      "tokens_per_sec_p95": null,
      "tokens_per_sec_max": null,
      "cold_warm_split": {
        "cold": {
          "n_calls": 2,
          "duration_ms_mean": null,
          "duration_ms_p50": null,
          "tokens_per_sec_mean": null
        },
        "warm": {
          "n_calls": 4,
          "duration_ms_mean": null,
          "duration_ms_p50": null,
          "tokens_per_sec_mean": null
        }
      },
      "per_prompt": {
        "unknown": {
          "n_calls": 6,
          "n_errors": 0,
          "duration_ms_mean": null,
          "duration_ms_p50": null,
          "tokens_per_sec_mean": null,
          "tokens_per_sec_p50": null
        }
      },
      "chars_split": null,
      "timeline": [
        {
          "prompt_id": null,
          "run_idx": 0,
          "phase": "hello",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 0,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 1,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 2,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 3,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 4,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        }
      ]
    },
    {
      "cell_id": "pavilion:weeyuga:qwen3.5:0.8b",
      "n_calls": 6,
      "n_errors": 0,
      "duration_ms_mean": null,
      "duration_ms_p50": null,
      "duration_ms_p95": null,
      "tokens_per_sec_mean": null,
      "tokens_per_sec_p50": null,
      "tokens_per_sec_p95": null,
      "tokens_per_sec_max": null,
      "cold_warm_split": {
        "cold": {
          "n_calls": 2,
          "duration_ms_mean": null,
          "duration_ms_p50": null,
          "tokens_per_sec_mean": null
        },
        "warm": {
          "n_calls": 4,
          "duration_ms_mean": null,
          "duration_ms_p50": null,
          "tokens_per_sec_mean": null
        }
      },
      "per_prompt": {
        "unknown": {
          "n_calls": 6,
          "n_errors": 0,
          "duration_ms_mean": null,
          "duration_ms_p50": null,
          "tokens_per_sec_mean": null,
          "tokens_per_sec_p50": null
        }
      },
      "chars_split": null,
      "timeline": [
        {
          "prompt_id": null,
          "run_idx": 0,
          "phase": "hello",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 0,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 1,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 2,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 3,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 4,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        }
      ]
    },
    {
      "cell_id": "pavilion:weeyuga:qwen3.5:9b",
      "n_calls": 6,
      "n_errors": 0,
      "duration_ms_mean": null,
      "duration_ms_p50": null,
      "duration_ms_p95": null,
      "tokens_per_sec_mean": null,
      "tokens_per_sec_p50": null,
      "tokens_per_sec_p95": null,
      "tokens_per_sec_max": null,
      "cold_warm_split": {
        "cold": {
          "n_calls": 2,
          "duration_ms_mean": null,
          "duration_ms_p50": null,
          "tokens_per_sec_mean": null
        },
        "warm": {
          "n_calls": 4,
          "duration_ms_mean": null,
          "duration_ms_p50": null,
          "tokens_per_sec_mean": null
        }
      },
      "per_prompt": {
        "unknown": {
          "n_calls": 6,
          "n_errors": 0,
          "duration_ms_mean": null,
          "duration_ms_p50": null,
          "tokens_per_sec_mean": null,
          "tokens_per_sec_p50": null
        }
      },
      "chars_split": null,
      "timeline": [
        {
          "prompt_id": null,
          "run_idx": 0,
          "phase": "hello",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 0,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 1,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 2,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 3,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 4,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        }
      ]
    },
    {
      "cell_id": "pavilion:weeyuga:qwen2.5-coder:14b",
      "n_calls": 6,
      "n_errors": 6,
      "duration_ms_mean": null,
      "duration_ms_p50": null,
      "duration_ms_p95": null,
      "tokens_per_sec_mean": null,
      "tokens_per_sec_p50": null,
      "tokens_per_sec_p95": null,
      "tokens_per_sec_max": null,
      "cold_warm_split": {
        "cold": {
          "n_calls": 2,
          "duration_ms_mean": null,
          "duration_ms_p50": null,
          "tokens_per_sec_mean": null
        },
        "warm": {
          "n_calls": 4,
          "duration_ms_mean": null,
          "duration_ms_p50": null,
          "tokens_per_sec_mean": null
        }
      },
      "per_prompt": {
        "unknown": {
          "n_calls": 6,
          "n_errors": 6,
          "duration_ms_mean": null,
          "duration_ms_p50": null,
          "tokens_per_sec_mean": null,
          "tokens_per_sec_p50": null
        }
      },
      "chars_split": null,
      "timeline": [
        {
          "prompt_id": null,
          "run_idx": 0,
          "phase": "hello",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": "<HTTPError 500: 'Internal Server Error'>"
        },
        {
          "prompt_id": null,
          "run_idx": 0,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": "<HTTPError 500: 'Internal Server Error'>"
        },
        {
          "prompt_id": null,
          "run_idx": 1,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": "<HTTPError 500: 'Internal Server Error'>"
        },
        {
          "prompt_id": null,
          "run_idx": 2,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": "<HTTPError 500: 'Internal Server Error'>"
        },
        {
          "prompt_id": null,
          "run_idx": 3,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": "<HTTPError 500: 'Internal Server Error'>"
        },
        {
          "prompt_id": null,
          "run_idx": 4,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": "<HTTPError 500: 'Internal Server Error'>"
        }
      ]
    },
    {
      "cell_id": "pavilion:weeyuga:qwen2.5-coder:3b",
      "n_calls": 6,
      "n_errors": 0,
      "duration_ms_mean": null,
      "duration_ms_p50": null,
      "duration_ms_p95": null,
      "tokens_per_sec_mean": null,
      "tokens_per_sec_p50": null,
      "tokens_per_sec_p95": null,
      "tokens_per_sec_max": null,
      "cold_warm_split": {
        "cold": {
          "n_calls": 2,
          "duration_ms_mean": null,
          "duration_ms_p50": null,
          "tokens_per_sec_mean": null
        },
        "warm": {
          "n_calls": 4,
          "duration_ms_mean": null,
          "duration_ms_p50": null,
          "tokens_per_sec_mean": null
        }
      },
      "per_prompt": {
        "unknown": {
          "n_calls": 6,
          "n_errors": 0,
          "duration_ms_mean": null,
          "duration_ms_p50": null,
          "tokens_per_sec_mean": null,
          "tokens_per_sec_p50": null
        }
      },
      "chars_split": null,
      "timeline": [
        {
          "prompt_id": null,
          "run_idx": 0,
          "phase": "hello",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 0,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 1,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 2,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 3,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 4,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        }
      ]
    },
    {
      "cell_id": "pavilion:weeyuga:qwen3:14b",
      "n_calls": 6,
      "n_errors": 4,
      "duration_ms_mean": null,
      "duration_ms_p50": null,
      "duration_ms_p95": null,
      "tokens_per_sec_mean": null,
      "tokens_per_sec_p50": null,
      "tokens_per_sec_p95": null,
      "tokens_per_sec_max": null,
      "cold_warm_split": {
        "cold": {
          "n_calls": 2,
          "duration_ms_mean": null,
          "duration_ms_p50": null,
          "tokens_per_sec_mean": null
        },
        "warm": {
          "n_calls": 4,
          "duration_ms_mean": null,
          "duration_ms_p50": null,
          "tokens_per_sec_mean": null
        }
      },
      "per_prompt": {
        "unknown": {
          "n_calls": 6,
          "n_errors": 4,
          "duration_ms_mean": null,
          "duration_ms_p50": null,
          "tokens_per_sec_mean": null,
          "tokens_per_sec_p50": null
        }
      },
      "chars_split": null,
      "timeline": [
        {
          "prompt_id": null,
          "run_idx": 0,
          "phase": "hello",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": "TimeoutError('timed out')"
        },
        {
          "prompt_id": null,
          "run_idx": 0,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": "TimeoutError('timed out')"
        },
        {
          "prompt_id": null,
          "run_idx": 1,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": "TimeoutError('timed out')"
        },
        {
          "prompt_id": null,
          "run_idx": 2,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": "TimeoutError('timed out')"
        },
        {
          "prompt_id": null,
          "run_idx": 3,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 4,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        }
      ]
    },
    {
      "cell_id": "pavilion:weeyuga:qwen3:8b",
      "n_calls": 6,
      "n_errors": 2,
      "duration_ms_mean": null,
      "duration_ms_p50": null,
      "duration_ms_p95": null,
      "tokens_per_sec_mean": null,
      "tokens_per_sec_p50": null,
      "tokens_per_sec_p95": null,
      "tokens_per_sec_max": null,
      "cold_warm_split": {
        "cold": {
          "n_calls": 2,
          "duration_ms_mean": null,
          "duration_ms_p50": null,
          "tokens_per_sec_mean": null
        },
        "warm": {
          "n_calls": 4,
          "duration_ms_mean": null,
          "duration_ms_p50": null,
          "tokens_per_sec_mean": null
        }
      },
      "per_prompt": {
        "unknown": {
          "n_calls": 6,
          "n_errors": 2,
          "duration_ms_mean": null,
          "duration_ms_p50": null,
          "tokens_per_sec_mean": null,
          "tokens_per_sec_p50": null
        }
      },
      "chars_split": null,
      "timeline": [
        {
          "prompt_id": null,
          "run_idx": 0,
          "phase": "hello",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": "TimeoutError('timed out')"
        },
        {
          "prompt_id": null,
          "run_idx": 0,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 1,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": "TimeoutError('timed out')"
        },
        {
          "prompt_id": null,
          "run_idx": 2,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 3,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 4,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        }
      ]
    },
    {
      "cell_id": "pavilion:weeyuga:qwen3:4b",
      "n_calls": 6,
      "n_errors": 0,
      "duration_ms_mean": null,
      "duration_ms_p50": null,
      "duration_ms_p95": null,
      "tokens_per_sec_mean": null,
      "tokens_per_sec_p50": null,
      "tokens_per_sec_p95": null,
      "tokens_per_sec_max": null,
      "cold_warm_split": {
        "cold": {
          "n_calls": 2,
          "duration_ms_mean": null,
          "duration_ms_p50": null,
          "tokens_per_sec_mean": null
        },
        "warm": {
          "n_calls": 4,
          "duration_ms_mean": null,
          "duration_ms_p50": null,
          "tokens_per_sec_mean": null
        }
      },
      "per_prompt": {
        "unknown": {
          "n_calls": 6,
          "n_errors": 0,
          "duration_ms_mean": null,
          "duration_ms_p50": null,
          "tokens_per_sec_mean": null,
          "tokens_per_sec_p50": null
        }
      },
      "chars_split": null,
      "timeline": [
        {
          "prompt_id": null,
          "run_idx": 0,
          "phase": "hello",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 0,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 1,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 2,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 3,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 4,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        }
      ]
    },
    {
      "cell_id": "pavilion:weeyuga:qwen2.5:3b",
      "n_calls": 6,
      "n_errors": 0,
      "duration_ms_mean": null,
      "duration_ms_p50": null,
      "duration_ms_p95": null,
      "tokens_per_sec_mean": null,
      "tokens_per_sec_p50": null,
      "tokens_per_sec_p95": null,
      "tokens_per_sec_max": null,
      "cold_warm_split": {
        "cold": {
          "n_calls": 2,
          "duration_ms_mean": null,
          "duration_ms_p50": null,
          "tokens_per_sec_mean": null
        },
        "warm": {
          "n_calls": 4,
          "duration_ms_mean": null,
          "duration_ms_p50": null,
          "tokens_per_sec_mean": null
        }
      },
      "per_prompt": {
        "unknown": {
          "n_calls": 6,
          "n_errors": 0,
          "duration_ms_mean": null,
          "duration_ms_p50": null,
          "tokens_per_sec_mean": null,
          "tokens_per_sec_p50": null
        }
      },
      "chars_split": null,
      "timeline": [
        {
          "prompt_id": null,
          "run_idx": 0,
          "phase": "hello",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 0,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 1,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 2,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 3,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 4,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        }
      ]
    },
    {
      "cell_id": "pavilion:weeyuga:qwen2.5-coder:1.5b",
      "n_calls": 6,
      "n_errors": 0,
      "duration_ms_mean": null,
      "duration_ms_p50": null,
      "duration_ms_p95": null,
      "tokens_per_sec_mean": null,
      "tokens_per_sec_p50": null,
      "tokens_per_sec_p95": null,
      "tokens_per_sec_max": null,
      "cold_warm_split": {
        "cold": {
          "n_calls": 2,
          "duration_ms_mean": null,
          "duration_ms_p50": null,
          "tokens_per_sec_mean": null
        },
        "warm": {
          "n_calls": 4,
          "duration_ms_mean": null,
          "duration_ms_p50": null,
          "tokens_per_sec_mean": null
        }
      },
      "per_prompt": {
        "unknown": {
          "n_calls": 6,
          "n_errors": 0,
          "duration_ms_mean": null,
          "duration_ms_p50": null,
          "tokens_per_sec_mean": null,
          "tokens_per_sec_p50": null
        }
      },
      "chars_split": null,
      "timeline": [
        {
          "prompt_id": null,
          "run_idx": 0,
          "phase": "hello",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 0,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 1,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 2,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 3,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 4,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        }
      ]
    },
    {
      "cell_id": "pavilion:weeyuga:qwen2.5-coder:0.5b",
      "n_calls": 6,
      "n_errors": 0,
      "duration_ms_mean": null,
      "duration_ms_p50": null,
      "duration_ms_p95": null,
      "tokens_per_sec_mean": null,
      "tokens_per_sec_p50": null,
      "tokens_per_sec_p95": null,
      "tokens_per_sec_max": null,
      "cold_warm_split": {
        "cold": {
          "n_calls": 2,
          "duration_ms_mean": null,
          "duration_ms_p50": null,
          "tokens_per_sec_mean": null
        },
        "warm": {
          "n_calls": 4,
          "duration_ms_mean": null,
          "duration_ms_p50": null,
          "tokens_per_sec_mean": null
        }
      },
      "per_prompt": {
        "unknown": {
          "n_calls": 6,
          "n_errors": 0,
          "duration_ms_mean": null,
          "duration_ms_p50": null,
          "tokens_per_sec_mean": null,
          "tokens_per_sec_p50": null
        }
      },
      "chars_split": null,
      "timeline": [
        {
          "prompt_id": null,
          "run_idx": 0,
          "phase": "hello",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 0,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 1,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 2,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 3,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 4,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        }
      ]
    }
  ],
  "n_calls_total": 96,
  "n_errors_total": 12,
  "chart_spec": {
    "kind": "bar",
    "x": "cell",
    "y": "tokens_per_sec_mean",
    "series": [
      "qwen3.5:4b",
      "qwen3.5:35b-a3b-uncensored-iq1m",
      "qwen3.5:35b-a3b-iq2s",
      "qwen3.5:9b-q6k",
      "qwen3.5:9b-q4km",
      "qwen3.5:2b",
      "qwen3.5:0.8b",
      "qwen3.5:9b",
      "qwen2.5-coder:14b",
      "qwen2.5-coder:3b",
      "qwen3:14b",
      "qwen3:8b",
      "qwen3:4b",
      "qwen2.5:3b",
      "qwen2.5-coder:1.5b",
      "qwen2.5-coder:0.5b"
    ],
    "data_url": "/data/ad057f5b-ed3f-4a95-a38e-361be310ffd6/run.jsonl"
  },
  "site_grade": "archive-only",
  "site_grade_reason": "error rate 12/96",
  "raw_data_urls": {
    "jsonl": "/data/ad057f5b-ed3f-4a95-a38e-361be310ffd6/run.jsonl",
    "log": null,
    "md": null,
    "metadata": "/data/ad057f5b-ed3f-4a95-a38e-361be310ffd6/metadata.json",
    "gitea_dir": "https://git.weeyuga.com/slobodanmargetic988/weeyuga-benchmarks-public/src/branch/main/runs/ad057f5b-ed3f-4a95-a38e-361be310ffd6/"
  },
  "synthesis_doc": "SITE_DATA_AUDIT_AND_MIGRATION_PLAN_2026-05-06.md",
  "synthesis_docs_all": [
    "SITE_DATA_AUDIT_AND_MIGRATION_PLAN_2026-05-06.md",
    "PAVILION_LLAMACPP_VS_OLLAMA_v0_INCOMPLETE.md"
  ],
  "related_ids": [],
  "status": "complete",
  "visibility": "draft",
  "cells_full": [
    {
      "cell_id": "pavilion:weeyuga:qwen3.5:4b",
      "n_calls": 6,
      "n_errors": 0,
      "duration_ms_mean": null,
      "duration_ms_p50": null,
      "duration_ms_p95": null,
      "tokens_per_sec_mean": null,
      "tokens_per_sec_p50": null,
      "tokens_per_sec_p95": null,
      "tokens_per_sec_max": null,
      "cold_warm_split": {
        "cold": {
          "n_calls": 2,
          "duration_ms_mean": null,
          "duration_ms_p50": null,
          "tokens_per_sec_mean": null
        },
        "warm": {
          "n_calls": 4,
          "duration_ms_mean": null,
          "duration_ms_p50": null,
          "tokens_per_sec_mean": null
        }
      },
      "per_prompt": {
        "unknown": {
          "n_calls": 6,
          "n_errors": 0,
          "duration_ms_mean": null,
          "duration_ms_p50": null,
          "tokens_per_sec_mean": null,
          "tokens_per_sec_p50": null
        }
      },
      "chars_split": null,
      "timeline": [
        {
          "prompt_id": null,
          "run_idx": 0,
          "phase": "hello",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 0,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 1,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 2,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 3,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 4,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        }
      ],
      "calls": [
        {
          "type": "call",
          "ts_utc": "2026-04-29T09:51:53Z",
          "cell_id": "pavilion:weeyuga:qwen3.5:4b",
          "model": "qwen3.5:4b",
          "phase": "hello",
          "question_id": "hello_check",
          "run_idx": 0,
          "duration_seconds": 7.103,
          "prompt_tokens": 16,
          "completion_tokens": 251,
          "tokens_per_second": 35.34,
          "finish_reason": "stop",
          "weeyuga_meta": null,
          "status_code": 200,
          "response_chars": 62,
          "response_preview": "Hi there! I'm glad to help! What would you like to do today? \ud83d\ude0a",
          "required_markers": [],
          "markers_hit": [],
          "marker_hit_rate": null,
          "format_rule": "",
          "format_ok": null,
          "usable_answer": true,
          "error": null
        },
        {
          "type": "call",
          "ts_utc": "2026-04-29T09:52:21Z",
          "cell_id": "pavilion:weeyuga:qwen3.5:4b",
          "model": "qwen3.5:4b",
          "phase": "5q",
          "question_id": "disk_guard_bash",
          "run_idx": 0,
          "duration_seconds": 28.241,
          "prompt_tokens": 71,
          "completion_tokens": 1124,
          "tokens_per_second": 39.8,
          "finish_reason": "stop",
          "weeyuga_meta": null,
          "status_code": 200,
          "response_chars": 246,
          "response_preview": "```bash\n#!/bin/bash\n\n# Check disk usage for /\nif [ -d \"/\" ]; then\n    usage=$(df -P / | awk '{print $5}')\n    if [ \"$usage\" -gt 85 ]; then\n        echo \"WARNING: Disk usage for / is at ${usage}% (above 85% threshold)\"\n        exit 1\n    fi\n",
          "required_markers": [
            "#!/usr/bin/env bash",
            "df -P /",
            "85",
            "exit 1"
          ],
          "markers_hit": [
            "df -P /",
            "85",
            "exit 1"
          ],
          "marker_hit_rate": 0.75,
          "format_rule": "bash_code",
          "format_ok": false,
          "usable_answer": true,
          "error": null
        },
        {
          "type": "call",
          "ts_utc": "2026-04-29T09:53:11Z",
          "cell_id": "pavilion:weeyuga:qwen3.5:4b",
          "model": "qwen3.5:4b",
          "phase": "5q",
          "question_id": "ipv4_python_tests",
          "run_idx": 1,
          "duration_seconds": 49.606,
          "prompt_tokens": 55,
          "completion_tokens": 2048,
          "tokens_per_second": 41.29,
          "finish_reason": "length",
          "weeyuga_meta": null,
          "status_code": 200,
          "response_chars": 1630,
          "response_preview": "```python\ndef is_valid_ipv4(value: str) -> bool:\n    \"\"\"\n    Check if a string is a valid IPv4 address.\n    \"\"\"\n    if not isinstance(value, str):\n        return False\n    parts = value.split('.')\n    if len(parts) != 4:\n        return Fals",
          "required_markers": [
            "def is_valid_ipv4",
            "def test_",
            "assert",
            "split('.')"
          ],
          "markers_hit": [
            "def is_valid_ipv4",
            "split('.')"
          ],
          "marker_hit_rate": 0.5,
          "format_rule": "python_code",
          "format_ok": false,
          "usable_answer": true,
          "error": null
        },
        {
          "type": "call",
          "ts_utc": "2026-04-29T09:53:38Z",
          "cell_id": "pavilion:weeyuga:qwen3.5:4b",
          "model": "qwen3.5:4b",
          "phase": "5q",
          "question_id": "nginx_safe_reload",
          "run_idx": 2,
          "duration_seconds": 26.721,
          "prompt_tokens": 39,
          "completion_tokens": 1076,
          "tokens_per_second": 40.27,
          "finish_reason": "stop",
          "weeyuga_meta": null,
          "status_code": 200,
          "response_chars": 116,
          "response_preview": "cp /etc/nginx/nginx.conf /etc/nginx/nginx.conf.bak\nnginx -t\nif [ $? -eq 0 ]; then nginx -s reload; else nginx -t; fi",
          "required_markers": [
            "cp /etc/nginx/nginx.conf",
            "nginx -t",
            "systemctl reload nginx",
            "&&"
          ],
          "markers_hit": [
            "cp /etc/nginx/nginx.conf",
            "nginx -t"
          ],
          "marker_hit_rate": 0.5,
          "format_rule": "shell_lines",
          "format_ok": true,
          "usable_answer": true,
          "error": null
        },
        {
          "type": "call",
          "ts_utc": "2026-04-29T09:54:27Z",
          "cell_id": "pavilion:weeyuga:qwen3.5:4b",
          "model": "qwen3.5:4b",
          "phase": "5q",
          "question_id": "yaml_cli_plan",
          "run_idx": 3,
          "duration_seconds": 49.969,
          "prompt_tokens": 43,
          "completion_tokens": 2048,
          "tokens_per_second": 40.99,
          "finish_reason": "length",
          "weeyuga_meta": null,
          "status_code": 200,
          "response_chars": 7628,
          "response_preview": "# Python CLI for Git-Scanned YAML Schema Validation\n\n```python\n#!/usr/bin/env python3\n\"\"\"\nYAML Schema Validator CLI\nScans git repos for changed YAML files and validates against JSON schemas.\n\"\"\"\n\nimport sys\nimport os\nimport json\nimport yaml",
          "required_markers": [
            "1.",
            "2.",
            "3.",
            "4.",
            "JSON schema",
            "git"
          ],
          "markers_hit": [
            "JSON schema",
            "git"
          ],
          "marker_hit_rate": 0.333,
          "format_rule": "four_numbered_steps",
          "format_ok": false,
          "usable_answer": true,
          "error": null
        },
        {
          "type": "call",
          "ts_utc": "2026-04-29T09:54:56Z",
          "cell_id": "pavilion:weeyuga:qwen3.5:4b",
          "model": "qwen3.5:4b",
          "phase": "5q",
          "question_id": "ssh_lockout_triage",
          "run_idx": 4,
          "duration_seconds": 28.774,
          "prompt_tokens": 56,
          "completion_tokens": 1124,
          "tokens_per_second": 39.06,
          "finish_reason": "stop",
          "weeyuga_meta": null,
          "status_code": 200,
          "response_chars": 468,
          "response_preview": "- Review `sshd_config` for misconfigurations that might cause permission denied errors.\n- Review `authorized_keys` permissions to ensure only trusted keys are allowed to connect.\n- Review `PasswordAuthentication` settings to ensure password",
          "required_markers": [
            "sshd_config",
            "authorized_keys",
            "journalctl",
            "rollback",
            "PasswordAuthentication"
          ],
          "markers_hit": [
            "sshd_config",
            "authorized_keys",
            "journalctl",
            "rollback",
            "PasswordAuthentication"
          ],
          "marker_hit_rate": 1.0,
          "format_rule": "five_bullets",
          "format_ok": true,
          "usable_answer": true,
          "error": null
        }
      ]
    },
    {
      "cell_id": "pavilion:weeyuga:qwen3.5:35b-a3b-uncensored-iq1m",
      "n_calls": 6,
      "n_errors": 0,
      "duration_ms_mean": null,
      "duration_ms_p50": null,
      "duration_ms_p95": null,
      "tokens_per_sec_mean": null,
      "tokens_per_sec_p50": null,
      "tokens_per_sec_p95": null,
      "tokens_per_sec_max": null,
      "cold_warm_split": {
        "cold": {
          "n_calls": 2,
          "duration_ms_mean": null,
          "duration_ms_p50": null,
          "tokens_per_sec_mean": null
        },
        "warm": {
          "n_calls": 4,
          "duration_ms_mean": null,
          "duration_ms_p50": null,
          "tokens_per_sec_mean": null
        }
      },
      "per_prompt": {
        "unknown": {
          "n_calls": 6,
          "n_errors": 0,
          "duration_ms_mean": null,
          "duration_ms_p50": null,
          "tokens_per_sec_mean": null,
          "tokens_per_sec_p50": null
        }
      },
      "chars_split": null,
      "timeline": [
        {
          "prompt_id": null,
          "run_idx": 0,
          "phase": "hello",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 0,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 1,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 2,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 3,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 4,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        }
      ],
      "calls": [
        {
          "type": "call",
          "ts_utc": "2026-04-29T09:55:15Z",
          "cell_id": "pavilion:weeyuga:qwen3.5:35b-a3b-uncensored-iq1m",
          "model": "qwen3.5:35b-a3b-uncensored-iq1m",
          "phase": "hello",
          "question_id": "hello_check",
          "run_idx": 0,
          "duration_seconds": 18.944,
          "prompt_tokens": 16,
          "completion_tokens": 750,
          "tokens_per_second": 39.59,
          "finish_reason": "stop",
          "weeyuga_meta": null,
          "status_code": 200,
          "response_chars": 185,
          "response_preview": "Hello! I'm glad to help you. What would you like to do today? Whether you need assistance with a specific task, need help with something, or just want to chat, I'm here to assist you! \ud83d\ude0a",
          "required_markers": [],
          "markers_hit": [],
          "marker_hit_rate": null,
          "format_rule": "",
          "format_ok": null,
          "usable_answer": true,
          "error": null
        },
        {
          "type": "call",
          "ts_utc": "2026-04-29T09:55:42Z",
          "cell_id": "pavilion:weeyuga:qwen3.5:35b-a3b-uncensored-iq1m",
          "model": "qwen3.5:35b-a3b-uncensored-iq1m",
          "phase": "5q",
          "question_id": "disk_guard_bash",
          "run_idx": 0,
          "duration_seconds": 27.028,
          "prompt_tokens": 71,
          "completion_tokens": 1123,
          "tokens_per_second": 41.55,
          "finish_reason": "stop",
          "weeyuga_meta": null,
          "status_code": 200,
          "response_chars": 242,
          "response_preview": "#!/bin/bash\n\n# Check disk usage for /\nif [ -d \"/\" ]; then\n    usage=$(df -P / | awk '{print $5}')\n    if [ \"$usage\" -gt 85 ]; then\n        echo \"WARNING: Disk usage for / is at ${usage}% (above 85% threshold)\"\n        exit 1\n    fi\nfi\n\nexit",
          "required_markers": [
            "#!/usr/bin/env bash",
            "df -P /",
            "85",
            "exit 1"
          ],
          "markers_hit": [
            "df -P /",
            "85",
            "exit 1"
          ],
          "marker_hit_rate": 0.75,
          "format_rule": "bash_code",
          "format_ok": false,
          "usable_answer": true,
          "error": null
        },
        {
          "type": "call",
          "ts_utc": "2026-04-29T09:56:02Z",
          "cell_id": "pavilion:weeyuga:qwen3.5:35b-a3b-uncensored-iq1m",
          "model": "qwen3.5:35b-a3b-uncensored-iq1m",
          "phase": "5q",
          "question_id": "ipv4_python_tests",
          "run_idx": 1,
          "duration_seconds": 19.353,
          "prompt_tokens": 55,
          "completion_tokens": 776,
          "tokens_per_second": 40.1,
          "finish_reason": "stop",
          "weeyuga_meta": null,
          "status_code": 200,
          "response_chars": 665,
          "response_preview": "```python\ndef is_valid_ipv4(value: str) -> bool:\n    \"\"\"\n    Check if a string represents a valid IPv4 address.\n    \"\"\"\n    if not isinstance(value, str):\n        return False\n    parts = value.split('.')\n    if len(parts) != 4:\n        ret",
          "required_markers": [
            "def is_valid_ipv4",
            "def test_",
            "assert",
            "split('.')"
          ],
          "markers_hit": [
            "def is_valid_ipv4",
            "def test_",
            "assert",
            "split('.')"
          ],
          "marker_hit_rate": 1.0,
          "format_rule": "python_code",
          "format_ok": true,
          "usable_answer": true,
          "error": null
        },
        {
          "type": "call",
          "ts_utc": "2026-04-29T09:56:27Z",
          "cell_id": "pavilion:weeyuga:qwen3.5:35b-a3b-uncensored-iq1m",
          "model": "qwen3.5:35b-a3b-uncensored-iq1m",
          "phase": "5q",
          "question_id": "nginx_safe_reload",
          "run_idx": 2,
          "duration_seconds": 25.905,
          "prompt_tokens": 39,
          "completion_tokens": 1073,
          "tokens_per_second": 41.42,
          "finish_reason": "stop",
          "weeyuga_meta": null,
          "status_code": 200,
          "response_chars": 104,
          "response_preview": "cp /etc/nginx/nginx.conf /etc/nginx/nginx.conf.bak\nnginx -t\nif [ $? -eq 0 ]; then\n    nginx -s reload\nfi",
          "required_markers": [
            "cp /etc/nginx/nginx.conf",
            "nginx -t",
            "systemctl reload nginx",
            "&&"
          ],
          "markers_hit": [
            "cp /etc/nginx/nginx.conf",
            "nginx -t"
          ],
          "marker_hit_rate": 0.5,
          "format_rule": "shell_lines",
          "format_ok": true,
          "usable_answer": true,
          "error": null
        },
        {
          "type": "call",
          "ts_utc": "2026-04-29T09:57:17Z",
          "cell_id": "pavilion:weeyuga:qwen3.5:35b-a3b-uncensored-iq1m",
          "model": "qwen3.5:35b-a3b-uncensored-iq1m",
          "phase": "5q",
          "question_id": "yaml_cli_plan",
          "run_idx": 3,
          "duration_seconds": 49.427,
          "prompt_tokens": 43,
          "completion_tokens": 2048,
          "tokens_per_second": 41.43,
          "finish_reason": "length",
          "weeyuga_meta": null,
          "status_code": 200,
          "response_chars": 9007,
          "response_preview": "# Python CLI for Git-Scanned YAML Schema Validation\n\n```python\n#!/usr/bin/env python3\n\"\"\"\nCLI for scanning git repos for changed YAML files and validating against JSON schemas.\n\"\"\"\n\nimport argparse\nimport json\nimport os\nimport sys\nfrom path",
          "required_markers": [
            "1.",
            "2.",
            "3.",
            "4.",
            "JSON schema",
            "git"
          ],
          "markers_hit": [
            "JSON schema",
            "git"
          ],
          "marker_hit_rate": 0.333,
          "format_rule": "four_numbered_steps",
          "format_ok": false,
          "usable_answer": true,
          "error": null
        },
        {
          "type": "call",
          "ts_utc": "2026-04-29T09:57:44Z",
          "cell_id": "pavilion:weeyuga:qwen3.5:35b-a3b-uncensored-iq1m",
          "model": "qwen3.5:35b-a3b-uncensored-iq1m",
          "phase": "5q",
          "question_id": "ssh_lockout_triage",
          "run_idx": 4,
          "duration_seconds": 27.269,
          "prompt_tokens": 56,
          "completion_tokens": 1108,
          "tokens_per_second": 40.63,
          "finish_reason": "stop",
          "weeyuga_meta": null,
          "status_code": 200,
          "response_chars": 354,
          "response_preview": "- Review `sshd_config` to ensure security settings are properly configured.\n- Verify `authorized_keys` permissions to prevent unauthorized access.\n- Check `PasswordAuthentication` settings to ensure secure password handling.\n- Review `journ",
          "required_markers": [
            "sshd_config",
            "authorized_keys",
            "journalctl",
            "rollback",
            "PasswordAuthentication"
          ],
          "markers_hit": [
            "sshd_config",
            "authorized_keys",
            "journalctl",
            "rollback",
            "PasswordAuthentication"
          ],
          "marker_hit_rate": 1.0,
          "format_rule": "five_bullets",
          "format_ok": true,
          "usable_answer": true,
          "error": null
        }
      ]
    },
    {
      "cell_id": "pavilion:weeyuga:qwen3.5:35b-a3b-iq2s",
      "n_calls": 6,
      "n_errors": 0,
      "duration_ms_mean": null,
      "duration_ms_p50": null,
      "duration_ms_p95": null,
      "tokens_per_sec_mean": null,
      "tokens_per_sec_p50": null,
      "tokens_per_sec_p95": null,
      "tokens_per_sec_max": null,
      "cold_warm_split": {
        "cold": {
          "n_calls": 2,
          "duration_ms_mean": null,
          "duration_ms_p50": null,
          "tokens_per_sec_mean": null
        },
        "warm": {
          "n_calls": 4,
          "duration_ms_mean": null,
          "duration_ms_p50": null,
          "tokens_per_sec_mean": null
        }
      },
      "per_prompt": {
        "unknown": {
          "n_calls": 6,
          "n_errors": 0,
          "duration_ms_mean": null,
          "duration_ms_p50": null,
          "tokens_per_sec_mean": null,
          "tokens_per_sec_p50": null
        }
      },
      "chars_split": null,
      "timeline": [
        {
          "prompt_id": null,
          "run_idx": 0,
          "phase": "hello",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 0,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 1,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 2,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 3,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 4,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        }
      ],
      "calls": [
        {
          "type": "call",
          "ts_utc": "2026-04-29T09:57:48Z",
          "cell_id": "pavilion:weeyuga:qwen3.5:35b-a3b-iq2s",
          "model": "qwen3.5:35b-a3b-iq2s",
          "phase": "hello",
          "question_id": "hello_check",
          "run_idx": 0,
          "duration_seconds": 3.684,
          "prompt_tokens": 16,
          "completion_tokens": 134,
          "tokens_per_second": 36.37,
          "finish_reason": "stop",
          "weeyuga_meta": null,
          "status_code": 200,
          "response_chars": 147,
          "response_preview": "Hello! I'd love to help you. What would you like to work on? Whether it's writing, coding, problem-solving, or just chatting, I'm here to assist! \ud83d\ude0a",
          "required_markers": [],
          "markers_hit": [],
          "marker_hit_rate": null,
          "format_rule": "",
          "format_ok": null,
          "usable_answer": true,
          "error": null
        },
        {
          "type": "call",
          "ts_utc": "2026-04-29T09:58:15Z",
          "cell_id": "pavilion:weeyuga:qwen3.5:35b-a3b-iq2s",
          "model": "qwen3.5:35b-a3b-iq2s",
          "phase": "5q",
          "question_id": "disk_guard_bash",
          "run_idx": 0,
          "duration_seconds": 27.061,
          "prompt_tokens": 71,
          "completion_tokens": 1121,
          "tokens_per_second": 41.42,
          "finish_reason": "stop",
          "weeyuga_meta": null,
          "status_code": 200,
          "response_chars": 239,
          "response_preview": "#!/bin/bash\n\n# Check disk usage for /\nif [ -d \"/\" ]; then\n    disk_usage=$(df -P / | awk '{print $5}')\n    if [ \"$disk_usage\" -gt 85 ]; then\n        echo \"WARNING: Disk usage for / is at ${disk_usage}% (above 85%)\"\n        exit 1\n    fi\nfi",
          "required_markers": [
            "#!/usr/bin/env bash",
            "df -P /",
            "85",
            "exit 1"
          ],
          "markers_hit": [
            "df -P /",
            "85",
            "exit 1"
          ],
          "marker_hit_rate": 0.75,
          "format_rule": "bash_code",
          "format_ok": false,
          "usable_answer": true,
          "error": null
        },
        {
          "type": "call",
          "ts_utc": "2026-04-29T09:58:45Z",
          "cell_id": "pavilion:weeyuga:qwen3.5:35b-a3b-iq2s",
          "model": "qwen3.5:35b-a3b-iq2s",
          "phase": "5q",
          "question_id": "ipv4_python_tests",
          "run_idx": 1,
          "duration_seconds": 29.886,
          "prompt_tokens": 55,
          "completion_tokens": 1232,
          "tokens_per_second": 41.22,
          "finish_reason": "stop",
          "weeyuga_meta": null,
          "status_code": 200,
          "response_chars": 613,
          "response_preview": "```python\ndef is_valid_ipv4(value: str) -> bool:\n    \"\"\"Check if a string is a valid IPv4 address.\"\"\"\n    if not isinstance(value, str):\n        return False\n    parts = value.split('.')\n    if len(parts) != 4:\n        return False\n    for ",
          "required_markers": [
            "def is_valid_ipv4",
            "def test_",
            "assert",
            "split('.')"
          ],
          "markers_hit": [
            "def is_valid_ipv4",
            "def test_",
            "assert",
            "split('.')"
          ],
          "marker_hit_rate": 1.0,
          "format_rule": "python_code",
          "format_ok": true,
          "usable_answer": true,
          "error": null
        },
        {
          "type": "call",
          "ts_utc": "2026-04-29T09:59:11Z",
          "cell_id": "pavilion:weeyuga:qwen3.5:35b-a3b-iq2s",
          "model": "qwen3.5:35b-a3b-iq2s",
          "phase": "5q",
          "question_id": "nginx_safe_reload",
          "run_idx": 2,
          "duration_seconds": 25.688,
          "prompt_tokens": 39,
          "completion_tokens": 1060,
          "tokens_per_second": 41.26,
          "finish_reason": "stop",
          "weeyuga_meta": null,
          "status_code": 200,
          "response_chars": 75,
          "response_preview": "cp /etc/nginx/nginx.conf /etc/nginx/nginx.conf.bak\nnginx -t\nnginx -s reload",
          "required_markers": [
            "cp /etc/nginx/nginx.conf",
            "nginx -t",
            "systemctl reload nginx",
            "&&"
          ],
          "markers_hit": [
            "cp /etc/nginx/nginx.conf",
            "nginx -t"
          ],
          "marker_hit_rate": 0.5,
          "format_rule": "shell_lines",
          "format_ok": true,
          "usable_answer": true,
          "error": null
        },
        {
          "type": "call",
          "ts_utc": "2026-04-29T10:00:01Z",
          "cell_id": "pavilion:weeyuga:qwen3.5:35b-a3b-iq2s",
          "model": "qwen3.5:35b-a3b-iq2s",
          "phase": "5q",
          "question_id": "yaml_cli_plan",
          "run_idx": 3,
          "duration_seconds": 50.264,
          "prompt_tokens": 43,
          "completion_tokens": 2048,
          "tokens_per_second": 40.74,
          "finish_reason": "length",
          "weeyuga_meta": null,
          "status_code": 200,
          "response_chars": 7205,
          "response_preview": "```python\n#!/usr/bin/env python3\n\"\"\"\nYAML Schema Validator CLI\nScans git repos for changed YAML files and validates against JSON schemas.\n\"\"\"\n\nimport argparse\nimport json\nimport os\nimport sys\nfrom pathlib import Path\nfrom typing import List",
          "required_markers": [
            "1.",
            "2.",
            "3.",
            "4.",
            "JSON schema",
            "git"
          ],
          "markers_hit": [
            "JSON schema",
            "git"
          ],
          "marker_hit_rate": 0.333,
          "format_rule": "four_numbered_steps",
          "format_ok": false,
          "usable_answer": true,
          "error": null
        },
        {
          "type": "call",
          "ts_utc": "2026-04-29T10:00:28Z",
          "cell_id": "pavilion:weeyuga:qwen3.5:35b-a3b-iq2s",
          "model": "qwen3.5:35b-a3b-iq2s",
          "phase": "5q",
          "question_id": "ssh_lockout_triage",
          "run_idx": 4,
          "duration_seconds": 27.352,
          "prompt_tokens": 56,
          "completion_tokens": 1116,
          "tokens_per_second": 40.8,
          "finish_reason": "stop",
          "weeyuga_meta": null,
          "status_code": 200,
          "response_chars": 409,
          "response_preview": "- Review `sshd_config` for hardening settings before making changes.\n- Review `authorized_keys` permissions to ensure only trusted keys are allowed.\n- Review `PasswordAuthentication` settings to prevent unauthorized password usage.\n- Review",
          "required_markers": [
            "sshd_config",
            "authorized_keys",
            "journalctl",
            "rollback",
            "PasswordAuthentication"
          ],
          "markers_hit": [
            "sshd_config",
            "authorized_keys",
            "journalctl",
            "rollback",
            "PasswordAuthentication"
          ],
          "marker_hit_rate": 1.0,
          "format_rule": "five_bullets",
          "format_ok": true,
          "usable_answer": true,
          "error": null
        }
      ]
    },
    {
      "cell_id": "pavilion:weeyuga:qwen3.5:9b-q6k",
      "n_calls": 6,
      "n_errors": 0,
      "duration_ms_mean": null,
      "duration_ms_p50": null,
      "duration_ms_p95": null,
      "tokens_per_sec_mean": null,
      "tokens_per_sec_p50": null,
      "tokens_per_sec_p95": null,
      "tokens_per_sec_max": null,
      "cold_warm_split": {
        "cold": {
          "n_calls": 2,
          "duration_ms_mean": null,
          "duration_ms_p50": null,
          "tokens_per_sec_mean": null
        },
        "warm": {
          "n_calls": 4,
          "duration_ms_mean": null,
          "duration_ms_p50": null,
          "tokens_per_sec_mean": null
        }
      },
      "per_prompt": {
        "unknown": {
          "n_calls": 6,
          "n_errors": 0,
          "duration_ms_mean": null,
          "duration_ms_p50": null,
          "tokens_per_sec_mean": null,
          "tokens_per_sec_p50": null
        }
      },
      "chars_split": null,
      "timeline": [
        {
          "prompt_id": null,
          "run_idx": 0,
          "phase": "hello",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 0,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 1,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 2,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 3,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 4,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        }
      ],
      "calls": [
        {
          "type": "call",
          "ts_utc": "2026-04-29T10:00:33Z",
          "cell_id": "pavilion:weeyuga:qwen3.5:9b-q6k",
          "model": "qwen3.5:9b-q6k",
          "phase": "hello",
          "question_id": "hello_check",
          "run_idx": 0,
          "duration_seconds": 4.401,
          "prompt_tokens": 16,
          "completion_tokens": 163,
          "tokens_per_second": 37.04,
          "finish_reason": "stop",
          "weeyuga_meta": null,
          "status_code": 200,
          "response_chars": 202,
          "response_preview": "Hello! I'm glad to help! \ud83d\ude0a What would you like to do today? Whether it's answering questions, solving problems, writing content, or just chatting, I'm here to assist you. Just let me know what you need!",
          "required_markers": [],
          "markers_hit": [],
          "marker_hit_rate": null,
          "format_rule": "",
          "format_ok": null,
          "usable_answer": true,
          "error": null
        },
        {
          "type": "call",
          "ts_utc": "2026-04-29T10:01:00Z",
          "cell_id": "pavilion:weeyuga:qwen3.5:9b-q6k",
          "model": "qwen3.5:9b-q6k",
          "phase": "5q",
          "question_id": "disk_guard_bash",
          "run_idx": 0,
          "duration_seconds": 27.133,
          "prompt_tokens": 71,
          "completion_tokens": 1124,
          "tokens_per_second": 41.43,
          "finish_reason": "stop",
          "weeyuga_meta": null,
          "status_code": 200,
          "response_chars": 246,
          "response_preview": "```bash\n#!/bin/bash\n\n# Check disk usage for /\nif [ -d \"/\" ]; then\n    usage=$(df -P / | awk '{print $5}')\n    if [ \"$usage\" -gt 85 ]; then\n        echo \"WARNING: Disk usage for / is at ${usage}% (above 85% threshold)\"\n        exit 1\n    fi\n",
          "required_markers": [
            "#!/usr/bin/env bash",
            "df -P /",
            "85",
            "exit 1"
          ],
          "markers_hit": [
            "df -P /",
            "85",
            "exit 1"
          ],
          "marker_hit_rate": 0.75,
          "format_rule": "bash_code",
          "format_ok": false,
          "usable_answer": true,
          "error": null
        },
        {
          "type": "call",
          "ts_utc": "2026-04-29T10:01:20Z",
          "cell_id": "pavilion:weeyuga:qwen3.5:9b-q6k",
          "model": "qwen3.5:9b-q6k",
          "phase": "5q",
          "question_id": "ipv4_python_tests",
          "run_idx": 1,
          "duration_seconds": 20.684,
          "prompt_tokens": 55,
          "completion_tokens": 838,
          "tokens_per_second": 40.51,
          "finish_reason": "stop",
          "weeyuga_meta": null,
          "status_code": 200,
          "response_chars": 1012,
          "response_preview": "```python\ndef is_valid_ipv4(value: str) -> bool:\n    \"\"\"\n    Check if a string is a valid IPv4 address.\n    \n    Args:\n        value (str): The string to validate as an IPv4 address.\n        \n    Returns:\n        bool: True if the string is",
          "required_markers": [
            "def is_valid_ipv4",
            "def test_",
            "assert",
            "split('.')"
          ],
          "markers_hit": [
            "def is_valid_ipv4",
            "def test_",
            "assert",
            "split('.')"
          ],
          "marker_hit_rate": 1.0,
          "format_rule": "python_code",
          "format_ok": true,
          "usable_answer": true,
          "error": null
        },
        {
          "type": "call",
          "ts_utc": "2026-04-29T10:01:27Z",
          "cell_id": "pavilion:weeyuga:qwen3.5:9b-q6k",
          "model": "qwen3.5:9b-q6k",
          "phase": "5q",
          "question_id": "nginx_safe_reload",
          "run_idx": 2,
          "duration_seconds": 6.142,
          "prompt_tokens": 39,
          "completion_tokens": 241,
          "tokens_per_second": 39.24,
          "finish_reason": "stop",
          "weeyuga_meta": null,
          "status_code": 200,
          "response_chars": 75,
          "response_preview": "cp /etc/nginx/nginx.conf /etc/nginx/nginx.conf.bak\nnginx -t\nnginx -s reload",
          "required_markers": [
            "cp /etc/nginx/nginx.conf",
            "nginx -t",
            "systemctl reload nginx",
            "&&"
          ],
          "markers_hit": [
            "cp /etc/nginx/nginx.conf",
            "nginx -t"
          ],
          "marker_hit_rate": 0.5,
          "format_rule": "shell_lines",
          "format_ok": true,
          "usable_answer": true,
          "error": null
        },
        {
          "type": "call",
          "ts_utc": "2026-04-29T10:02:16Z",
          "cell_id": "pavilion:weeyuga:qwen3.5:9b-q6k",
          "model": "qwen3.5:9b-q6k",
          "phase": "5q",
          "question_id": "yaml_cli_plan",
          "run_idx": 3,
          "duration_seconds": 49.257,
          "prompt_tokens": 43,
          "completion_tokens": 2048,
          "tokens_per_second": 41.58,
          "finish_reason": "length",
          "weeyuga_meta": null,
          "status_code": 200,
          "response_chars": 7775,
          "response_preview": "# Python CLI for Git-Scanned YAML Schema Validation\n\n## Step 1: Project Setup and Imports\n\n```python\nimport os\nimport sys\nimport json\nimport yaml\nfrom pathlib import Path\nfrom typing import List, Dict, Any, Optional\nimport argparse\n\n# Requi",
          "required_markers": [
            "1.",
            "2.",
            "3.",
            "4.",
            "JSON schema",
            "git"
          ],
          "markers_hit": [
            "git"
          ],
          "marker_hit_rate": 0.167,
          "format_rule": "four_numbered_steps",
          "format_ok": false,
          "usable_answer": true,
          "error": null
        },
        {
          "type": "call",
          "ts_utc": "2026-04-29T10:02:45Z",
          "cell_id": "pavilion:weeyuga:qwen3.5:9b-q6k",
          "model": "qwen3.5:9b-q6k",
          "phase": "5q",
          "question_id": "ssh_lockout_triage",
          "run_idx": 4,
          "duration_seconds": 29.306,
          "prompt_tokens": 56,
          "completion_tokens": 1165,
          "tokens_per_second": 39.75,
          "finish_reason": "stop",
          "weeyuga_meta": null,
          "status_code": 200,
          "response_chars": 574,
          "response_preview": "-   Review your `sshd_config` file to ensure the `PasswordAuthentication` setting is enabled and `PermitRootLogin` is set to `yes` for security.\n-   Verify that your `authorized_keys` file contains only valid public keys and is not empty.\n-",
          "required_markers": [
            "sshd_config",
            "authorized_keys",
            "journalctl",
            "rollback",
            "PasswordAuthentication"
          ],
          "markers_hit": [
            "sshd_config",
            "authorized_keys",
            "journalctl",
            "rollback",
            "PasswordAuthentication"
          ],
          "marker_hit_rate": 1.0,
          "format_rule": "five_bullets",
          "format_ok": true,
          "usable_answer": true,
          "error": null
        }
      ]
    },
    {
      "cell_id": "pavilion:weeyuga:qwen3.5:9b-q4km",
      "n_calls": 6,
      "n_errors": 0,
      "duration_ms_mean": null,
      "duration_ms_p50": null,
      "duration_ms_p95": null,
      "tokens_per_sec_mean": null,
      "tokens_per_sec_p50": null,
      "tokens_per_sec_p95": null,
      "tokens_per_sec_max": null,
      "cold_warm_split": {
        "cold": {
          "n_calls": 2,
          "duration_ms_mean": null,
          "duration_ms_p50": null,
          "tokens_per_sec_mean": null
        },
        "warm": {
          "n_calls": 4,
          "duration_ms_mean": null,
          "duration_ms_p50": null,
          "tokens_per_sec_mean": null
        }
      },
      "per_prompt": {
        "unknown": {
          "n_calls": 6,
          "n_errors": 0,
          "duration_ms_mean": null,
          "duration_ms_p50": null,
          "tokens_per_sec_mean": null,
          "tokens_per_sec_p50": null
        }
      },
      "chars_split": null,
      "timeline": [
        {
          "prompt_id": null,
          "run_idx": 0,
          "phase": "hello",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 0,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 1,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 2,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 3,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 4,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        }
      ],
      "calls": [
        {
          "type": "call",
          "ts_utc": "2026-04-29T10:02:51Z",
          "cell_id": "pavilion:weeyuga:qwen3.5:9b-q4km",
          "model": "qwen3.5:9b-q4km",
          "phase": "hello",
          "question_id": "hello_check",
          "run_idx": 0,
          "duration_seconds": 5.912,
          "prompt_tokens": 16,
          "completion_tokens": 220,
          "tokens_per_second": 37.21,
          "finish_reason": "stop",
          "weeyuga_meta": null,
          "status_code": 200,
          "response_chars": 186,
          "response_preview": "Hello! I'm glad to help! \ud83d\ude0a  \nWhat would you like to do today? For example:  \n- Ask a question?  \n- Solve a problem?  \n- Get information?  \n- Or just chat?  \n\nLet me know what you need! \ud83d\ude4c",
          "required_markers": [],
          "markers_hit": [],
          "marker_hit_rate": null,
          "format_rule": "",
          "format_ok": null,
          "usable_answer": true,
          "error": null
        },
        {
          "type": "call",
          "ts_utc": "2026-04-29T10:03:07Z",
          "cell_id": "pavilion:weeyuga:qwen3.5:9b-q4km",
          "model": "qwen3.5:9b-q4km",
          "phase": "5q",
          "question_id": "disk_guard_bash",
          "run_idx": 0,
          "duration_seconds": 15.618,
          "prompt_tokens": 71,
          "completion_tokens": 636,
          "tokens_per_second": 40.72,
          "finish_reason": "stop",
          "weeyuga_meta": null,
          "status_code": 200,
          "response_chars": 263,
          "response_preview": "```bash\n#!/bin/bash\n\n# Check disk usage for /\nif [ -d \"/\" ]; then\n    usage=$(df -P / 2>&1 | tail -n 1 | awk '{print $5}')\n    if [ \"$usage\" -gt 85 ]; then\n        echo \"WARNING: Disk usage for / is at ${usage}% (above 85% threshold)\"\n     ",
          "required_markers": [
            "#!/usr/bin/env bash",
            "df -P /",
            "85",
            "exit 1"
          ],
          "markers_hit": [
            "df -P /",
            "85",
            "exit 1"
          ],
          "marker_hit_rate": 0.75,
          "format_rule": "bash_code",
          "format_ok": false,
          "usable_answer": true,
          "error": null
        },
        {
          "type": "call",
          "ts_utc": "2026-04-29T10:03:31Z",
          "cell_id": "pavilion:weeyuga:qwen3.5:9b-q4km",
          "model": "qwen3.5:9b-q4km",
          "phase": "5q",
          "question_id": "ipv4_python_tests",
          "run_idx": 1,
          "duration_seconds": 24.316,
          "prompt_tokens": 55,
          "completion_tokens": 947,
          "tokens_per_second": 38.95,
          "finish_reason": "stop",
          "weeyuga_meta": null,
          "status_code": 200,
          "response_chars": 493,
          "response_preview": "```python\ndef is_valid_ipv4(value: str) -> bool:\n    if not isinstance(value, str):\n        return False\n    if len(value) != 4:\n        return False\n    for i in range(4):\n        if not value[i].isdigit():\n            return False\n    ret",
          "required_markers": [
            "def is_valid_ipv4",
            "def test_",
            "assert",
            "split('.')"
          ],
          "markers_hit": [
            "def is_valid_ipv4",
            "def test_",
            "assert"
          ],
          "marker_hit_rate": 0.75,
          "format_rule": "python_code",
          "format_ok": true,
          "usable_answer": true,
          "error": null
        },
        {
          "type": "call",
          "ts_utc": "2026-04-29T10:03:56Z",
          "cell_id": "pavilion:weeyuga:qwen3.5:9b-q4km",
          "model": "qwen3.5:9b-q4km",
          "phase": "5q",
          "question_id": "nginx_safe_reload",
          "run_idx": 2,
          "duration_seconds": 25.495,
          "prompt_tokens": 39,
          "completion_tokens": 1060,
          "tokens_per_second": 41.58,
          "finish_reason": "stop",
          "weeyuga_meta": null,
          "status_code": 200,
          "response_chars": 75,
          "response_preview": "cp /etc/nginx/nginx.conf /etc/nginx/nginx.conf.bak\nnginx -t\nnginx -s reload",
          "required_markers": [
            "cp /etc/nginx/nginx.conf",
            "nginx -t",
            "systemctl reload nginx",
            "&&"
          ],
          "markers_hit": [
            "cp /etc/nginx/nginx.conf",
            "nginx -t"
          ],
          "marker_hit_rate": 0.5,
          "format_rule": "shell_lines",
          "format_ok": true,
          "usable_answer": true,
          "error": null
        },
        {
          "type": "call",
          "ts_utc": "2026-04-29T10:04:46Z",
          "cell_id": "pavilion:weeyuga:qwen3.5:9b-q4km",
          "model": "qwen3.5:9b-q4km",
          "phase": "5q",
          "question_id": "yaml_cli_plan",
          "run_idx": 3,
          "duration_seconds": 49.664,
          "prompt_tokens": 43,
          "completion_tokens": 2048,
          "tokens_per_second": 41.24,
          "finish_reason": "length",
          "weeyuga_meta": null,
          "status_code": 200,
          "response_chars": 9006,
          "response_preview": "```python\n#!/usr/bin/env python3\n\"\"\"\nCLI: git-yaml-validator\nScans a git repo for changed YAML files, validates them against a JSON schema, and exits nonzero on failure.\n\"\"\"\n\nimport argparse\nimport json\nimport os\nimport sys\nfrom pathlib imp",
          "required_markers": [
            "1.",
            "2.",
            "3.",
            "4.",
            "JSON schema",
            "git"
          ],
          "markers_hit": [
            "JSON schema",
            "git"
          ],
          "marker_hit_rate": 0.333,
          "format_rule": "four_numbered_steps",
          "format_ok": false,
          "usable_answer": true,
          "error": null
        },
        {
          "type": "call",
          "ts_utc": "2026-04-29T10:05:14Z",
          "cell_id": "pavilion:weeyuga:qwen3.5:9b-q4km",
          "model": "qwen3.5:9b-q4km",
          "phase": "5q",
          "question_id": "ssh_lockout_triage",
          "run_idx": 4,
          "duration_seconds": 27.576,
          "prompt_tokens": 56,
          "completion_tokens": 1128,
          "tokens_per_second": 40.91,
          "finish_reason": "stop",
          "weeyuga_meta": null,
          "status_code": 200,
          "response_chars": 401,
          "response_preview": "- Review `sshd_config` to ensure `PermitRootLogin no` and `PasswordAuthentication no` are set correctly.\n- Review `authorized_keys` to ensure no sensitive keys are stored or misconfigured.\n- Run `journalctl` to check for recent security iss",
          "required_markers": [
            "sshd_config",
            "authorized_keys",
            "journalctl",
            "rollback",
            "PasswordAuthentication"
          ],
          "markers_hit": [
            "sshd_config",
            "authorized_keys",
            "journalctl",
            "rollback",
            "PasswordAuthentication"
          ],
          "marker_hit_rate": 1.0,
          "format_rule": "five_bullets",
          "format_ok": true,
          "usable_answer": true,
          "error": null
        }
      ]
    },
    {
      "cell_id": "pavilion:weeyuga:qwen3.5:2b",
      "n_calls": 6,
      "n_errors": 0,
      "duration_ms_mean": null,
      "duration_ms_p50": null,
      "duration_ms_p95": null,
      "tokens_per_sec_mean": null,
      "tokens_per_sec_p50": null,
      "tokens_per_sec_p95": null,
      "tokens_per_sec_max": null,
      "cold_warm_split": {
        "cold": {
          "n_calls": 2,
          "duration_ms_mean": null,
          "duration_ms_p50": null,
          "tokens_per_sec_mean": null
        },
        "warm": {
          "n_calls": 4,
          "duration_ms_mean": null,
          "duration_ms_p50": null,
          "tokens_per_sec_mean": null
        }
      },
      "per_prompt": {
        "unknown": {
          "n_calls": 6,
          "n_errors": 0,
          "duration_ms_mean": null,
          "duration_ms_p50": null,
          "tokens_per_sec_mean": null,
          "tokens_per_sec_p50": null
        }
      },
      "chars_split": null,
      "timeline": [
        {
          "prompt_id": null,
          "run_idx": 0,
          "phase": "hello",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 0,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 1,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 2,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 3,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 4,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        }
      ],
      "calls": [
        {
          "type": "call",
          "ts_utc": "2026-04-29T10:05:18Z",
          "cell_id": "pavilion:weeyuga:qwen3.5:2b",
          "model": "qwen3.5:2b",
          "phase": "hello",
          "question_id": "hello_check",
          "run_idx": 0,
          "duration_seconds": 4.366,
          "prompt_tokens": 16,
          "completion_tokens": 157,
          "tokens_per_second": 35.96,
          "finish_reason": "stop",
          "weeyuga_meta": null,
          "status_code": 200,
          "response_chars": 91,
          "response_preview": "Hello! I'd be happy to help you with anything you need. What can I assist you with today? \ud83d\ude0a",
          "required_markers": [],
          "markers_hit": [],
          "marker_hit_rate": null,
          "format_rule": "",
          "format_ok": null,
          "usable_answer": true,
          "error": null
        },
        {
          "type": "call",
          "ts_utc": "2026-04-29T10:05:36Z",
          "cell_id": "pavilion:weeyuga:qwen3.5:2b",
          "model": "qwen3.5:2b",
          "phase": "5q",
          "question_id": "disk_guard_bash",
          "run_idx": 0,
          "duration_seconds": 18.333,
          "prompt_tokens": 71,
          "completion_tokens": 751,
          "tokens_per_second": 40.96,
          "finish_reason": "stop",
          "weeyuga_meta": null,
          "status_code": 200,
          "response_chars": 246,
          "response_preview": "```bash\n#!/bin/bash\n\n# Check disk usage for /\nif [ -d \"/\" ]; then\n    usage=$(df -P / | awk '{print $5}')\n    if [ \"$usage\" -gt 85 ]; then\n        echo \"WARNING: Disk usage for / is at ${usage}% (above 85% threshold)\"\n        exit 1\n    fi\n",
          "required_markers": [
            "#!/usr/bin/env bash",
            "df -P /",
            "85",
            "exit 1"
          ],
          "markers_hit": [
            "df -P /",
            "85",
            "exit 1"
          ],
          "marker_hit_rate": 0.75,
          "format_rule": "bash_code",
          "format_ok": false,
          "usable_answer": true,
          "error": null
        },
        {
          "type": "call",
          "ts_utc": "2026-04-29T10:05:56Z",
          "cell_id": "pavilion:weeyuga:qwen3.5:2b",
          "model": "qwen3.5:2b",
          "phase": "5q",
          "question_id": "ipv4_python_tests",
          "run_idx": 1,
          "duration_seconds": 19.861,
          "prompt_tokens": 55,
          "completion_tokens": 820,
          "tokens_per_second": 41.29,
          "finish_reason": "stop",
          "weeyuga_meta": null,
          "status_code": 200,
          "response_chars": 771,
          "response_preview": "```python\ndef is_valid_ipv4(value: str) -> bool:\n    \"\"\"Check if a string is a valid IPv4 address.\"\"\"\n    if not isinstance(value, str):\n        return False\n    parts = value.split('.')\n    if len(parts) != 4:\n        return False\n    for ",
          "required_markers": [
            "def is_valid_ipv4",
            "def test_",
            "assert",
            "split('.')"
          ],
          "markers_hit": [
            "def is_valid_ipv4",
            "def test_",
            "assert",
            "split('.')"
          ],
          "marker_hit_rate": 1.0,
          "format_rule": "python_code",
          "format_ok": true,
          "usable_answer": true,
          "error": null
        },
        {
          "type": "call",
          "ts_utc": "2026-04-29T10:06:22Z",
          "cell_id": "pavilion:weeyuga:qwen3.5:2b",
          "model": "qwen3.5:2b",
          "phase": "5q",
          "question_id": "nginx_safe_reload",
          "run_idx": 2,
          "duration_seconds": 25.77,
          "prompt_tokens": 39,
          "completion_tokens": 1060,
          "tokens_per_second": 41.13,
          "finish_reason": "stop",
          "weeyuga_meta": null,
          "status_code": 200,
          "response_chars": 75,
          "response_preview": "cp /etc/nginx/nginx.conf /etc/nginx/nginx.conf.bak\nnginx -t\nnginx -s reload",
          "required_markers": [
            "cp /etc/nginx/nginx.conf",
            "nginx -t",
            "systemctl reload nginx",
            "&&"
          ],
          "markers_hit": [
            "cp /etc/nginx/nginx.conf",
            "nginx -t"
          ],
          "marker_hit_rate": 0.5,
          "format_rule": "shell_lines",
          "format_ok": true,
          "usable_answer": true,
          "error": null
        },
        {
          "type": "call",
          "ts_utc": "2026-04-29T10:07:12Z",
          "cell_id": "pavilion:weeyuga:qwen3.5:2b",
          "model": "qwen3.5:2b",
          "phase": "5q",
          "question_id": "yaml_cli_plan",
          "run_idx": 3,
          "duration_seconds": 50.003,
          "prompt_tokens": 43,
          "completion_tokens": 2048,
          "tokens_per_second": 40.96,
          "finish_reason": "length",
          "weeyuga_meta": null,
          "status_code": 200,
          "response_chars": 11731,
          "response_preview": "1. Initialize a Python project structure with a `main.py` script and a `schema.json` file for validation.\n2. Create a `git` command-line interface that uses `git diff` to identify changed files in the repository.\n3. Implement a YAML parser ",
          "required_markers": [
            "1.",
            "2.",
            "3.",
            "4.",
            "JSON schema",
            "git"
          ],
          "markers_hit": [
            "1.",
            "2.",
            "3.",
            "4.",
            "JSON schema",
            "git"
          ],
          "marker_hit_rate": 1.0,
          "format_rule": "four_numbered_steps",
          "format_ok": true,
          "usable_answer": true,
          "error": null
        },
        {
          "type": "call",
          "ts_utc": "2026-04-29T10:07:39Z",
          "cell_id": "pavilion:weeyuga:qwen3.5:2b",
          "model": "qwen3.5:2b",
          "phase": "5q",
          "question_id": "ssh_lockout_triage",
          "run_idx": 4,
          "duration_seconds": 27.148,
          "prompt_tokens": 56,
          "completion_tokens": 1113,
          "tokens_per_second": 41.0,
          "finish_reason": "stop",
          "weeyuga_meta": null,
          "status_code": 200,
          "response_chars": 378,
          "response_preview": "- Review `sshd_config` for hardening settings before making changes.\n- Review `authorized_keys` permissions to ensure only trusted keys are allowed.\n- Review `PasswordAuthentication` settings to prevent unauthorized password usage.\n- Review",
          "required_markers": [
            "sshd_config",
            "authorized_keys",
            "journalctl",
            "rollback",
            "PasswordAuthentication"
          ],
          "markers_hit": [
            "sshd_config",
            "authorized_keys",
            "journalctl",
            "rollback",
            "PasswordAuthentication"
          ],
          "marker_hit_rate": 1.0,
          "format_rule": "five_bullets",
          "format_ok": true,
          "usable_answer": true,
          "error": null
        }
      ]
    },
    {
      "cell_id": "pavilion:weeyuga:qwen3.5:0.8b",
      "n_calls": 6,
      "n_errors": 0,
      "duration_ms_mean": null,
      "duration_ms_p50": null,
      "duration_ms_p95": null,
      "tokens_per_sec_mean": null,
      "tokens_per_sec_p50": null,
      "tokens_per_sec_p95": null,
      "tokens_per_sec_max": null,
      "cold_warm_split": {
        "cold": {
          "n_calls": 2,
          "duration_ms_mean": null,
          "duration_ms_p50": null,
          "tokens_per_sec_mean": null
        },
        "warm": {
          "n_calls": 4,
          "duration_ms_mean": null,
          "duration_ms_p50": null,
          "tokens_per_sec_mean": null
        }
      },
      "per_prompt": {
        "unknown": {
          "n_calls": 6,
          "n_errors": 0,
          "duration_ms_mean": null,
          "duration_ms_p50": null,
          "tokens_per_sec_mean": null,
          "tokens_per_sec_p50": null
        }
      },
      "chars_split": null,
      "timeline": [
        {
          "prompt_id": null,
          "run_idx": 0,
          "phase": "hello",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 0,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 1,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 2,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 3,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 4,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        }
      ],
      "calls": [
        {
          "type": "call",
          "ts_utc": "2026-04-29T10:07:43Z",
          "cell_id": "pavilion:weeyuga:qwen3.5:0.8b",
          "model": "qwen3.5:0.8b",
          "phase": "hello",
          "question_id": "hello_check",
          "run_idx": 0,
          "duration_seconds": 3.869,
          "prompt_tokens": 16,
          "completion_tokens": 141,
          "tokens_per_second": 36.44,
          "finish_reason": "stop",
          "weeyuga_meta": null,
          "status_code": 200,
          "response_chars": 57,
          "response_preview": "Hello! I'm glad to help you. What would you like to do? \ud83d\ude0a",
          "required_markers": [],
          "markers_hit": [],
          "marker_hit_rate": null,
          "format_rule": "",
          "format_ok": null,
          "usable_answer": true,
          "error": null
        },
        {
          "type": "call",
          "ts_utc": "2026-04-29T10:08:00Z",
          "cell_id": "pavilion:weeyuga:qwen3.5:0.8b",
          "model": "qwen3.5:0.8b",
          "phase": "5q",
          "question_id": "disk_guard_bash",
          "run_idx": 0,
          "duration_seconds": 16.794,
          "prompt_tokens": 71,
          "completion_tokens": 688,
          "tokens_per_second": 40.97,
          "finish_reason": "stop",
          "weeyuga_meta": null,
          "status_code": 200,
          "response_chars": 217,
          "response_preview": "#!/bin/bash\n\n# Check disk usage for /\nif [ -d \"/\" ]; then\n    usage=$(df -P / 2>&1 | awk '{print $5}')\n    if [ \"$usage\" -gt 85 ]; then\n        echo \"WARNING: Disk usage for / is at ${usage}%\"\n        exit 1\n    fi\nfi",
          "required_markers": [
            "#!/usr/bin/env bash",
            "df -P /",
            "85",
            "exit 1"
          ],
          "markers_hit": [
            "df -P /",
            "85",
            "exit 1"
          ],
          "marker_hit_rate": 0.75,
          "format_rule": "bash_code",
          "format_ok": false,
          "usable_answer": true,
          "error": null
        },
        {
          "type": "call",
          "ts_utc": "2026-04-29T10:08:18Z",
          "cell_id": "pavilion:weeyuga:qwen3.5:0.8b",
          "model": "qwen3.5:0.8b",
          "phase": "5q",
          "question_id": "ipv4_python_tests",
          "run_idx": 1,
          "duration_seconds": 17.714,
          "prompt_tokens": 55,
          "completion_tokens": 722,
          "tokens_per_second": 40.76,
          "finish_reason": "stop",
          "weeyuga_meta": null,
          "status_code": 200,
          "response_chars": 644,
          "response_preview": "```python\ndef is_valid_ipv4(value: str) -> bool:\n    \"\"\"Check if a string is a valid IPv4 address.\"\"\"\n    if not isinstance(value, str):\n        return False\n    parts = value.split('.')\n    if len(parts) != 4:\n        return False\n    for ",
          "required_markers": [
            "def is_valid_ipv4",
            "def test_",
            "assert",
            "split('.')"
          ],
          "markers_hit": [
            "def is_valid_ipv4",
            "def test_",
            "assert",
            "split('.')"
          ],
          "marker_hit_rate": 1.0,
          "format_rule": "python_code",
          "format_ok": true,
          "usable_answer": true,
          "error": null
        },
        {
          "type": "call",
          "ts_utc": "2026-04-29T10:08:44Z",
          "cell_id": "pavilion:weeyuga:qwen3.5:0.8b",
          "model": "qwen3.5:0.8b",
          "phase": "5q",
          "question_id": "nginx_safe_reload",
          "run_idx": 2,
          "duration_seconds": 26.52,
          "prompt_tokens": 39,
          "completion_tokens": 1087,
          "tokens_per_second": 40.99,
          "finish_reason": "stop",
          "weeyuga_meta": null,
          "status_code": 200,
          "response_chars": 177,
          "response_preview": "cp /etc/nginx/nginx.conf /etc/nginx/nginx.conf.backup\nnginx -t\nif nginx -t; then\n    nginx -s reload\nelse\n    echo \"Validation failed, reloading nginx...\"\n    nginx -s reload\nfi",
          "required_markers": [
            "cp /etc/nginx/nginx.conf",
            "nginx -t",
            "systemctl reload nginx",
            "&&"
          ],
          "markers_hit": [
            "cp /etc/nginx/nginx.conf",
            "nginx -t"
          ],
          "marker_hit_rate": 0.5,
          "format_rule": "shell_lines",
          "format_ok": true,
          "usable_answer": true,
          "error": null
        },
        {
          "type": "call",
          "ts_utc": "2026-04-29T10:09:35Z",
          "cell_id": "pavilion:weeyuga:qwen3.5:0.8b",
          "model": "qwen3.5:0.8b",
          "phase": "5q",
          "question_id": "yaml_cli_plan",
          "run_idx": 3,
          "duration_seconds": 50.97,
          "prompt_tokens": 43,
          "completion_tokens": 2048,
          "tokens_per_second": 40.18,
          "finish_reason": "length",
          "weeyuga_meta": null,
          "status_code": 200,
          "response_chars": 7262,
          "response_preview": "# Python CLI for Git-Scanned YAML Schema Validation\n\n```python\n#!/usr/bin/env python3\n\"\"\"\nYAML Schema Validator CLI\nScans git repos for changed YAML files and validates against JSON schemas.\n\"\"\"\n\nimport argparse\nimport json\nimport os\nimport",
          "required_markers": [
            "1.",
            "2.",
            "3.",
            "4.",
            "JSON schema",
            "git"
          ],
          "markers_hit": [
            "1.",
            "2.",
            "3.",
            "4.",
            "JSON schema",
            "git"
          ],
          "marker_hit_rate": 1.0,
          "format_rule": "four_numbered_steps",
          "format_ok": true,
          "usable_answer": true,
          "error": null
        },
        {
          "type": "call",
          "ts_utc": "2026-04-29T10:10:03Z",
          "cell_id": "pavilion:weeyuga:qwen3.5:0.8b",
          "model": "qwen3.5:0.8b",
          "phase": "5q",
          "question_id": "ssh_lockout_triage",
          "run_idx": 4,
          "duration_seconds": 27.569,
          "prompt_tokens": 56,
          "completion_tokens": 1125,
          "tokens_per_second": 40.81,
          "finish_reason": "stop",
          "weeyuga_meta": null,
          "status_code": 200,
          "response_chars": 428,
          "response_preview": "- Review `sshd_config` for security settings like `PermitRootLogin no` and `PasswordAuthentication no`.\n- Review `authorized_keys` permissions and ownership to ensure only trusted users can access.\n- Review `PasswordAuthentication` settings",
          "required_markers": [
            "sshd_config",
            "authorized_keys",
            "journalctl",
            "rollback",
            "PasswordAuthentication"
          ],
          "markers_hit": [
            "sshd_config",
            "authorized_keys",
            "journalctl",
            "rollback",
            "PasswordAuthentication"
          ],
          "marker_hit_rate": 1.0,
          "format_rule": "five_bullets",
          "format_ok": true,
          "usable_answer": true,
          "error": null
        }
      ]
    },
    {
      "cell_id": "pavilion:weeyuga:qwen3.5:9b",
      "n_calls": 6,
      "n_errors": 0,
      "duration_ms_mean": null,
      "duration_ms_p50": null,
      "duration_ms_p95": null,
      "tokens_per_sec_mean": null,
      "tokens_per_sec_p50": null,
      "tokens_per_sec_p95": null,
      "tokens_per_sec_max": null,
      "cold_warm_split": {
        "cold": {
          "n_calls": 2,
          "duration_ms_mean": null,
          "duration_ms_p50": null,
          "tokens_per_sec_mean": null
        },
        "warm": {
          "n_calls": 4,
          "duration_ms_mean": null,
          "duration_ms_p50": null,
          "tokens_per_sec_mean": null
        }
      },
      "per_prompt": {
        "unknown": {
          "n_calls": 6,
          "n_errors": 0,
          "duration_ms_mean": null,
          "duration_ms_p50": null,
          "tokens_per_sec_mean": null,
          "tokens_per_sec_p50": null
        }
      },
      "chars_split": null,
      "timeline": [
        {
          "prompt_id": null,
          "run_idx": 0,
          "phase": "hello",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 0,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 1,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 2,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 3,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 4,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        }
      ],
      "calls": [
        {
          "type": "call",
          "ts_utc": "2026-04-29T10:10:13Z",
          "cell_id": "pavilion:weeyuga:qwen3.5:9b",
          "model": "qwen3.5:9b",
          "phase": "hello",
          "question_id": "hello_check",
          "run_idx": 0,
          "duration_seconds": 10.852,
          "prompt_tokens": 16,
          "completion_tokens": 416,
          "tokens_per_second": 38.33,
          "finish_reason": "stop",
          "weeyuga_meta": null,
          "status_code": 200,
          "response_chars": 157,
          "response_preview": "Hello! I'm glad to help you. What would you like to do today? Whether it's answering questions, solving problems, or just chatting, I'm here to assist you! \ud83d\ude0a",
          "required_markers": [],
          "markers_hit": [],
          "marker_hit_rate": null,
          "format_rule": "",
          "format_ok": null,
          "usable_answer": true,
          "error": null
        },
        {
          "type": "call",
          "ts_utc": "2026-04-29T10:10:40Z",
          "cell_id": "pavilion:weeyuga:qwen3.5:9b",
          "model": "qwen3.5:9b",
          "phase": "5q",
          "question_id": "disk_guard_bash",
          "run_idx": 0,
          "duration_seconds": 26.865,
          "prompt_tokens": 71,
          "completion_tokens": 1127,
          "tokens_per_second": 41.95,
          "finish_reason": "stop",
          "weeyuga_meta": null,
          "status_code": 200,
          "response_chars": 247,
          "response_preview": "#!/bin/bash\n\n# Check disk usage for /\nif [ -d \"/\" ]; then\n    usage=$(df -P / 2>&1 | awk '{print $5}')\n    if [ \"$usage\" -gt 85 ]; then\n        echo \"WARNING: Disk usage for / is at ${usage}% (above 85% threshold)\"\n        exit 1\n    fi\nfi\n",
          "required_markers": [
            "#!/usr/bin/env bash",
            "df -P /",
            "85",
            "exit 1"
          ],
          "markers_hit": [
            "df -P /",
            "85",
            "exit 1"
          ],
          "marker_hit_rate": 0.75,
          "format_rule": "bash_code",
          "format_ok": false,
          "usable_answer": true,
          "error": null
        },
        {
          "type": "call",
          "ts_utc": "2026-04-29T10:10:58Z",
          "cell_id": "pavilion:weeyuga:qwen3.5:9b",
          "model": "qwen3.5:9b",
          "phase": "5q",
          "question_id": "ipv4_python_tests",
          "run_idx": 1,
          "duration_seconds": 17.892,
          "prompt_tokens": 55,
          "completion_tokens": 732,
          "tokens_per_second": 40.91,
          "finish_reason": "stop",
          "weeyuga_meta": null,
          "status_code": 200,
          "response_chars": 574,
          "response_preview": "```python\ndef is_valid_ipv4(value: str) -> bool:\n    \"\"\"Check if a string is a valid IPv4 address.\"\"\"\n    if not isinstance(value, str):\n        return False\n    parts = value.split('.')\n    if len(parts) != 4:\n        return False\n    for ",
          "required_markers": [
            "def is_valid_ipv4",
            "def test_",
            "assert",
            "split('.')"
          ],
          "markers_hit": [
            "def is_valid_ipv4",
            "def test_",
            "assert",
            "split('.')"
          ],
          "marker_hit_rate": 1.0,
          "format_rule": "python_code",
          "format_ok": false,
          "usable_answer": true,
          "error": null
        },
        {
          "type": "call",
          "ts_utc": "2026-04-29T10:11:25Z",
          "cell_id": "pavilion:weeyuga:qwen3.5:9b",
          "model": "qwen3.5:9b",
          "phase": "5q",
          "question_id": "nginx_safe_reload",
          "run_idx": 2,
          "duration_seconds": 26.612,
          "prompt_tokens": 39,
          "completion_tokens": 1073,
          "tokens_per_second": 40.32,
          "finish_reason": "stop",
          "weeyuga_meta": null,
          "status_code": 200,
          "response_chars": 104,
          "response_preview": "cp /etc/nginx/nginx.conf /etc/nginx/nginx.conf.bak\nnginx -t\nif [ $? -eq 0 ]; then\n    nginx -s reload\nfi",
          "required_markers": [
            "cp /etc/nginx/nginx.conf",
            "nginx -t",
            "systemctl reload nginx",
            "&&"
          ],
          "markers_hit": [
            "cp /etc/nginx/nginx.conf",
            "nginx -t"
          ],
          "marker_hit_rate": 0.5,
          "format_rule": "shell_lines",
          "format_ok": true,
          "usable_answer": true,
          "error": null
        },
        {
          "type": "call",
          "ts_utc": "2026-04-29T10:12:14Z",
          "cell_id": "pavilion:weeyuga:qwen3.5:9b",
          "model": "qwen3.5:9b",
          "phase": "5q",
          "question_id": "yaml_cli_plan",
          "run_idx": 3,
          "duration_seconds": 49.467,
          "prompt_tokens": 43,
          "completion_tokens": 2048,
          "tokens_per_second": 41.4,
          "finish_reason": "length",
          "weeyuga_meta": null,
          "status_code": 200,
          "response_chars": 9329,
          "response_preview": "```python\n#!/usr/bin/env python3\n\"\"\"\nYAML Schema Validator CLI\nScans git repos for changed YAML files and validates against a JSON schema.\n\"\"\"\n\nimport argparse\nimport json\nimport os\nimport sys\nfrom pathlib import Path\nfrom typing import Lis",
          "required_markers": [
            "1.",
            "2.",
            "3.",
            "4.",
            "JSON schema",
            "git"
          ],
          "markers_hit": [
            "JSON schema",
            "git"
          ],
          "marker_hit_rate": 0.333,
          "format_rule": "four_numbered_steps",
          "format_ok": false,
          "usable_answer": true,
          "error": null
        },
        {
          "type": "call",
          "ts_utc": "2026-04-29T10:12:42Z",
          "cell_id": "pavilion:weeyuga:qwen3.5:9b",
          "model": "qwen3.5:9b",
          "phase": "5q",
          "question_id": "ssh_lockout_triage",
          "run_idx": 4,
          "duration_seconds": 27.33,
          "prompt_tokens": 56,
          "completion_tokens": 1104,
          "tokens_per_second": 40.4,
          "finish_reason": "stop",
          "weeyuga_meta": null,
          "status_code": 200,
          "response_chars": 279,
          "response_preview": "- Review `sshd_config` to ensure `PasswordAuthentication` is enabled.\n- Verify `authorized_keys` contains the correct public key.\n- Run `journalctl` to check for recent errors.\n- Review `rollback` to ensure the current state is safe.\n- Ensu",
          "required_markers": [
            "sshd_config",
            "authorized_keys",
            "journalctl",
            "rollback",
            "PasswordAuthentication"
          ],
          "markers_hit": [
            "sshd_config",
            "authorized_keys",
            "journalctl",
            "rollback",
            "PasswordAuthentication"
          ],
          "marker_hit_rate": 1.0,
          "format_rule": "five_bullets",
          "format_ok": true,
          "usable_answer": true,
          "error": null
        }
      ]
    },
    {
      "cell_id": "pavilion:weeyuga:qwen2.5-coder:14b",
      "n_calls": 6,
      "n_errors": 6,
      "duration_ms_mean": null,
      "duration_ms_p50": null,
      "duration_ms_p95": null,
      "tokens_per_sec_mean": null,
      "tokens_per_sec_p50": null,
      "tokens_per_sec_p95": null,
      "tokens_per_sec_max": null,
      "cold_warm_split": {
        "cold": {
          "n_calls": 2,
          "duration_ms_mean": null,
          "duration_ms_p50": null,
          "tokens_per_sec_mean": null
        },
        "warm": {
          "n_calls": 4,
          "duration_ms_mean": null,
          "duration_ms_p50": null,
          "tokens_per_sec_mean": null
        }
      },
      "per_prompt": {
        "unknown": {
          "n_calls": 6,
          "n_errors": 6,
          "duration_ms_mean": null,
          "duration_ms_p50": null,
          "tokens_per_sec_mean": null,
          "tokens_per_sec_p50": null
        }
      },
      "chars_split": null,
      "timeline": [
        {
          "prompt_id": null,
          "run_idx": 0,
          "phase": "hello",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": "<HTTPError 500: 'Internal Server Error'>"
        },
        {
          "prompt_id": null,
          "run_idx": 0,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": "<HTTPError 500: 'Internal Server Error'>"
        },
        {
          "prompt_id": null,
          "run_idx": 1,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": "<HTTPError 500: 'Internal Server Error'>"
        },
        {
          "prompt_id": null,
          "run_idx": 2,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": "<HTTPError 500: 'Internal Server Error'>"
        },
        {
          "prompt_id": null,
          "run_idx": 3,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": "<HTTPError 500: 'Internal Server Error'>"
        },
        {
          "prompt_id": null,
          "run_idx": 4,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": "<HTTPError 500: 'Internal Server Error'>"
        }
      ],
      "calls": [
        {
          "type": "call",
          "ts_utc": "2026-04-29T10:12:49Z",
          "cell_id": "pavilion:weeyuga:qwen2.5-coder:14b",
          "model": "qwen2.5-coder:14b",
          "phase": "hello",
          "question_id": "hello_check",
          "run_idx": 0,
          "duration_seconds": 7.472,
          "prompt_tokens": null,
          "completion_tokens": null,
          "tokens_per_second": null,
          "finish_reason": null,
          "weeyuga_meta": null,
          "status_code": null,
          "response_chars": 0,
          "response_preview": "",
          "required_markers": [],
          "markers_hit": [],
          "marker_hit_rate": null,
          "format_rule": "",
          "format_ok": null,
          "usable_answer": false,
          "error": "<HTTPError 500: 'Internal Server Error'>"
        },
        {
          "type": "call",
          "ts_utc": "2026-04-29T10:12:50Z",
          "cell_id": "pavilion:weeyuga:qwen2.5-coder:14b",
          "model": "qwen2.5-coder:14b",
          "phase": "5q",
          "question_id": "disk_guard_bash",
          "run_idx": 0,
          "duration_seconds": 0.882,
          "prompt_tokens": null,
          "completion_tokens": null,
          "tokens_per_second": null,
          "finish_reason": null,
          "weeyuga_meta": null,
          "status_code": null,
          "response_chars": 0,
          "response_preview": "",
          "required_markers": [
            "#!/usr/bin/env bash",
            "df -P /",
            "85",
            "exit 1"
          ],
          "markers_hit": [],
          "marker_hit_rate": 0.0,
          "format_rule": "bash_code",
          "format_ok": false,
          "usable_answer": false,
          "error": "<HTTPError 500: 'Internal Server Error'>"
        },
        {
          "type": "call",
          "ts_utc": "2026-04-29T10:12:51Z",
          "cell_id": "pavilion:weeyuga:qwen2.5-coder:14b",
          "model": "qwen2.5-coder:14b",
          "phase": "5q",
          "question_id": "ipv4_python_tests",
          "run_idx": 1,
          "duration_seconds": 1.059,
          "prompt_tokens": null,
          "completion_tokens": null,
          "tokens_per_second": null,
          "finish_reason": null,
          "weeyuga_meta": null,
          "status_code": null,
          "response_chars": 0,
          "response_preview": "",
          "required_markers": [
            "def is_valid_ipv4",
            "def test_",
            "assert",
            "split('.')"
          ],
          "markers_hit": [],
          "marker_hit_rate": 0.0,
          "format_rule": "python_code",
          "format_ok": false,
          "usable_answer": false,
          "error": "<HTTPError 500: 'Internal Server Error'>"
        },
        {
          "type": "call",
          "ts_utc": "2026-04-29T10:12:52Z",
          "cell_id": "pavilion:weeyuga:qwen2.5-coder:14b",
          "model": "qwen2.5-coder:14b",
          "phase": "5q",
          "question_id": "nginx_safe_reload",
          "run_idx": 2,
          "duration_seconds": 0.922,
          "prompt_tokens": null,
          "completion_tokens": null,
          "tokens_per_second": null,
          "finish_reason": null,
          "weeyuga_meta": null,
          "status_code": null,
          "response_chars": 0,
          "response_preview": "",
          "required_markers": [
            "cp /etc/nginx/nginx.conf",
            "nginx -t",
            "systemctl reload nginx",
            "&&"
          ],
          "markers_hit": [],
          "marker_hit_rate": 0.0,
          "format_rule": "shell_lines",
          "format_ok": false,
          "usable_answer": false,
          "error": "<HTTPError 500: 'Internal Server Error'>"
        },
        {
          "type": "call",
          "ts_utc": "2026-04-29T10:12:53Z",
          "cell_id": "pavilion:weeyuga:qwen2.5-coder:14b",
          "model": "qwen2.5-coder:14b",
          "phase": "5q",
          "question_id": "yaml_cli_plan",
          "run_idx": 3,
          "duration_seconds": 0.991,
          "prompt_tokens": null,
          "completion_tokens": null,
          "tokens_per_second": null,
          "finish_reason": null,
          "weeyuga_meta": null,
          "status_code": null,
          "response_chars": 0,
          "response_preview": "",
          "required_markers": [
            "1.",
            "2.",
            "3.",
            "4.",
            "JSON schema",
            "git"
          ],
          "markers_hit": [],
          "marker_hit_rate": 0.0,
          "format_rule": "four_numbered_steps",
          "format_ok": false,
          "usable_answer": false,
          "error": "<HTTPError 500: 'Internal Server Error'>"
        },
        {
          "type": "call",
          "ts_utc": "2026-04-29T10:12:54Z",
          "cell_id": "pavilion:weeyuga:qwen2.5-coder:14b",
          "model": "qwen2.5-coder:14b",
          "phase": "5q",
          "question_id": "ssh_lockout_triage",
          "run_idx": 4,
          "duration_seconds": 1.054,
          "prompt_tokens": null,
          "completion_tokens": null,
          "tokens_per_second": null,
          "finish_reason": null,
          "weeyuga_meta": null,
          "status_code": null,
          "response_chars": 0,
          "response_preview": "",
          "required_markers": [
            "sshd_config",
            "authorized_keys",
            "journalctl",
            "rollback",
            "PasswordAuthentication"
          ],
          "markers_hit": [],
          "marker_hit_rate": 0.0,
          "format_rule": "five_bullets",
          "format_ok": false,
          "usable_answer": false,
          "error": "<HTTPError 500: 'Internal Server Error'>"
        }
      ]
    },
    {
      "cell_id": "pavilion:weeyuga:qwen2.5-coder:3b",
      "n_calls": 6,
      "n_errors": 0,
      "duration_ms_mean": null,
      "duration_ms_p50": null,
      "duration_ms_p95": null,
      "tokens_per_sec_mean": null,
      "tokens_per_sec_p50": null,
      "tokens_per_sec_p95": null,
      "tokens_per_sec_max": null,
      "cold_warm_split": {
        "cold": {
          "n_calls": 2,
          "duration_ms_mean": null,
          "duration_ms_p50": null,
          "tokens_per_sec_mean": null
        },
        "warm": {
          "n_calls": 4,
          "duration_ms_mean": null,
          "duration_ms_p50": null,
          "tokens_per_sec_mean": null
        }
      },
      "per_prompt": {
        "unknown": {
          "n_calls": 6,
          "n_errors": 0,
          "duration_ms_mean": null,
          "duration_ms_p50": null,
          "tokens_per_sec_mean": null,
          "tokens_per_sec_p50": null
        }
      },
      "chars_split": null,
      "timeline": [
        {
          "prompt_id": null,
          "run_idx": 0,
          "phase": "hello",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 0,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 1,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 2,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 3,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 4,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        }
      ],
      "calls": [
        {
          "type": "call",
          "ts_utc": "2026-04-29T10:13:22Z",
          "cell_id": "pavilion:weeyuga:qwen2.5-coder:3b",
          "model": "qwen2.5-coder:3b",
          "phase": "hello",
          "question_id": "hello_check",
          "run_idx": 0,
          "duration_seconds": 28.318,
          "prompt_tokens": 35,
          "completion_tokens": 17,
          "tokens_per_second": 0.6,
          "finish_reason": "stop",
          "weeyuga_meta": null,
          "status_code": 200,
          "response_chars": 62,
          "response_preview": "Of course! I'm here to help. What do you need assistance with?",
          "required_markers": [],
          "markers_hit": [],
          "marker_hit_rate": null,
          "format_rule": "",
          "format_ok": null,
          "usable_answer": true,
          "error": null
        },
        {
          "type": "call",
          "ts_utc": "2026-04-29T10:13:50Z",
          "cell_id": "pavilion:weeyuga:qwen2.5-coder:3b",
          "model": "qwen2.5-coder:3b",
          "phase": "5q",
          "question_id": "disk_guard_bash",
          "run_idx": 0,
          "duration_seconds": 27.993,
          "prompt_tokens": 90,
          "completion_tokens": 378,
          "tokens_per_second": 13.5,
          "finish_reason": "stop",
          "weeyuga_meta": null,
          "status_code": 200,
          "response_chars": 1474,
          "response_preview": "Certainly! Below is a Bash script that checks disk usage for `/`, prints a human-readable warning, and exits with status 1 when usage is above 85%.\n\n```bash\n#!/bin/bash\n\n# Check if df -P / command exists\nif ! command -v df &> /dev/null; the",
          "required_markers": [
            "#!/usr/bin/env bash",
            "df -P /",
            "85",
            "exit 1"
          ],
          "markers_hit": [
            "df -P /",
            "85",
            "exit 1"
          ],
          "marker_hit_rate": 0.75,
          "format_rule": "bash_code",
          "format_ok": false,
          "usable_answer": true,
          "error": null
        },
        {
          "type": "call",
          "ts_utc": "2026-04-29T10:14:15Z",
          "cell_id": "pavilion:weeyuga:qwen2.5-coder:3b",
          "model": "qwen2.5-coder:3b",
          "phase": "5q",
          "question_id": "ipv4_python_tests",
          "run_idx": 1,
          "duration_seconds": 24.785,
          "prompt_tokens": 74,
          "completion_tokens": 305,
          "tokens_per_second": 12.31,
          "finish_reason": "stop",
          "weeyuga_meta": null,
          "status_code": 200,
          "response_chars": 1075,
          "response_preview": "```python\nimport pytest\n\ndef is_valid_ipv4(value: str) -> bool:\n    parts = value.split('.')\n    if len(parts) != 4:\n        return False\n    \n    for part in parts:\n        try:\n            num = int(part)\n            if not (0 <= num <= 2",
          "required_markers": [
            "def is_valid_ipv4",
            "def test_",
            "assert",
            "split('.')"
          ],
          "markers_hit": [
            "def is_valid_ipv4",
            "def test_",
            "assert",
            "split('.')"
          ],
          "marker_hit_rate": 1.0,
          "format_rule": "python_code",
          "format_ok": true,
          "usable_answer": true,
          "error": null
        },
        {
          "type": "call",
          "ts_utc": "2026-04-29T10:14:19Z",
          "cell_id": "pavilion:weeyuga:qwen2.5-coder:3b",
          "model": "qwen2.5-coder:3b",
          "phase": "5q",
          "question_id": "nginx_safe_reload",
          "run_idx": 2,
          "duration_seconds": 3.778,
          "prompt_tokens": 58,
          "completion_tokens": 43,
          "tokens_per_second": 11.38,
          "finish_reason": "stop",
          "weeyuga_meta": null,
          "status_code": 200,
          "response_chars": 140,
          "response_preview": "```bash\nsudo cp /etc/nginx/nginx.conf /etc/nginx/nginx.conf.bak\n\nsudo nginx -t\n\nif [ $? -eq 0 ]; then\n    sudo systemctl reload nginx\nfi\n```",
          "required_markers": [
            "cp /etc/nginx/nginx.conf",
            "nginx -t",
            "systemctl reload nginx",
            "&&"
          ],
          "markers_hit": [
            "cp /etc/nginx/nginx.conf",
            "nginx -t",
            "systemctl reload nginx"
          ],
          "marker_hit_rate": 0.75,
          "format_rule": "shell_lines",
          "format_ok": true,
          "usable_answer": true,
          "error": null
        },
        {
          "type": "call",
          "ts_utc": "2026-04-29T10:14:56Z",
          "cell_id": "pavilion:weeyuga:qwen2.5-coder:3b",
          "model": "qwen2.5-coder:3b",
          "phase": "5q",
          "question_id": "yaml_cli_plan",
          "run_idx": 3,
          "duration_seconds": 36.655,
          "prompt_tokens": 62,
          "completion_tokens": 501,
          "tokens_per_second": 13.67,
          "finish_reason": "stop",
          "weeyuga_meta": null,
          "status_code": 200,
          "response_chars": 2192,
          "response_preview": "1. **Install Required Packages**: Ensure you have `git`, `pyyaml`, and `jsonschema` installed. You can install these using pip:\n   ```bash\n   pip install gitpython pyyaml jsonschema\n   ```\n\n2. **Create the Python Script**: Create a new file",
          "required_markers": [
            "1.",
            "2.",
            "3.",
            "4.",
            "JSON schema",
            "git"
          ],
          "markers_hit": [
            "1.",
            "2.",
            "3.",
            "4.",
            "JSON schema",
            "git"
          ],
          "marker_hit_rate": 1.0,
          "format_rule": "four_numbered_steps",
          "format_ok": true,
          "usable_answer": true,
          "error": null
        },
        {
          "type": "call",
          "ts_utc": "2026-04-29T10:15:08Z",
          "cell_id": "pavilion:weeyuga:qwen2.5-coder:3b",
          "model": "qwen2.5-coder:3b",
          "phase": "5q",
          "question_id": "ssh_lockout_triage",
          "run_idx": 4,
          "duration_seconds": 11.989,
          "prompt_tokens": 75,
          "completion_tokens": 154,
          "tokens_per_second": 12.85,
          "finish_reason": "stop",
          "weeyuga_meta": null,
          "status_code": 200,
          "response_chars": 699,
          "response_preview": "1. **Check `sshd_config`**: Ensure that `PasswordAuthentication` is set to `no`. This prevents password-based authentication.\n2. **Review `authorized_keys`**: Verify that the public keys in `~/.ssh/authorized_keys` are correct and match tho",
          "required_markers": [
            "sshd_config",
            "authorized_keys",
            "journalctl",
            "rollback",
            "PasswordAuthentication"
          ],
          "markers_hit": [
            "sshd_config",
            "authorized_keys",
            "journalctl",
            "rollback",
            "PasswordAuthentication"
          ],
          "marker_hit_rate": 1.0,
          "format_rule": "five_bullets",
          "format_ok": false,
          "usable_answer": true,
          "error": null
        }
      ]
    },
    {
      "cell_id": "pavilion:weeyuga:qwen3:14b",
      "n_calls": 6,
      "n_errors": 4,
      "duration_ms_mean": null,
      "duration_ms_p50": null,
      "duration_ms_p95": null,
      "tokens_per_sec_mean": null,
      "tokens_per_sec_p50": null,
      "tokens_per_sec_p95": null,
      "tokens_per_sec_max": null,
      "cold_warm_split": {
        "cold": {
          "n_calls": 2,
          "duration_ms_mean": null,
          "duration_ms_p50": null,
          "tokens_per_sec_mean": null
        },
        "warm": {
          "n_calls": 4,
          "duration_ms_mean": null,
          "duration_ms_p50": null,
          "tokens_per_sec_mean": null
        }
      },
      "per_prompt": {
        "unknown": {
          "n_calls": 6,
          "n_errors": 4,
          "duration_ms_mean": null,
          "duration_ms_p50": null,
          "tokens_per_sec_mean": null,
          "tokens_per_sec_p50": null
        }
      },
      "chars_split": null,
      "timeline": [
        {
          "prompt_id": null,
          "run_idx": 0,
          "phase": "hello",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": "TimeoutError('timed out')"
        },
        {
          "prompt_id": null,
          "run_idx": 0,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": "TimeoutError('timed out')"
        },
        {
          "prompt_id": null,
          "run_idx": 1,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": "TimeoutError('timed out')"
        },
        {
          "prompt_id": null,
          "run_idx": 2,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": "TimeoutError('timed out')"
        },
        {
          "prompt_id": null,
          "run_idx": 3,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 4,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        }
      ],
      "calls": [
        {
          "type": "call",
          "ts_utc": "2026-04-29T10:21:08Z",
          "cell_id": "pavilion:weeyuga:qwen3:14b",
          "model": "qwen3:14b",
          "phase": "hello",
          "question_id": "hello_check",
          "run_idx": 0,
          "duration_seconds": 360.058,
          "prompt_tokens": null,
          "completion_tokens": null,
          "tokens_per_second": null,
          "finish_reason": null,
          "weeyuga_meta": null,
          "status_code": null,
          "response_chars": 0,
          "response_preview": "",
          "required_markers": [],
          "markers_hit": [],
          "marker_hit_rate": null,
          "format_rule": "",
          "format_ok": null,
          "usable_answer": false,
          "error": "TimeoutError('timed out')"
        },
        {
          "type": "call",
          "ts_utc": "2026-04-29T10:27:08Z",
          "cell_id": "pavilion:weeyuga:qwen3:14b",
          "model": "qwen3:14b",
          "phase": "5q",
          "question_id": "disk_guard_bash",
          "run_idx": 0,
          "duration_seconds": 360.069,
          "prompt_tokens": null,
          "completion_tokens": null,
          "tokens_per_second": null,
          "finish_reason": null,
          "weeyuga_meta": null,
          "status_code": null,
          "response_chars": 0,
          "response_preview": "",
          "required_markers": [
            "#!/usr/bin/env bash",
            "df -P /",
            "85",
            "exit 1"
          ],
          "markers_hit": [],
          "marker_hit_rate": 0.0,
          "format_rule": "bash_code",
          "format_ok": false,
          "usable_answer": false,
          "error": "TimeoutError('timed out')"
        },
        {
          "type": "call",
          "ts_utc": "2026-04-29T10:33:08Z",
          "cell_id": "pavilion:weeyuga:qwen3:14b",
          "model": "qwen3:14b",
          "phase": "5q",
          "question_id": "ipv4_python_tests",
          "run_idx": 1,
          "duration_seconds": 360.075,
          "prompt_tokens": null,
          "completion_tokens": null,
          "tokens_per_second": null,
          "finish_reason": null,
          "weeyuga_meta": null,
          "status_code": null,
          "response_chars": 0,
          "response_preview": "",
          "required_markers": [
            "def is_valid_ipv4",
            "def test_",
            "assert",
            "split('.')"
          ],
          "markers_hit": [],
          "marker_hit_rate": 0.0,
          "format_rule": "python_code",
          "format_ok": false,
          "usable_answer": false,
          "error": "TimeoutError('timed out')"
        },
        {
          "type": "call",
          "ts_utc": "2026-04-29T10:39:08Z",
          "cell_id": "pavilion:weeyuga:qwen3:14b",
          "model": "qwen3:14b",
          "phase": "5q",
          "question_id": "nginx_safe_reload",
          "run_idx": 2,
          "duration_seconds": 360.065,
          "prompt_tokens": null,
          "completion_tokens": null,
          "tokens_per_second": null,
          "finish_reason": null,
          "weeyuga_meta": null,
          "status_code": null,
          "response_chars": 0,
          "response_preview": "",
          "required_markers": [
            "cp /etc/nginx/nginx.conf",
            "nginx -t",
            "systemctl reload nginx",
            "&&"
          ],
          "markers_hit": [],
          "marker_hit_rate": 0.0,
          "format_rule": "shell_lines",
          "format_ok": false,
          "usable_answer": false,
          "error": "TimeoutError('timed out')"
        },
        {
          "type": "call",
          "ts_utc": "2026-04-29T10:44:37Z",
          "cell_id": "pavilion:weeyuga:qwen3:14b",
          "model": "qwen3:14b",
          "phase": "5q",
          "question_id": "yaml_cli_plan",
          "run_idx": 3,
          "duration_seconds": 328.959,
          "prompt_tokens": 43,
          "completion_tokens": 903,
          "tokens_per_second": 2.75,
          "finish_reason": "stop",
          "weeyuga_meta": null,
          "status_code": 200,
          "response_chars": 751,
          "response_preview": "1. **Use `git diff` to identify changed YAML files**: Execute `git diff --name-only` to list modified files, then filter for `.yaml` or `.yml` extensions using a regex or `grep` to isolate relevant files.  \n\n2. **Parse YAML files into Pytho",
          "required_markers": [
            "1.",
            "2.",
            "3.",
            "4.",
            "JSON schema",
            "git"
          ],
          "markers_hit": [
            "1.",
            "2.",
            "3.",
            "4.",
            "JSON schema",
            "git"
          ],
          "marker_hit_rate": 1.0,
          "format_rule": "four_numbered_steps",
          "format_ok": true,
          "usable_answer": true,
          "error": null
        },
        {
          "type": "call",
          "ts_utc": "2026-04-29T10:48:48Z",
          "cell_id": "pavilion:weeyuga:qwen3:14b",
          "model": "qwen3:14b",
          "phase": "5q",
          "question_id": "ssh_lockout_triage",
          "run_idx": 4,
          "duration_seconds": 251.383,
          "prompt_tokens": 56,
          "completion_tokens": 787,
          "tokens_per_second": 3.13,
          "finish_reason": "stop",
          "weeyuga_meta": null,
          "status_code": 200,
          "response_chars": 794,
          "response_preview": "- **Check `sshd_config` for `PasswordAuthentication` setting**: Ensure it's enabled if password login is required, or confirm public key authentication is properly configured.  \n- **Verify `authorized_keys` file permissions and content**: C",
          "required_markers": [
            "sshd_config",
            "authorized_keys",
            "journalctl",
            "rollback",
            "PasswordAuthentication"
          ],
          "markers_hit": [
            "sshd_config",
            "authorized_keys",
            "journalctl",
            "rollback",
            "PasswordAuthentication"
          ],
          "marker_hit_rate": 1.0,
          "format_rule": "five_bullets",
          "format_ok": true,
          "usable_answer": true,
          "error": null
        }
      ]
    },
    {
      "cell_id": "pavilion:weeyuga:qwen3:8b",
      "n_calls": 6,
      "n_errors": 2,
      "duration_ms_mean": null,
      "duration_ms_p50": null,
      "duration_ms_p95": null,
      "tokens_per_sec_mean": null,
      "tokens_per_sec_p50": null,
      "tokens_per_sec_p95": null,
      "tokens_per_sec_max": null,
      "cold_warm_split": {
        "cold": {
          "n_calls": 2,
          "duration_ms_mean": null,
          "duration_ms_p50": null,
          "tokens_per_sec_mean": null
        },
        "warm": {
          "n_calls": 4,
          "duration_ms_mean": null,
          "duration_ms_p50": null,
          "tokens_per_sec_mean": null
        }
      },
      "per_prompt": {
        "unknown": {
          "n_calls": 6,
          "n_errors": 2,
          "duration_ms_mean": null,
          "duration_ms_p50": null,
          "tokens_per_sec_mean": null,
          "tokens_per_sec_p50": null
        }
      },
      "chars_split": null,
      "timeline": [
        {
          "prompt_id": null,
          "run_idx": 0,
          "phase": "hello",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": "TimeoutError('timed out')"
        },
        {
          "prompt_id": null,
          "run_idx": 0,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 1,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": "TimeoutError('timed out')"
        },
        {
          "prompt_id": null,
          "run_idx": 2,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 3,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 4,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        }
      ],
      "calls": [
        {
          "type": "call",
          "ts_utc": "2026-04-29T11:20:47Z",
          "cell_id": "pavilion:weeyuga:qwen3:8b",
          "model": "qwen3:8b",
          "phase": "hello",
          "question_id": "hello_check",
          "run_idx": 0,
          "duration_seconds": 360.092,
          "prompt_tokens": null,
          "completion_tokens": null,
          "tokens_per_second": null,
          "finish_reason": null,
          "weeyuga_meta": null,
          "status_code": null,
          "response_chars": 0,
          "response_preview": "",
          "required_markers": [],
          "markers_hit": [],
          "marker_hit_rate": null,
          "format_rule": "",
          "format_ok": null,
          "usable_answer": false,
          "error": "TimeoutError('timed out')"
        },
        {
          "type": "call",
          "ts_utc": "2026-04-29T11:25:53Z",
          "cell_id": "pavilion:weeyuga:qwen3:8b",
          "model": "qwen3:8b",
          "phase": "5q",
          "question_id": "disk_guard_bash",
          "run_idx": 0,
          "duration_seconds": 305.673,
          "prompt_tokens": 71,
          "completion_tokens": 1427,
          "tokens_per_second": 4.67,
          "finish_reason": "stop",
          "weeyuga_meta": null,
          "status_code": 200,
          "response_chars": 214,
          "response_preview": "#!/bin/bash\n\nusage_percent=$(df -P / | grep '/$' | awk '{print $5}' | cut -d'%' -f1)\n\nif (( usage_percent > 85 )); then\n    echo \"Warning: / is at $usage_percent% usage. Please free up space.\"\n    exit 1\nfi\n\nexit 0",
          "required_markers": [
            "#!/usr/bin/env bash",
            "df -P /",
            "85",
            "exit 1"
          ],
          "markers_hit": [
            "df -P /",
            "85",
            "exit 1"
          ],
          "marker_hit_rate": 0.75,
          "format_rule": "bash_code",
          "format_ok": false,
          "usable_answer": true,
          "error": null
        },
        {
          "type": "call",
          "ts_utc": "2026-04-29T11:31:53Z",
          "cell_id": "pavilion:weeyuga:qwen3:8b",
          "model": "qwen3:8b",
          "phase": "5q",
          "question_id": "ipv4_python_tests",
          "run_idx": 1,
          "duration_seconds": 360.074,
          "prompt_tokens": null,
          "completion_tokens": null,
          "tokens_per_second": null,
          "finish_reason": null,
          "weeyuga_meta": null,
          "status_code": null,
          "response_chars": 0,
          "response_preview": "",
          "required_markers": [
            "def is_valid_ipv4",
            "def test_",
            "assert",
            "split('.')"
          ],
          "markers_hit": [],
          "marker_hit_rate": 0.0,
          "format_rule": "python_code",
          "format_ok": false,
          "usable_answer": false,
          "error": "TimeoutError('timed out')"
        },
        {
          "type": "call",
          "ts_utc": "2026-04-29T11:36:36Z",
          "cell_id": "pavilion:weeyuga:qwen3:8b",
          "model": "qwen3:8b",
          "phase": "5q",
          "question_id": "nginx_safe_reload",
          "run_idx": 2,
          "duration_seconds": 283.312,
          "prompt_tokens": 39,
          "completion_tokens": 933,
          "tokens_per_second": 3.29,
          "finish_reason": "stop",
          "weeyuga_meta": null,
          "status_code": 200,
          "response_chars": 114,
          "response_preview": "cp /etc/nginx/nginx.conf /etc/nginx/nginx.conf.$(date +\\%F-\\%T)\nnginx -t\nif [ $? -eq 0 ]; then nginx -s reload; fi",
          "required_markers": [
            "cp /etc/nginx/nginx.conf",
            "nginx -t",
            "systemctl reload nginx",
            "&&"
          ],
          "markers_hit": [
            "cp /etc/nginx/nginx.conf",
            "nginx -t"
          ],
          "marker_hit_rate": 0.5,
          "format_rule": "shell_lines",
          "format_ok": true,
          "usable_answer": true,
          "error": null
        },
        {
          "type": "call",
          "ts_utc": "2026-04-29T11:39:10Z",
          "cell_id": "pavilion:weeyuga:qwen3:8b",
          "model": "qwen3:8b",
          "phase": "5q",
          "question_id": "yaml_cli_plan",
          "run_idx": 3,
          "duration_seconds": 154.227,
          "prompt_tokens": 43,
          "completion_tokens": 776,
          "tokens_per_second": 5.03,
          "finish_reason": "stop",
          "weeyuga_meta": null,
          "status_code": 200,
          "response_chars": 363,
          "response_preview": "1. Use `git diff --name-only` to list changed files, filter for `.yml`/`.yaml` extensions.  \n2. Parse each YAML file into JSON and load the provided JSON schema using `jsonschema`.  \n3. Validate each parsed YAML against the schema, collecti",
          "required_markers": [
            "1.",
            "2.",
            "3.",
            "4.",
            "JSON schema",
            "git"
          ],
          "markers_hit": [
            "1.",
            "2.",
            "3.",
            "4.",
            "JSON schema",
            "git"
          ],
          "marker_hit_rate": 1.0,
          "format_rule": "four_numbered_steps",
          "format_ok": true,
          "usable_answer": true,
          "error": null
        },
        {
          "type": "call",
          "ts_utc": "2026-04-29T11:41:15Z",
          "cell_id": "pavilion:weeyuga:qwen3:8b",
          "model": "qwen3:8b",
          "phase": "5q",
          "question_id": "ssh_lockout_triage",
          "run_idx": 4,
          "duration_seconds": 124.742,
          "prompt_tokens": 56,
          "completion_tokens": 629,
          "tokens_per_second": 5.04,
          "finish_reason": "stop",
          "weeyuga_meta": null,
          "status_code": 200,
          "response_chars": 921,
          "response_preview": "- **Verify `sshd_config` settings**: Ensure `PasswordAuthentication` is set to `no` (if disabling passwords) and `PermitRootLogin` is configured securely. Check for syntax errors using `sshd_config_test`.  \n- **Inspect `authorized_keys` fil",
          "required_markers": [
            "sshd_config",
            "authorized_keys",
            "journalctl",
            "rollback",
            "PasswordAuthentication"
          ],
          "markers_hit": [
            "sshd_config",
            "authorized_keys",
            "journalctl",
            "rollback",
            "PasswordAuthentication"
          ],
          "marker_hit_rate": 1.0,
          "format_rule": "five_bullets",
          "format_ok": true,
          "usable_answer": true,
          "error": null
        }
      ]
    },
    {
      "cell_id": "pavilion:weeyuga:qwen3:4b",
      "n_calls": 6,
      "n_errors": 0,
      "duration_ms_mean": null,
      "duration_ms_p50": null,
      "duration_ms_p95": null,
      "tokens_per_sec_mean": null,
      "tokens_per_sec_p50": null,
      "tokens_per_sec_p95": null,
      "tokens_per_sec_max": null,
      "cold_warm_split": {
        "cold": {
          "n_calls": 2,
          "duration_ms_mean": null,
          "duration_ms_p50": null,
          "tokens_per_sec_mean": null
        },
        "warm": {
          "n_calls": 4,
          "duration_ms_mean": null,
          "duration_ms_p50": null,
          "tokens_per_sec_mean": null
        }
      },
      "per_prompt": {
        "unknown": {
          "n_calls": 6,
          "n_errors": 0,
          "duration_ms_mean": null,
          "duration_ms_p50": null,
          "tokens_per_sec_mean": null,
          "tokens_per_sec_p50": null
        }
      },
      "chars_split": null,
      "timeline": [
        {
          "prompt_id": null,
          "run_idx": 0,
          "phase": "hello",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 0,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 1,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 2,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 3,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 4,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        }
      ],
      "calls": [
        {
          "type": "call",
          "ts_utc": "2026-04-29T11:43:45Z",
          "cell_id": "pavilion:weeyuga:qwen3:4b",
          "model": "qwen3:4b",
          "phase": "hello",
          "question_id": "hello_check",
          "run_idx": 0,
          "duration_seconds": 150.036,
          "prompt_tokens": 16,
          "completion_tokens": 411,
          "tokens_per_second": 2.74,
          "finish_reason": "stop",
          "weeyuga_meta": null,
          "status_code": 200,
          "response_chars": 248,
          "response_preview": "Hi! \ud83d\udc4b Absolutely \u2014 I'm here to help. \ud83d\ude0a  \n**What would you like assistance with?**  \n(Whether it's a question, problem, creative idea, or just need a quick explanation \u2014 I'm all ears!)\n\nJust share what you need, and I'll do my best to suppor",
          "required_markers": [],
          "markers_hit": [],
          "marker_hit_rate": null,
          "format_rule": "",
          "format_ok": null,
          "usable_answer": true,
          "error": null
        },
        {
          "type": "call",
          "ts_utc": "2026-04-29T11:47:43Z",
          "cell_id": "pavilion:weeyuga:qwen3:4b",
          "model": "qwen3:4b",
          "phase": "5q",
          "question_id": "disk_guard_bash",
          "run_idx": 0,
          "duration_seconds": 238.435,
          "prompt_tokens": 71,
          "completion_tokens": 2048,
          "tokens_per_second": 8.59,
          "finish_reason": "length",
          "weeyuga_meta": null,
          "status_code": 200,
          "response_chars": 0,
          "response_preview": "",
          "required_markers": [
            "#!/usr/bin/env bash",
            "df -P /",
            "85",
            "exit 1"
          ],
          "markers_hit": [],
          "marker_hit_rate": 0.0,
          "format_rule": "bash_code",
          "format_ok": false,
          "usable_answer": false,
          "error": null
        },
        {
          "type": "call",
          "ts_utc": "2026-04-29T11:51:39Z",
          "cell_id": "pavilion:weeyuga:qwen3:4b",
          "model": "qwen3:4b",
          "phase": "5q",
          "question_id": "ipv4_python_tests",
          "run_idx": 1,
          "duration_seconds": 235.851,
          "prompt_tokens": 55,
          "completion_tokens": 2048,
          "tokens_per_second": 8.68,
          "finish_reason": "length",
          "weeyuga_meta": null,
          "status_code": 200,
          "response_chars": 0,
          "response_preview": "",
          "required_markers": [
            "def is_valid_ipv4",
            "def test_",
            "assert",
            "split('.')"
          ],
          "markers_hit": [],
          "marker_hit_rate": 0.0,
          "format_rule": "python_code",
          "format_ok": false,
          "usable_answer": false,
          "error": null
        },
        {
          "type": "call",
          "ts_utc": "2026-04-29T11:55:33Z",
          "cell_id": "pavilion:weeyuga:qwen3:4b",
          "model": "qwen3:4b",
          "phase": "5q",
          "question_id": "nginx_safe_reload",
          "run_idx": 2,
          "duration_seconds": 233.393,
          "prompt_tokens": 39,
          "completion_tokens": 2048,
          "tokens_per_second": 8.77,
          "finish_reason": "length",
          "weeyuga_meta": null,
          "status_code": 200,
          "response_chars": 0,
          "response_preview": "",
          "required_markers": [
            "cp /etc/nginx/nginx.conf",
            "nginx -t",
            "systemctl reload nginx",
            "&&"
          ],
          "markers_hit": [],
          "marker_hit_rate": 0.0,
          "format_rule": "shell_lines",
          "format_ok": false,
          "usable_answer": false,
          "error": null
        },
        {
          "type": "call",
          "ts_utc": "2026-04-29T11:59:33Z",
          "cell_id": "pavilion:weeyuga:qwen3:4b",
          "model": "qwen3:4b",
          "phase": "5q",
          "question_id": "yaml_cli_plan",
          "run_idx": 3,
          "duration_seconds": 240.036,
          "prompt_tokens": 43,
          "completion_tokens": 2048,
          "tokens_per_second": 8.53,
          "finish_reason": "length",
          "weeyuga_meta": null,
          "status_code": 200,
          "response_chars": 0,
          "response_preview": "",
          "required_markers": [
            "1.",
            "2.",
            "3.",
            "4.",
            "JSON schema",
            "git"
          ],
          "markers_hit": [],
          "marker_hit_rate": 0.0,
          "format_rule": "four_numbered_steps",
          "format_ok": false,
          "usable_answer": false,
          "error": null
        },
        {
          "type": "call",
          "ts_utc": "2026-04-29T12:03:30Z",
          "cell_id": "pavilion:weeyuga:qwen3:4b",
          "model": "qwen3:4b",
          "phase": "5q",
          "question_id": "ssh_lockout_triage",
          "run_idx": 4,
          "duration_seconds": 237.376,
          "prompt_tokens": 56,
          "completion_tokens": 2048,
          "tokens_per_second": 8.63,
          "finish_reason": "length",
          "weeyuga_meta": null,
          "status_code": 200,
          "response_chars": 0,
          "response_preview": "",
          "required_markers": [
            "sshd_config",
            "authorized_keys",
            "journalctl",
            "rollback",
            "PasswordAuthentication"
          ],
          "markers_hit": [],
          "marker_hit_rate": 0.0,
          "format_rule": "five_bullets",
          "format_ok": false,
          "usable_answer": false,
          "error": null
        }
      ]
    },
    {
      "cell_id": "pavilion:weeyuga:qwen2.5:3b",
      "n_calls": 6,
      "n_errors": 0,
      "duration_ms_mean": null,
      "duration_ms_p50": null,
      "duration_ms_p95": null,
      "tokens_per_sec_mean": null,
      "tokens_per_sec_p50": null,
      "tokens_per_sec_p95": null,
      "tokens_per_sec_max": null,
      "cold_warm_split": {
        "cold": {
          "n_calls": 2,
          "duration_ms_mean": null,
          "duration_ms_p50": null,
          "tokens_per_sec_mean": null
        },
        "warm": {
          "n_calls": 4,
          "duration_ms_mean": null,
          "duration_ms_p50": null,
          "tokens_per_sec_mean": null
        }
      },
      "per_prompt": {
        "unknown": {
          "n_calls": 6,
          "n_errors": 0,
          "duration_ms_mean": null,
          "duration_ms_p50": null,
          "tokens_per_sec_mean": null,
          "tokens_per_sec_p50": null
        }
      },
      "chars_split": null,
      "timeline": [
        {
          "prompt_id": null,
          "run_idx": 0,
          "phase": "hello",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 0,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 1,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 2,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 3,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 4,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        }
      ],
      "calls": [
        {
          "type": "call",
          "ts_utc": "2026-04-29T12:03:59Z",
          "cell_id": "pavilion:weeyuga:qwen2.5:3b",
          "model": "qwen2.5:3b",
          "phase": "hello",
          "question_id": "hello_check",
          "run_idx": 0,
          "duration_seconds": 28.558,
          "prompt_tokens": 35,
          "completion_tokens": 19,
          "tokens_per_second": 0.67,
          "finish_reason": "stop",
          "weeyuga_meta": null,
          "status_code": 200,
          "response_chars": 72,
          "response_preview": "Of course! I'd be happy to help. What do you need assistance with today?",
          "required_markers": [],
          "markers_hit": [],
          "marker_hit_rate": null,
          "format_rule": "",
          "format_ok": null,
          "usable_answer": true,
          "error": null
        },
        {
          "type": "call",
          "ts_utc": "2026-04-29T12:04:07Z",
          "cell_id": "pavilion:weeyuga:qwen2.5:3b",
          "model": "qwen2.5:3b",
          "phase": "5q",
          "question_id": "disk_guard_bash",
          "run_idx": 0,
          "duration_seconds": 7.986,
          "prompt_tokens": 90,
          "completion_tokens": 97,
          "tokens_per_second": 12.15,
          "finish_reason": "stop",
          "weeyuga_meta": null,
          "status_code": 200,
          "response_chars": 284,
          "response_preview": "```bash\n#!/bin/bash\n\n# Check disk usage for /\nusage=$(df -Ph / | awk 'NR==2 {print $5}')\n\n# Convert to percentage and check if it's above 85%\nif (( $(echo \"$usage > 85\" | bc -l) )); then\n    echo \"Warning: Disk usage on / is over 85%. Curre",
          "required_markers": [
            "#!/usr/bin/env bash",
            "df -P /",
            "85",
            "exit 1"
          ],
          "markers_hit": [
            "85",
            "exit 1"
          ],
          "marker_hit_rate": 0.5,
          "format_rule": "bash_code",
          "format_ok": false,
          "usable_answer": true,
          "error": null
        },
        {
          "type": "call",
          "ts_utc": "2026-04-29T12:04:36Z",
          "cell_id": "pavilion:weeyuga:qwen2.5:3b",
          "model": "qwen2.5:3b",
          "phase": "5q",
          "question_id": "ipv4_python_tests",
          "run_idx": 1,
          "duration_seconds": 28.968,
          "prompt_tokens": 74,
          "completion_tokens": 378,
          "tokens_per_second": 13.05,
          "finish_reason": "stop",
          "weeyuga_meta": null,
          "status_code": 200,
          "response_chars": 1393,
          "response_preview": "```python\ndef is_valid_ipv4(value: str) -> bool:\n    \"\"\"\n    Checks if the given value is a valid IPv4 address.\n    \n    :param value: A string representing the IP address to validate.\n    :return: True if the value is a valid IPv4 address,",
          "required_markers": [
            "def is_valid_ipv4",
            "def test_",
            "assert",
            "split('.')"
          ],
          "markers_hit": [
            "def is_valid_ipv4",
            "def test_",
            "assert",
            "split('.')"
          ],
          "marker_hit_rate": 1.0,
          "format_rule": "python_code",
          "format_ok": false,
          "usable_answer": true,
          "error": null
        },
        {
          "type": "call",
          "ts_utc": "2026-04-29T12:04:39Z",
          "cell_id": "pavilion:weeyuga:qwen2.5:3b",
          "model": "qwen2.5:3b",
          "phase": "5q",
          "question_id": "nginx_safe_reload",
          "run_idx": 2,
          "duration_seconds": 3.395,
          "prompt_tokens": 58,
          "completion_tokens": 37,
          "tokens_per_second": 10.9,
          "finish_reason": "stop",
          "weeyuga_meta": null,
          "status_code": 200,
          "response_chars": 123,
          "response_preview": "sudo cp /etc/nginx/nginx.conf /etc/nginx/nginx.conf.bak\nsudo nginx -t\nif [ $? -eq 0 ]; then\n sudo systemctl reload nginx\nfi",
          "required_markers": [
            "cp /etc/nginx/nginx.conf",
            "nginx -t",
            "systemctl reload nginx",
            "&&"
          ],
          "markers_hit": [
            "cp /etc/nginx/nginx.conf",
            "nginx -t",
            "systemctl reload nginx"
          ],
          "marker_hit_rate": 0.75,
          "format_rule": "shell_lines",
          "format_ok": true,
          "usable_answer": true,
          "error": null
        },
        {
          "type": "call",
          "ts_utc": "2026-04-29T12:05:26Z",
          "cell_id": "pavilion:weeyuga:qwen2.5:3b",
          "model": "qwen2.5:3b",
          "phase": "5q",
          "question_id": "yaml_cli_plan",
          "run_idx": 3,
          "duration_seconds": 46.998,
          "prompt_tokens": 62,
          "completion_tokens": 606,
          "tokens_per_second": 12.89,
          "finish_reason": "stop",
          "weeyuga_meta": null,
          "status_code": 200,
          "response_chars": 2760,
          "response_preview": "Certainly! Below are the four numbered steps to plan your Python CLI tool that scans a Git repository for changed YAML files, validates them against a JSON schema, and exits with a non-zero status if any validation fails.\n\n1. **Initialize t",
          "required_markers": [
            "1.",
            "2.",
            "3.",
            "4.",
            "JSON schema",
            "git"
          ],
          "markers_hit": [
            "1.",
            "2.",
            "3.",
            "4.",
            "JSON schema",
            "git"
          ],
          "marker_hit_rate": 1.0,
          "format_rule": "four_numbered_steps",
          "format_ok": true,
          "usable_answer": true,
          "error": null
        },
        {
          "type": "call",
          "ts_utc": "2026-04-29T12:05:40Z",
          "cell_id": "pavilion:weeyuga:qwen2.5:3b",
          "model": "qwen2.5:3b",
          "phase": "5q",
          "question_id": "ssh_lockout_triage",
          "run_idx": 4,
          "duration_seconds": 14.325,
          "prompt_tokens": 75,
          "completion_tokens": 181,
          "tokens_per_second": 12.64,
          "finish_reason": "stop",
          "weeyuga_meta": null,
          "status_code": 200,
          "response_chars": 871,
          "response_preview": "- Ensure that `PasswordAuthentication` is set to `no` in `/etc/ssh/sshd_config`. This prevents SSH from accepting password authentication.\n- Verify that your public key is correctly added to the user's authorized keys file located at `~/.ss",
          "required_markers": [
            "sshd_config",
            "authorized_keys",
            "journalctl",
            "rollback",
            "PasswordAuthentication"
          ],
          "markers_hit": [
            "sshd_config",
            "authorized_keys",
            "journalctl",
            "rollback",
            "PasswordAuthentication"
          ],
          "marker_hit_rate": 1.0,
          "format_rule": "five_bullets",
          "format_ok": true,
          "usable_answer": true,
          "error": null
        }
      ]
    },
    {
      "cell_id": "pavilion:weeyuga:qwen2.5-coder:1.5b",
      "n_calls": 6,
      "n_errors": 0,
      "duration_ms_mean": null,
      "duration_ms_p50": null,
      "duration_ms_p95": null,
      "tokens_per_sec_mean": null,
      "tokens_per_sec_p50": null,
      "tokens_per_sec_p95": null,
      "tokens_per_sec_max": null,
      "cold_warm_split": {
        "cold": {
          "n_calls": 2,
          "duration_ms_mean": null,
          "duration_ms_p50": null,
          "tokens_per_sec_mean": null
        },
        "warm": {
          "n_calls": 4,
          "duration_ms_mean": null,
          "duration_ms_p50": null,
          "tokens_per_sec_mean": null
        }
      },
      "per_prompt": {
        "unknown": {
          "n_calls": 6,
          "n_errors": 0,
          "duration_ms_mean": null,
          "duration_ms_p50": null,
          "tokens_per_sec_mean": null,
          "tokens_per_sec_p50": null
        }
      },
      "chars_split": null,
      "timeline": [
        {
          "prompt_id": null,
          "run_idx": 0,
          "phase": "hello",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 0,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 1,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 2,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 3,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 4,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        }
      ],
      "calls": [
        {
          "type": "call",
          "ts_utc": "2026-04-29T12:06:01Z",
          "cell_id": "pavilion:weeyuga:qwen2.5-coder:1.5b",
          "model": "qwen2.5-coder:1.5b",
          "phase": "hello",
          "question_id": "hello_check",
          "run_idx": 0,
          "duration_seconds": 20.837,
          "prompt_tokens": 35,
          "completion_tokens": 11,
          "tokens_per_second": 0.53,
          "finish_reason": "stop",
          "weeyuga_meta": null,
          "status_code": 200,
          "response_chars": 38,
          "response_preview": "Of course! How may I assist you today?",
          "required_markers": [],
          "markers_hit": [],
          "marker_hit_rate": null,
          "format_rule": "",
          "format_ok": null,
          "usable_answer": true,
          "error": null
        },
        {
          "type": "call",
          "ts_utc": "2026-04-29T12:06:11Z",
          "cell_id": "pavilion:weeyuga:qwen2.5-coder:1.5b",
          "model": "qwen2.5-coder:1.5b",
          "phase": "5q",
          "question_id": "disk_guard_bash",
          "run_idx": 0,
          "duration_seconds": 9.55,
          "prompt_tokens": 90,
          "completion_tokens": 243,
          "tokens_per_second": 25.45,
          "finish_reason": "stop",
          "weeyuga_meta": null,
          "status_code": 200,
          "response_chars": 861,
          "response_preview": "```bash\n#!/bin/bash\n\n# Check disk usage for /\ndf -P / | awk '{print $5}' | grep -E '[0-9]+' > /tmp/disk_usage.txt\n\n# Read the percentage from the file\npercentage=$(cat /tmp/disk_usage.txt)\n\n# Exit with status 1 if usage is above 85 percent\n",
          "required_markers": [
            "#!/usr/bin/env bash",
            "df -P /",
            "85",
            "exit 1"
          ],
          "markers_hit": [
            "df -P /",
            "85",
            "exit 1"
          ],
          "marker_hit_rate": 0.75,
          "format_rule": "bash_code",
          "format_ok": false,
          "usable_answer": true,
          "error": null
        },
        {
          "type": "call",
          "ts_utc": "2026-04-29T12:06:23Z",
          "cell_id": "pavilion:weeyuga:qwen2.5-coder:1.5b",
          "model": "qwen2.5-coder:1.5b",
          "phase": "5q",
          "question_id": "ipv4_python_tests",
          "run_idx": 1,
          "duration_seconds": 12.454,
          "prompt_tokens": 74,
          "completion_tokens": 303,
          "tokens_per_second": 24.33,
          "finish_reason": "stop",
          "weeyuga_meta": null,
          "status_code": 200,
          "response_chars": 1101,
          "response_preview": "```python\nimport pytest\n\ndef is_valid_ipv4(value: str) -> bool:\n    \"\"\"\n    Check if the provided string is a valid IPv4 address.\n    \n    Args:\n    value (str): The string to check.\n    \n    Returns:\n    bool: True if the string is a valid",
          "required_markers": [
            "def is_valid_ipv4",
            "def test_",
            "assert",
            "split('.')"
          ],
          "markers_hit": [
            "def is_valid_ipv4",
            "def test_",
            "assert",
            "split('.')"
          ],
          "marker_hit_rate": 1.0,
          "format_rule": "python_code",
          "format_ok": false,
          "usable_answer": true,
          "error": null
        },
        {
          "type": "call",
          "ts_utc": "2026-04-29T12:06:26Z",
          "cell_id": "pavilion:weeyuga:qwen2.5-coder:1.5b",
          "model": "qwen2.5-coder:1.5b",
          "phase": "5q",
          "question_id": "nginx_safe_reload",
          "run_idx": 2,
          "duration_seconds": 2.926,
          "prompt_tokens": 58,
          "completion_tokens": 64,
          "tokens_per_second": 21.87,
          "finish_reason": "stop",
          "weeyuga_meta": null,
          "status_code": 200,
          "response_chars": 257,
          "response_preview": "```bash\n# Backup the original nginx configuration file\ncp /etc/nginx/nginx.conf /etc/nginx/nginx.conf.bak\n\n# Validate the nginx configuration\nnginx -t\n\n# Reload nginx if the configuration is valid\nif [ $? -eq 0 ]; then\n    sudo systemctl re",
          "required_markers": [
            "cp /etc/nginx/nginx.conf",
            "nginx -t",
            "systemctl reload nginx",
            "&&"
          ],
          "markers_hit": [
            "cp /etc/nginx/nginx.conf",
            "nginx -t",
            "systemctl reload nginx"
          ],
          "marker_hit_rate": 0.75,
          "format_rule": "shell_lines",
          "format_ok": true,
          "usable_answer": true,
          "error": null
        },
        {
          "type": "call",
          "ts_utc": "2026-04-29T12:06:44Z",
          "cell_id": "pavilion:weeyuga:qwen2.5-coder:1.5b",
          "model": "qwen2.5-coder:1.5b",
          "phase": "5q",
          "question_id": "yaml_cli_plan",
          "run_idx": 3,
          "duration_seconds": 18.124,
          "prompt_tokens": 62,
          "completion_tokens": 473,
          "tokens_per_second": 26.1,
          "finish_reason": "stop",
          "weeyuga_meta": null,
          "status_code": 200,
          "response_chars": 2139,
          "response_preview": "1. Install the required packages:\n```bash\npip install gitpython jsonschema\n```\n\n2. Create a new Python file named `validate_yaml.py`:\n\n3. Add the following code to the `validate_yaml.py` file:\n\n```python\nimport os\nimport json\nfrom git impor",
          "required_markers": [
            "1.",
            "2.",
            "3.",
            "4.",
            "JSON schema",
            "git"
          ],
          "markers_hit": [
            "1.",
            "2.",
            "3.",
            "4.",
            "JSON schema",
            "git"
          ],
          "marker_hit_rate": 1.0,
          "format_rule": "four_numbered_steps",
          "format_ok": true,
          "usable_answer": true,
          "error": null
        },
        {
          "type": "call",
          "ts_utc": "2026-04-29T12:06:55Z",
          "cell_id": "pavilion:weeyuga:qwen2.5-coder:1.5b",
          "model": "qwen2.5-coder:1.5b",
          "phase": "5q",
          "question_id": "ssh_lockout_triage",
          "run_idx": 4,
          "duration_seconds": 10.595,
          "prompt_tokens": 75,
          "completion_tokens": 271,
          "tokens_per_second": 25.58,
          "finish_reason": "stop",
          "weeyuga_meta": null,
          "status_code": 200,
          "response_chars": 1086,
          "response_preview": "- **Check for Permissions**: Ensure that the `authorized_keys` file has the correct permissions (`600`) to restrict access.\n  ```sh\n  chmod 600 ~/.ssh/authorized_keys\n  ```\n\n- **Review SSHD Configuration**: Verify that the `PasswordAuthenti",
          "required_markers": [
            "sshd_config",
            "authorized_keys",
            "journalctl",
            "rollback",
            "PasswordAuthentication"
          ],
          "markers_hit": [
            "sshd_config",
            "authorized_keys",
            "journalctl",
            "rollback",
            "PasswordAuthentication"
          ],
          "marker_hit_rate": 1.0,
          "format_rule": "five_bullets",
          "format_ok": true,
          "usable_answer": true,
          "error": null
        }
      ]
    },
    {
      "cell_id": "pavilion:weeyuga:qwen2.5-coder:0.5b",
      "n_calls": 6,
      "n_errors": 0,
      "duration_ms_mean": null,
      "duration_ms_p50": null,
      "duration_ms_p95": null,
      "tokens_per_sec_mean": null,
      "tokens_per_sec_p50": null,
      "tokens_per_sec_p95": null,
      "tokens_per_sec_max": null,
      "cold_warm_split": {
        "cold": {
          "n_calls": 2,
          "duration_ms_mean": null,
          "duration_ms_p50": null,
          "tokens_per_sec_mean": null
        },
        "warm": {
          "n_calls": 4,
          "duration_ms_mean": null,
          "duration_ms_p50": null,
          "tokens_per_sec_mean": null
        }
      },
      "per_prompt": {
        "unknown": {
          "n_calls": 6,
          "n_errors": 0,
          "duration_ms_mean": null,
          "duration_ms_p50": null,
          "tokens_per_sec_mean": null,
          "tokens_per_sec_p50": null
        }
      },
      "chars_split": null,
      "timeline": [
        {
          "prompt_id": null,
          "run_idx": 0,
          "phase": "hello",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 0,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 1,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 2,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 3,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        },
        {
          "prompt_id": null,
          "run_idx": 4,
          "phase": "5q",
          "duration_ms": null,
          "tokens_per_sec": null,
          "error": null
        }
      ],
      "calls": [
        {
          "type": "call",
          "ts_utc": "2026-04-29T12:07:09Z",
          "cell_id": "pavilion:weeyuga:qwen2.5-coder:0.5b",
          "model": "qwen2.5-coder:0.5b",
          "phase": "hello",
          "question_id": "hello_check",
          "run_idx": 0,
          "duration_seconds": 13.774,
          "prompt_tokens": 35,
          "completion_tokens": 11,
          "tokens_per_second": 0.8,
          "finish_reason": "stop",
          "weeyuga_meta": null,
          "status_code": 200,
          "response_chars": 38,
          "response_preview": "Of course! How can I assist you today?",
          "required_markers": [],
          "markers_hit": [],
          "marker_hit_rate": null,
          "format_rule": "",
          "format_ok": null,
          "usable_answer": true,
          "error": null
        },
        {
          "type": "call",
          "ts_utc": "2026-04-29T12:07:17Z",
          "cell_id": "pavilion:weeyuga:qwen2.5-coder:0.5b",
          "model": "qwen2.5-coder:0.5b",
          "phase": "5q",
          "question_id": "disk_guard_bash",
          "run_idx": 0,
          "duration_seconds": 8.292,
          "prompt_tokens": 90,
          "completion_tokens": 418,
          "tokens_per_second": 50.41,
          "finish_reason": "stop",
          "weeyuga_meta": null,
          "status_code": 200,
          "response_chars": 1654,
          "response_preview": "Certainly! Below is a Bash script that checks disk usage for `/`, prints a human-readable warning if usage is above 85 percent, and exits with status 1 when usage is above 85 percent.\n\n```bash\n#!/bin/bash\n\n# Check if df -P / exists\nif ! com",
          "required_markers": [
            "#!/usr/bin/env bash",
            "df -P /",
            "85",
            "exit 1"
          ],
          "markers_hit": [
            "df -P /",
            "85",
            "exit 1"
          ],
          "marker_hit_rate": 0.75,
          "format_rule": "bash_code",
          "format_ok": false,
          "usable_answer": true,
          "error": null
        },
        {
          "type": "call",
          "ts_utc": "2026-04-29T12:07:27Z",
          "cell_id": "pavilion:weeyuga:qwen2.5-coder:0.5b",
          "model": "qwen2.5-coder:0.5b",
          "phase": "5q",
          "question_id": "ipv4_python_tests",
          "run_idx": 1,
          "duration_seconds": 10.239,
          "prompt_tokens": 74,
          "completion_tokens": 520,
          "tokens_per_second": 50.79,
          "finish_reason": "stop",
          "weeyuga_meta": null,
          "status_code": 200,
          "response_chars": 1505,
          "response_preview": "```python\nimport re\n\ndef is_valid_ipv4(value: str) -> bool:\n    \"\"\"\n    Check if the provided string is a valid IPv4 address.\n    \n    Args:\n    value (str): The string to check.\n    \n    Returns:\n    bool: True if the string is a valid IPv",
          "required_markers": [
            "def is_valid_ipv4",
            "def test_",
            "assert",
            "split('.')"
          ],
          "markers_hit": [
            "def is_valid_ipv4",
            "def test_",
            "assert"
          ],
          "marker_hit_rate": 0.75,
          "format_rule": "python_code",
          "format_ok": false,
          "usable_answer": true,
          "error": null
        },
        {
          "type": "call",
          "ts_utc": "2026-04-29T12:07:30Z",
          "cell_id": "pavilion:weeyuga:qwen2.5-coder:0.5b",
          "model": "qwen2.5-coder:0.5b",
          "phase": "5q",
          "question_id": "nginx_safe_reload",
          "run_idx": 2,
          "duration_seconds": 3.174,
          "prompt_tokens": 58,
          "completion_tokens": 149,
          "tokens_per_second": 46.94,
          "finish_reason": "stop",
          "weeyuga_meta": null,
          "status_code": 200,
          "response_chars": 576,
          "response_preview": "```bash\n# Backup /etc/nginx/nginx.conf\nsudo cp -p /etc/nginx/nginx.conf /etc/nginx/nginx.conf.bak\n\n# Validate nginx config\nsudo nginx -t\n\n# Reload nginx only if validation passes\nif [ $? -eq 0 ]; then\n    sudo systemctl reload nginx\nfi\n```\n",
          "required_markers": [
            "cp /etc/nginx/nginx.conf",
            "nginx -t",
            "systemctl reload nginx",
            "&&"
          ],
          "markers_hit": [
            "nginx -t",
            "systemctl reload nginx"
          ],
          "marker_hit_rate": 0.5,
          "format_rule": "shell_lines",
          "format_ok": true,
          "usable_answer": true,
          "error": null
        },
        {
          "type": "call",
          "ts_utc": "2026-04-29T12:07:42Z",
          "cell_id": "pavilion:weeyuga:qwen2.5-coder:0.5b",
          "model": "qwen2.5-coder:0.5b",
          "phase": "5q",
          "question_id": "yaml_cli_plan",
          "run_idx": 3,
          "duration_seconds": 12.01,
          "prompt_tokens": 62,
          "completion_tokens": 613,
          "tokens_per_second": 51.04,
          "finish_reason": "stop",
          "weeyuga_meta": null,
          "status_code": 200,
          "response_chars": 2834,
          "response_preview": "To create a Python CLI that scans a Git repository for changed YAML files, validates them against a JSON schema, and exits nonzero on failure, you can use the `git` command-line tool along with some Python libraries like `argparse`, `jsonsc",
          "required_markers": [
            "1.",
            "2.",
            "3.",
            "4.",
            "JSON schema",
            "git"
          ],
          "markers_hit": [
            "1.",
            "2.",
            "3.",
            "JSON schema",
            "git"
          ],
          "marker_hit_rate": 0.833,
          "format_rule": "four_numbered_steps",
          "format_ok": true,
          "usable_answer": true,
          "error": null
        },
        {
          "type": "call",
          "ts_utc": "2026-04-29T12:07:51Z",
          "cell_id": "pavilion:weeyuga:qwen2.5-coder:0.5b",
          "model": "qwen2.5-coder:0.5b",
          "phase": "5q",
          "question_id": "ssh_lockout_triage",
          "run_idx": 4,
          "duration_seconds": 8.951,
          "prompt_tokens": 75,
          "completion_tokens": 451,
          "tokens_per_second": 50.39,
          "finish_reason": "stop",
          "weeyuga_meta": null,
          "status_code": 200,
          "response_chars": 1588,
          "response_preview": "- **Hardening**: Ensure that your SSH server is secure by using strong passwords, enabling two-factor authentication, and updating software to the latest version.\n- **Safest First Checks**:\n  - **Authorized Keys**: Verify that your public k",
          "required_markers": [
            "sshd_config",
            "authorized_keys",
            "journalctl",
            "rollback",
            "PasswordAuthentication"
          ],
          "markers_hit": [
            "sshd_config",
            "authorized_keys",
            "journalctl",
            "rollback",
            "PasswordAuthentication"
          ],
          "marker_hit_rate": 1.0,
          "format_rule": "five_bullets",
          "format_ok": true,
          "usable_answer": true,
          "error": null
        }
      ]
    }
  ],
  "meta_record": {
    "type": "meta",
    "benchmark_run_id": "ad057f5b-ed3f-4a95-a38e-361be310ffd6",
    "harness_version": "3",
    "harness_path": "scripts/benchmarks/run_pavilion_weeyuga.py",
    "git_sha": "371ce70c8708ce0aedbe4f3bf096f8e8f8b11d43",
    "git_dirty": true,
    "started_at_utc": "2026-04-29T09:51:46Z",
    "host": "Slobodans-MacBook-Air.local",
    "load_avg_start": [
      1.60205078125,
      1.9716796875,
      1.95751953125
    ],
    "target_url": "http://10.8.0.3:11435",
    "weeyuga_url": "http://10.8.0.3:11435",
    "cell_id_prefix": "pavilion:weeyuga",
    "execution_shape": "per-model-block",
    "phase_plan": "hello+5q",
    "models_planned": [
      "qwen3.5:4b",
      "qwen3.5:35b-a3b-uncensored-iq1m",
      "qwen3.5:35b-a3b-iq2s",
      "qwen3.5:9b-q6k",
      "qwen3.5:9b-q4km",
      "qwen3.5:2b",
      "qwen3.5:0.8b",
      "qwen3.5:9b",
      "qwen2.5-coder:14b",
      "qwen2.5-coder:3b",
      "qwen3:14b",
      "qwen3:8b",
      "qwen3:4b",
      "qwen2.5:3b",
      "qwen2.5-coder:1.5b",
      "qwen2.5-coder:0.5b"
    ],
    "canonical_options": {
      "temperature": 0.1,
      "num_ctx": 4096,
      "num_predict": 2048
    },
    "timeout_seconds": 360,
    "suite_5q_path": "/Users/slobodan/Documents/MyServers/instances/vps-81-17-99-14/telemetry/small_model_eval_questions.json",
    "suite_20q_path": "/Users/slobodan/Documents/MyServers/instances/vps-81-17-99-14/telemetry/python_task_suite_questions.json",
    "env_inference_route": null,
    "env_llamacpp_url": null
  },
  "packaged_at": "2026-05-06T12:28:24Z"
}