{ "suite_name": "small-model-coding-eval-v1", "version": "1.0", "purpose": "A deterministic five-question coding and DevOps comparison suite for smaller Ollama models on vps50.", "models": [ { "model": "qwen2.5-coder:3b", "display_name": "Qwen2.5 Coder 3B", "size_label": "3b" }, { "model": "qwen2.5-coder:1.5b", "display_name": "Qwen2.5 Coder 1.5B", "size_label": "1.5b" }, { "model": "qwen2.5:3b", "display_name": "Qwen2.5 3B", "size_label": "3b" }, { "model": "llama3.2:3b", "display_name": "Llama 3.2 3B", "size_label": "3b" }, { "model": "phi3", "display_name": "Phi-3 Mini", "size_label": "3.8b" } ], "questions": [ { "id": "disk_guard_bash", "title": "Disk Guard Script", "category": "shell", "prompt": "Return only Bash code. Write a script that checks disk usage for /, prints a human-readable warning, and exits with status 1 when usage is above 85 percent. Requirements: include a shebang, use df -P /, parse the numeric percentage, and keep the script production-safe.", "required_markers": ["#!/usr/bin/env bash", "df -P /", "85", "exit 1"], "format_rule": "bash_code" }, { "id": "ipv4_python_tests", "title": "IPv4 Validator", "category": "python", "prompt": "Return only Python code. Write a function named is_valid_ipv4(value: str) -> bool and include exactly three pytest tests that cover a valid address, an out-of-range octet, and a non-numeric input.", "required_markers": ["def is_valid_ipv4", "def test_", "assert", "split('.')"], "format_rule": "python_code" }, { "id": "nginx_safe_reload", "title": "Nginx Safe Reload", "category": "ops", "prompt": "Return only Bash commands, one per line. Back up /etc/nginx/nginx.conf, validate nginx config, and reload nginx only if validation passes.", "required_markers": ["cp /etc/nginx/nginx.conf", "nginx -t", "systemctl reload nginx", "&&"], "format_rule": "shell_lines" }, { "id": "yaml_cli_plan", "title": "YAML Validator Plan", "category": "planning", "prompt": "Return exactly four numbered steps. Plan a Python CLI that scans a git repo for changed YAML files, validates them against a JSON schema, and exits nonzero on failure.", "required_markers": ["1.", "2.", "3.", "4.", "JSON schema", "git"], "format_rule": "four_numbered_steps" }, { "id": "ssh_lockout_triage", "title": "SSH Lockout Triage", "category": "debugging", "prompt": "Return exactly five bullet points. After hardening, SSH started returning Permission denied (publickey,password). List the safest first checks before changing config. Mention sshd_config, authorized_keys, journalctl, rollback, and PasswordAuthentication.", "required_markers": ["sshd_config", "authorized_keys", "journalctl", "rollback", "PasswordAuthentication"], "format_rule": "five_bullets" } ] }