{ "generated_at": "2026-04-11T19:00:02Z", "suite_name": "parallel-qwen-same-model-20q-v1", "version": "1.0", "purpose": "Run the shared 20-question Python benchmark in two-question batches against one model at a time. Questions 1+2 run together, then 3+4, and so on, while Ollama stays on one loaded model with two parallel request slots and a 32K request context.", "run_mode": "same_model_pairs", "question_batch_size": 2, "question_assignment": "same_model_receives_both_questions_in_each_batch", "ollama_runtime": { "max_loaded_models": 1, "num_parallel": 2, "num_ctx": 32768, "keep_alive": "24h" }, "lanes": [ { "lane_id": "qwen2_5_coder_0_5b_same_model", "display_name": "Qwen2.5 Coder 0.5B same-model 2-up", "kind": "same_model", "models": [ { "role": "shared", "model": "qwen2.5-coder:0.5b", "display_name": "Qwen2.5 Coder 0.5B", "family": "qwen2.5-coder", "size_label": "0.5B", "max_context_tokens": 32768, "requested_num_ctx": 32768, "mode": "same_model" } ] }, { "lane_id": "qwen2_5_coder_1_5b_same_model", "display_name": "Qwen2.5 Coder 1.5B same-model 2-up", "kind": "same_model", "models": [ { "role": "shared", "model": "qwen2.5-coder:1.5b", "display_name": "Qwen2.5 Coder 1.5B", "family": "qwen2.5-coder", "size_label": "1.5B", "max_context_tokens": 32768, "requested_num_ctx": 32768, "mode": "same_model" } ] }, { "lane_id": "qwen2_5_coder_3b_same_model", "display_name": "Qwen2.5 Coder 3B same-model 2-up", "kind": "same_model", "models": [ { "role": "shared", "model": "qwen2.5-coder:3b", "display_name": "Qwen2.5 Coder 3B", "family": "qwen2.5-coder", "size_label": "3B", "max_context_tokens": 32768, "requested_num_ctx": 32768, "mode": "same_model" } ] }, { "lane_id": "qwen3_5_0_8b_same_model", "display_name": "Qwen3.5 0.8B same-model 2-up", "kind": "same_model", "models": [ { "role": "shared", "model": "qwen3.5:0.8b", "display_name": "Qwen3.5 0.8B", "family": "qwen3.5", "size_label": "0.8B", "max_context_tokens": 262144, "requested_num_ctx": 32768, "mode": "same_model" } ] } ], "questions": [ { "id": "py_csv_parse", "title": "CSV Parser", "category": "parsing", "prompt": "Return only Python code. Write a function that reads CSV text, skips blank lines, and returns a list of dicts keyed by the header row.", "required_markers": [ "csv", "Dict", "reader", "header" ], "format_rule": "python_code" }, { "id": "py_file_scan", "title": "File Scanner", "category": "file_io", "prompt": "Return only Python code. Write a script that walks a directory tree and prints the paths of files larger than 5 MB.", "required_markers": [ "os.walk", "5 * 1024 * 1024", "print", "path" ], "format_rule": "python_code" }, { "id": "py_cli_args", "title": "CLI Arguments", "category": "cli", "prompt": "Return only Python code. Build a small argparse CLI with one required path argument and one optional verbose flag.", "required_markers": [ "argparse", "--verbose", "ArgumentParser", "path" ], "format_rule": "python_code" }, { "id": "py_typing_dataclass", "title": "Typed Dataclass", "category": "typing", "prompt": "Return only Python code. Define a typed dataclass for a job record with id, name, created_at, and is_active fields.", "required_markers": [ "@dataclass", "created_at", "is_active", "str" ], "format_rule": "python_code" }, { "id": "py_pytest_fixture", "title": "Pytest Fixture", "category": "tests", "prompt": "Return only Python code. Write a pytest fixture and one test that uses it to verify a function converting Celsius to Fahrenheit.", "required_markers": [ "@pytest.fixture", "def test_", "assert", "fahrenheit" ], "format_rule": "python_code" }, { "id": "py_async_fetch", "title": "Async Fetch", "category": "async", "prompt": "Return only Python code. Write an async function that fetches two URLs concurrently with asyncio.gather and returns both bodies.", "required_markers": [ "async def", "asyncio.gather", "await", "aiohttp" ], "format_rule": "python_code" }, { "id": "py_http_retry", "title": "HTTP Retry", "category": "http", "prompt": "Return only Python code. Write a requests wrapper that retries HTTP 429 with exponential backoff and a maximum attempt count.", "required_markers": [ "requests", "429", "backoff", "max_attempts" ], "format_rule": "python_code" }, { "id": "py_json_validate", "title": "JSON Validation", "category": "validation", "prompt": "Return only Python code. Validate a JSON object against a schema and raise ValueError when required keys are missing.", "required_markers": [ "jsonschema", "ValueError", "required", "schema" ], "format_rule": "python_code" }, { "id": "py_sqlite_store", "title": "SQLite Store", "category": "sqlite", "prompt": "Return only Python code. Create a SQLite table for events and write a function that inserts one event row safely.", "required_markers": [ "sqlite3", "CREATE TABLE", "INSERT INTO", "commit" ], "format_rule": "python_code" }, { "id": "py_fastapi_handler", "title": "FastAPI Handler", "category": "web", "prompt": "Return only Python code. Write a FastAPI route that returns a JSON health response with status and version fields.", "required_markers": [ "FastAPI", "@app.get", "status", "version" ], "format_rule": "python_code" }, { "id": "py_config_dataclass", "title": "Config Dataclass", "category": "config", "prompt": "Return only Python code. Build a dataclass-based config loader that reads environment variables and supplies defaults.", "required_markers": [ "dataclass", "os.environ", "default", "load" ], "format_rule": "python_code" }, { "id": "py_logging_setup", "title": "Logging Setup", "category": "logging", "prompt": "Return only Python code. Configure structured logging with a timestamped formatter and a reusable setup function.", "required_markers": [ "logging", "Formatter", "timestamp", "basicConfig" ], "format_rule": "python_code" }, { "id": "py_thread_pool", "title": "Thread Pool", "category": "concurrency", "prompt": "Return only Python code. Use concurrent.futures to run a small CPU-bound function across a list of inputs and collect results.", "required_markers": [ "concurrent.futures", "ThreadPoolExecutor", "map", "results" ], "format_rule": "python_code" }, { "id": "py_package_layout", "title": "Package Layout", "category": "package", "prompt": "Return only Python code. Show a minimal package layout with __init__.py and a helper module that can be imported from tests.", "required_markers": [ "__init__.py", "import", "helper", "tests" ], "format_rule": "python_code" }, { "id": "py_debug_stacktrace", "title": "Debug Stacktrace", "category": "debugging", "prompt": "Return only Python code. Fix a function that crashes on None input by adding an early return and a clear exception message.", "required_markers": [ "None", "return", "raise", "message" ], "format_rule": "python_code" }, { "id": "py_refactor_split", "title": "Refactor Split", "category": "refactor", "prompt": "Return only Python code. Refactor a large function into two smaller helpers while preserving behavior.", "required_markers": [ "def", "helper", "return", "preserve" ], "format_rule": "python_code" }, { "id": "py_csv_summary", "title": "CSV Summary", "category": "analysis", "prompt": "Return only Python code. Read a CSV file and produce a summary with row count and a count of unique values in one column.", "required_markers": [ "csv", "row_count", "unique", "Counter" ], "format_rule": "python_code" }, { "id": "py_pathlib_clean", "title": "Pathlib Cleaner", "category": "filesystem", "prompt": "Return only Python code. Use pathlib to remove empty files from a directory tree and print each deleted path.", "required_markers": [ "pathlib", "rglob", "unlink", "print" ], "format_rule": "python_code" }, { "id": "py_pydantic_model", "title": "Pydantic Model", "category": "validation", "prompt": "Return only Python code. Define a Pydantic model for a user profile with email validation and an age field.", "required_markers": [ "BaseModel", "EmailStr", "age", "validation" ], "format_rule": "python_code" }, { "id": "py_regex_log_parser", "title": "Regex Log Parser", "category": "parsing", "prompt": "Return only Python code. Parse web server log lines with regex and return a list of status codes and request paths.", "required_markers": [ "re", "status", "path", "findall" ], "format_rule": "python_code" } ] }