weeyuga-benchmarks-public/runs/79afe366-1055-4e45-adf6-593864a530e8/run.log

[09:44:24Z] === predator-a3b-ngl6-ctx run_id=79afe366-1055-4e45-adf6-593864a530e8 ===
[09:44:24Z] Plan: NGL=6 × ctx={64k, 96k, 131k}
[09:44:24Z] Creating run-mirror dir on Predator: D:\WeeyugaBench\runs\79afe366-1055-4e45-adf6-593864a530e8
[09:44:34Z] >>> CELL: ngl6-ctx64k (NGL=6 ctx=65536)
[09:44:34Z] Stopping llama-server on Predator...
[09:44:41Z] Starting llama-server (key=ngl6-ctx64k, NGL=6, ctx=65536)...
[09:44:48Z] Waiting for llama-server (key=ngl6-ctx64k, up to 15 min)...
[09:45:12Z]   llama-server up after 17s
[09:45:16Z] Running llama-bench for ngl6-ctx64k (NGL=6)...
[09:46:31Z] Running prompts for ngl6-ctx64k (cold + 3 warm; max_tokens 512/1024/2048)...
[09:46:31Z]   prompt=hello cold (max_tokens=512)
[09:47:09Z]   prompt=hello warm 1
[09:47:25Z]   prompt=hello warm 2
[09:47:39Z]   prompt=hello warm 3
[09:48:01Z]   prompt=P-MEDIUM cold (max_tokens=1024)
[09:49:23Z]   prompt=P-MEDIUM warm 1
[09:50:34Z]   prompt=P-MEDIUM warm 2
[09:51:34Z]   prompt=P-MEDIUM warm 3
[09:52:50Z]   prompt=P-HARD cold (max_tokens=2048)
[09:55:17Z]   prompt=P-HARD warm 1
[09:57:55Z]   prompt=P-HARD warm 2
[10:00:21Z]   prompt=P-HARD warm 3
[10:02:48Z] >>> CELL: ngl6-ctx96k (NGL=6 ctx=98304)
[10:02:48Z] Stopping llama-server on Predator...
[10:02:52Z] Starting llama-server (key=ngl6-ctx96k, NGL=6, ctx=98304)...
[10:02:55Z] Waiting for llama-server (key=ngl6-ctx96k, up to 15 min)...
[10:03:02Z]   llama-server up after 7s
[10:03:05Z] Running llama-bench for ngl6-ctx96k (NGL=6)...
[10:04:19Z] Running prompts for ngl6-ctx96k (cold + 3 warm; max_tokens 512/1024/2048)...
[10:04:19Z]   prompt=hello cold (max_tokens=512)
[10:04:36Z]   prompt=hello warm 1
[10:04:54Z]   prompt=hello warm 2
[10:05:13Z]   prompt=hello warm 3
[10:05:30Z]   prompt=P-MEDIUM cold (max_tokens=1024)
[10:07:21Z]   prompt=P-MEDIUM warm 1
[10:09:12Z]   prompt=P-MEDIUM warm 2
[10:11:04Z]   prompt=P-MEDIUM warm 3
[10:12:10Z]   prompt=P-HARD cold (max_tokens=2048)
[10:14:42Z]   prompt=P-HARD warm 1
[10:17:15Z]   prompt=P-HARD warm 2
[10:18:43Z]   prompt=P-HARD warm 3
[10:21:15Z] >>> CELL: ngl6-ctx131k (NGL=6 ctx=131072)
[10:21:15Z] Stopping llama-server on Predator...
[10:21:19Z] Starting llama-server (key=ngl6-ctx131k, NGL=6, ctx=131072)...
[10:21:22Z] Waiting for llama-server (key=ngl6-ctx131k, up to 15 min)...
[10:21:30Z]   llama-server up after 7s
[10:21:32Z] Running llama-bench for ngl6-ctx131k (NGL=6)...
[10:23:00Z] Running prompts for ngl6-ctx131k (cold + 3 warm; max_tokens 512/1024/2048)...
[10:23:00Z]   prompt=hello cold (max_tokens=512)
[10:23:34Z]   prompt=hello warm 1
[10:23:52Z]   prompt=hello warm 2
[10:24:33Z]   prompt=hello warm 3
[10:25:17Z]   prompt=P-MEDIUM cold (max_tokens=1024)
[10:27:18Z]   prompt=P-MEDIUM warm 1
[10:28:31Z]   prompt=P-MEDIUM warm 2
[10:30:15Z]   prompt=P-MEDIUM warm 3
[10:31:52Z]   prompt=P-HARD cold (max_tokens=2048)
[10:34:39Z]   prompt=P-HARD warm 1
[10:37:05Z]   prompt=P-HARD warm 2
[10:38:45Z]   prompt=P-HARD warm 3
[10:41:22Z] Stopping llama-server on Predator...
[10:41:26Z] === bench complete ===
[10:41:26Z] Mirroring run-dir to Predator...