diff options
| author | Frank <[email protected]> | 2025-12-28 15:33:18 -0500 |
|---|---|---|
| committer | Frank <[email protected]> | 2025-12-28 15:33:18 -0500 |
| commit | 9d61370ac458627c394195ef761669ba70c93237 (patch) | |
| tree | 4f69f2eb550b50202f850f41b10a069b5ba0251d | |
| parent | f3febd6e395458698fceca2f175f59e0cf68c1d6 (diff) | |
| download | opencode-9d61370ac458627c394195ef761669ba70c93237.tar.gz opencode-9d61370ac458627c394195ef761669ba70c93237.zip | |
sync
| -rw-r--r-- | packages/console/app/src/routes/bench/index.tsx | 92 |
1 files changed, 90 insertions, 2 deletions
diff --git a/packages/console/app/src/routes/bench/index.tsx b/packages/console/app/src/routes/bench/index.tsx index 9d99393cf..6339c8017 100644 --- a/packages/console/app/src/routes/bench/index.tsx +++ b/packages/console/app/src/routes/bench/index.tsx @@ -10,11 +10,37 @@ interface TaskSource { to: string } +interface ScoreDetail { + criterion: string + weight: number + average: number +} + +interface Run { + task: string + model: string + agent: string + score: { + final: number + base: number + penalty: number + } + scoreDetails: ScoreDetail[] +} + +interface Prompt { + commit: string + prompt: string +} + interface Task { averageScore: number + summary?: string + runs?: Run[] task: { id: string source: TaskSource + prompts?: Prompt[] } } @@ -70,7 +96,7 @@ export default function Bench() { <tr> <th>Agent</th> <th>Model</th> - <th>Average Score</th> + <th>Final Score</th> <For each={taskIds()}>{(id) => <th>{id}</th>}</For> </tr> </thead> @@ -127,7 +153,7 @@ export default function Bench() { }} onClick={(e) => e.stopPropagation()} > - <div style={{ "margin-bottom": "1rem" }}> + <div style={{ "margin-bottom": "1rem", color: "#000" }}> <div> <strong>Repo: </strong> <a @@ -162,6 +188,68 @@ export default function Bench() { </a> </div> </div> + <Show when={modalTask()?.task.prompts && modalTask()!.task.prompts!.length > 0}> + <div style={{ "margin-bottom": "1rem", color: "#000" }}> + <strong>Prompt:</strong> + <For each={modalTask()!.task.prompts}> + {(p) => ( + <div style={{ "margin-top": "0.5rem" }}> + <div style={{ "font-size": "0.875rem", color: "#666" }}>Commit: {p.commit.slice(0, 7)}</div> + <p style={{ "margin-top": "0.25rem", "white-space": "pre-wrap" }}>{p.prompt}</p> + </div> + )} + </For> + </div> + </Show> + <Show when={modalTask()?.runs && modalTask()!.runs!.length > 0}> + <div style={{ "margin-bottom": "1rem", color: "#000" }}> + <strong>Runs:</strong> + <table style={{ "margin-top": "0.5rem", "border-collapse": "collapse", width: "100%" }}> + <thead> + <tr> + <th style={{ border: "1px solid #ccc", padding: "0.5rem", "text-align": "left" }}>Run</th> + <th style={{ border: "1px solid #ccc", padding: "0.5rem", "text-align": "left" }}>Final</th> + <th style={{ border: "1px solid #ccc", padding: "0.5rem", "text-align": "left" }}>Base</th> + <th style={{ border: "1px solid #ccc", padding: "0.5rem", "text-align": "left" }}>Penalty</th> + <For each={modalTask()!.runs![0]?.scoreDetails}> + {(detail) => ( + <th style={{ border: "1px solid #ccc", padding: "0.5rem", "text-align": "left" }}> + {detail.criterion} ({detail.weight}) + </th> + )} + </For> + </tr> + </thead> + <tbody> + <For each={modalTask()!.runs}> + {(run, index) => ( + <tr> + <td style={{ border: "1px solid #ccc", padding: "0.5rem" }}>{index() + 1}</td> + <td style={{ border: "1px solid #ccc", padding: "0.5rem" }}>{run.score.final.toFixed(3)}</td> + <td style={{ border: "1px solid #ccc", padding: "0.5rem" }}>{run.score.base.toFixed(3)}</td> + <td style={{ border: "1px solid #ccc", padding: "0.5rem" }}> + {run.score.penalty.toFixed(3)} + </td> + <For each={run.scoreDetails}> + {(detail) => ( + <td style={{ border: "1px solid #ccc", padding: "0.5rem" }}> + {detail.average.toFixed(3)} + </td> + )} + </For> + </tr> + )} + </For> + </tbody> + </table> + </div> + </Show> + <Show when={modalTask()?.summary}> + <div style={{ "margin-bottom": "1rem", color: "#000" }}> + <strong>Summary:</strong> + <p style={{ "margin-top": "0.5rem", "white-space": "pre-wrap" }}>{modalTask()!.summary}</p> + </div> + </Show> <pre style={{ color: "#000" }}>{JSON.stringify(modalTask(), null, 2)}</pre> </div> </div> |
