summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorFrank <[email protected]>2025-12-28 15:33:18 -0500
committerFrank <[email protected]>2025-12-28 15:33:18 -0500
commit9d61370ac458627c394195ef761669ba70c93237 (patch)
tree4f69f2eb550b50202f850f41b10a069b5ba0251d
parentf3febd6e395458698fceca2f175f59e0cf68c1d6 (diff)
downloadopencode-9d61370ac458627c394195ef761669ba70c93237.tar.gz
opencode-9d61370ac458627c394195ef761669ba70c93237.zip
sync
-rw-r--r--packages/console/app/src/routes/bench/index.tsx92
1 files changed, 90 insertions, 2 deletions
diff --git a/packages/console/app/src/routes/bench/index.tsx b/packages/console/app/src/routes/bench/index.tsx
index 9d99393cf..6339c8017 100644
--- a/packages/console/app/src/routes/bench/index.tsx
+++ b/packages/console/app/src/routes/bench/index.tsx
@@ -10,11 +10,37 @@ interface TaskSource {
to: string
}
+interface ScoreDetail {
+ criterion: string
+ weight: number
+ average: number
+}
+
+interface Run {
+ task: string
+ model: string
+ agent: string
+ score: {
+ final: number
+ base: number
+ penalty: number
+ }
+ scoreDetails: ScoreDetail[]
+}
+
+interface Prompt {
+ commit: string
+ prompt: string
+}
+
interface Task {
averageScore: number
+ summary?: string
+ runs?: Run[]
task: {
id: string
source: TaskSource
+ prompts?: Prompt[]
}
}
@@ -70,7 +96,7 @@ export default function Bench() {
<tr>
<th>Agent</th>
<th>Model</th>
- <th>Average Score</th>
+ <th>Final Score</th>
<For each={taskIds()}>{(id) => <th>{id}</th>}</For>
</tr>
</thead>
@@ -127,7 +153,7 @@ export default function Bench() {
}}
onClick={(e) => e.stopPropagation()}
>
- <div style={{ "margin-bottom": "1rem" }}>
+ <div style={{ "margin-bottom": "1rem", color: "#000" }}>
<div>
<strong>Repo: </strong>
<a
@@ -162,6 +188,68 @@ export default function Bench() {
</a>
</div>
</div>
+ <Show when={modalTask()?.task.prompts && modalTask()!.task.prompts!.length > 0}>
+ <div style={{ "margin-bottom": "1rem", color: "#000" }}>
+ <strong>Prompt:</strong>
+ <For each={modalTask()!.task.prompts}>
+ {(p) => (
+ <div style={{ "margin-top": "0.5rem" }}>
+ <div style={{ "font-size": "0.875rem", color: "#666" }}>Commit: {p.commit.slice(0, 7)}</div>
+ <p style={{ "margin-top": "0.25rem", "white-space": "pre-wrap" }}>{p.prompt}</p>
+ </div>
+ )}
+ </For>
+ </div>
+ </Show>
+ <Show when={modalTask()?.runs && modalTask()!.runs!.length > 0}>
+ <div style={{ "margin-bottom": "1rem", color: "#000" }}>
+ <strong>Runs:</strong>
+ <table style={{ "margin-top": "0.5rem", "border-collapse": "collapse", width: "100%" }}>
+ <thead>
+ <tr>
+ <th style={{ border: "1px solid #ccc", padding: "0.5rem", "text-align": "left" }}>Run</th>
+ <th style={{ border: "1px solid #ccc", padding: "0.5rem", "text-align": "left" }}>Final</th>
+ <th style={{ border: "1px solid #ccc", padding: "0.5rem", "text-align": "left" }}>Base</th>
+ <th style={{ border: "1px solid #ccc", padding: "0.5rem", "text-align": "left" }}>Penalty</th>
+ <For each={modalTask()!.runs![0]?.scoreDetails}>
+ {(detail) => (
+ <th style={{ border: "1px solid #ccc", padding: "0.5rem", "text-align": "left" }}>
+ {detail.criterion} ({detail.weight})
+ </th>
+ )}
+ </For>
+ </tr>
+ </thead>
+ <tbody>
+ <For each={modalTask()!.runs}>
+ {(run, index) => (
+ <tr>
+ <td style={{ border: "1px solid #ccc", padding: "0.5rem" }}>{index() + 1}</td>
+ <td style={{ border: "1px solid #ccc", padding: "0.5rem" }}>{run.score.final.toFixed(3)}</td>
+ <td style={{ border: "1px solid #ccc", padding: "0.5rem" }}>{run.score.base.toFixed(3)}</td>
+ <td style={{ border: "1px solid #ccc", padding: "0.5rem" }}>
+ {run.score.penalty.toFixed(3)}
+ </td>
+ <For each={run.scoreDetails}>
+ {(detail) => (
+ <td style={{ border: "1px solid #ccc", padding: "0.5rem" }}>
+ {detail.average.toFixed(3)}
+ </td>
+ )}
+ </For>
+ </tr>
+ )}
+ </For>
+ </tbody>
+ </table>
+ </div>
+ </Show>
+ <Show when={modalTask()?.summary}>
+ <div style={{ "margin-bottom": "1rem", color: "#000" }}>
+ <strong>Summary:</strong>
+ <p style={{ "margin-top": "0.5rem", "white-space": "pre-wrap" }}>{modalTask()!.summary}</p>
+ </div>
+ </Show>
<pre style={{ color: "#000" }}>{JSON.stringify(modalTask(), null, 2)}</pre>
</div>
</div>