summaryrefslogtreecommitdiffhomepage
path: root/packages/console/app/src
diff options
context:
space:
mode:
authorFrank <[email protected]>2025-12-28 14:55:03 -0500
committerFrank <[email protected]>2025-12-28 14:55:05 -0500
commitf3febd6e395458698fceca2f175f59e0cf68c1d6 (patch)
treea56c817da250af0342d6eb2874ab3d7e27a5c388 /packages/console/app/src
parentf12d55bf1e69785f779e545a67b6fc045d163cdb (diff)
downloadopencode-f3febd6e395458698fceca2f175f59e0cf68c1d6.tar.gz
opencode-f3febd6e395458698fceca2f175f59e0cf68c1d6.zip
wip: benchmark
Diffstat (limited to 'packages/console/app/src')
-rw-r--r--packages/console/app/src/routes/bench/index.tsx125
1 files changed, 122 insertions, 3 deletions
diff --git a/packages/console/app/src/routes/bench/index.tsx b/packages/console/app/src/routes/bench/index.tsx
index adf31fbc6..9d99393cf 100644
--- a/packages/console/app/src/routes/bench/index.tsx
+++ b/packages/console/app/src/routes/bench/index.tsx
@@ -1,20 +1,47 @@
import { Title } from "@solidjs/meta"
import { createAsync, query } from "@solidjs/router"
-import { For } from "solid-js"
+import { createMemo, createSignal, For, Show } from "solid-js"
import { Database, desc } from "@opencode-ai/console-core/drizzle/index.js"
import { BenchmarkTable } from "@opencode-ai/console-core/schema/benchmark.sql.js"
+interface TaskSource {
+ repo: string
+ from: string
+ to: string
+}
+
+interface Task {
+ averageScore: number
+ task: {
+ id: string
+ source: TaskSource
+ }
+}
+
+interface BenchmarkResult {
+ averageScore: number
+ tasks: Task[]
+}
+
async function getBenchmarks() {
"use server"
const rows = await Database.use((tx) =>
tx.select().from(BenchmarkTable).orderBy(desc(BenchmarkTable.timeCreated)).limit(100),
)
return rows.map((row) => {
- const parsed = JSON.parse(row.result) as { averageScore: number }
+ const parsed = JSON.parse(row.result) as BenchmarkResult
+ const taskScores: Record<string, number> = {}
+ const taskData: Record<string, Task> = {}
+ for (const t of parsed.tasks) {
+ taskScores[t.task.id] = t.averageScore
+ taskData[t.task.id] = t
+ }
return {
agent: row.agent,
model: row.model,
averageScore: parsed.averageScore,
+ taskScores,
+ taskData,
}
})
}
@@ -23,6 +50,17 @@ const queryBenchmarks = query(getBenchmarks, "benchmarks.list")
export default function Bench() {
const benchmarks = createAsync(() => queryBenchmarks())
+ const [modalTask, setModalTask] = createSignal<Task | null>(null)
+
+ const taskIds = createMemo(() => {
+ const ids = new Set<string>()
+ for (const row of benchmarks() ?? []) {
+ for (const id of Object.keys(row.taskScores)) {
+ ids.add(id)
+ }
+ }
+ return [...ids].sort()
+ })
return (
<main data-page="bench">
@@ -33,6 +71,7 @@ export default function Bench() {
<th>Agent</th>
<th>Model</th>
<th>Average Score</th>
+ <For each={taskIds()}>{(id) => <th>{id}</th>}</For>
</tr>
</thead>
<tbody>
@@ -41,12 +80,92 @@ export default function Bench() {
<tr>
<td>{row.agent}</td>
<td>{row.model}</td>
- <td>{row.averageScore}</td>
+ <td>{row.averageScore.toFixed(3)}</td>
+ <For each={taskIds()}>
+ {(id) => (
+ <td>
+ <Show when={row.taskData[id]} fallback={row.taskScores[id]?.toFixed(3) ?? ""}>
+ <span
+ style={{ cursor: "pointer", "text-decoration": "underline" }}
+ onClick={() => setModalTask(row.taskData[id])}
+ >
+ {row.taskScores[id]?.toFixed(3)}
+ </span>
+ </Show>
+ </td>
+ )}
+ </For>
</tr>
)}
</For>
</tbody>
</table>
+
+ <Show when={modalTask()}>
+ <div
+ data-component="modal-overlay"
+ style={{
+ position: "fixed",
+ inset: "0",
+ background: "rgba(0, 0, 0, 0.5)",
+ display: "flex",
+ "align-items": "center",
+ "justify-content": "center",
+ "z-index": "1000",
+ }}
+ onClick={() => setModalTask(null)}
+ >
+ <div
+ data-component="modal"
+ style={{
+ background: "var(--color-background, #fff)",
+ padding: "1rem",
+ "border-radius": "8px",
+ "max-width": "80vw",
+ "max-height": "80vh",
+ overflow: "auto",
+ }}
+ onClick={(e) => e.stopPropagation()}
+ >
+ <div style={{ "margin-bottom": "1rem" }}>
+ <div>
+ <strong>Repo: </strong>
+ <a
+ href={`https://github.com/${modalTask()!.task.source.repo}`}
+ target="_blank"
+ rel="noopener noreferrer"
+ style={{ color: "#0066cc" }}
+ >
+ {modalTask()!.task.source.repo}
+ </a>
+ </div>
+ <div>
+ <strong>From: </strong>
+ <a
+ href={`https://github.com/${modalTask()!.task.source.repo}/commit/${modalTask()!.task.source.from}`}
+ target="_blank"
+ rel="noopener noreferrer"
+ style={{ color: "#0066cc" }}
+ >
+ {modalTask()!.task.source.from.slice(0, 7)}
+ </a>
+ </div>
+ <div>
+ <strong>To: </strong>
+ <a
+ href={`https://github.com/${modalTask()!.task.source.repo}/commit/${modalTask()!.task.source.to}`}
+ target="_blank"
+ rel="noopener noreferrer"
+ style={{ color: "#0066cc" }}
+ >
+ {modalTask()!.task.source.to.slice(0, 7)}
+ </a>
+ </div>
+ </div>
+ <pre style={{ color: "#000" }}>{JSON.stringify(modalTask(), null, 2)}</pre>
+ </div>
+ </div>
+ </Show>
</main>
)
}