diff options
| author | adamdottv <[email protected]> | 2025-05-29 15:18:47 -0500 |
|---|---|---|
| committer | adamdottv <[email protected]> | 2025-05-29 15:18:47 -0500 |
| commit | 005d6e0bde9a42e2bebee7b712b0fe9a7be23499 (patch) | |
| tree | 3e667d3237d99a95a123b8659c4f1a5c370b9e5e /internal/llm | |
| parent | 37c0c1f358cadbc918319500cd2b1b3fcbe41a9e (diff) | |
| download | opencode-005d6e0bde9a42e2bebee7b712b0fe9a7be23499.tar.gz opencode-005d6e0bde9a42e2bebee7b712b0fe9a7be23499.zip | |
wip: refactoring tui
Diffstat (limited to 'internal/llm')
24 files changed, 0 insertions, 3906 deletions
diff --git a/internal/llm/models/anthropic.go b/internal/llm/models/anthropic.go deleted file mode 100644 index f67a74842..000000000 --- a/internal/llm/models/anthropic.go +++ /dev/null @@ -1,97 +0,0 @@ -package models - -const ( - ProviderAnthropic ModelProvider = "anthropic" - - // Models - Claude35Sonnet ModelID = "claude-3.5-sonnet" - Claude3Haiku ModelID = "claude-3-haiku" - Claude37Sonnet ModelID = "claude-3.7-sonnet" - Claude35Haiku ModelID = "claude-3.5-haiku" - Claude3Opus ModelID = "claude-3-opus" - Claude4Sonnet ModelID = "claude-4-sonnet" -) - -// https://docs.anthropic.com/en/docs/about-claude/models/all-models -var AnthropicModels = map[ModelID]Model{ - Claude35Sonnet: { - ID: Claude35Sonnet, - Name: "Claude 3.5 Sonnet", - Provider: ProviderAnthropic, - APIModel: "claude-3-5-sonnet-latest", - CostPer1MIn: 3.0, - CostPer1MInCached: 3.75, - CostPer1MOutCached: 0.30, - CostPer1MOut: 15.0, - ContextWindow: 200000, - DefaultMaxTokens: 5000, - SupportsAttachments: true, - }, - Claude3Haiku: { - ID: Claude3Haiku, - Name: "Claude 3 Haiku", - Provider: ProviderAnthropic, - APIModel: "claude-3-haiku-20240307", // doesn't support "-latest" - CostPer1MIn: 0.25, - CostPer1MInCached: 0.30, - CostPer1MOutCached: 0.03, - CostPer1MOut: 1.25, - ContextWindow: 200000, - DefaultMaxTokens: 4096, - SupportsAttachments: true, - }, - Claude37Sonnet: { - ID: Claude37Sonnet, - Name: "Claude 3.7 Sonnet", - Provider: ProviderAnthropic, - APIModel: "claude-3-7-sonnet-latest", - CostPer1MIn: 3.0, - CostPer1MInCached: 3.75, - CostPer1MOutCached: 0.30, - CostPer1MOut: 15.0, - ContextWindow: 200000, - DefaultMaxTokens: 50000, - CanReason: true, - SupportsAttachments: true, - }, - Claude4Sonnet: { - ID: Claude4Sonnet, - Name: "Claude 4 Sonnet", - Provider: ProviderAnthropic, - APIModel: "claude-sonnet-4-20250514", - CostPer1MIn: 3.0, - CostPer1MInCached: 3.75, - CostPer1MOutCached: 0.30, - CostPer1MOut: 15.0, - ContextWindow: 200000, - DefaultMaxTokens: 50000, - CanReason: true, - SupportsAttachments: true, - }, - Claude35Haiku: { - ID: Claude35Haiku, - Name: "Claude 3.5 Haiku", - Provider: ProviderAnthropic, - APIModel: "claude-3-5-haiku-latest", - CostPer1MIn: 0.80, - CostPer1MInCached: 1.0, - CostPer1MOutCached: 0.08, - CostPer1MOut: 4.0, - ContextWindow: 200000, - DefaultMaxTokens: 4096, - SupportsAttachments: true, - }, - Claude3Opus: { - ID: Claude3Opus, - Name: "Claude 3 Opus", - Provider: ProviderAnthropic, - APIModel: "claude-3-opus-latest", - CostPer1MIn: 15.0, - CostPer1MInCached: 18.75, - CostPer1MOutCached: 1.50, - CostPer1MOut: 75.0, - ContextWindow: 200000, - DefaultMaxTokens: 4096, - SupportsAttachments: true, - }, -} diff --git a/internal/llm/models/azure.go b/internal/llm/models/azure.go deleted file mode 100644 index 416597302..000000000 --- a/internal/llm/models/azure.go +++ /dev/null @@ -1,168 +0,0 @@ -package models - -const ProviderAzure ModelProvider = "azure" - -const ( - AzureGPT41 ModelID = "azure.gpt-4.1" - AzureGPT41Mini ModelID = "azure.gpt-4.1-mini" - AzureGPT41Nano ModelID = "azure.gpt-4.1-nano" - AzureGPT45Preview ModelID = "azure.gpt-4.5-preview" - AzureGPT4o ModelID = "azure.gpt-4o" - AzureGPT4oMini ModelID = "azure.gpt-4o-mini" - AzureO1 ModelID = "azure.o1" - AzureO1Mini ModelID = "azure.o1-mini" - AzureO3 ModelID = "azure.o3" - AzureO3Mini ModelID = "azure.o3-mini" - AzureO4Mini ModelID = "azure.o4-mini" -) - -var AzureModels = map[ModelID]Model{ - AzureGPT41: { - ID: AzureGPT41, - Name: "Azure OpenAI – GPT 4.1", - Provider: ProviderAzure, - APIModel: "gpt-4.1", - CostPer1MIn: OpenAIModels[GPT41].CostPer1MIn, - CostPer1MInCached: OpenAIModels[GPT41].CostPer1MInCached, - CostPer1MOut: OpenAIModels[GPT41].CostPer1MOut, - CostPer1MOutCached: OpenAIModels[GPT41].CostPer1MOutCached, - ContextWindow: OpenAIModels[GPT41].ContextWindow, - DefaultMaxTokens: OpenAIModels[GPT41].DefaultMaxTokens, - SupportsAttachments: true, - }, - AzureGPT41Mini: { - ID: AzureGPT41Mini, - Name: "Azure OpenAI – GPT 4.1 mini", - Provider: ProviderAzure, - APIModel: "gpt-4.1-mini", - CostPer1MIn: OpenAIModels[GPT41Mini].CostPer1MIn, - CostPer1MInCached: OpenAIModels[GPT41Mini].CostPer1MInCached, - CostPer1MOut: OpenAIModels[GPT41Mini].CostPer1MOut, - CostPer1MOutCached: OpenAIModels[GPT41Mini].CostPer1MOutCached, - ContextWindow: OpenAIModels[GPT41Mini].ContextWindow, - DefaultMaxTokens: OpenAIModels[GPT41Mini].DefaultMaxTokens, - SupportsAttachments: true, - }, - AzureGPT41Nano: { - ID: AzureGPT41Nano, - Name: "Azure OpenAI – GPT 4.1 nano", - Provider: ProviderAzure, - APIModel: "gpt-4.1-nano", - CostPer1MIn: OpenAIModels[GPT41Nano].CostPer1MIn, - CostPer1MInCached: OpenAIModels[GPT41Nano].CostPer1MInCached, - CostPer1MOut: OpenAIModels[GPT41Nano].CostPer1MOut, - CostPer1MOutCached: OpenAIModels[GPT41Nano].CostPer1MOutCached, - ContextWindow: OpenAIModels[GPT41Nano].ContextWindow, - DefaultMaxTokens: OpenAIModels[GPT41Nano].DefaultMaxTokens, - SupportsAttachments: true, - }, - AzureGPT45Preview: { - ID: AzureGPT45Preview, - Name: "Azure OpenAI – GPT 4.5 preview", - Provider: ProviderAzure, - APIModel: "gpt-4.5-preview", - CostPer1MIn: OpenAIModels[GPT45Preview].CostPer1MIn, - CostPer1MInCached: OpenAIModels[GPT45Preview].CostPer1MInCached, - CostPer1MOut: OpenAIModels[GPT45Preview].CostPer1MOut, - CostPer1MOutCached: OpenAIModels[GPT45Preview].CostPer1MOutCached, - ContextWindow: OpenAIModels[GPT45Preview].ContextWindow, - DefaultMaxTokens: OpenAIModels[GPT45Preview].DefaultMaxTokens, - SupportsAttachments: true, - }, - AzureGPT4o: { - ID: AzureGPT4o, - Name: "Azure OpenAI – GPT-4o", - Provider: ProviderAzure, - APIModel: "gpt-4o", - CostPer1MIn: OpenAIModels[GPT4o].CostPer1MIn, - CostPer1MInCached: OpenAIModels[GPT4o].CostPer1MInCached, - CostPer1MOut: OpenAIModels[GPT4o].CostPer1MOut, - CostPer1MOutCached: OpenAIModels[GPT4o].CostPer1MOutCached, - ContextWindow: OpenAIModels[GPT4o].ContextWindow, - DefaultMaxTokens: OpenAIModels[GPT4o].DefaultMaxTokens, - SupportsAttachments: true, - }, - AzureGPT4oMini: { - ID: AzureGPT4oMini, - Name: "Azure OpenAI – GPT-4o mini", - Provider: ProviderAzure, - APIModel: "gpt-4o-mini", - CostPer1MIn: OpenAIModels[GPT4oMini].CostPer1MIn, - CostPer1MInCached: OpenAIModels[GPT4oMini].CostPer1MInCached, - CostPer1MOut: OpenAIModels[GPT4oMini].CostPer1MOut, - CostPer1MOutCached: OpenAIModels[GPT4oMini].CostPer1MOutCached, - ContextWindow: OpenAIModels[GPT4oMini].ContextWindow, - DefaultMaxTokens: OpenAIModels[GPT4oMini].DefaultMaxTokens, - SupportsAttachments: true, - }, - AzureO1: { - ID: AzureO1, - Name: "Azure OpenAI – O1", - Provider: ProviderAzure, - APIModel: "o1", - CostPer1MIn: OpenAIModels[O1].CostPer1MIn, - CostPer1MInCached: OpenAIModels[O1].CostPer1MInCached, - CostPer1MOut: OpenAIModels[O1].CostPer1MOut, - CostPer1MOutCached: OpenAIModels[O1].CostPer1MOutCached, - ContextWindow: OpenAIModels[O1].ContextWindow, - DefaultMaxTokens: OpenAIModels[O1].DefaultMaxTokens, - CanReason: OpenAIModels[O1].CanReason, - SupportsAttachments: true, - }, - AzureO1Mini: { - ID: AzureO1Mini, - Name: "Azure OpenAI – O1 mini", - Provider: ProviderAzure, - APIModel: "o1-mini", - CostPer1MIn: OpenAIModels[O1Mini].CostPer1MIn, - CostPer1MInCached: OpenAIModels[O1Mini].CostPer1MInCached, - CostPer1MOut: OpenAIModels[O1Mini].CostPer1MOut, - CostPer1MOutCached: OpenAIModels[O1Mini].CostPer1MOutCached, - ContextWindow: OpenAIModels[O1Mini].ContextWindow, - DefaultMaxTokens: OpenAIModels[O1Mini].DefaultMaxTokens, - CanReason: OpenAIModels[O1Mini].CanReason, - SupportsAttachments: true, - }, - AzureO3: { - ID: AzureO3, - Name: "Azure OpenAI – O3", - Provider: ProviderAzure, - APIModel: "o3", - CostPer1MIn: OpenAIModels[O3].CostPer1MIn, - CostPer1MInCached: OpenAIModels[O3].CostPer1MInCached, - CostPer1MOut: OpenAIModels[O3].CostPer1MOut, - CostPer1MOutCached: OpenAIModels[O3].CostPer1MOutCached, - ContextWindow: OpenAIModels[O3].ContextWindow, - DefaultMaxTokens: OpenAIModels[O3].DefaultMaxTokens, - CanReason: OpenAIModels[O3].CanReason, - SupportsAttachments: true, - }, - AzureO3Mini: { - ID: AzureO3Mini, - Name: "Azure OpenAI – O3 mini", - Provider: ProviderAzure, - APIModel: "o3-mini", - CostPer1MIn: OpenAIModels[O3Mini].CostPer1MIn, - CostPer1MInCached: OpenAIModels[O3Mini].CostPer1MInCached, - CostPer1MOut: OpenAIModels[O3Mini].CostPer1MOut, - CostPer1MOutCached: OpenAIModels[O3Mini].CostPer1MOutCached, - ContextWindow: OpenAIModels[O3Mini].ContextWindow, - DefaultMaxTokens: OpenAIModels[O3Mini].DefaultMaxTokens, - CanReason: OpenAIModels[O3Mini].CanReason, - SupportsAttachments: false, - }, - AzureO4Mini: { - ID: AzureO4Mini, - Name: "Azure OpenAI – O4 mini", - Provider: ProviderAzure, - APIModel: "o4-mini", - CostPer1MIn: OpenAIModels[O4Mini].CostPer1MIn, - CostPer1MInCached: OpenAIModels[O4Mini].CostPer1MInCached, - CostPer1MOut: OpenAIModels[O4Mini].CostPer1MOut, - CostPer1MOutCached: OpenAIModels[O4Mini].CostPer1MOutCached, - ContextWindow: OpenAIModels[O4Mini].ContextWindow, - DefaultMaxTokens: OpenAIModels[O4Mini].DefaultMaxTokens, - CanReason: OpenAIModels[O4Mini].CanReason, - SupportsAttachments: true, - }, -} diff --git a/internal/llm/models/bedrock.go b/internal/llm/models/bedrock.go deleted file mode 100644 index 06f825654..000000000 --- a/internal/llm/models/bedrock.go +++ /dev/null @@ -1,25 +0,0 @@ -package models - -const ( - ProviderBedrock ModelProvider = "bedrock" - - // Models - BedrockClaude37Sonnet ModelID = "bedrock.claude-3.7-sonnet" -) - -var BedrockModels = map[ModelID]Model{ - BedrockClaude37Sonnet: { - ID: BedrockClaude37Sonnet, - Name: "Bedrock: Claude 3.7 Sonnet", - Provider: ProviderBedrock, - APIModel: "anthropic.claude-3-7-sonnet-20250219-v1:0", - CostPer1MIn: 3.0, - CostPer1MInCached: 3.75, - CostPer1MOutCached: 0.30, - CostPer1MOut: 15.0, - ContextWindow: 200_000, - DefaultMaxTokens: 50_000, - CanReason: true, - SupportsAttachments: true, - }, -} diff --git a/internal/llm/models/gemini.go b/internal/llm/models/gemini.go deleted file mode 100644 index f73910166..000000000 --- a/internal/llm/models/gemini.go +++ /dev/null @@ -1,67 +0,0 @@ -package models - -const ( - ProviderGemini ModelProvider = "gemini" - - // Models - Gemini25Flash ModelID = "gemini-2.5-flash" - Gemini25 ModelID = "gemini-2.5" - Gemini20Flash ModelID = "gemini-2.0-flash" - Gemini20FlashLite ModelID = "gemini-2.0-flash-lite" -) - -var GeminiModels = map[ModelID]Model{ - Gemini25Flash: { - ID: Gemini25Flash, - Name: "Gemini 2.5 Flash", - Provider: ProviderGemini, - APIModel: "gemini-2.5-flash-preview-04-17", - CostPer1MIn: 0.15, - CostPer1MInCached: 0, - CostPer1MOutCached: 0, - CostPer1MOut: 0.60, - ContextWindow: 1000000, - DefaultMaxTokens: 50000, - SupportsAttachments: true, - }, - Gemini25: { - ID: Gemini25, - Name: "Gemini 2.5 Pro", - Provider: ProviderGemini, - APIModel: "gemini-2.5-pro-preview-03-25", - CostPer1MIn: 1.25, - CostPer1MInCached: 0, - CostPer1MOutCached: 0, - CostPer1MOut: 10, - ContextWindow: 1000000, - DefaultMaxTokens: 50000, - SupportsAttachments: true, - }, - - Gemini20Flash: { - ID: Gemini20Flash, - Name: "Gemini 2.0 Flash", - Provider: ProviderGemini, - APIModel: "gemini-2.0-flash", - CostPer1MIn: 0.10, - CostPer1MInCached: 0, - CostPer1MOutCached: 0, - CostPer1MOut: 0.40, - ContextWindow: 1000000, - DefaultMaxTokens: 6000, - SupportsAttachments: true, - }, - Gemini20FlashLite: { - ID: Gemini20FlashLite, - Name: "Gemini 2.0 Flash Lite", - Provider: ProviderGemini, - APIModel: "gemini-2.0-flash-lite", - CostPer1MIn: 0.05, - CostPer1MInCached: 0, - CostPer1MOutCached: 0, - CostPer1MOut: 0.30, - ContextWindow: 1000000, - DefaultMaxTokens: 6000, - SupportsAttachments: true, - }, -} diff --git a/internal/llm/models/groq.go b/internal/llm/models/groq.go deleted file mode 100644 index 0a54053de..000000000 --- a/internal/llm/models/groq.go +++ /dev/null @@ -1,89 +0,0 @@ -package models - -const ( - ProviderGROQ ModelProvider = "groq" - - // GROQ - QWENQwq ModelID = "qwen-qwq" - - // GROQ preview models - Llama4Scout ModelID = "meta-llama/llama-4-scout-17b-16e-instruct" - Llama4Maverick ModelID = "meta-llama/llama-4-maverick-17b-128e-instruct" - Llama3_3_70BVersatile ModelID = "llama-3.3-70b-versatile" - DeepseekR1DistillLlama70b ModelID = "deepseek-r1-distill-llama-70b" -) - -var GroqModels = map[ModelID]Model{ - // - // GROQ - QWENQwq: { - ID: QWENQwq, - Name: "Qwen Qwq", - Provider: ProviderGROQ, - APIModel: "qwen-qwq-32b", - CostPer1MIn: 0.29, - CostPer1MInCached: 0.275, - CostPer1MOutCached: 0.0, - CostPer1MOut: 0.39, - ContextWindow: 128_000, - DefaultMaxTokens: 50000, - // for some reason, the groq api doesn't like the reasoningEffort parameter - CanReason: false, - SupportsAttachments: false, - }, - - Llama4Scout: { - ID: Llama4Scout, - Name: "Llama4Scout", - Provider: ProviderGROQ, - APIModel: "meta-llama/llama-4-scout-17b-16e-instruct", - CostPer1MIn: 0.11, - CostPer1MInCached: 0, - CostPer1MOutCached: 0, - CostPer1MOut: 0.34, - DefaultMaxTokens: 8192, - ContextWindow: 128_000, // 10M when? - SupportsAttachments: true, - }, - - Llama4Maverick: { - ID: Llama4Maverick, - Name: "Llama4Maverick", - Provider: ProviderGROQ, - APIModel: "meta-llama/llama-4-maverick-17b-128e-instruct", - CostPer1MIn: 0.20, - CostPer1MInCached: 0, - CostPer1MOutCached: 0, - CostPer1MOut: 0.20, - DefaultMaxTokens: 8192, - ContextWindow: 128_000, - SupportsAttachments: true, - }, - - Llama3_3_70BVersatile: { - ID: Llama3_3_70BVersatile, - Name: "Llama3_3_70BVersatile", - Provider: ProviderGROQ, - APIModel: "llama-3.3-70b-versatile", - CostPer1MIn: 0.59, - CostPer1MInCached: 0, - CostPer1MOutCached: 0, - CostPer1MOut: 0.79, - ContextWindow: 128_000, - SupportsAttachments: false, - }, - - DeepseekR1DistillLlama70b: { - ID: DeepseekR1DistillLlama70b, - Name: "DeepseekR1DistillLlama70b", - Provider: ProviderGROQ, - APIModel: "deepseek-r1-distill-llama-70b", - CostPer1MIn: 0.75, - CostPer1MInCached: 0, - CostPer1MOutCached: 0, - CostPer1MOut: 0.99, - ContextWindow: 128_000, - CanReason: true, - SupportsAttachments: false, - }, -} diff --git a/internal/llm/models/models.go b/internal/llm/models/models.go deleted file mode 100644 index bfdd0d2d8..000000000 --- a/internal/llm/models/models.go +++ /dev/null @@ -1,54 +0,0 @@ -package models - -import "maps" - -type ( - ModelID string - ModelProvider string -) - -type Model struct { - ID ModelID `json:"id"` - Name string `json:"name"` - Provider ModelProvider `json:"provider"` - APIModel string `json:"api_model"` - CostPer1MIn float64 `json:"cost_per_1m_in"` - CostPer1MOut float64 `json:"cost_per_1m_out"` - CostPer1MInCached float64 `json:"cost_per_1m_in_cached"` - CostPer1MOutCached float64 `json:"cost_per_1m_out_cached"` - ContextWindow int64 `json:"context_window"` - DefaultMaxTokens int64 `json:"default_max_tokens"` - CanReason bool `json:"can_reason"` - SupportsAttachments bool `json:"supports_attachments"` -} - -const ( - // ForTests - ProviderMock ModelProvider = "__mock" -) - -// Providers in order of popularity -var ProviderPopularity = map[ModelProvider]int{ - ProviderAnthropic: 1, - ProviderOpenAI: 2, - ProviderGemini: 3, - ProviderGROQ: 4, - ProviderOpenRouter: 5, - ProviderBedrock: 6, - ProviderAzure: 7, - ProviderVertexAI: 8, -} - -var SupportedModels = map[ModelID]Model{} - -func init() { - maps.Copy(SupportedModels, AnthropicModels) - maps.Copy(SupportedModels, BedrockModels) - maps.Copy(SupportedModels, OpenAIModels) - maps.Copy(SupportedModels, GeminiModels) - maps.Copy(SupportedModels, GroqModels) - maps.Copy(SupportedModels, AzureModels) - maps.Copy(SupportedModels, OpenRouterModels) - maps.Copy(SupportedModels, XAIModels) - maps.Copy(SupportedModels, VertexAIGeminiModels) -} diff --git a/internal/llm/models/openai.go b/internal/llm/models/openai.go deleted file mode 100644 index fdca5bed3..000000000 --- a/internal/llm/models/openai.go +++ /dev/null @@ -1,196 +0,0 @@ -package models - -const ( - ProviderOpenAI ModelProvider = "openai" - - CodexMini ModelID = "codex-mini" - GPT41 ModelID = "gpt-4.1" - GPT41Mini ModelID = "gpt-4.1-mini" - GPT41Nano ModelID = "gpt-4.1-nano" - GPT45Preview ModelID = "gpt-4.5-preview" - GPT4o ModelID = "gpt-4o" - GPT4oMini ModelID = "gpt-4o-mini" - O1 ModelID = "o1" - O1Pro ModelID = "o1-pro" - O1Mini ModelID = "o1-mini" - O3 ModelID = "o3" - O3Mini ModelID = "o3-mini" - O4Mini ModelID = "o4-mini" -) - -var OpenAIModels = map[ModelID]Model{ - CodexMini: { - ID: CodexMini, - Name: "Codex Mini", - Provider: ProviderOpenAI, - APIModel: "codex-mini-latest", - CostPer1MIn: 1.50, - CostPer1MInCached: 0.375, - CostPer1MOutCached: 0.0, - CostPer1MOut: 6.00, - ContextWindow: 200_000, - DefaultMaxTokens: 100_000, - CanReason: true, - SupportsAttachments: true, - }, - GPT41: { - ID: GPT41, - Name: "GPT 4.1", - Provider: ProviderOpenAI, - APIModel: "gpt-4.1", - CostPer1MIn: 2.00, - CostPer1MInCached: 0.50, - CostPer1MOutCached: 0.0, - CostPer1MOut: 8.00, - ContextWindow: 1_047_576, - DefaultMaxTokens: 20000, - SupportsAttachments: true, - }, - GPT41Mini: { - ID: GPT41Mini, - Name: "GPT 4.1 mini", - Provider: ProviderOpenAI, - APIModel: "gpt-4.1", - CostPer1MIn: 0.40, - CostPer1MInCached: 0.10, - CostPer1MOutCached: 0.0, - CostPer1MOut: 1.60, - ContextWindow: 200_000, - DefaultMaxTokens: 20000, - SupportsAttachments: true, - }, - GPT41Nano: { - ID: GPT41Nano, - Name: "GPT 4.1 nano", - Provider: ProviderOpenAI, - APIModel: "gpt-4.1-nano", - CostPer1MIn: 0.10, - CostPer1MInCached: 0.025, - CostPer1MOutCached: 0.0, - CostPer1MOut: 0.40, - ContextWindow: 1_047_576, - DefaultMaxTokens: 20000, - SupportsAttachments: true, - }, - GPT45Preview: { - ID: GPT45Preview, - Name: "GPT 4.5 preview", - Provider: ProviderOpenAI, - APIModel: "gpt-4.5-preview", - CostPer1MIn: 75.00, - CostPer1MInCached: 37.50, - CostPer1MOutCached: 0.0, - CostPer1MOut: 150.00, - ContextWindow: 128_000, - DefaultMaxTokens: 15000, - SupportsAttachments: true, - }, - GPT4o: { - ID: GPT4o, - Name: "GPT 4o", - Provider: ProviderOpenAI, - APIModel: "gpt-4o", - CostPer1MIn: 2.50, - CostPer1MInCached: 1.25, - CostPer1MOutCached: 0.0, - CostPer1MOut: 10.00, - ContextWindow: 128_000, - DefaultMaxTokens: 4096, - SupportsAttachments: true, - }, - GPT4oMini: { - ID: GPT4oMini, - Name: "GPT 4o mini", - Provider: ProviderOpenAI, - APIModel: "gpt-4o-mini", - CostPer1MIn: 0.15, - CostPer1MInCached: 0.075, - CostPer1MOutCached: 0.0, - CostPer1MOut: 0.60, - ContextWindow: 128_000, - SupportsAttachments: true, - }, - O1: { - ID: O1, - Name: "O1", - Provider: ProviderOpenAI, - APIModel: "o1", - CostPer1MIn: 15.00, - CostPer1MInCached: 7.50, - CostPer1MOutCached: 0.0, - CostPer1MOut: 60.00, - ContextWindow: 200_000, - DefaultMaxTokens: 50000, - CanReason: true, - SupportsAttachments: true, - }, - O1Pro: { - ID: O1Pro, - Name: "o1 pro", - Provider: ProviderOpenAI, - APIModel: "o1-pro", - CostPer1MIn: 150.00, - CostPer1MInCached: 0.0, - CostPer1MOutCached: 0.0, - CostPer1MOut: 600.00, - ContextWindow: 200_000, - DefaultMaxTokens: 50000, - CanReason: true, - SupportsAttachments: true, - }, - O1Mini: { - ID: O1Mini, - Name: "o1 mini", - Provider: ProviderOpenAI, - APIModel: "o1-mini", - CostPer1MIn: 1.10, - CostPer1MInCached: 0.55, - CostPer1MOutCached: 0.0, - CostPer1MOut: 4.40, - ContextWindow: 128_000, - DefaultMaxTokens: 50000, - CanReason: true, - SupportsAttachments: true, - }, - O3: { - ID: O3, - Name: "o3", - Provider: ProviderOpenAI, - APIModel: "o3", - CostPer1MIn: 10.00, - CostPer1MInCached: 2.50, - CostPer1MOutCached: 0.0, - CostPer1MOut: 40.00, - ContextWindow: 200_000, - CanReason: true, - SupportsAttachments: true, - }, - O3Mini: { - ID: O3Mini, - Name: "o3 mini", - Provider: ProviderOpenAI, - APIModel: "o3-mini", - CostPer1MIn: 1.10, - CostPer1MInCached: 0.55, - CostPer1MOutCached: 0.0, - CostPer1MOut: 4.40, - ContextWindow: 200_000, - DefaultMaxTokens: 50000, - CanReason: true, - SupportsAttachments: false, - }, - O4Mini: { - ID: O4Mini, - Name: "o4 mini", - Provider: ProviderOpenAI, - APIModel: "o4-mini", - CostPer1MIn: 1.10, - CostPer1MInCached: 0.275, - CostPer1MOutCached: 0.0, - CostPer1MOut: 4.40, - ContextWindow: 128_000, - DefaultMaxTokens: 50000, - CanReason: true, - SupportsAttachments: true, - }, -} diff --git a/internal/llm/models/openrouter.go b/internal/llm/models/openrouter.go deleted file mode 100644 index faee734a0..000000000 --- a/internal/llm/models/openrouter.go +++ /dev/null @@ -1,327 +0,0 @@ -package models - -const ( - ProviderOpenRouter ModelProvider = "openrouter" - - OpenRouterGPT41 ModelID = "openrouter.gpt-4.1" - OpenRouterGPT41Mini ModelID = "openrouter.gpt-4.1-mini" - OpenRouterGPT41Nano ModelID = "openrouter.gpt-4.1-nano" - OpenRouterGPT45Preview ModelID = "openrouter.gpt-4.5-preview" - OpenRouterGPT4o ModelID = "openrouter.gpt-4o" - OpenRouterGPT4oMini ModelID = "openrouter.gpt-4o-mini" - OpenRouterO1 ModelID = "openrouter.o1" - OpenRouterO1Pro ModelID = "openrouter.o1-pro" - OpenRouterO1Mini ModelID = "openrouter.o1-mini" - OpenRouterO3 ModelID = "openrouter.o3" - OpenRouterO3Mini ModelID = "openrouter.o3-mini" - OpenRouterO4Mini ModelID = "openrouter.o4-mini" - OpenRouterGemini25Flash ModelID = "openrouter.gemini-2.5-flash" - OpenRouterGemini25 ModelID = "openrouter.gemini-2.5" - OpenRouterClaude35Sonnet ModelID = "openrouter.claude-3.5-sonnet" - OpenRouterClaude3Haiku ModelID = "openrouter.claude-3-haiku" - OpenRouterClaude37Sonnet ModelID = "openrouter.claude-3.7-sonnet" - OpenRouterClaude35Haiku ModelID = "openrouter.claude-3.5-haiku" - OpenRouterClaude3Opus ModelID = "openrouter.claude-3-opus" - OpenRouterQwen235B ModelID = "openrouter.qwen-3-235b" - OpenRouterQwen32B ModelID = "openrouter.qwen-3-32b" - OpenRouterQwen30B ModelID = "openrouter.qwen-3-30b" - OpenRouterQwen14B ModelID = "openrouter.qwen-3-14b" - OpenRouterQwen8B ModelID = "openrouter.qwen-3-8b" -) - -var OpenRouterModels = map[ModelID]Model{ - OpenRouterGPT41: { - ID: OpenRouterGPT41, - Name: "OpenRouter: GPT 4.1", - Provider: ProviderOpenRouter, - APIModel: "openai/gpt-4.1", - CostPer1MIn: OpenAIModels[GPT41].CostPer1MIn, - CostPer1MInCached: OpenAIModels[GPT41].CostPer1MInCached, - CostPer1MOut: OpenAIModels[GPT41].CostPer1MOut, - CostPer1MOutCached: OpenAIModels[GPT41].CostPer1MOutCached, - ContextWindow: OpenAIModels[GPT41].ContextWindow, - DefaultMaxTokens: OpenAIModels[GPT41].DefaultMaxTokens, - }, - OpenRouterGPT41Mini: { - ID: OpenRouterGPT41Mini, - Name: "OpenRouter: GPT 4.1 mini", - Provider: ProviderOpenRouter, - APIModel: "openai/gpt-4.1-mini", - CostPer1MIn: OpenAIModels[GPT41Mini].CostPer1MIn, - CostPer1MInCached: OpenAIModels[GPT41Mini].CostPer1MInCached, - CostPer1MOut: OpenAIModels[GPT41Mini].CostPer1MOut, - CostPer1MOutCached: OpenAIModels[GPT41Mini].CostPer1MOutCached, - ContextWindow: OpenAIModels[GPT41Mini].ContextWindow, - DefaultMaxTokens: OpenAIModels[GPT41Mini].DefaultMaxTokens, - }, - OpenRouterGPT41Nano: { - ID: OpenRouterGPT41Nano, - Name: "OpenRouter: GPT 4.1 nano", - Provider: ProviderOpenRouter, - APIModel: "openai/gpt-4.1-nano", - CostPer1MIn: OpenAIModels[GPT41Nano].CostPer1MIn, - CostPer1MInCached: OpenAIModels[GPT41Nano].CostPer1MInCached, - CostPer1MOut: OpenAIModels[GPT41Nano].CostPer1MOut, - CostPer1MOutCached: OpenAIModels[GPT41Nano].CostPer1MOutCached, - ContextWindow: OpenAIModels[GPT41Nano].ContextWindow, - DefaultMaxTokens: OpenAIModels[GPT41Nano].DefaultMaxTokens, - }, - OpenRouterGPT45Preview: { - ID: OpenRouterGPT45Preview, - Name: "OpenRouter: GPT 4.5 preview", - Provider: ProviderOpenRouter, - APIModel: "openai/gpt-4.5-preview", - CostPer1MIn: OpenAIModels[GPT45Preview].CostPer1MIn, - CostPer1MInCached: OpenAIModels[GPT45Preview].CostPer1MInCached, - CostPer1MOut: OpenAIModels[GPT45Preview].CostPer1MOut, - CostPer1MOutCached: OpenAIModels[GPT45Preview].CostPer1MOutCached, - ContextWindow: OpenAIModels[GPT45Preview].ContextWindow, - DefaultMaxTokens: OpenAIModels[GPT45Preview].DefaultMaxTokens, - }, - OpenRouterGPT4o: { - ID: OpenRouterGPT4o, - Name: "OpenRouter: GPT 4o", - Provider: ProviderOpenRouter, - APIModel: "openai/gpt-4o", - CostPer1MIn: OpenAIModels[GPT4o].CostPer1MIn, - CostPer1MInCached: OpenAIModels[GPT4o].CostPer1MInCached, - CostPer1MOut: OpenAIModels[GPT4o].CostPer1MOut, - CostPer1MOutCached: OpenAIModels[GPT4o].CostPer1MOutCached, - ContextWindow: OpenAIModels[GPT4o].ContextWindow, - DefaultMaxTokens: OpenAIModels[GPT4o].DefaultMaxTokens, - }, - OpenRouterGPT4oMini: { - ID: OpenRouterGPT4oMini, - Name: "OpenRouter: GPT 4o mini", - Provider: ProviderOpenRouter, - APIModel: "openai/gpt-4o-mini", - CostPer1MIn: OpenAIModels[GPT4oMini].CostPer1MIn, - CostPer1MInCached: OpenAIModels[GPT4oMini].CostPer1MInCached, - CostPer1MOut: OpenAIModels[GPT4oMini].CostPer1MOut, - CostPer1MOutCached: OpenAIModels[GPT4oMini].CostPer1MOutCached, - ContextWindow: OpenAIModels[GPT4oMini].ContextWindow, - }, - OpenRouterO1: { - ID: OpenRouterO1, - Name: "OpenRouter: O1", - Provider: ProviderOpenRouter, - APIModel: "openai/o1", - CostPer1MIn: OpenAIModels[O1].CostPer1MIn, - CostPer1MInCached: OpenAIModels[O1].CostPer1MInCached, - CostPer1MOut: OpenAIModels[O1].CostPer1MOut, - CostPer1MOutCached: OpenAIModels[O1].CostPer1MOutCached, - ContextWindow: OpenAIModels[O1].ContextWindow, - DefaultMaxTokens: OpenAIModels[O1].DefaultMaxTokens, - CanReason: OpenAIModels[O1].CanReason, - }, - OpenRouterO1Pro: { - ID: OpenRouterO1Pro, - Name: "OpenRouter: o1 pro", - Provider: ProviderOpenRouter, - APIModel: "openai/o1-pro", - CostPer1MIn: OpenAIModels[O1Pro].CostPer1MIn, - CostPer1MInCached: OpenAIModels[O1Pro].CostPer1MInCached, - CostPer1MOut: OpenAIModels[O1Pro].CostPer1MOut, - CostPer1MOutCached: OpenAIModels[O1Pro].CostPer1MOutCached, - ContextWindow: OpenAIModels[O1Pro].ContextWindow, - DefaultMaxTokens: OpenAIModels[O1Pro].DefaultMaxTokens, - CanReason: OpenAIModels[O1Pro].CanReason, - }, - OpenRouterO1Mini: { - ID: OpenRouterO1Mini, - Name: "OpenRouter: o1 mini", - Provider: ProviderOpenRouter, - APIModel: "openai/o1-mini", - CostPer1MIn: OpenAIModels[O1Mini].CostPer1MIn, - CostPer1MInCached: OpenAIModels[O1Mini].CostPer1MInCached, - CostPer1MOut: OpenAIModels[O1Mini].CostPer1MOut, - CostPer1MOutCached: OpenAIModels[O1Mini].CostPer1MOutCached, - ContextWindow: OpenAIModels[O1Mini].ContextWindow, - DefaultMaxTokens: OpenAIModels[O1Mini].DefaultMaxTokens, - CanReason: OpenAIModels[O1Mini].CanReason, - }, - OpenRouterO3: { - ID: OpenRouterO3, - Name: "OpenRouter: o3", - Provider: ProviderOpenRouter, - APIModel: "openai/o3", - CostPer1MIn: OpenAIModels[O3].CostPer1MIn, - CostPer1MInCached: OpenAIModels[O3].CostPer1MInCached, - CostPer1MOut: OpenAIModels[O3].CostPer1MOut, - CostPer1MOutCached: OpenAIModels[O3].CostPer1MOutCached, - ContextWindow: OpenAIModels[O3].ContextWindow, - DefaultMaxTokens: OpenAIModels[O3].DefaultMaxTokens, - CanReason: OpenAIModels[O3].CanReason, - }, - OpenRouterO3Mini: { - ID: OpenRouterO3Mini, - Name: "OpenRouter: o3 mini", - Provider: ProviderOpenRouter, - APIModel: "openai/o3-mini-high", - CostPer1MIn: OpenAIModels[O3Mini].CostPer1MIn, - CostPer1MInCached: OpenAIModels[O3Mini].CostPer1MInCached, - CostPer1MOut: OpenAIModels[O3Mini].CostPer1MOut, - CostPer1MOutCached: OpenAIModels[O3Mini].CostPer1MOutCached, - ContextWindow: OpenAIModels[O3Mini].ContextWindow, - DefaultMaxTokens: OpenAIModels[O3Mini].DefaultMaxTokens, - CanReason: OpenAIModels[O3Mini].CanReason, - }, - OpenRouterO4Mini: { - ID: OpenRouterO4Mini, - Name: "OpenRouter: o4 mini", - Provider: ProviderOpenRouter, - APIModel: "openai/o4-mini-high", - CostPer1MIn: OpenAIModels[O4Mini].CostPer1MIn, - CostPer1MInCached: OpenAIModels[O4Mini].CostPer1MInCached, - CostPer1MOut: OpenAIModels[O4Mini].CostPer1MOut, - CostPer1MOutCached: OpenAIModels[O4Mini].CostPer1MOutCached, - ContextWindow: OpenAIModels[O4Mini].ContextWindow, - DefaultMaxTokens: OpenAIModels[O4Mini].DefaultMaxTokens, - CanReason: OpenAIModels[O4Mini].CanReason, - }, - OpenRouterGemini25Flash: { - ID: OpenRouterGemini25Flash, - Name: "OpenRouter: Gemini 2.5 Flash", - Provider: ProviderOpenRouter, - APIModel: "google/gemini-2.5-flash-preview:thinking", - CostPer1MIn: GeminiModels[Gemini25Flash].CostPer1MIn, - CostPer1MInCached: GeminiModels[Gemini25Flash].CostPer1MInCached, - CostPer1MOut: GeminiModels[Gemini25Flash].CostPer1MOut, - CostPer1MOutCached: GeminiModels[Gemini25Flash].CostPer1MOutCached, - ContextWindow: GeminiModels[Gemini25Flash].ContextWindow, - DefaultMaxTokens: GeminiModels[Gemini25Flash].DefaultMaxTokens, - }, - OpenRouterGemini25: { - ID: OpenRouterGemini25, - Name: "OpenRouter: Gemini 2.5 Pro", - Provider: ProviderOpenRouter, - APIModel: "google/gemini-2.5-pro-preview-03-25", - CostPer1MIn: GeminiModels[Gemini25].CostPer1MIn, - CostPer1MInCached: GeminiModels[Gemini25].CostPer1MInCached, - CostPer1MOut: GeminiModels[Gemini25].CostPer1MOut, - CostPer1MOutCached: GeminiModels[Gemini25].CostPer1MOutCached, - ContextWindow: GeminiModels[Gemini25].ContextWindow, - DefaultMaxTokens: GeminiModels[Gemini25].DefaultMaxTokens, - }, - OpenRouterClaude35Sonnet: { - ID: OpenRouterClaude35Sonnet, - Name: "OpenRouter: Claude 3.5 Sonnet", - Provider: ProviderOpenRouter, - APIModel: "anthropic/claude-3.5-sonnet", - CostPer1MIn: AnthropicModels[Claude35Sonnet].CostPer1MIn, - CostPer1MInCached: AnthropicModels[Claude35Sonnet].CostPer1MInCached, - CostPer1MOut: AnthropicModels[Claude35Sonnet].CostPer1MOut, - CostPer1MOutCached: AnthropicModels[Claude35Sonnet].CostPer1MOutCached, - ContextWindow: AnthropicModels[Claude35Sonnet].ContextWindow, - DefaultMaxTokens: AnthropicModels[Claude35Sonnet].DefaultMaxTokens, - }, - OpenRouterClaude3Haiku: { - ID: OpenRouterClaude3Haiku, - Name: "OpenRouter: Claude 3 Haiku", - Provider: ProviderOpenRouter, - APIModel: "anthropic/claude-3-haiku", - CostPer1MIn: AnthropicModels[Claude3Haiku].CostPer1MIn, - CostPer1MInCached: AnthropicModels[Claude3Haiku].CostPer1MInCached, - CostPer1MOut: AnthropicModels[Claude3Haiku].CostPer1MOut, - CostPer1MOutCached: AnthropicModels[Claude3Haiku].CostPer1MOutCached, - ContextWindow: AnthropicModels[Claude3Haiku].ContextWindow, - DefaultMaxTokens: AnthropicModels[Claude3Haiku].DefaultMaxTokens, - }, - OpenRouterClaude37Sonnet: { - ID: OpenRouterClaude37Sonnet, - Name: "OpenRouter: Claude 3.7 Sonnet", - Provider: ProviderOpenRouter, - APIModel: "anthropic/claude-3.7-sonnet", - CostPer1MIn: AnthropicModels[Claude37Sonnet].CostPer1MIn, - CostPer1MInCached: AnthropicModels[Claude37Sonnet].CostPer1MInCached, - CostPer1MOut: AnthropicModels[Claude37Sonnet].CostPer1MOut, - CostPer1MOutCached: AnthropicModels[Claude37Sonnet].CostPer1MOutCached, - ContextWindow: AnthropicModels[Claude37Sonnet].ContextWindow, - DefaultMaxTokens: AnthropicModels[Claude37Sonnet].DefaultMaxTokens, - CanReason: AnthropicModels[Claude37Sonnet].CanReason, - }, - OpenRouterClaude35Haiku: { - ID: OpenRouterClaude35Haiku, - Name: "OpenRouter: Claude 3.5 Haiku", - Provider: ProviderOpenRouter, - APIModel: "anthropic/claude-3.5-haiku", - CostPer1MIn: AnthropicModels[Claude35Haiku].CostPer1MIn, - CostPer1MInCached: AnthropicModels[Claude35Haiku].CostPer1MInCached, - CostPer1MOut: AnthropicModels[Claude35Haiku].CostPer1MOut, - CostPer1MOutCached: AnthropicModels[Claude35Haiku].CostPer1MOutCached, - ContextWindow: AnthropicModels[Claude35Haiku].ContextWindow, - DefaultMaxTokens: AnthropicModels[Claude35Haiku].DefaultMaxTokens, - }, - OpenRouterClaude3Opus: { - ID: OpenRouterClaude3Opus, - Name: "OpenRouter: Claude 3 Opus", - Provider: ProviderOpenRouter, - APIModel: "anthropic/claude-3-opus", - CostPer1MIn: AnthropicModels[Claude3Opus].CostPer1MIn, - CostPer1MInCached: AnthropicModels[Claude3Opus].CostPer1MInCached, - CostPer1MOut: AnthropicModels[Claude3Opus].CostPer1MOut, - CostPer1MOutCached: AnthropicModels[Claude3Opus].CostPer1MOutCached, - ContextWindow: AnthropicModels[Claude3Opus].ContextWindow, - DefaultMaxTokens: AnthropicModels[Claude3Opus].DefaultMaxTokens, - }, - OpenRouterQwen235B: { - ID: OpenRouterQwen235B, - Name: "OpenRouter: Qwen3 235B A22B", - Provider: ProviderOpenRouter, - APIModel: "qwen/qwen3-235b-a22b", - CostPer1MIn: 0.1, - CostPer1MInCached: 0.1, - CostPer1MOut: 0.1, - CostPer1MOutCached: 0.1, - ContextWindow: 40960, - DefaultMaxTokens: 4096, - }, - OpenRouterQwen32B: { - ID: OpenRouterQwen32B, - Name: "OpenRouter: Qwen3 32B", - Provider: ProviderOpenRouter, - APIModel: "qwen/qwen3-32b", - CostPer1MIn: 0.1, - CostPer1MInCached: 0.1, - CostPer1MOut: 0.3, - CostPer1MOutCached: 0.3, - ContextWindow: 40960, - DefaultMaxTokens: 4096, - }, - OpenRouterQwen30B: { - ID: OpenRouterQwen30B, - Name: "OpenRouter: Qwen3 30B A3B", - Provider: ProviderOpenRouter, - APIModel: "qwen/qwen3-30b-a3b", - CostPer1MIn: 0.1, - CostPer1MInCached: 0.1, - CostPer1MOut: 0.3, - CostPer1MOutCached: 0.3, - ContextWindow: 40960, - DefaultMaxTokens: 4096, - }, - OpenRouterQwen14B: { - ID: OpenRouterQwen14B, - Name: "OpenRouter: Qwen3 14B", - Provider: ProviderOpenRouter, - APIModel: "qwen/qwen3-14b", - CostPer1MIn: 0.7, - CostPer1MInCached: 0.7, - CostPer1MOut: 0.24, - CostPer1MOutCached: 0.24, - ContextWindow: 40960, - DefaultMaxTokens: 4096, - }, - OpenRouterQwen8B: { - ID: OpenRouterQwen8B, - Name: "OpenRouter: Qwen3 8B", - Provider: ProviderOpenRouter, - APIModel: "qwen/qwen3-8b", - CostPer1MIn: 0.35, - CostPer1MInCached: 0.35, - CostPer1MOut: 0.138, - CostPer1MOutCached: 0.138, - ContextWindow: 128000, - DefaultMaxTokens: 4096, - }, -} diff --git a/internal/llm/models/vertexai.go b/internal/llm/models/vertexai.go deleted file mode 100644 index d71dfc0be..000000000 --- a/internal/llm/models/vertexai.go +++ /dev/null @@ -1,38 +0,0 @@ -package models - -const ( - ProviderVertexAI ModelProvider = "vertexai" - - // Models - VertexAIGemini25Flash ModelID = "vertexai.gemini-2.5-flash" - VertexAIGemini25 ModelID = "vertexai.gemini-2.5" -) - -var VertexAIGeminiModels = map[ModelID]Model{ - VertexAIGemini25Flash: { - ID: VertexAIGemini25Flash, - Name: "VertexAI: Gemini 2.5 Flash", - Provider: ProviderVertexAI, - APIModel: "gemini-2.5-flash-preview-04-17", - CostPer1MIn: GeminiModels[Gemini25Flash].CostPer1MIn, - CostPer1MInCached: GeminiModels[Gemini25Flash].CostPer1MInCached, - CostPer1MOut: GeminiModels[Gemini25Flash].CostPer1MOut, - CostPer1MOutCached: GeminiModels[Gemini25Flash].CostPer1MOutCached, - ContextWindow: GeminiModels[Gemini25Flash].ContextWindow, - DefaultMaxTokens: GeminiModels[Gemini25Flash].DefaultMaxTokens, - SupportsAttachments: true, - }, - VertexAIGemini25: { - ID: VertexAIGemini25, - Name: "VertexAI: Gemini 2.5 Pro", - Provider: ProviderVertexAI, - APIModel: "gemini-2.5-pro-preview-03-25", - CostPer1MIn: GeminiModels[Gemini25].CostPer1MIn, - CostPer1MInCached: GeminiModels[Gemini25].CostPer1MInCached, - CostPer1MOut: GeminiModels[Gemini25].CostPer1MOut, - CostPer1MOutCached: GeminiModels[Gemini25].CostPer1MOutCached, - ContextWindow: GeminiModels[Gemini25].ContextWindow, - DefaultMaxTokens: GeminiModels[Gemini25].DefaultMaxTokens, - SupportsAttachments: true, - }, -} diff --git a/internal/llm/models/xai.go b/internal/llm/models/xai.go deleted file mode 100644 index 00caf3b89..000000000 --- a/internal/llm/models/xai.go +++ /dev/null @@ -1,61 +0,0 @@ -package models - -const ( - ProviderXAI ModelProvider = "xai" - - XAIGrok3Beta ModelID = "grok-3-beta" - XAIGrok3MiniBeta ModelID = "grok-3-mini-beta" - XAIGrok3FastBeta ModelID = "grok-3-fast-beta" - XAiGrok3MiniFastBeta ModelID = "grok-3-mini-fast-beta" -) - -var XAIModels = map[ModelID]Model{ - XAIGrok3Beta: { - ID: XAIGrok3Beta, - Name: "Grok3 Beta", - Provider: ProviderXAI, - APIModel: "grok-3-beta", - CostPer1MIn: 3.0, - CostPer1MInCached: 0, - CostPer1MOut: 15, - CostPer1MOutCached: 0, - ContextWindow: 131_072, - DefaultMaxTokens: 20_000, - }, - XAIGrok3MiniBeta: { - ID: XAIGrok3MiniBeta, - Name: "Grok3 Mini Beta", - Provider: ProviderXAI, - APIModel: "grok-3-mini-beta", - CostPer1MIn: 0.3, - CostPer1MInCached: 0, - CostPer1MOut: 0.5, - CostPer1MOutCached: 0, - ContextWindow: 131_072, - DefaultMaxTokens: 20_000, - }, - XAIGrok3FastBeta: { - ID: XAIGrok3FastBeta, - Name: "Grok3 Fast Beta", - Provider: ProviderXAI, - APIModel: "grok-3-fast-beta", - CostPer1MIn: 5, - CostPer1MInCached: 0, - CostPer1MOut: 25, - CostPer1MOutCached: 0, - ContextWindow: 131_072, - DefaultMaxTokens: 20_000, - }, - XAiGrok3MiniFastBeta: { - ID: XAiGrok3MiniFastBeta, - Name: "Grok3 Mini Fast Beta", - Provider: ProviderXAI, - APIModel: "grok-3-mini-fast-beta", - CostPer1MIn: 0.6, - CostPer1MInCached: 0, - CostPer1MOut: 4.0, - CostPer1MOutCached: 0, - ContextWindow: 131_072, - DefaultMaxTokens: 20_000, - }, -} diff --git a/internal/llm/prompt/primary.go b/internal/llm/prompt/primary.go deleted file mode 100644 index 8efacf275..000000000 --- a/internal/llm/prompt/primary.go +++ /dev/null @@ -1,222 +0,0 @@ -package prompt - -import ( - "context" - "fmt" - "os" - "path/filepath" - "runtime" - "time" - - "github.com/sst/opencode/internal/config" - "github.com/sst/opencode/internal/llm/models" - "github.com/sst/opencode/internal/llm/tools" -) - -func PrimaryPrompt(provider models.ModelProvider) string { - basePrompt := baseAnthropicPrimaryPrompt - switch provider { - case models.ProviderOpenAI: - basePrompt = baseOpenAIPrimaryPrompt - } - envInfo := getEnvironmentInfo() - - return fmt.Sprintf("%s\n\n%s\n%s", basePrompt, envInfo, lspInformation()) -} - -const baseOpenAIPrimaryPrompt = ` -You are operating as and within the OpenCode CLI, a terminal-based agentic coding assistant built by OpenAI. It wraps OpenAI models to enable natural language interaction with a local codebase. You are expected to be precise, safe, and helpful. - -You can: -- Receive user prompts, project context, and files. -- Stream responses and emit function calls (e.g., shell commands, code edits). -- Apply patches, run commands, and manage user approvals based on policy. -- Work inside a sandboxed, git-backed workspace with rollback support. -- Log telemetry so sessions can be replayed or inspected later. -- More details on your functionality are available at "opencode --help" - - -You are an agent - please keep going until the user's query is completely resolved, before ending your turn and yielding back to the user. Only terminate your turn when you are sure that the problem is solved. If you are not sure about file content or codebase structure pertaining to the user's request, use your tools to read files and gather the relevant information: do NOT guess or make up an answer. - -Please resolve the user's task by editing and testing the code files in your current code execution session. You are a deployed coding agent. Your session allows for you to modify and run code. The repo(s) are already cloned in your working directory, and you must fully solve the problem for your answer to be considered correct. - -You MUST adhere to the following criteria when executing the task: -- Working on the repo(s) in the current environment is allowed, even if they are proprietary. -- Analyzing code for vulnerabilities is allowed. -- Showing user code and tool call details is allowed. -- User instructions may overwrite the *CODING GUIDELINES* section in this developer message. -- If completing the user's task requires writing or modifying files: - - Your code and final answer should follow these *CODING GUIDELINES*: - - Fix the problem at the root cause rather than applying surface-level patches, when possible. - - Avoid unneeded complexity in your solution. - - Ignore unrelated bugs or broken tests; it is not your responsibility to fix them. - - Update documentation as necessary. - - Keep changes consistent with the style of the existing codebase. Changes should be minimal and focused on the task. - - Use "git log" and "git blame" to search the history of the codebase if additional context is required; internet access is disabled. - - NEVER add copyright or license headers unless specifically requested. - - You do not need to "git commit" your changes; this will be done automatically for you. - - Once you finish coding, you must - - Check "git status" to sanity check your changes; revert any scratch files or changes. - - Remove all inline comments you added as much as possible, even if they look normal. Check using "git diff". Inline comments must be generally avoided, unless active maintainers of the repo, after long careful study of the code and the issue, will still misinterpret the code without the comments. - - Check if you accidentally add copyright or license headers. If so, remove them. - - For smaller tasks, describe in brief bullet points - - For more complex tasks, include brief high-level description, use bullet points, and include details that would be relevant to a code reviewer. -- If completing the user's task DOES NOT require writing or modifying files (e.g., the user asks a question about the code base): - - Respond in a friendly tune as a remote teammate, who is knowledgeable, capable and eager to help with coding. -- When your task involves writing or modifying files: - - Do NOT tell the user to "save the file" or "copy the code into a file" if you already created or modified the file using "apply_patch". Instead, reference the file as already saved. - - Do NOT show the full contents of large files you have already written, unless the user explicitly asks for them. -- When doing things with paths, always use use the full path, if the working directory is /abc/xyz and you want to edit the file abc.go in the working dir refer to it as /abc/xyz/abc.go. -- If you send a path not including the working dir, the working dir will be prepended to it. -- Remember the user does not see the full output of tools -` - -const baseAnthropicPrimaryPrompt = `You are OpenCode, an interactive CLI tool that helps users with software engineering tasks. Use the instructions below and the tools available to you to assist the user. - -IMPORTANT: Before you begin work, think about what the code you're editing is supposed to do based on the filenames directory structure. - -# Memory -If the current working directory contains a file called OpenCode.md, it will be automatically added to your context. This file serves multiple purposes: -1. Storing frequently used bash commands (build, test, lint, etc.) so you can use them without searching each time -2. Recording the user's code style preferences (naming conventions, preferred libraries, etc.) -3. Maintaining useful information about the codebase structure and organization - -When you spend time searching for commands to typecheck, lint, build, or test, you should ask the user if it's okay to add those commands to CONTEXT.md. Similarly, when learning about code style preferences or important codebase information, ask if it's okay to add that to CONTEXT.md so you can remember it for next time. - -# Tone and style -You should be concise, direct, and to the point. When you run a non-trivial bash command, you should explain what the command does and why you are running it, to make sure the user understands what you are doing (this is especially important when you are running a command that will make changes to the user's system). -Remember that your output will be displayed on a command line interface. Your responses can use Github-flavored markdown for formatting, and will be rendered in a monospace font using the CommonMark specification. -Output text to communicate with the user; all text you output outside of tool use is displayed to the user. Only use tools to complete tasks. Never use tools like Bash or code comments as means to communicate with the user during the session. -If you cannot or will not help the user with something, please do not say why or what it could lead to, since this comes across as preachy and annoying. Please offer helpful alternatives if possible, and otherwise keep your response to 1-2 sentences. -IMPORTANT: You should minimize output tokens as much as possible while maintaining helpfulness, quality, and accuracy. Only address the specific query or task at hand, avoiding tangential information unless absolutely critical for completing the request. If you can answer in 1-3 sentences or a short paragraph, please do. -IMPORTANT: You should NOT answer with unnecessary preamble or postamble (such as explaining your code or summarizing your action), unless the user asks you to. -IMPORTANT: Keep your responses short, since they will be displayed on a command line interface. You MUST answer concisely with fewer than 4 lines (not including tool use or code generation), unless user asks for detail. Answer the user's question directly, without elaboration, explanation, or details. One word answers are best. Avoid introductions, conclusions, and explanations. You MUST avoid text before/after your response, such as "The answer is <answer>.", "Here is the content of the file..." or "Based on the information provided, the answer is..." or "Here is what I will do next...". Here are some examples to demonstrate appropriate verbosity: -<example> -user: 2 + 2 -assistant: 4 -</example> - -<example> -user: what is 2+2? -assistant: 4 -</example> - -<example> -user: is 11 a prime number? -assistant: yes -</example> - -<example> -user: what command should I run to list files in the current directory? -assistant: ls -</example> - -<example> -user: what command should I run to watch files in the current directory? -assistant: [use the ls tool to list the files in the current directory, then read docs/commands in the relevant file to find out how to watch files] -npm run dev -</example> - -<example> -user: How many golf balls fit inside a jetta? -assistant: 150000 -</example> - -<example> -user: what files are in the directory src/? -assistant: [runs ls and sees foo.c, bar.c, baz.c] -user: which file contains the implementation of foo? -assistant: src/foo.c -</example> - -<example> -user: write tests for new feature -assistant: [uses grep and glob search tools to find where similar tests are defined, uses concurrent read file tool use blocks in one tool call to read relevant files at the same time, uses edit/patch file tool to write new tests] -</example> - -# Proactiveness -You are allowed to be proactive, but only when the user asks you to do something. You should strive to strike a balance between: -1. Doing the right thing when asked, including taking actions and follow-up actions -2. Not surprising the user with actions you take without asking -For example, if the user asks you how to approach something, you should do your best to answer their question first, and not immediately jump into taking actions. -3. Do not add additional code explanation summary unless requested by the user. After working on a file, just stop, rather than providing an explanation of what you did. - -# Following conventions -When making changes to files, first understand the file's code conventions. Mimic code style, use existing libraries and utilities, and follow existing patterns. -- NEVER assume that a given library is available, even if it is well known. Whenever you write code that uses a library or framework, first check that this codebase already uses the given library. For example, you might look at neighboring files, or check the package.json (or cargo.toml, and so on depending on the language). -- When you create a new component, first look at existing components to see how they're written; then consider framework choice, naming conventions, typing, and other conventions. -- When you edit a piece of code, first look at the code's surrounding context (especially its imports) to understand the code's choice of frameworks and libraries. Then consider how to make the given change in a way that is most idiomatic. -- Always follow security best practices. Never introduce code that exposes or logs secrets and keys. Never commit secrets or keys to the repository. - -# Code style -- Do not add comments to the code you write, unless the user asks you to, or the code is complex and requires additional context. - -# Doing tasks -The user will primarily request you perform software engineering tasks. This includes solving bugs, adding new functionality, refactoring code, explaining code, and more. For these tasks the following steps are recommended: -1. Use the available search tools to understand the codebase and the user's query. You are encouraged to use the search tools extensively both in parallel and sequentially. -2. Implement the solution using all tools available to you -3. Verify the solution if possible with tests. NEVER assume specific test framework or test script. Check the README or search codebase to determine the testing approach. -4. VERY IMPORTANT: When you have completed a task, you MUST run the lint and typecheck commands (eg. npm run lint, npm run typecheck, ruff, etc.) if they were provided to you to ensure your code is correct. If you are unable to find the correct command, ask the user for the command to run and if they supply it, proactively suggest writing it to opencode.md so that you will know to run it next time. - -NEVER commit changes unless the user explicitly asks you to. It is VERY IMPORTANT to only commit when explicitly asked, otherwise the user will feel that you are being too proactive. - -# Tool usage policy -- When doing file search, prefer to use the Agent tool in order to reduce context usage. -- If you intend to call multiple tools and there are no dependencies between the calls, make all of the independent calls in the same function_calls block. -- IMPORTANT: The user does not see the full output of the tool responses, so if you need the output of the tool for the response make sure to summarize it for the user. - -You MUST answer concisely with fewer than 4 lines of text (not including tool use or code generation), unless user asks for detail.` - -func getEnvironmentInfo() string { - cwd := config.WorkingDirectory() - isGit := isGitRepo(cwd) - platform := runtime.GOOS - date := time.Now().Format("1/2/2006") - ls := tools.NewLsTool() - r, _ := ls.Run(context.Background(), tools.ToolCall{ - Input: `{"path":"."}`, - }) - return fmt.Sprintf(`Here is useful information about the environment you are running in: -<env> -Working directory: %s -Is directory a git repo: %s -Platform: %s -Today's date: %s -</env> -<project> -%s -</project> - `, cwd, boolToYesNo(isGit), platform, date, r.Content) -} - -func isGitRepo(dir string) bool { - _, err := os.Stat(filepath.Join(dir, ".git")) - return err == nil -} - -func lspInformation() string { - cfg := config.Get() - hasLSP := false - for _, v := range cfg.LSP { - if !v.Disabled { - hasLSP = true - break - } - } - if !hasLSP { - return "" - } - return `# LSP Information -Tools that support it will also include useful diagnostics such as linting and typechecking. -- These diagnostics will be automatically enabled when you run the tool, and will be displayed in the output at the bottom within the <file_diagnostics></file_diagnostics> and <project_diagnostics></project_diagnostics> tags. -- Take necessary actions to fix the issues. -- You should ignore diagnostics of files that you did not change or are not related or caused by your changes unless the user explicitly asks you to fix them. -` -} - -func boolToYesNo(b bool) string { - if b { - return "Yes" - } - return "No" -} diff --git a/internal/llm/prompt/prompt.go b/internal/llm/prompt/prompt.go deleted file mode 100644 index 003da344b..000000000 --- a/internal/llm/prompt/prompt.go +++ /dev/null @@ -1,135 +0,0 @@ -package prompt - -import ( - "fmt" - "os" - "path/filepath" - "strings" - "sync" - - "github.com/sst/opencode/internal/config" - "github.com/sst/opencode/internal/llm/models" - "log/slog" -) - -func GetAgentPrompt(agentName config.AgentName, provider models.ModelProvider) string { - basePrompt := "" - switch agentName { - case config.AgentPrimary: - basePrompt = PrimaryPrompt(provider) - case config.AgentTitle: - basePrompt = TitlePrompt(provider) - case config.AgentTask: - basePrompt = TaskPrompt(provider) - default: - basePrompt = "You are a helpful assistant" - } - - if agentName == config.AgentPrimary || agentName == config.AgentTask { - // Add context from project-specific instruction files if they exist - contextContent := getContextFromPaths() - slog.Debug("Context content", "Context", contextContent) - if contextContent != "" { - return fmt.Sprintf("%s\n\n# Project-Specific Context\n Make sure to follow the instructions in the context below\n%s", basePrompt, contextContent) - } - } - return basePrompt -} - -var ( - onceContext sync.Once - contextContent string -) - -func getContextFromPaths() string { - onceContext.Do(func() { - var ( - cfg = config.Get() - workDir = cfg.WorkingDir - contextPaths = cfg.ContextPaths - ) - - contextContent = processContextPaths(workDir, contextPaths) - }) - - return contextContent -} - -func processContextPaths(workDir string, paths []string) string { - var ( - wg sync.WaitGroup - resultCh = make(chan string) - ) - - // Track processed files to avoid duplicates - processedFiles := make(map[string]bool) - var processedMutex sync.Mutex - - for _, path := range paths { - wg.Add(1) - go func(p string) { - defer wg.Done() - - if strings.HasSuffix(p, "/") { - filepath.WalkDir(filepath.Join(workDir, p), func(path string, d os.DirEntry, err error) error { - if err != nil { - return err - } - if !d.IsDir() { - // Check if we've already processed this file (case-insensitive) - processedMutex.Lock() - lowerPath := strings.ToLower(path) - if !processedFiles[lowerPath] { - processedFiles[lowerPath] = true - processedMutex.Unlock() - - if result := processFile(path); result != "" { - resultCh <- result - } - } else { - processedMutex.Unlock() - } - } - return nil - }) - } else { - fullPath := filepath.Join(workDir, p) - - // Check if we've already processed this file (case-insensitive) - processedMutex.Lock() - lowerPath := strings.ToLower(fullPath) - if !processedFiles[lowerPath] { - processedFiles[lowerPath] = true - processedMutex.Unlock() - - result := processFile(fullPath) - if result != "" { - resultCh <- result - } - } else { - processedMutex.Unlock() - } - } - }(path) - } - - go func() { - wg.Wait() - close(resultCh) - }() - - results := make([]string, 0) - for result := range resultCh { - results = append(results, result) - } - - return strings.Join(results, "\n") -} - -func processFile(filePath string) string { - content, err := os.ReadFile(filePath) - if err != nil { - return "" - } - return "# From:" + filePath + "\n" + string(content) -} diff --git a/internal/llm/prompt/prompt_test.go b/internal/llm/prompt/prompt_test.go deleted file mode 100644 index 3e21638f0..000000000 --- a/internal/llm/prompt/prompt_test.go +++ /dev/null @@ -1,61 +0,0 @@ -package prompt - -import ( - "fmt" - "log/slog" - "os" - "path/filepath" - "testing" - - "github.com/sst/opencode/internal/config" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" -) - -func TestGetContextFromPaths(t *testing.T) { - t.Parallel() - - lvl := new(slog.LevelVar) - lvl.Set(slog.LevelDebug) - - tmpDir := t.TempDir() - _, err := config.Load(tmpDir, false, lvl) - if err != nil { - t.Fatalf("Failed to load config: %v", err) - } - cfg := config.Get() - cfg.WorkingDir = tmpDir - cfg.ContextPaths = []string{ - "file.txt", - "directory/", - } - testFiles := []string{ - "file.txt", - "directory/file_a.txt", - "directory/file_b.txt", - "directory/file_c.txt", - } - - createTestFiles(t, tmpDir, testFiles) - - context := getContextFromPaths() - expectedContext := fmt.Sprintf("# From:%s/file.txt\nfile.txt: test content\n# From:%s/directory/file_a.txt\ndirectory/file_a.txt: test content\n# From:%s/directory/file_b.txt\ndirectory/file_b.txt: test content\n# From:%s/directory/file_c.txt\ndirectory/file_c.txt: test content", tmpDir, tmpDir, tmpDir, tmpDir) - assert.Equal(t, expectedContext, context) -} - -func createTestFiles(t *testing.T, tmpDir string, testFiles []string) { - t.Helper() - for _, path := range testFiles { - fullPath := filepath.Join(tmpDir, path) - if path[len(path)-1] == '/' { - err := os.MkdirAll(fullPath, 0755) - require.NoError(t, err) - } else { - dir := filepath.Dir(fullPath) - err := os.MkdirAll(dir, 0755) - require.NoError(t, err) - err = os.WriteFile(fullPath, []byte(path+": test content"), 0644) - require.NoError(t, err) - } - } -} diff --git a/internal/llm/prompt/task.go b/internal/llm/prompt/task.go deleted file mode 100644 index 78ffbfd2e..000000000 --- a/internal/llm/prompt/task.go +++ /dev/null @@ -1,17 +0,0 @@ -package prompt - -import ( - "fmt" - - "github.com/sst/opencode/internal/llm/models" -) - -func TaskPrompt(_ models.ModelProvider) string { - agentPrompt := `You are an agent for OpenCode. Given the user's prompt, you should use the tools available to you to answer the user's question. -Notes: -1. IMPORTANT: You should be concise, direct, and to the point, since your responses will be displayed on a command line interface. Answer the user's question directly, without elaboration, explanation, or details. One word answers are best. Avoid introductions, conclusions, and explanations. You MUST avoid text before/after your response, such as "The answer is <answer>.", "Here is the content of the file..." or "Based on the information provided, the answer is..." or "Here is what I will do next...". -2. When relevant, share file names and code snippets relevant to the query -3. Any file paths you return in your final response MUST be absolute. DO NOT use relative paths.` - - return fmt.Sprintf("%s\n%s\n", agentPrompt, getEnvironmentInfo()) -} diff --git a/internal/llm/prompt/title.go b/internal/llm/prompt/title.go deleted file mode 100644 index 9daa5ee0f..000000000 --- a/internal/llm/prompt/title.go +++ /dev/null @@ -1,13 +0,0 @@ -package prompt - -import "github.com/sst/opencode/internal/llm/models" - -func TitlePrompt(_ models.ModelProvider) string { - return `you will generate a short title based on the first message a user begins a conversation with -- ensure it is not more than 50 characters long -- the title should be a summary of the user's message -- it should be one line long -- do not use quotes or colons -- the entire text you return will be used as the title -- never return anything that is more than one sentence (one line) long` -} diff --git a/internal/llm/provider/anthropic.go b/internal/llm/provider/anthropic.go deleted file mode 100644 index 24bcb48fb..000000000 --- a/internal/llm/provider/anthropic.go +++ /dev/null @@ -1,472 +0,0 @@ -package provider - -import ( - "context" - "encoding/json" - "errors" - "fmt" - "io" - "strings" - "time" - - "github.com/anthropics/anthropic-sdk-go" - "github.com/anthropics/anthropic-sdk-go/bedrock" - "github.com/anthropics/anthropic-sdk-go/option" - "github.com/sst/opencode/internal/config" - "github.com/sst/opencode/internal/llm/models" - "github.com/sst/opencode/internal/llm/tools" - "github.com/sst/opencode/internal/message" - "github.com/sst/opencode/internal/status" - "log/slog" -) - -type anthropicOptions struct { - useBedrock bool - disableCache bool - shouldThink func(userMessage string) bool -} - -type AnthropicOption func(*anthropicOptions) - -type anthropicClient struct { - providerOptions providerClientOptions - options anthropicOptions - client anthropic.Client -} - -type AnthropicClient ProviderClient - -func newAnthropicClient(opts providerClientOptions) AnthropicClient { - anthropicOpts := anthropicOptions{} - for _, o := range opts.anthropicOptions { - o(&anthropicOpts) - } - - anthropicClientOptions := []option.RequestOption{} - if opts.apiKey != "" { - anthropicClientOptions = append(anthropicClientOptions, option.WithAPIKey(opts.apiKey)) - } - if anthropicOpts.useBedrock { - anthropicClientOptions = append(anthropicClientOptions, bedrock.WithLoadDefaultConfig(context.Background())) - } - - client := anthropic.NewClient(anthropicClientOptions...) - return &anthropicClient{ - providerOptions: opts, - options: anthropicOpts, - client: client, - } -} - -func (a *anthropicClient) convertMessages(messages []message.Message) (anthropicMessages []anthropic.MessageParam) { - for i, msg := range messages { - cache := false - if i > len(messages)-3 { - cache = true - } - switch msg.Role { - case message.User: - content := anthropic.NewTextBlock(msg.Content().String()) - if cache && !a.options.disableCache { - content.OfRequestTextBlock.CacheControl = anthropic.CacheControlEphemeralParam{ - Type: "ephemeral", - } - } - var contentBlocks []anthropic.ContentBlockParamUnion - contentBlocks = append(contentBlocks, content) - for _, binaryContent := range msg.BinaryContent() { - base64Image := binaryContent.String(models.ProviderAnthropic) - imageBlock := anthropic.NewImageBlockBase64(binaryContent.MIMEType, base64Image) - contentBlocks = append(contentBlocks, imageBlock) - } - anthropicMessages = append(anthropicMessages, anthropic.NewUserMessage(contentBlocks...)) - - case message.Assistant: - blocks := []anthropic.ContentBlockParamUnion{} - - if msg.Content() != nil { - content := msg.Content().String() - if strings.TrimSpace(content) != "" { - block := anthropic.NewTextBlock(content) - if cache && !a.options.disableCache { - block.OfRequestTextBlock.CacheControl = anthropic.CacheControlEphemeralParam{ - Type: "ephemeral", - } - } - blocks = append(blocks, block) - } - } - - for _, toolCall := range msg.ToolCalls() { - var inputMap map[string]any - err := json.Unmarshal([]byte(toolCall.Input), &inputMap) - if err != nil { - continue - } - blocks = append(blocks, anthropic.ContentBlockParamOfRequestToolUseBlock(toolCall.ID, inputMap, toolCall.Name)) - } - - if len(blocks) == 0 { - slog.Warn("There is a message without content, investigate, this should not happen") - continue - } - anthropicMessages = append(anthropicMessages, anthropic.NewAssistantMessage(blocks...)) - - case message.Tool: - results := make([]anthropic.ContentBlockParamUnion, len(msg.ToolResults())) - for i, toolResult := range msg.ToolResults() { - results[i] = anthropic.NewToolResultBlock(toolResult.ToolCallID, toolResult.Content, toolResult.IsError) - } - anthropicMessages = append(anthropicMessages, anthropic.NewUserMessage(results...)) - } - } - return -} - -func (a *anthropicClient) convertTools(tools []tools.BaseTool) []anthropic.ToolUnionParam { - anthropicTools := make([]anthropic.ToolUnionParam, len(tools)) - - for i, tool := range tools { - info := tool.Info() - toolParam := anthropic.ToolParam{ - Name: info.Name, - Description: anthropic.String(info.Description), - InputSchema: anthropic.ToolInputSchemaParam{ - Properties: info.Parameters, - // TODO: figure out how we can tell claude the required fields? - }, - } - - if i == len(tools)-1 && !a.options.disableCache { - toolParam.CacheControl = anthropic.CacheControlEphemeralParam{ - Type: "ephemeral", - } - } - - anthropicTools[i] = anthropic.ToolUnionParam{OfTool: &toolParam} - } - - return anthropicTools -} - -func (a *anthropicClient) finishReason(reason string) message.FinishReason { - switch reason { - case "end_turn": - return message.FinishReasonEndTurn - case "max_tokens": - return message.FinishReasonMaxTokens - case "tool_use": - return message.FinishReasonToolUse - case "stop_sequence": - return message.FinishReasonEndTurn - default: - return message.FinishReasonUnknown - } -} - -func (a *anthropicClient) preparedMessages(messages []anthropic.MessageParam, tools []anthropic.ToolUnionParam) anthropic.MessageNewParams { - var thinkingParam anthropic.ThinkingConfigParamUnion - lastMessage := messages[len(messages)-1] - isUser := lastMessage.Role == anthropic.MessageParamRoleUser - messageContent := "" - temperature := anthropic.Float(0) - if isUser { - for _, m := range lastMessage.Content { - if m.OfRequestTextBlock != nil && m.OfRequestTextBlock.Text != "" { - messageContent = m.OfRequestTextBlock.Text - } - } - if messageContent != "" && a.options.shouldThink != nil && a.options.shouldThink(messageContent) { - thinkingParam = anthropic.ThinkingConfigParamUnion{ - OfThinkingConfigEnabled: &anthropic.ThinkingConfigEnabledParam{ - BudgetTokens: int64(float64(a.providerOptions.maxTokens) * 0.8), - Type: "enabled", - }, - } - temperature = anthropic.Float(1) - } - } - - return anthropic.MessageNewParams{ - Model: anthropic.Model(a.providerOptions.model.APIModel), - MaxTokens: a.providerOptions.maxTokens, - Temperature: temperature, - Messages: messages, - Tools: tools, - Thinking: thinkingParam, - System: []anthropic.TextBlockParam{ - { - Text: a.providerOptions.systemMessage, - CacheControl: anthropic.CacheControlEphemeralParam{ - Type: "ephemeral", - }, - }, - }, - } -} - -func (a *anthropicClient) send(ctx context.Context, messages []message.Message, tools []tools.BaseTool) (resposne *ProviderResponse, err error) { - preparedMessages := a.preparedMessages(a.convertMessages(messages), a.convertTools(tools)) - cfg := config.Get() - if cfg.Debug { - jsonData, _ := json.Marshal(preparedMessages) - slog.Debug("Prepared messages", "messages", string(jsonData)) - } - - attempts := 0 - for { - attempts++ - anthropicResponse, err := a.client.Messages.New( - ctx, - preparedMessages, - ) - // If there is an error we are going to see if we can retry the call - if err != nil { - slog.Error("Error in Anthropic API call", "error", err) - retry, after, retryErr := a.shouldRetry(attempts, err) - duration := time.Duration(after) * time.Millisecond - if retryErr != nil { - return nil, retryErr - } - if retry { - status.Warn(fmt.Sprintf("Retrying due to rate limit... attempt %d of %d", attempts, maxRetries), status.WithDuration(duration)) - select { - case <-ctx.Done(): - return nil, ctx.Err() - case <-time.After(duration): - continue - } - } - return nil, retryErr - } - - content := "" - for _, block := range anthropicResponse.Content { - if text, ok := block.AsAny().(anthropic.TextBlock); ok { - content += text.Text - } - } - - return &ProviderResponse{ - Content: content, - ToolCalls: a.toolCalls(*anthropicResponse), - Usage: a.usage(*anthropicResponse), - }, nil - } -} - -func (a *anthropicClient) stream(ctx context.Context, messages []message.Message, tools []tools.BaseTool) <-chan ProviderEvent { - preparedMessages := a.preparedMessages(a.convertMessages(messages), a.convertTools(tools)) - cfg := config.Get() - if cfg.Debug { - jsonData, _ := json.Marshal(preparedMessages) - slog.Debug("Prepared messages", "messages", string(jsonData)) - } - attempts := 0 - eventChan := make(chan ProviderEvent) - go func() { - for { - attempts++ - anthropicStream := a.client.Messages.NewStreaming( - ctx, - preparedMessages, - ) - accumulatedMessage := anthropic.Message{} - - currentToolCallID := "" - for anthropicStream.Next() { - event := anthropicStream.Current() - err := accumulatedMessage.Accumulate(event) - if err != nil { - slog.Warn("Error accumulating message", "error", err) - continue - } - - switch event := event.AsAny().(type) { - case anthropic.ContentBlockStartEvent: - if event.ContentBlock.Type == "text" { - eventChan <- ProviderEvent{Type: EventContentStart} - } else if event.ContentBlock.Type == "tool_use" { - currentToolCallID = event.ContentBlock.ID - eventChan <- ProviderEvent{ - Type: EventToolUseStart, - ToolCall: &message.ToolCall{ - ID: event.ContentBlock.ID, - Name: event.ContentBlock.Name, - Finished: false, - }, - } - } - - case anthropic.ContentBlockDeltaEvent: - if event.Delta.Type == "thinking_delta" && event.Delta.Thinking != "" { - eventChan <- ProviderEvent{ - Type: EventThinkingDelta, - Thinking: event.Delta.Thinking, - } - } else if event.Delta.Type == "text_delta" && event.Delta.Text != "" { - eventChan <- ProviderEvent{ - Type: EventContentDelta, - Content: event.Delta.Text, - } - } else if event.Delta.Type == "input_json_delta" { - if currentToolCallID != "" { - eventChan <- ProviderEvent{ - Type: EventToolUseDelta, - ToolCall: &message.ToolCall{ - ID: currentToolCallID, - Finished: false, - Input: event.Delta.JSON.PartialJSON.Raw(), - }, - } - } - } - case anthropic.ContentBlockStopEvent: - if currentToolCallID != "" { - eventChan <- ProviderEvent{ - Type: EventToolUseStop, - ToolCall: &message.ToolCall{ - ID: currentToolCallID, - }, - } - currentToolCallID = "" - } else { - eventChan <- ProviderEvent{Type: EventContentStop} - } - - case anthropic.MessageStopEvent: - content := "" - for _, block := range accumulatedMessage.Content { - if text, ok := block.AsAny().(anthropic.TextBlock); ok { - content += text.Text - } - } - - eventChan <- ProviderEvent{ - Type: EventComplete, - Response: &ProviderResponse{ - Content: content, - ToolCalls: a.toolCalls(accumulatedMessage), - Usage: a.usage(accumulatedMessage), - FinishReason: a.finishReason(string(accumulatedMessage.StopReason)), - }, - } - } - } - - err := anthropicStream.Err() - if err == nil || errors.Is(err, io.EOF) { - close(eventChan) - return - } - // If there is an error we are going to see if we can retry the call - retry, after, retryErr := a.shouldRetry(attempts, err) - duration := time.Duration(after) * time.Millisecond - if retryErr != nil { - eventChan <- ProviderEvent{Type: EventError, Error: retryErr} - close(eventChan) - return - } - if retry { - status.Warn(fmt.Sprintf("Retrying due to rate limit... attempt %d of %d", attempts, maxRetries), status.WithDuration(duration)) - select { - case <-ctx.Done(): - // context cancelled - if ctx.Err() != nil { - eventChan <- ProviderEvent{Type: EventError, Error: ctx.Err()} - } - close(eventChan) - return - case <-time.After(duration): - continue - } - } - if ctx.Err() != nil { - eventChan <- ProviderEvent{Type: EventError, Error: ctx.Err()} - } - - close(eventChan) - return - } - }() - return eventChan -} - -func (a *anthropicClient) shouldRetry(attempts int, err error) (bool, int64, error) { - var apierr *anthropic.Error - if !errors.As(err, &apierr) { - return false, 0, err - } - - if apierr.StatusCode != 429 && apierr.StatusCode != 529 { - return false, 0, err - } - - if attempts > maxRetries { - return false, 0, fmt.Errorf("maximum retry attempts reached for rate limit: %d retries", maxRetries) - } - - retryMs := 0 - retryAfterValues := apierr.Response.Header.Values("Retry-After") - - backoffMs := 2000 * (1 << (attempts - 1)) - jitterMs := int(float64(backoffMs) * 0.2) - retryMs = backoffMs + jitterMs - if len(retryAfterValues) > 0 { - if _, err := fmt.Sscanf(retryAfterValues[0], "%d", &retryMs); err == nil { - retryMs = retryMs * 1000 - } - } - return true, int64(retryMs), nil -} - -func (a *anthropicClient) toolCalls(msg anthropic.Message) []message.ToolCall { - var toolCalls []message.ToolCall - - for _, block := range msg.Content { - switch variant := block.AsAny().(type) { - case anthropic.ToolUseBlock: - toolCall := message.ToolCall{ - ID: variant.ID, - Name: variant.Name, - Input: string(variant.Input), - Type: string(variant.Type), - Finished: true, - } - toolCalls = append(toolCalls, toolCall) - } - } - - return toolCalls -} - -func (a *anthropicClient) usage(msg anthropic.Message) TokenUsage { - return TokenUsage{ - InputTokens: msg.Usage.InputTokens, - OutputTokens: msg.Usage.OutputTokens, - CacheCreationTokens: msg.Usage.CacheCreationInputTokens, - CacheReadTokens: msg.Usage.CacheReadInputTokens, - } -} - -func WithAnthropicBedrock(useBedrock bool) AnthropicOption { - return func(options *anthropicOptions) { - options.useBedrock = useBedrock - } -} - -func WithAnthropicDisableCache() AnthropicOption { - return func(options *anthropicOptions) { - options.disableCache = true - } -} - -func DefaultShouldThinkFn(s string) bool { - return strings.Contains(strings.ToLower(s), "think") -} - -func WithAnthropicShouldThinkFn(fn func(string) bool) AnthropicOption { - return func(options *anthropicOptions) { - options.shouldThink = fn - } -} diff --git a/internal/llm/provider/azure.go b/internal/llm/provider/azure.go deleted file mode 100644 index 6368a181c..000000000 --- a/internal/llm/provider/azure.go +++ /dev/null @@ -1,47 +0,0 @@ -package provider - -import ( - "os" - - "github.com/Azure/azure-sdk-for-go/sdk/azidentity" - "github.com/openai/openai-go" - "github.com/openai/openai-go/azure" - "github.com/openai/openai-go/option" -) - -type azureClient struct { - *openaiClient -} - -type AzureClient ProviderClient - -func newAzureClient(opts providerClientOptions) AzureClient { - - endpoint := os.Getenv("AZURE_OPENAI_ENDPOINT") // ex: https://foo.openai.azure.com - apiVersion := os.Getenv("AZURE_OPENAI_API_VERSION") // ex: 2025-04-01-preview - - if endpoint == "" || apiVersion == "" { - return &azureClient{openaiClient: newOpenAIClient(opts).(*openaiClient)} - } - - reqOpts := []option.RequestOption{ - azure.WithEndpoint(endpoint, apiVersion), - } - - if opts.apiKey != "" || os.Getenv("AZURE_OPENAI_API_KEY") != "" { - key := opts.apiKey - if key == "" { - key = os.Getenv("AZURE_OPENAI_API_KEY") - } - reqOpts = append(reqOpts, azure.WithAPIKey(key)) - } else if cred, err := azidentity.NewDefaultAzureCredential(nil); err == nil { - reqOpts = append(reqOpts, azure.WithTokenCredential(cred)) - } - - base := &openaiClient{ - providerOptions: opts, - client: openai.NewClient(reqOpts...), - } - - return &azureClient{openaiClient: base} -} diff --git a/internal/llm/provider/bedrock.go b/internal/llm/provider/bedrock.go deleted file mode 100644 index 4622ae5ff..000000000 --- a/internal/llm/provider/bedrock.go +++ /dev/null @@ -1,100 +0,0 @@ -package provider - -import ( - "context" - "errors" - "fmt" - "os" - "strings" - - "github.com/sst/opencode/internal/llm/tools" - "github.com/sst/opencode/internal/message" -) - -type bedrockOptions struct { - // Bedrock specific options can be added here -} - -type BedrockOption func(*bedrockOptions) - -type bedrockClient struct { - providerOptions providerClientOptions - options bedrockOptions - childProvider ProviderClient -} - -type BedrockClient ProviderClient - -func newBedrockClient(opts providerClientOptions) BedrockClient { - bedrockOpts := bedrockOptions{} - // Apply bedrock specific options if they are added in the future - - // Get AWS region from environment - region := os.Getenv("AWS_REGION") - if region == "" { - region = os.Getenv("AWS_DEFAULT_REGION") - } - - if region == "" { - region = "us-east-1" // default region - } - if len(region) < 2 { - return &bedrockClient{ - providerOptions: opts, - options: bedrockOpts, - childProvider: nil, // Will cause an error when used - } - } - - // Prefix the model name with region - regionPrefix := region[:2] - modelName := opts.model.APIModel - opts.model.APIModel = fmt.Sprintf("%s.%s", regionPrefix, modelName) - - // Determine which provider to use based on the model - if strings.Contains(string(opts.model.APIModel), "anthropic") { - // Create Anthropic client with Bedrock configuration - anthropicOpts := opts - anthropicOpts.anthropicOptions = append(anthropicOpts.anthropicOptions, - WithAnthropicBedrock(true), - WithAnthropicDisableCache(), - ) - return &bedrockClient{ - providerOptions: opts, - options: bedrockOpts, - childProvider: newAnthropicClient(anthropicOpts), - } - } - - // Return client with nil childProvider if model is not supported - // This will cause an error when used - return &bedrockClient{ - providerOptions: opts, - options: bedrockOpts, - childProvider: nil, - } -} - -func (b *bedrockClient) send(ctx context.Context, messages []message.Message, tools []tools.BaseTool) (*ProviderResponse, error) { - if b.childProvider == nil { - return nil, errors.New("unsupported model for bedrock provider") - } - return b.childProvider.send(ctx, messages, tools) -} - -func (b *bedrockClient) stream(ctx context.Context, messages []message.Message, tools []tools.BaseTool) <-chan ProviderEvent { - eventChan := make(chan ProviderEvent) - - if b.childProvider == nil { - go func() { - eventChan <- ProviderEvent{ - Type: EventError, - Error: errors.New("unsupported model for bedrock provider"), - } - close(eventChan) - }() - return eventChan - } - - return b.childProvider.stream(ctx, messages, tools) -} diff --git a/internal/llm/provider/gemini.go b/internal/llm/provider/gemini.go deleted file mode 100644 index 8b8e33698..000000000 --- a/internal/llm/provider/gemini.go +++ /dev/null @@ -1,555 +0,0 @@ -package provider - -import ( - "context" - "encoding/json" - "errors" - "fmt" - "io" - "strings" - "time" - - "github.com/google/uuid" - "github.com/sst/opencode/internal/config" - "github.com/sst/opencode/internal/llm/tools" - "github.com/sst/opencode/internal/message" - "github.com/sst/opencode/internal/status" - "google.golang.org/genai" - "log/slog" -) - -type geminiOptions struct { - disableCache bool -} - -type GeminiOption func(*geminiOptions) - -type geminiClient struct { - providerOptions providerClientOptions - options geminiOptions - client *genai.Client -} - -type GeminiClient ProviderClient - -func newGeminiClient(opts providerClientOptions) GeminiClient { - geminiOpts := geminiOptions{} - for _, o := range opts.geminiOptions { - o(&geminiOpts) - } - - client, err := genai.NewClient(context.Background(), &genai.ClientConfig{APIKey: opts.apiKey, Backend: genai.BackendGeminiAPI}) - if err != nil { - slog.Error("Failed to create Gemini client", "error", err) - return nil - } - - return &geminiClient{ - providerOptions: opts, - options: geminiOpts, - client: client, - } -} - -func (g *geminiClient) convertMessages(messages []message.Message) []*genai.Content { - var history []*genai.Content - for _, msg := range messages { - switch msg.Role { - case message.User: - var parts []*genai.Part - parts = append(parts, &genai.Part{Text: msg.Content().String()}) - for _, binaryContent := range msg.BinaryContent() { - imageFormat := strings.Split(binaryContent.MIMEType, "/") - parts = append(parts, &genai.Part{InlineData: &genai.Blob{ - MIMEType: imageFormat[1], - Data: binaryContent.Data, - }}) - } - history = append(history, &genai.Content{ - Parts: parts, - Role: "user", - }) - case message.Assistant: - content := &genai.Content{ - Role: "model", - Parts: []*genai.Part{}, - } - - if msg.Content().String() != "" { - content.Parts = append(content.Parts, &genai.Part{Text: msg.Content().String()}) - } - - if len(msg.ToolCalls()) > 0 { - for _, call := range msg.ToolCalls() { - args, _ := parseJsonToMap(call.Input) - content.Parts = append(content.Parts, &genai.Part{ - FunctionCall: &genai.FunctionCall{ - Name: call.Name, - Args: args, - }, - }) - } - } - - history = append(history, content) - - case message.Tool: - for _, result := range msg.ToolResults() { - response := map[string]interface{}{"result": result.Content} - parsed, err := parseJsonToMap(result.Content) - if err == nil { - response = parsed - } - - var toolCall message.ToolCall - for _, m := range messages { - if m.Role == message.Assistant { - for _, call := range m.ToolCalls() { - if call.ID == result.ToolCallID { - toolCall = call - break - } - } - } - } - - history = append(history, &genai.Content{ - Parts: []*genai.Part{ - { - FunctionResponse: &genai.FunctionResponse{ - Name: toolCall.Name, - Response: response, - }, - }, - }, - Role: "function", - }) - } - } - } - - return history -} - -func (g *geminiClient) convertTools(tools []tools.BaseTool) []*genai.Tool { - geminiTool := &genai.Tool{} - geminiTool.FunctionDeclarations = make([]*genai.FunctionDeclaration, 0, len(tools)) - - for _, tool := range tools { - info := tool.Info() - declaration := &genai.FunctionDeclaration{ - Name: info.Name, - Description: info.Description, - Parameters: &genai.Schema{ - Type: genai.TypeObject, - Properties: convertSchemaProperties(info.Parameters), - Required: info.Required, - }, - } - - geminiTool.FunctionDeclarations = append(geminiTool.FunctionDeclarations, declaration) - } - - return []*genai.Tool{geminiTool} -} - -func (g *geminiClient) finishReason(reason genai.FinishReason) message.FinishReason { - switch { - case reason == genai.FinishReasonStop: - return message.FinishReasonEndTurn - case reason == genai.FinishReasonMaxTokens: - return message.FinishReasonMaxTokens - default: - return message.FinishReasonUnknown - } -} - -func (g *geminiClient) send(ctx context.Context, messages []message.Message, tools []tools.BaseTool) (*ProviderResponse, error) { - // Convert messages - geminiMessages := g.convertMessages(messages) - - cfg := config.Get() - if cfg.Debug { - jsonData, _ := json.Marshal(geminiMessages) - slog.Debug("Prepared messages", "messages", string(jsonData)) - } - - history := geminiMessages[:len(geminiMessages)-1] // All but last message - lastMsg := geminiMessages[len(geminiMessages)-1] - config := &genai.GenerateContentConfig{ - MaxOutputTokens: int32(g.providerOptions.maxTokens), - SystemInstruction: &genai.Content{ - Parts: []*genai.Part{{Text: g.providerOptions.systemMessage}}, - }, - } - if len(tools) > 0 { - config.Tools = g.convertTools(tools) - } - chat, _ := g.client.Chats.Create(ctx, g.providerOptions.model.APIModel, config, history) - - attempts := 0 - for { - attempts++ - var toolCalls []message.ToolCall - - var lastMsgParts []genai.Part - for _, part := range lastMsg.Parts { - lastMsgParts = append(lastMsgParts, *part) - } - resp, err := chat.SendMessage(ctx, lastMsgParts...) - // If there is an error we are going to see if we can retry the call - if err != nil { - retry, after, retryErr := g.shouldRetry(attempts, err) - duration := time.Duration(after) * time.Millisecond - if retryErr != nil { - return nil, retryErr - } - if retry { - status.Warn(fmt.Sprintf("Retrying due to rate limit... attempt %d of %d", attempts, maxRetries), status.WithDuration(duration)) - select { - case <-ctx.Done(): - return nil, ctx.Err() - case <-time.After(duration): - continue - } - } - return nil, retryErr - } - - content := "" - - if len(resp.Candidates) > 0 && resp.Candidates[0].Content != nil { - for _, part := range resp.Candidates[0].Content.Parts { - switch { - case part.Text != "": - content = string(part.Text) - case part.FunctionCall != nil: - id := "call_" + uuid.New().String() - args, _ := json.Marshal(part.FunctionCall.Args) - toolCalls = append(toolCalls, message.ToolCall{ - ID: id, - Name: part.FunctionCall.Name, - Input: string(args), - Type: "function", - Finished: true, - }) - } - } - } - finishReason := message.FinishReasonEndTurn - if len(resp.Candidates) > 0 { - finishReason = g.finishReason(resp.Candidates[0].FinishReason) - } - if len(toolCalls) > 0 { - finishReason = message.FinishReasonToolUse - } - - return &ProviderResponse{ - Content: content, - ToolCalls: toolCalls, - Usage: g.usage(resp), - FinishReason: finishReason, - }, nil - } -} - -func (g *geminiClient) stream(ctx context.Context, messages []message.Message, tools []tools.BaseTool) <-chan ProviderEvent { - // Convert messages - geminiMessages := g.convertMessages(messages) - - cfg := config.Get() - if cfg.Debug { - jsonData, _ := json.Marshal(geminiMessages) - slog.Debug("Prepared messages", "messages", string(jsonData)) - } - - history := geminiMessages[:len(geminiMessages)-1] // All but last message - lastMsg := geminiMessages[len(geminiMessages)-1] - config := &genai.GenerateContentConfig{ - MaxOutputTokens: int32(g.providerOptions.maxTokens), - SystemInstruction: &genai.Content{ - Parts: []*genai.Part{{Text: g.providerOptions.systemMessage}}, - }, - } - if len(tools) > 0 { - config.Tools = g.convertTools(tools) - } - chat, _ := g.client.Chats.Create(ctx, g.providerOptions.model.APIModel, config, history) - - attempts := 0 - eventChan := make(chan ProviderEvent) - - go func() { - defer close(eventChan) - - for { - attempts++ - - currentContent := "" - toolCalls := []message.ToolCall{} - var finalResp *genai.GenerateContentResponse - - eventChan <- ProviderEvent{Type: EventContentStart} - - var lastMsgParts []genai.Part - - for _, part := range lastMsg.Parts { - lastMsgParts = append(lastMsgParts, *part) - } - for resp, err := range chat.SendMessageStream(ctx, lastMsgParts...) { - if err != nil { - retry, after, retryErr := g.shouldRetry(attempts, err) - duration := time.Duration(after) * time.Millisecond - if retryErr != nil { - eventChan <- ProviderEvent{Type: EventError, Error: retryErr} - return - } - if retry { - status.Warn(fmt.Sprintf("Retrying due to rate limit... attempt %d of %d", attempts, maxRetries), status.WithDuration(duration)) - select { - case <-ctx.Done(): - if ctx.Err() != nil { - eventChan <- ProviderEvent{Type: EventError, Error: ctx.Err()} - } - - return - case <-time.After(duration): - break - } - } else { - eventChan <- ProviderEvent{Type: EventError, Error: err} - return - } - } - - finalResp = resp - - if len(resp.Candidates) > 0 && resp.Candidates[0].Content != nil { - for _, part := range resp.Candidates[0].Content.Parts { - switch { - case part.Text != "": - delta := string(part.Text) - if delta != "" { - eventChan <- ProviderEvent{ - Type: EventContentDelta, - Content: delta, - } - currentContent += delta - } - case part.FunctionCall != nil: - id := "call_" + uuid.New().String() - args, _ := json.Marshal(part.FunctionCall.Args) - newCall := message.ToolCall{ - ID: id, - Name: part.FunctionCall.Name, - Input: string(args), - Type: "function", - Finished: true, - } - - isNew := true - for _, existing := range toolCalls { - if existing.Name == newCall.Name && existing.Input == newCall.Input { - isNew = false - break - } - } - - if isNew { - toolCalls = append(toolCalls, newCall) - } - } - } - } - } - - eventChan <- ProviderEvent{Type: EventContentStop} - - if finalResp != nil { - - finishReason := message.FinishReasonEndTurn - if len(finalResp.Candidates) > 0 { - finishReason = g.finishReason(finalResp.Candidates[0].FinishReason) - } - if len(toolCalls) > 0 { - finishReason = message.FinishReasonToolUse - } - eventChan <- ProviderEvent{ - Type: EventComplete, - Response: &ProviderResponse{ - Content: currentContent, - ToolCalls: toolCalls, - Usage: g.usage(finalResp), - FinishReason: finishReason, - }, - } - return - } - - } - }() - - return eventChan -} - -func (g *geminiClient) shouldRetry(attempts int, err error) (bool, int64, error) { - // Check if error is a rate limit error - if attempts > maxRetries { - return false, 0, fmt.Errorf("maximum retry attempts reached for rate limit: %d retries", maxRetries) - } - - // Gemini doesn't have a standard error type we can check against - // So we'll check the error message for rate limit indicators - if errors.Is(err, io.EOF) { - return false, 0, err - } - - errMsg := err.Error() - isRateLimit := false - - // Check for common rate limit error messages - if contains(errMsg, "rate limit", "quota exceeded", "too many requests") { - isRateLimit = true - } - - if !isRateLimit { - return false, 0, err - } - - // Calculate backoff with jitter - backoffMs := 2000 * (1 << (attempts - 1)) - jitterMs := int(float64(backoffMs) * 0.2) - retryMs := backoffMs + jitterMs - - return true, int64(retryMs), nil -} - -func (g *geminiClient) toolCalls(resp *genai.GenerateContentResponse) []message.ToolCall { - var toolCalls []message.ToolCall - - if len(resp.Candidates) > 0 && resp.Candidates[0].Content != nil { - for _, part := range resp.Candidates[0].Content.Parts { - if part.FunctionCall != nil { - id := "call_" + uuid.New().String() - args, _ := json.Marshal(part.FunctionCall.Args) - toolCalls = append(toolCalls, message.ToolCall{ - ID: id, - Name: part.FunctionCall.Name, - Input: string(args), - Type: "function", - }) - } - } - } - - return toolCalls -} - -func (g *geminiClient) usage(resp *genai.GenerateContentResponse) TokenUsage { - if resp == nil || resp.UsageMetadata == nil { - return TokenUsage{} - } - - return TokenUsage{ - InputTokens: int64(resp.UsageMetadata.PromptTokenCount), - OutputTokens: int64(resp.UsageMetadata.CandidatesTokenCount), - CacheCreationTokens: 0, // Not directly provided by Gemini - CacheReadTokens: int64(resp.UsageMetadata.CachedContentTokenCount), - } -} - -func WithGeminiDisableCache() GeminiOption { - return func(options *geminiOptions) { - options.disableCache = true - } -} - -// Helper functions -func parseJsonToMap(jsonStr string) (map[string]interface{}, error) { - var result map[string]interface{} - err := json.Unmarshal([]byte(jsonStr), &result) - return result, err -} - -func convertSchemaProperties(parameters map[string]interface{}) map[string]*genai.Schema { - properties := make(map[string]*genai.Schema) - - for name, param := range parameters { - properties[name] = convertToSchema(param) - } - - return properties -} - -func convertToSchema(param interface{}) *genai.Schema { - schema := &genai.Schema{Type: genai.TypeString} - - paramMap, ok := param.(map[string]interface{}) - if !ok { - return schema - } - - if desc, ok := paramMap["description"].(string); ok { - schema.Description = desc - } - - typeVal, hasType := paramMap["type"] - if !hasType { - return schema - } - - typeStr, ok := typeVal.(string) - if !ok { - return schema - } - - schema.Type = mapJSONTypeToGenAI(typeStr) - - switch typeStr { - case "array": - schema.Items = processArrayItems(paramMap) - case "object": - if props, ok := paramMap["properties"].(map[string]interface{}); ok { - schema.Properties = convertSchemaProperties(props) - } - } - - return schema -} - -func processArrayItems(paramMap map[string]interface{}) *genai.Schema { - items, ok := paramMap["items"].(map[string]interface{}) - if !ok { - return nil - } - - return convertToSchema(items) -} - -func mapJSONTypeToGenAI(jsonType string) genai.Type { - switch jsonType { - case "string": - return genai.TypeString - case "number": - return genai.TypeNumber - case "integer": - return genai.TypeInteger - case "boolean": - return genai.TypeBoolean - case "array": - return genai.TypeArray - case "object": - return genai.TypeObject - default: - return genai.TypeString // Default to string for unknown types - } -} - -func contains(s string, substrs ...string) bool { - for _, substr := range substrs { - if strings.Contains(strings.ToLower(s), strings.ToLower(substr)) { - return true - } - } - return false -} diff --git a/internal/llm/provider/openai.go b/internal/llm/provider/openai.go deleted file mode 100644 index db77a3844..000000000 --- a/internal/llm/provider/openai.go +++ /dev/null @@ -1,149 +0,0 @@ -package provider - -import ( - "context" - "errors" - "fmt" - "log/slog" - "github.com/openai/openai-go" - "github.com/openai/openai-go/option" - "github.com/sst/opencode/internal/llm/models" - "github.com/sst/opencode/internal/llm/tools" - "github.com/sst/opencode/internal/message" -) - -type openaiOptions struct { - baseURL string - disableCache bool - reasoningEffort string - extraHeaders map[string]string -} - -type OpenAIOption func(*openaiOptions) - -type openaiClient struct { - providerOptions providerClientOptions - options openaiOptions - client openai.Client -} - -type OpenAIClient ProviderClient - -func newOpenAIClient(opts providerClientOptions) OpenAIClient { - openaiOpts := openaiOptions{ - reasoningEffort: "medium", - } - for _, o := range opts.openaiOptions { - o(&openaiOpts) - } - - openaiClientOptions := []option.RequestOption{} - if opts.apiKey != "" { - openaiClientOptions = append(openaiClientOptions, option.WithAPIKey(opts.apiKey)) - } - if openaiOpts.baseURL != "" { - openaiClientOptions = append(openaiClientOptions, option.WithBaseURL(openaiOpts.baseURL)) - } - - if openaiOpts.extraHeaders != nil { - for key, value := range openaiOpts.extraHeaders { - openaiClientOptions = append(openaiClientOptions, option.WithHeader(key, value)) - } - } - - client := openai.NewClient(openaiClientOptions...) - return &openaiClient{ - providerOptions: opts, - options: openaiOpts, - client: client, - } -} - -func (o *openaiClient) send(ctx context.Context, messages []message.Message, tools []tools.BaseTool) (response *ProviderResponse, err error) { - if o.providerOptions.model.ID == models.OpenAIModels[models.CodexMini].ID || o.providerOptions.model.ID == models.OpenAIModels[models.O1Pro].ID { - return o.sendResponseMessages(ctx, messages, tools) - } - return o.sendChatcompletionMessage(ctx, messages, tools) -} - -func (o *openaiClient) stream(ctx context.Context, messages []message.Message, tools []tools.BaseTool) <-chan ProviderEvent { - if o.providerOptions.model.ID == models.OpenAIModels[models.CodexMini].ID || o.providerOptions.model.ID == models.OpenAIModels[models.O1Pro].ID { - return o.streamResponseMessages(ctx, messages, tools) - } - return o.streamChatCompletionMessages(ctx, messages, tools) -} - - -func (o *openaiClient) finishReason(reason string) message.FinishReason { - switch reason { - case "stop": - return message.FinishReasonEndTurn - case "length": - return message.FinishReasonMaxTokens - case "tool_calls": - return message.FinishReasonToolUse - default: - return message.FinishReasonUnknown - } -} - - -func (o *openaiClient) shouldRetry(attempts int, err error) (bool, int64, error) { - var apierr *openai.Error - if !errors.As(err, &apierr) { - return false, 0, err - } - - if apierr.StatusCode != 429 && apierr.StatusCode != 500 { - return false, 0, err - } - - if attempts > maxRetries { - return false, 0, fmt.Errorf("maximum retry attempts reached for rate limit: %d retries", maxRetries) - } - - retryMs := 0 - retryAfterValues := apierr.Response.Header.Values("Retry-After") - - backoffMs := 2000 * (1 << (attempts - 1)) - jitterMs := int(float64(backoffMs) * 0.2) - retryMs = backoffMs + jitterMs - if len(retryAfterValues) > 0 { - if _, err := fmt.Sscanf(retryAfterValues[0], "%d", &retryMs); err == nil { - retryMs = retryMs * 1000 - } - } - return true, int64(retryMs), nil -} - - -func WithOpenAIBaseURL(baseURL string) OpenAIOption { - return func(options *openaiOptions) { - options.baseURL = baseURL - } -} - -func WithOpenAIExtraHeaders(headers map[string]string) OpenAIOption { - return func(options *openaiOptions) { - options.extraHeaders = headers - } -} - -func WithOpenAIDisableCache() OpenAIOption { - return func(options *openaiOptions) { - options.disableCache = true - } -} - -func WithReasoningEffort(effort string) OpenAIOption { - return func(options *openaiOptions) { - defaultReasoningEffort := "medium" - switch effort { - case "low", "medium", "high": - defaultReasoningEffort = effort - default: - slog.Warn("Invalid reasoning effort, using default: medium") - } - options.reasoningEffort = defaultReasoningEffort - } -} diff --git a/internal/llm/provider/openai_completion.go b/internal/llm/provider/openai_completion.go deleted file mode 100644 index e3b837231..000000000 --- a/internal/llm/provider/openai_completion.go +++ /dev/null @@ -1,317 +0,0 @@ -package provider - -import ( - "context" - "encoding/json" - "errors" - "fmt" - "io" - "log/slog" - "time" - - "github.com/openai/openai-go" - "github.com/openai/openai-go/shared" - "github.com/sst/opencode/internal/config" - "github.com/sst/opencode/internal/llm/models" - "github.com/sst/opencode/internal/llm/tools" - "github.com/sst/opencode/internal/message" - "github.com/sst/opencode/internal/status" -) - -func (o *openaiClient) convertMessagesToChatCompletionMessages(messages []message.Message) (openaiMessages []openai.ChatCompletionMessageParamUnion) { - // Add system message first - openaiMessages = append(openaiMessages, openai.SystemMessage(o.providerOptions.systemMessage)) - - for _, msg := range messages { - switch msg.Role { - case message.User: - var content []openai.ChatCompletionContentPartUnionParam - textBlock := openai.ChatCompletionContentPartTextParam{Text: msg.Content().String()} - content = append(content, openai.ChatCompletionContentPartUnionParam{OfText: &textBlock}) - for _, binaryContent := range msg.BinaryContent() { - imageURL := openai.ChatCompletionContentPartImageImageURLParam{URL: binaryContent.String(models.ProviderOpenAI)} - imageBlock := openai.ChatCompletionContentPartImageParam{ImageURL: imageURL} - - content = append(content, openai.ChatCompletionContentPartUnionParam{OfImageURL: &imageBlock}) - } - - openaiMessages = append(openaiMessages, openai.UserMessage(content)) - - case message.Assistant: - assistantMsg := openai.ChatCompletionAssistantMessageParam{ - Role: "assistant", - } - - if msg.Content().String() != "" { - assistantMsg.Content = openai.ChatCompletionAssistantMessageParamContentUnion{ - OfString: openai.String(msg.Content().String()), - } - } - - if len(msg.ToolCalls()) > 0 { - assistantMsg.ToolCalls = make([]openai.ChatCompletionMessageToolCallParam, len(msg.ToolCalls())) - for i, call := range msg.ToolCalls() { - assistantMsg.ToolCalls[i] = openai.ChatCompletionMessageToolCallParam{ - ID: call.ID, - Type: "function", - Function: openai.ChatCompletionMessageToolCallFunctionParam{ - Name: call.Name, - Arguments: call.Input, - }, - } - } - } - - openaiMessages = append(openaiMessages, openai.ChatCompletionMessageParamUnion{ - OfAssistant: &assistantMsg, - }) - - case message.Tool: - for _, result := range msg.ToolResults() { - openaiMessages = append(openaiMessages, - openai.ToolMessage(result.Content, result.ToolCallID), - ) - } - } - } - - return -} - -func (o *openaiClient) convertToChatCompletionTools(tools []tools.BaseTool) []openai.ChatCompletionToolParam { - openaiTools := make([]openai.ChatCompletionToolParam, len(tools)) - - for i, tool := range tools { - info := tool.Info() - openaiTools[i] = openai.ChatCompletionToolParam{ - Function: openai.FunctionDefinitionParam{ - Name: info.Name, - Description: openai.String(info.Description), - Parameters: openai.FunctionParameters{ - "type": "object", - "properties": info.Parameters, - "required": info.Required, - }, - }, - } - } - - return openaiTools -} - -func (o *openaiClient) preparedChatCompletionParams(messages []openai.ChatCompletionMessageParamUnion, tools []openai.ChatCompletionToolParam) openai.ChatCompletionNewParams { - params := openai.ChatCompletionNewParams{ - Model: openai.ChatModel(o.providerOptions.model.APIModel), - Messages: messages, - Tools: tools, - } - if o.providerOptions.model.CanReason == true { - params.MaxCompletionTokens = openai.Int(o.providerOptions.maxTokens) - switch o.options.reasoningEffort { - case "low": - params.ReasoningEffort = shared.ReasoningEffortLow - case "medium": - params.ReasoningEffort = shared.ReasoningEffortMedium - case "high": - params.ReasoningEffort = shared.ReasoningEffortHigh - default: - params.ReasoningEffort = shared.ReasoningEffortMedium - } - } else { - params.MaxTokens = openai.Int(o.providerOptions.maxTokens) - } - - if o.providerOptions.model.Provider == models.ProviderOpenRouter { - params.WithExtraFields(map[string]any{ - "provider": map[string]any{ - "require_parameters": true, - }, - }) - } - - return params -} - -func (o *openaiClient) sendChatcompletionMessage(ctx context.Context, messages []message.Message, tools []tools.BaseTool) (response *ProviderResponse, err error) { - params := o.preparedChatCompletionParams(o.convertMessagesToChatCompletionMessages(messages), o.convertToChatCompletionTools(tools)) - cfg := config.Get() - if cfg.Debug { - jsonData, _ := json.Marshal(params) - slog.Debug("Prepared messages", "messages", string(jsonData)) - } - attempts := 0 - for { - attempts++ - openaiResponse, err := o.client.Chat.Completions.New( - ctx, - params, - ) - // If there is an error we are going to see if we can retry the call - if err != nil { - retry, after, retryErr := o.shouldRetry(attempts, err) - duration := time.Duration(after) * time.Millisecond - if retryErr != nil { - return nil, retryErr - } - if retry { - status.Warn(fmt.Sprintf("Retrying due to rate limit... attempt %d of %d", attempts, maxRetries), status.WithDuration(duration)) - select { - case <-ctx.Done(): - return nil, ctx.Err() - case <-time.After(duration): - continue - } - } - return nil, retryErr - } - - content := "" - if openaiResponse.Choices[0].Message.Content != "" { - content = openaiResponse.Choices[0].Message.Content - } - - toolCalls := o.chatCompletionToolCalls(*openaiResponse) - finishReason := o.finishReason(string(openaiResponse.Choices[0].FinishReason)) - - if len(toolCalls) > 0 { - finishReason = message.FinishReasonToolUse - } - - return &ProviderResponse{ - Content: content, - ToolCalls: toolCalls, - Usage: o.usage(*openaiResponse), - FinishReason: finishReason, - }, nil - } -} - -func (o *openaiClient) streamChatCompletionMessages(ctx context.Context, messages []message.Message, tools []tools.BaseTool) <-chan ProviderEvent { - params := o.preparedChatCompletionParams(o.convertMessagesToChatCompletionMessages(messages), o.convertToChatCompletionTools(tools)) - params.StreamOptions = openai.ChatCompletionStreamOptionsParam{ - IncludeUsage: openai.Bool(true), - } - - cfg := config.Get() - if cfg.Debug { - jsonData, _ := json.Marshal(params) - slog.Debug("Prepared messages", "messages", string(jsonData)) - } - - attempts := 0 - eventChan := make(chan ProviderEvent) - - go func() { - for { - attempts++ - openaiStream := o.client.Chat.Completions.NewStreaming( - ctx, - params, - ) - - acc := openai.ChatCompletionAccumulator{} - currentContent := "" - toolCalls := make([]message.ToolCall, 0) - - for openaiStream.Next() { - chunk := openaiStream.Current() - acc.AddChunk(chunk) - - for _, choice := range chunk.Choices { - if choice.Delta.Content != "" { - eventChan <- ProviderEvent{ - Type: EventContentDelta, - Content: choice.Delta.Content, - } - currentContent += choice.Delta.Content - } - } - } - - err := openaiStream.Err() - if err == nil || errors.Is(err, io.EOF) { - // Stream completed successfully - finishReason := o.finishReason(string(acc.ChatCompletion.Choices[0].FinishReason)) - if len(acc.ChatCompletion.Choices[0].Message.ToolCalls) > 0 { - toolCalls = append(toolCalls, o.chatCompletionToolCalls(acc.ChatCompletion)...) - } - if len(toolCalls) > 0 { - finishReason = message.FinishReasonToolUse - } - - eventChan <- ProviderEvent{ - Type: EventComplete, - Response: &ProviderResponse{ - Content: currentContent, - ToolCalls: toolCalls, - Usage: o.usage(acc.ChatCompletion), - FinishReason: finishReason, - }, - } - close(eventChan) - return - } - - // If there is an error we are going to see if we can retry the call - retry, after, retryErr := o.shouldRetry(attempts, err) - duration := time.Duration(after) * time.Millisecond - if retryErr != nil { - eventChan <- ProviderEvent{Type: EventError, Error: retryErr} - close(eventChan) - return - } - if retry { - status.Warn(fmt.Sprintf("Retrying due to rate limit... attempt %d of %d", attempts, maxRetries), status.WithDuration(duration)) - select { - case <-ctx.Done(): - // context cancelled - if ctx.Err() == nil { - eventChan <- ProviderEvent{Type: EventError, Error: ctx.Err()} - } - close(eventChan) - return - case <-time.After(duration): - continue - } - } - eventChan <- ProviderEvent{Type: EventError, Error: retryErr} - close(eventChan) - return - } - }() - - return eventChan -} - - -func (o *openaiClient) chatCompletionToolCalls(completion openai.ChatCompletion) []message.ToolCall { - var toolCalls []message.ToolCall - - if len(completion.Choices) > 0 && len(completion.Choices[0].Message.ToolCalls) > 0 { - for _, call := range completion.Choices[0].Message.ToolCalls { - toolCall := message.ToolCall{ - ID: call.ID, - Name: call.Function.Name, - Input: call.Function.Arguments, - Type: "function", - Finished: true, - } - toolCalls = append(toolCalls, toolCall) - } - } - - return toolCalls -} - -func (o *openaiClient) usage(completion openai.ChatCompletion) TokenUsage { - cachedTokens := completion.Usage.PromptTokensDetails.CachedTokens - inputTokens := completion.Usage.PromptTokens - cachedTokens - - return TokenUsage{ - InputTokens: inputTokens, - OutputTokens: completion.Usage.CompletionTokens, - CacheCreationTokens: 0, // OpenAI doesn't provide this directly - CacheReadTokens: cachedTokens, - } -} - diff --git a/internal/llm/provider/openai_response.go b/internal/llm/provider/openai_response.go deleted file mode 100644 index 96a61c4db..000000000 --- a/internal/llm/provider/openai_response.go +++ /dev/null @@ -1,393 +0,0 @@ -package provider - - -import ( - "github.com/openai/openai-go" - "github.com/openai/openai-go/responses" - "github.com/sst/opencode/internal/llm/models" - "github.com/sst/opencode/internal/llm/tools" - "github.com/sst/opencode/internal/message" - "context" - "encoding/json" - "errors" - "fmt" - "io" - "time" - - "log/slog" - - "github.com/openai/openai-go/shared" - "github.com/sst/opencode/internal/config" - "github.com/sst/opencode/internal/status" -) - -func (o *openaiClient) convertMessagesToResponseParams(messages []message.Message) responses.ResponseInputParam { - inputItems := responses.ResponseInputParam{} - - inputItems = append(inputItems, responses.ResponseInputItemUnionParam{ - OfMessage: &responses.EasyInputMessageParam{ - Content: responses.EasyInputMessageContentUnionParam{OfString: openai.String(o.providerOptions.systemMessage)}, - Role: responses.EasyInputMessageRoleSystem, - }, - }) - - for _, msg := range messages { - switch msg.Role { - case message.User: - inputItemContentList := responses.ResponseInputMessageContentListParam{ - responses.ResponseInputContentUnionParam{ - OfInputText: &responses.ResponseInputTextParam{ - Text: msg.Content().String(), - }, - }, - } - - for _, binaryContent := range msg.BinaryContent() { - inputItemContentList = append(inputItemContentList, responses.ResponseInputContentUnionParam{ - OfInputImage: &responses.ResponseInputImageParam{ - ImageURL: openai.String(binaryContent.String(models.ProviderOpenAI)), - }, - }) - } - - userMsg := responses.ResponseInputItemUnionParam{ - OfInputMessage: &responses.ResponseInputItemMessageParam{ - Content: inputItemContentList, - Role: string(responses.ResponseInputMessageItemRoleUser), - }, - } - inputItems = append(inputItems, userMsg) - - case message.Assistant: - if msg.Content().String() != "" { - assistantMsg := responses.ResponseInputItemUnionParam{ - OfOutputMessage: &responses.ResponseOutputMessageParam{ - Content: []responses.ResponseOutputMessageContentUnionParam{{ - OfOutputText: &responses.ResponseOutputTextParam{ - Text: msg.Content().String(), - }, - }}, - }, - } - inputItems = append(inputItems, assistantMsg) - } - - if len(msg.ToolCalls()) > 0 { - for _, call := range msg.ToolCalls() { - toolMsg := responses.ResponseInputItemUnionParam{ - OfFunctionCall: &responses.ResponseFunctionToolCallParam{ - CallID: call.ID, - Name: call.Name, - Arguments: call.Input, - }, - } - inputItems = append(inputItems, toolMsg) - } - } - - case message.Tool: - for _, result := range msg.ToolResults() { - toolMsg := responses.ResponseInputItemUnionParam{ - OfFunctionCallOutput: &responses.ResponseInputItemFunctionCallOutputParam{ - Output: result.Content, - CallID: result.ToolCallID, - }, - } - inputItems = append(inputItems, toolMsg) - } - } - } - - return inputItems -} - -func (o *openaiClient) convertToResponseTools(tools []tools.BaseTool) []responses.ToolUnionParam { - outputTools := make([]responses.ToolUnionParam, len(tools)) - - for i, tool := range tools { - info := tool.Info() - outputTools[i] = responses.ToolUnionParam{ - OfFunction: &responses.FunctionToolParam{ - Name: info.Name, - Description: openai.String(info.Description), - Parameters: map[string]any{ - "type": "object", - "properties": info.Parameters, - "required": info.Required, - }, - }, - } - } - - return outputTools -} - - -func (o *openaiClient) preparedResponseParams(input responses.ResponseInputParam, tools []responses.ToolUnionParam) responses.ResponseNewParams { - params := responses.ResponseNewParams{ - Model: shared.ResponsesModel(o.providerOptions.model.APIModel), - Input: responses.ResponseNewParamsInputUnion{OfInputItemList: input}, - Tools: tools, - } - - params.MaxOutputTokens = openai.Int(o.providerOptions.maxTokens) - - if o.providerOptions.model.CanReason == true { - switch o.options.reasoningEffort { - case "low": - params.Reasoning.Effort = shared.ReasoningEffortLow - case "medium": - params.Reasoning.Effort = shared.ReasoningEffortMedium - case "high": - params.Reasoning.Effort = shared.ReasoningEffortHigh - default: - params.Reasoning.Effort = shared.ReasoningEffortMedium - } - } - - if o.providerOptions.model.Provider == models.ProviderOpenRouter { - params.WithExtraFields(map[string]any{ - "provider": map[string]any{ - "require_parameters": true, - }, - }) - } - - return params -} - -func (o *openaiClient) sendResponseMessages(ctx context.Context, messages []message.Message, tools []tools.BaseTool) (response *ProviderResponse, err error) { - params := o.preparedResponseParams(o.convertMessagesToResponseParams(messages), o.convertToResponseTools(tools)) - cfg := config.Get() - if cfg.Debug { - jsonData, _ := json.Marshal(params) - slog.Debug("Prepared messages", "messages", string(jsonData)) - } - attempts := 0 - for { - attempts++ - openaiResponse, err := o.client.Responses.New( - ctx, - params, - ) - // If there is an error we are going to see if we can retry the call - if err != nil { - retry, after, retryErr := o.shouldRetry(attempts, err) - duration := time.Duration(after) * time.Millisecond - if retryErr != nil { - return nil, retryErr - } - if retry { - status.Warn(fmt.Sprintf("Retrying due to rate limit... attempt %d of %d", attempts, maxRetries), status.WithDuration(duration)) - select { - case <-ctx.Done(): - return nil, ctx.Err() - case <-time.After(duration): - continue - } - } - return nil, retryErr - } - - content := "" - if openaiResponse.OutputText() != "" { - content = openaiResponse.OutputText() - } - - toolCalls := o.responseToolCalls(*openaiResponse) - finishReason := o.finishReason("stop") - - if len(toolCalls) > 0 { - finishReason = message.FinishReasonToolUse - } - - return &ProviderResponse{ - Content: content, - ToolCalls: toolCalls, - Usage: o.responseUsage(*openaiResponse), - FinishReason: finishReason, - }, nil - } -} - -func (o *openaiClient) streamResponseMessages(ctx context.Context, messages []message.Message, tools []tools.BaseTool) <-chan ProviderEvent { - eventChan := make(chan ProviderEvent) - - params := o.preparedResponseParams(o.convertMessagesToResponseParams(messages), o.convertToResponseTools(tools)) - - cfg := config.Get() - if cfg.Debug { - jsonData, _ := json.Marshal(params) - slog.Debug("Prepared messages", "messages", string(jsonData)) - } - - attempts := 0 - - go func() { - for { - attempts++ - stream := o.client.Responses.NewStreaming(ctx, params) - outputText := "" - currentToolCallID := "" - for stream.Next() { - event := stream.Current() - - switch event := event.AsAny().(type) { - case responses.ResponseCompletedEvent: - toolCalls := o.responseToolCalls(event.Response) - finishReason := o.finishReason("stop") - - if len(toolCalls) > 0 { - finishReason = message.FinishReasonToolUse - } - - eventChan <- ProviderEvent{ - Type: EventComplete, - Response: &ProviderResponse{ - Content: outputText, - ToolCalls: toolCalls, - Usage: o.responseUsage(event.Response), - FinishReason: finishReason, - }, - } - close(eventChan) - return - - case responses.ResponseTextDeltaEvent: - outputText += event.Delta - eventChan <- ProviderEvent{ - Type: EventContentDelta, - Content: event.Delta, - } - - case responses.ResponseTextDoneEvent: - eventChan <- ProviderEvent{ - Type: EventContentStop, - Content: outputText, - } - close(eventChan) - return - - case responses.ResponseOutputItemAddedEvent: - if event.Item.Type == "function_call" { - currentToolCallID = event.Item.ID - eventChan <- ProviderEvent{ - Type: EventToolUseStart, - ToolCall: &message.ToolCall{ - ID: event.Item.ID, - Name: event.Item.Name, - Finished: false, - }, - } - } - - case responses.ResponseFunctionCallArgumentsDeltaEvent: - if event.ItemID == currentToolCallID { - eventChan <- ProviderEvent{ - Type: EventToolUseDelta, - ToolCall: &message.ToolCall{ - ID: currentToolCallID, - Finished: false, - Input: event.Delta, - }, - } - } - - case responses.ResponseFunctionCallArgumentsDoneEvent: - if event.ItemID == currentToolCallID { - eventChan <- ProviderEvent{ - Type: EventToolUseStop, - ToolCall: &message.ToolCall{ - ID: currentToolCallID, - Input: event.Arguments, - }, - } - currentToolCallID = "" - } - - case responses.ResponseOutputItemDoneEvent: - if event.Item.Type == "function_call" { - eventChan <- ProviderEvent{ - Type: EventToolUseStop, - ToolCall: &message.ToolCall{ - ID: event.Item.ID, - Name: event.Item.Name, - Input: event.Item.Arguments, - Finished: true, - }, - } - currentToolCallID = "" - } - - } - } - - err := stream.Err() - if err == nil || errors.Is(err, io.EOF) { - close(eventChan) - return - } - - // If there is an error we are going to see if we can retry the call - retry, after, retryErr := o.shouldRetry(attempts, err) - duration := time.Duration(after) * time.Millisecond - if retryErr != nil { - eventChan <- ProviderEvent{Type: EventError, Error: retryErr} - close(eventChan) - return - } - if retry { - status.Warn(fmt.Sprintf("Retrying due to rate limit... attempt %d of %d", attempts, maxRetries), status.WithDuration(duration)) - select { - case <-ctx.Done(): - // context cancelled - if ctx.Err() == nil { - eventChan <- ProviderEvent{Type: EventError, Error: ctx.Err()} - } - close(eventChan) - return - case <-time.After(duration): - continue - } - } - eventChan <- ProviderEvent{Type: EventError, Error: retryErr} - close(eventChan) - return - } - }() - - return eventChan -} - - -func (o *openaiClient) responseToolCalls(response responses.Response) []message.ToolCall { - var toolCalls []message.ToolCall - - for _, output := range response.Output { - if output.Type == "function_call" { - call := output.AsFunctionCall() - toolCall := message.ToolCall{ - ID: call.ID, - Name: call.Name, - Input: call.Arguments, - Type: "function", - Finished: true, - } - toolCalls = append(toolCalls, toolCall) - } - } - - return toolCalls -} - -func (o *openaiClient) responseUsage(response responses.Response) TokenUsage { - cachedTokens := response.Usage.InputTokensDetails.CachedTokens - inputTokens := response.Usage.InputTokens - cachedTokens - - return TokenUsage{ - InputTokens: inputTokens, - OutputTokens: response.Usage.OutputTokens, - CacheCreationTokens: 0, // OpenAI doesn't provide this directly - CacheReadTokens: cachedTokens, - } -} diff --git a/internal/llm/provider/provider.go b/internal/llm/provider/provider.go deleted file mode 100644 index adcbfdbf7..000000000 --- a/internal/llm/provider/provider.go +++ /dev/null @@ -1,269 +0,0 @@ -package provider - -import ( - "context" - "fmt" - "log/slog" - - "github.com/sst/opencode/internal/llm/models" - "github.com/sst/opencode/internal/llm/tools" - "github.com/sst/opencode/internal/message" -) - -type EventType string - -const maxRetries = 8 - -const ( - EventContentStart EventType = "content_start" - EventToolUseStart EventType = "tool_use_start" - EventToolUseDelta EventType = "tool_use_delta" - EventToolUseStop EventType = "tool_use_stop" - EventContentDelta EventType = "content_delta" - EventThinkingDelta EventType = "thinking_delta" - EventContentStop EventType = "content_stop" - EventComplete EventType = "complete" - EventError EventType = "error" - EventWarning EventType = "warning" -) - -type TokenUsage struct { - InputTokens int64 - OutputTokens int64 - CacheCreationTokens int64 - CacheReadTokens int64 -} - -type ProviderResponse struct { - Content string - ToolCalls []message.ToolCall - Usage TokenUsage - FinishReason message.FinishReason -} - -type ProviderEvent struct { - Type EventType - - Content string - Thinking string - Response *ProviderResponse - ToolCall *message.ToolCall - Error error -} -type Provider interface { - SendMessages(ctx context.Context, messages []message.Message, tools []tools.BaseTool) (*ProviderResponse, error) - - StreamResponse(ctx context.Context, messages []message.Message, tools []tools.BaseTool) <-chan ProviderEvent - - Model() models.Model - - MaxTokens() int64 -} - -type providerClientOptions struct { - apiKey string - model models.Model - maxTokens int64 - systemMessage string - - anthropicOptions []AnthropicOption - openaiOptions []OpenAIOption - geminiOptions []GeminiOption - bedrockOptions []BedrockOption -} - -type ProviderClientOption func(*providerClientOptions) - -type ProviderClient interface { - send(ctx context.Context, messages []message.Message, tools []tools.BaseTool) (*ProviderResponse, error) - stream(ctx context.Context, messages []message.Message, tools []tools.BaseTool) <-chan ProviderEvent -} - -type baseProvider[C ProviderClient] struct { - options providerClientOptions - client C -} - -func NewProvider(providerName models.ModelProvider, opts ...ProviderClientOption) (Provider, error) { - clientOptions := providerClientOptions{} - for _, o := range opts { - o(&clientOptions) - } - switch providerName { - case models.ProviderAnthropic: - return &baseProvider[AnthropicClient]{ - options: clientOptions, - client: newAnthropicClient(clientOptions), - }, nil - case models.ProviderOpenAI: - return &baseProvider[OpenAIClient]{ - options: clientOptions, - client: newOpenAIClient(clientOptions), - }, nil - case models.ProviderGemini: - return &baseProvider[GeminiClient]{ - options: clientOptions, - client: newGeminiClient(clientOptions), - }, nil - case models.ProviderBedrock: - return &baseProvider[BedrockClient]{ - options: clientOptions, - client: newBedrockClient(clientOptions), - }, nil - case models.ProviderGROQ: - clientOptions.openaiOptions = append(clientOptions.openaiOptions, - WithOpenAIBaseURL("https://api.groq.com/openai/v1"), - ) - return &baseProvider[OpenAIClient]{ - options: clientOptions, - client: newOpenAIClient(clientOptions), - }, nil - case models.ProviderAzure: - return &baseProvider[AzureClient]{ - options: clientOptions, - client: newAzureClient(clientOptions), - }, nil - case models.ProviderVertexAI: - return &baseProvider[VertexAIClient]{ - options: clientOptions, - client: newVertexAIClient(clientOptions), - }, nil - case models.ProviderOpenRouter: - clientOptions.openaiOptions = append(clientOptions.openaiOptions, - WithOpenAIBaseURL("https://openrouter.ai/api/v1"), - WithOpenAIExtraHeaders(map[string]string{ - "HTTP-Referer": "opencode.ai", - "X-Title": "OpenCode", - }), - ) - return &baseProvider[OpenAIClient]{ - options: clientOptions, - client: newOpenAIClient(clientOptions), - }, nil - case models.ProviderXAI: - clientOptions.openaiOptions = append(clientOptions.openaiOptions, - WithOpenAIBaseURL("https://api.x.ai/v1"), - ) - return &baseProvider[OpenAIClient]{ - options: clientOptions, - client: newOpenAIClient(clientOptions), - }, nil - - case models.ProviderMock: - // TODO: implement mock client for test - panic("not implemented") - } - return nil, fmt.Errorf("provider not supported: %s", providerName) -} - -func (p *baseProvider[C]) cleanMessages(messages []message.Message) (cleaned []message.Message) { - for _, msg := range messages { - // The message has no content - if len(msg.Parts) == 0 { - continue - } - cleaned = append(cleaned, msg) - } - return -} - -func (p *baseProvider[C]) SendMessages(ctx context.Context, messages []message.Message, tools []tools.BaseTool) (*ProviderResponse, error) { - messages = p.cleanMessages(messages) - response, err := p.client.send(ctx, messages, tools) - if err == nil && response != nil { - slog.Debug("API request token usage", - "model", p.options.model.Name, - "input_tokens", response.Usage.InputTokens, - "output_tokens", response.Usage.OutputTokens, - "cache_creation_tokens", response.Usage.CacheCreationTokens, - "cache_read_tokens", response.Usage.CacheReadTokens, - "total_tokens", response.Usage.InputTokens+response.Usage.OutputTokens) - } - return response, err -} - -func (p *baseProvider[C]) Model() models.Model { - return p.options.model -} - -func (p *baseProvider[C]) MaxTokens() int64 { - return p.options.maxTokens -} - -func (p *baseProvider[C]) StreamResponse(ctx context.Context, messages []message.Message, tools []tools.BaseTool) <-chan ProviderEvent { - messages = p.cleanMessages(messages) - eventChan := p.client.stream(ctx, messages, tools) - - // Create a new channel to intercept events - wrappedChan := make(chan ProviderEvent) - - go func() { - defer close(wrappedChan) - - for event := range eventChan { - // Pass the event through - wrappedChan <- event - - // Log token usage when we get the complete event - if event.Type == EventComplete && event.Response != nil { - slog.Debug("API streaming request token usage", - "model", p.options.model.Name, - "input_tokens", event.Response.Usage.InputTokens, - "output_tokens", event.Response.Usage.OutputTokens, - "cache_creation_tokens", event.Response.Usage.CacheCreationTokens, - "cache_read_tokens", event.Response.Usage.CacheReadTokens, - "total_tokens", event.Response.Usage.InputTokens+event.Response.Usage.OutputTokens) - } - } - }() - - return wrappedChan -} - -func WithAPIKey(apiKey string) ProviderClientOption { - return func(options *providerClientOptions) { - options.apiKey = apiKey - } -} - -func WithModel(model models.Model) ProviderClientOption { - return func(options *providerClientOptions) { - options.model = model - } -} - -func WithMaxTokens(maxTokens int64) ProviderClientOption { - return func(options *providerClientOptions) { - options.maxTokens = maxTokens - } -} - -func WithSystemMessage(systemMessage string) ProviderClientOption { - return func(options *providerClientOptions) { - options.systemMessage = systemMessage - } -} - -func WithAnthropicOptions(anthropicOptions ...AnthropicOption) ProviderClientOption { - return func(options *providerClientOptions) { - options.anthropicOptions = anthropicOptions - } -} - -func WithOpenAIOptions(openaiOptions ...OpenAIOption) ProviderClientOption { - return func(options *providerClientOptions) { - options.openaiOptions = openaiOptions - } -} - -func WithGeminiOptions(geminiOptions ...GeminiOption) ProviderClientOption { - return func(options *providerClientOptions) { - options.geminiOptions = geminiOptions - } -} - -func WithBedrockOptions(bedrockOptions ...BedrockOption) ProviderClientOption { - return func(options *providerClientOptions) { - options.bedrockOptions = bedrockOptions - } -} diff --git a/internal/llm/provider/vertexai.go b/internal/llm/provider/vertexai.go deleted file mode 100644 index 328d213fe..000000000 --- a/internal/llm/provider/vertexai.go +++ /dev/null @@ -1,34 +0,0 @@ -package provider - -import ( - "context" - "log/slog" - "os" - - "google.golang.org/genai" -) - -type VertexAIClient ProviderClient - -func newVertexAIClient(opts providerClientOptions) VertexAIClient { - geminiOpts := geminiOptions{} - for _, o := range opts.geminiOptions { - o(&geminiOpts) - } - - client, err := genai.NewClient(context.Background(), &genai.ClientConfig{ - Project: os.Getenv("VERTEXAI_PROJECT"), - Location: os.Getenv("VERTEXAI_LOCATION"), - Backend: genai.BackendVertexAI, - }) - if err != nil { - slog.Error("Failed to create VertexAI client", "error", err) - return nil - } - - return &geminiClient{ - providerOptions: opts, - options: geminiOpts, - client: client, - } -} |
