diff --git a/packages/types/src/providers/deepseek.ts b/packages/types/src/providers/deepseek.ts index 40722471cb..890e965238 100644 --- a/packages/types/src/providers/deepseek.ts +++ b/packages/types/src/providers/deepseek.ts @@ -32,6 +32,30 @@ export const deepSeekModels = { cacheReadsPrice: 0.028, // $0.028 per million tokens (cache hit) - Updated Dec 9, 2025 description: `DeepSeek-V3.2 (Thinking Mode) achieves performance comparable to OpenAI-o1 across math, code, and reasoning tasks. Supports Chain of Thought reasoning with up to 8K output tokens. Supports JSON output, tool calls, and chat prefix completion (beta).`, }, + "deepseek-v4-flash": { + maxTokens: 8192, + contextWindow: 128_000, + supportsImages: false, + supportsPromptCache: true, + preserveReasoning: true, + inputPrice: 0.28, + outputPrice: 0.42, + cacheWritesPrice: 0.28, + cacheReadsPrice: 0.028, + description: `DeepSeek V4 Flash with thinking/reasoning support. Requires reasoning_content to be passed back during tool call sequences.`, + }, + "deepseek-v4-pro": { + maxTokens: 8192, + contextWindow: 128_000, + supportsImages: false, + supportsPromptCache: true, + preserveReasoning: true, + inputPrice: 0.28, + outputPrice: 0.42, + cacheWritesPrice: 0.28, + cacheReadsPrice: 0.028, + description: `DeepSeek V4 Pro with thinking/reasoning support. Requires reasoning_content to be passed back during tool call sequences.`, + }, } as const satisfies Record // https://api-docs.deepseek.com/quick_start/parameter_settings diff --git a/src/api/providers/__tests__/deepseek.spec.ts b/src/api/providers/__tests__/deepseek.spec.ts index cbbc61ad4d..e159b14364 100644 --- a/src/api/providers/__tests__/deepseek.spec.ts +++ b/src/api/providers/__tests__/deepseek.spec.ts @@ -30,7 +30,8 @@ vi.mock("openai", () => { } // Check if this is a reasoning_content test by looking at model - const isReasonerModel = options.model?.includes("deepseek-reasoner") + const isReasonerModel = + options.model?.includes("deepseek-reasoner") || options.model?.includes("deepseek-v4") const isToolCallTest = options.tools?.length > 0 // Return async iterator for streaming @@ -240,6 +241,24 @@ describe("DeepSeekHandler", () => { expect((model.info as ModelInfo).preserveReasoning).toBe(true) }) + it("should have preserveReasoning enabled for deepseek-v4-flash", () => { + const handlerV4Flash = new DeepSeekHandler({ + ...mockOptions, + apiModelId: "deepseek-v4-flash", + }) + const model = handlerV4Flash.getModel() + expect((model.info as ModelInfo).preserveReasoning).toBe(true) + }) + + it("should have preserveReasoning enabled for deepseek-v4-pro", () => { + const handlerV4Pro = new DeepSeekHandler({ + ...mockOptions, + apiModelId: "deepseek-v4-pro", + }) + const model = handlerV4Pro.getModel() + expect((model.info as ModelInfo).preserveReasoning).toBe(true) + }) + it("should NOT have preserveReasoning enabled for deepseek-chat", () => { // deepseek-chat doesn't use thinking mode, so no need to preserve reasoning const model = handler.getModel() @@ -459,6 +478,44 @@ describe("DeepSeekHandler", () => { ) }) + it("should pass thinking parameter for deepseek-v4-flash model", async () => { + const v4FlashHandler = new DeepSeekHandler({ + ...mockOptions, + apiModelId: "deepseek-v4-flash", + }) + + const stream = v4FlashHandler.createMessage(systemPrompt, messages) + for await (const _chunk of stream) { + // Consume the stream + } + + expect(mockCreate).toHaveBeenCalledWith( + expect.objectContaining({ + thinking: { type: "enabled" }, + }), + {}, + ) + }) + + it("should pass thinking parameter for deepseek-v4-pro model", async () => { + const v4ProHandler = new DeepSeekHandler({ + ...mockOptions, + apiModelId: "deepseek-v4-pro", + }) + + const stream = v4ProHandler.createMessage(systemPrompt, messages) + for await (const _chunk of stream) { + // Consume the stream + } + + expect(mockCreate).toHaveBeenCalledWith( + expect.objectContaining({ + thinking: { type: "enabled" }, + }), + {}, + ) + }) + it("should NOT pass thinking parameter for deepseek-chat model", async () => { const chatHandler = new DeepSeekHandler({ ...mockOptions, diff --git a/src/api/providers/deepseek.ts b/src/api/providers/deepseek.ts index 84cd557de0..b135f6c31d 100644 --- a/src/api/providers/deepseek.ts +++ b/src/api/providers/deepseek.ts @@ -55,8 +55,9 @@ export class DeepSeekHandler extends OpenAiHandler { const modelId = this.options.apiModelId ?? deepSeekDefaultModelId const { info: modelInfo } = this.getModel() - // Check if this is a thinking-enabled model (deepseek-reasoner) - const isThinkingModel = modelId.includes("deepseek-reasoner") + // Check if this is a thinking-enabled model by looking at the model info's preserveReasoning flag. + // This covers deepseek-reasoner and newer models like deepseek-v4-flash/deepseek-v4-pro. + const isThinkingModel = "preserveReasoning" in modelInfo && modelInfo.preserveReasoning === true // Convert messages to R1 format (merges consecutive same-role messages) // This is required for DeepSeek which does not support successive messages with the same role diff --git a/src/api/providers/openai.ts b/src/api/providers/openai.ts index 33b29abcaf..d8dda1712d 100644 --- a/src/api/providers/openai.ts +++ b/src/api/providers/openai.ts @@ -89,7 +89,8 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl const modelId = this.options.openAiModelId ?? "" const enabledR1Format = this.options.openAiR1FormatEnabled ?? false const isAzureAiInference = this._isAzureAiInference(modelUrl) - const deepseekReasoner = modelId.includes("deepseek-reasoner") || enabledR1Format + const deepseekReasoner = + modelId.includes("deepseek-reasoner") || modelId.includes("deepseek-v4") || enabledR1Format if (modelId.includes("o1") || modelId.includes("o3") || modelId.includes("o4")) { yield* this.handleO3FamilyMessage(modelId, systemPrompt, messages, metadata)