Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions packages/types/src/providers/deepseek.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,30 @@ export const deepSeekModels = {
cacheReadsPrice: 0.028, // $0.028 per million tokens (cache hit) - Updated Dec 9, 2025
description: `DeepSeek-V3.2 (Thinking Mode) achieves performance comparable to OpenAI-o1 across math, code, and reasoning tasks. Supports Chain of Thought reasoning with up to 8K output tokens. Supports JSON output, tool calls, and chat prefix completion (beta).`,
},
"deepseek-v4-flash": {
maxTokens: 8192,
contextWindow: 128_000,
supportsImages: false,
supportsPromptCache: true,
preserveReasoning: true,
inputPrice: 0.28,
outputPrice: 0.42,
cacheWritesPrice: 0.28,
cacheReadsPrice: 0.028,
description: `DeepSeek V4 Flash with thinking/reasoning support. Requires reasoning_content to be passed back during tool call sequences.`,
},
"deepseek-v4-pro": {
maxTokens: 8192,
contextWindow: 128_000,
supportsImages: false,
supportsPromptCache: true,
preserveReasoning: true,
inputPrice: 0.28,
outputPrice: 0.42,
cacheWritesPrice: 0.28,
cacheReadsPrice: 0.028,
description: `DeepSeek V4 Pro with thinking/reasoning support. Requires reasoning_content to be passed back during tool call sequences.`,
},
} as const satisfies Record<string, ModelInfo>

// https://api-docs.deepseek.com/quick_start/parameter_settings
Expand Down
59 changes: 58 additions & 1 deletion src/api/providers/__tests__/deepseek.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@ vi.mock("openai", () => {
}

// Check if this is a reasoning_content test by looking at model
const isReasonerModel = options.model?.includes("deepseek-reasoner")
const isReasonerModel =
options.model?.includes("deepseek-reasoner") || options.model?.includes("deepseek-v4")
const isToolCallTest = options.tools?.length > 0

// Return async iterator for streaming
Expand Down Expand Up @@ -240,6 +241,24 @@ describe("DeepSeekHandler", () => {
expect((model.info as ModelInfo).preserveReasoning).toBe(true)
})

it("should have preserveReasoning enabled for deepseek-v4-flash", () => {
const handlerV4Flash = new DeepSeekHandler({
...mockOptions,
apiModelId: "deepseek-v4-flash",
})
const model = handlerV4Flash.getModel()
expect((model.info as ModelInfo).preserveReasoning).toBe(true)
})

it("should have preserveReasoning enabled for deepseek-v4-pro", () => {
const handlerV4Pro = new DeepSeekHandler({
...mockOptions,
apiModelId: "deepseek-v4-pro",
})
const model = handlerV4Pro.getModel()
expect((model.info as ModelInfo).preserveReasoning).toBe(true)
})

it("should NOT have preserveReasoning enabled for deepseek-chat", () => {
// deepseek-chat doesn't use thinking mode, so no need to preserve reasoning
const model = handler.getModel()
Expand Down Expand Up @@ -459,6 +478,44 @@ describe("DeepSeekHandler", () => {
)
})

it("should pass thinking parameter for deepseek-v4-flash model", async () => {
const v4FlashHandler = new DeepSeekHandler({
...mockOptions,
apiModelId: "deepseek-v4-flash",
})

const stream = v4FlashHandler.createMessage(systemPrompt, messages)
for await (const _chunk of stream) {
// Consume the stream
}

expect(mockCreate).toHaveBeenCalledWith(
expect.objectContaining({
thinking: { type: "enabled" },
}),
{},
)
})

it("should pass thinking parameter for deepseek-v4-pro model", async () => {
const v4ProHandler = new DeepSeekHandler({
...mockOptions,
apiModelId: "deepseek-v4-pro",
})

const stream = v4ProHandler.createMessage(systemPrompt, messages)
for await (const _chunk of stream) {
// Consume the stream
}

expect(mockCreate).toHaveBeenCalledWith(
expect.objectContaining({
thinking: { type: "enabled" },
}),
{},
)
})

it("should NOT pass thinking parameter for deepseek-chat model", async () => {
const chatHandler = new DeepSeekHandler({
...mockOptions,
Expand Down
5 changes: 3 additions & 2 deletions src/api/providers/deepseek.ts
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,9 @@ export class DeepSeekHandler extends OpenAiHandler {
const modelId = this.options.apiModelId ?? deepSeekDefaultModelId
const { info: modelInfo } = this.getModel()

// Check if this is a thinking-enabled model (deepseek-reasoner)
const isThinkingModel = modelId.includes("deepseek-reasoner")
// Check if this is a thinking-enabled model by looking at the model info's preserveReasoning flag.
// This covers deepseek-reasoner and newer models like deepseek-v4-flash/deepseek-v4-pro.
const isThinkingModel = "preserveReasoning" in modelInfo && modelInfo.preserveReasoning === true

// Convert messages to R1 format (merges consecutive same-role messages)
// This is required for DeepSeek which does not support successive messages with the same role
Expand Down
3 changes: 2 additions & 1 deletion src/api/providers/openai.ts
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,8 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
const modelId = this.options.openAiModelId ?? ""
const enabledR1Format = this.options.openAiR1FormatEnabled ?? false
const isAzureAiInference = this._isAzureAiInference(modelUrl)
const deepseekReasoner = modelId.includes("deepseek-reasoner") || enabledR1Format
const deepseekReasoner =
modelId.includes("deepseek-reasoner") || modelId.includes("deepseek-v4") || enabledR1Format

if (modelId.includes("o1") || modelId.includes("o3") || modelId.includes("o4")) {
yield* this.handleO3FamilyMessage(modelId, systemPrompt, messages, metadata)
Expand Down
Loading