From 4ef7e801025aef660d14d282d33be54ac1b35a21 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Tue, 12 May 2026 12:05:26 -0700 Subject: [PATCH] skip token-count request for deepseek flash freebuff --- common/src/__tests__/free-agents.test.ts | 34 +++++++++++ common/src/constants/free-agents.ts | 14 +++++ packages/agent-runtime/src/run-agent-step.ts | 60 ++++++++++++-------- 3 files changed, 85 insertions(+), 23 deletions(-) diff --git a/common/src/__tests__/free-agents.test.ts b/common/src/__tests__/free-agents.test.ts index ed52eb0a5e..d45d612b70 100644 --- a/common/src/__tests__/free-agents.test.ts +++ b/common/src/__tests__/free-agents.test.ts @@ -12,6 +12,7 @@ import { getFreebuffRootAgentIdForModel, isFreebuffGeminiThinkerAgent, isFreeModeAllowedAgentModel, + shouldUseLocalTokenCountForFreebuffDeepseekFlash, } from '../constants/free-agents' describe('free mode agent model allowlist', () => { @@ -168,4 +169,37 @@ describe('free mode agent model allowlist', () => { ), ).toBe(false) }) + + test('uses local token count only for the DeepSeek Flash freebuff root', () => { + expect( + shouldUseLocalTokenCountForFreebuffDeepseekFlash({ + agentId: 'base2-free-deepseek-flash', + model: FREEBUFF_DEEPSEEK_V4_FLASH_MODEL_ID, + }), + ).toBe(true) + expect( + shouldUseLocalTokenCountForFreebuffDeepseekFlash({ + agentId: 'codebuff/base2-free-deepseek-flash@0.0.1', + model: FREEBUFF_DEEPSEEK_V4_FLASH_MODEL_ID, + }), + ).toBe(true) + expect( + shouldUseLocalTokenCountForFreebuffDeepseekFlash({ + agentId: 'base2-free-deepseek', + model: FREEBUFF_DEEPSEEK_V4_FLASH_MODEL_ID, + }), + ).toBe(false) + expect( + shouldUseLocalTokenCountForFreebuffDeepseekFlash({ + agentId: 'base2-free-deepseek-flash', + model: FREEBUFF_DEEPSEEK_V4_PRO_MODEL_ID, + }), + ).toBe(false) + expect( + shouldUseLocalTokenCountForFreebuffDeepseekFlash({ + agentId: 'other/base2-free-deepseek-flash@0.0.1', + model: FREEBUFF_DEEPSEEK_V4_FLASH_MODEL_ID, + }), + ).toBe(false) + }) }) diff --git a/common/src/constants/free-agents.ts b/common/src/constants/free-agents.ts index 535056331d..2d1a55c7ff 100644 --- a/common/src/constants/free-agents.ts +++ b/common/src/constants/free-agents.ts @@ -161,6 +161,20 @@ export function isFreebuffGeminiThinkerAgent(fullAgentId: string): boolean { return agentId === FREEBUFF_GEMINI_THINKER_AGENT_ID } +export function shouldUseLocalTokenCountForFreebuffDeepseekFlash(params: { + agentId: string | undefined + model: string | undefined +}): boolean { + const { agentId: fullAgentId, model } = params + if (!fullAgentId || model !== FREEBUFF_DEEPSEEK_V4_FLASH_MODEL_ID) { + return false + } + + const { publisherId, agentId } = parseAgentId(fullAgentId) + if (publisherId && publisherId !== 'codebuff') return false + return agentId === 'base2-free-deepseek-flash' +} + /** * Check if a specific agent is allowed to use a specific model in FREE mode. * This is the strictest check - validates both the agent AND model combination. diff --git a/packages/agent-runtime/src/run-agent-step.ts b/packages/agent-runtime/src/run-agent-step.ts index 38af3ae650..40c858d632 100644 --- a/packages/agent-runtime/src/run-agent-step.ts +++ b/packages/agent-runtime/src/run-agent-step.ts @@ -1,4 +1,5 @@ import { AnalyticsEvent } from '@codebuff/common/constants/analytics-events' +import { shouldUseLocalTokenCountForFreebuffDeepseekFlash } from '@codebuff/common/constants/free-agents' import { supportsCacheControl } from '@codebuff/common/old-constants' import { TOOLS_WHICH_WONT_FORCE_NEXT_STEP } from '@codebuff/common/tools/constants' import { buildArray } from '@codebuff/common/util/array' @@ -864,29 +865,42 @@ export async function loopAgentSteps( }), ) - // Check context token count via Anthropic API - const tokenCountResult = await callTokenCountAPI({ - messages: messagesWithStepPrompt, - system, - model: agentTemplate.model, - tools: toolsForTokenCount, - fetch, - logger, - env: { clientEnv, ciEnv }, - }) - if (tokenCountResult.inputTokens !== undefined) { - currentAgentState.contextTokenCount = tokenCountResult.inputTokens - } else if (tokenCountResult.error) { - logger.warn( - { error: tokenCountResult.error }, - 'Failed to get token count from Anthropic API', - ) - // Fall back to local estimate - const estimatedTokens = - countTokensJson(currentAgentState.messageHistory) + - countTokensJson(system) + - countTokensJson(toolDefinitions) - currentAgentState.contextTokenCount = estimatedTokens + const estimateContextTokensLocally = () => + countTokensJson(messagesWithStepPrompt) + + countTokensJson(system) + + countTokensJson(toolsForTokenCount) + + if ( + shouldUseLocalTokenCountForFreebuffDeepseekFlash({ + agentId: agentTemplate.id, + model: agentTemplate.model, + }) + ) { + currentAgentState.contextTokenCount = estimateContextTokensLocally() + } else { + // Check context token count via the web API. + const tokenCountResult = await callTokenCountAPI({ + messages: messagesWithStepPrompt, + system, + model: agentTemplate.model, + tools: toolsForTokenCount, + fetch, + logger, + env: { clientEnv, ciEnv }, + }) + if (tokenCountResult.inputTokens !== undefined) { + currentAgentState.contextTokenCount = tokenCountResult.inputTokens + } else if (tokenCountResult.error) { + logger.warn( + { error: tokenCountResult.error }, + 'Failed to get token count from web API', + ) + const estimatedTokens = + countTokensJson(currentAgentState.messageHistory) + + countTokensJson(system) + + countTokensJson(toolDefinitions) + currentAgentState.contextTokenCount = estimatedTokens + } } // 1. Run programmatic step first if it exists