-
Notifications
You must be signed in to change notification settings - Fork 52
fix: avoid local completion budget truncation #85
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
77b64d9
efd8128
2c6a196
72e1548
3319815
c93400d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,6 @@ | ||
| --- | ||
| "@moonshot-ai/agent-core": patch | ||
| "@moonshot-ai/kimi-code": patch | ||
| --- | ||
|
|
||
| Avoid overly small local completion caps that can truncate reasoning before summaries are produced. |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,28 +1,14 @@ | ||
| import type { | ||
| ChatProvider, | ||
| Message, | ||
| ModelCapability, | ||
| Tool, | ||
| } from '@moonshot-ai/kosong'; | ||
|
|
||
| import { | ||
| estimateTokens, | ||
| estimateTokensForMessages, | ||
| estimateTokensForTools, | ||
| } from './tokens'; | ||
| import type { ChatProvider, ModelCapability } from '@moonshot-ai/kosong'; | ||
|
|
||
| /** Completion-token budget for the next LLM request. */ | ||
| export interface CompletionBudgetConfig { | ||
| /** Explicit user-configured maximum. */ | ||
| readonly hardCap?: number; | ||
| /** Conservative cap for providers/models whose context window is unknown. */ | ||
| readonly fallback?: number; | ||
| /** Tokens kept out of the output budget to absorb estimation drift. */ | ||
| readonly safetyMargin?: number; | ||
| } | ||
|
|
||
| const MIN_FLOOR = 1; | ||
| const DEFAULT_SAFETY_MARGIN = 1024; | ||
| const DEFAULT_UNKNOWN_CONTEXT_FALLBACK = 32000; | ||
|
|
||
| /** | ||
|
|
@@ -59,36 +45,20 @@ function parseEnvBudget(raw: string | undefined): EnvBudget { | |
| } | ||
|
|
||
| /** | ||
| * Compute the effective `max_completion_tokens` cap. Known-context requests | ||
| * use the remaining window unless a hard cap is configured. | ||
| * Compute the effective `max_completion_tokens` cap. | ||
| */ | ||
| export function computeCompletionBudgetCap(args: { | ||
| readonly budget: CompletionBudgetConfig; | ||
| readonly capability: ModelCapability | undefined; | ||
| readonly messages: readonly Message[]; | ||
| readonly systemPrompt?: string; | ||
| readonly tools?: readonly Tool[]; | ||
| }): number { | ||
| const safetyMargin = args.budget.safetyMargin ?? DEFAULT_SAFETY_MARGIN; | ||
| const maxCtx = args.capability?.max_context_tokens ?? 0; | ||
| if (maxCtx <= 0) { | ||
| return Math.max( | ||
| MIN_FLOOR, | ||
| args.budget.hardCap ?? args.budget.fallback ?? DEFAULT_UNKNOWN_CONTEXT_FALLBACK, | ||
| ); | ||
| } | ||
| const input = | ||
| estimateTokensForMessages([...args.messages]) + | ||
| estimateTokens(args.systemPrompt ?? '') + | ||
| estimateTokensForTools(args.tools ?? []); | ||
| const remaining = maxCtx - input - safetyMargin; | ||
| if (remaining <= 0) { | ||
| return MIN_FLOOR; | ||
| } | ||
| if (args.budget.hardCap === undefined) { | ||
| return Math.max(MIN_FLOOR, remaining); | ||
| } | ||
| return Math.max(MIN_FLOOR, Math.min(args.budget.hardCap, remaining)); | ||
| // The provider backend computes the safe request-specific value from the | ||
| // serialized prompt. Locally using the largest cap avoids cutting off | ||
| // thinking before the model produces a summary. | ||
| const cap = | ||
| args.budget.hardCap ?? | ||
| (maxCtx > 0 ? maxCtx : args.budget.fallback ?? DEFAULT_UNKNOWN_CONTEXT_FALLBACK); | ||
|
Comment on lines
+58
to
+60
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Useful? React with 👍 / 👎. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Setting the cap to full Useful? React with 👍 / 👎. |
||
| return Math.max(MIN_FLOOR, cap); | ||
| } | ||
|
|
||
| /** | ||
|
|
@@ -105,18 +75,12 @@ export function applyCompletionBudget(args: { | |
| readonly provider: ChatProvider; | ||
| readonly budget: CompletionBudgetConfig | undefined; | ||
| readonly capability: ModelCapability | undefined; | ||
| readonly messages: readonly Message[]; | ||
| readonly systemPrompt?: string; | ||
| readonly tools?: readonly Tool[]; | ||
| }): ChatProvider { | ||
| if (args.budget === undefined) return args.provider; | ||
| if (args.provider.withMaxCompletionTokens === undefined) return args.provider; | ||
| const cap = computeCompletionBudgetCap({ | ||
| budget: args.budget, | ||
| capability: args.capability, | ||
| messages: args.messages, | ||
| systemPrompt: args.systemPrompt, | ||
| tools: args.tools, | ||
| }); | ||
| return args.provider.withMaxCompletionTokens(cap); | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
computeCompletionBudgetCapnow prefersbudget.hardCapverbatim, so ifKIMI_MODEL_MAX_COMPLETION_TOKENS/KIMI_MODEL_MAX_TOKENSis set above the model’s context window, every request can carry an impossible completion limit and fail with provider-side invalid-request/context-limit errors. Before this commit, explicit caps were still clamped by available context, so this change introduces hard failures for oversized but previously tolerable operator configuration.Useful? React with 👍 / 👎.