Skip to content

Commit d9783f1

Browse files
authored
Improve freebuff request throughput (#651)
1 parent 0ecc385 commit d9783f1

4 files changed

Lines changed: 329 additions & 131 deletions

File tree

packages/billing/src/balance-calculator.ts

Lines changed: 127 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,30 @@ export interface CreditConsumptionResult {
3838
fromPurchased: number
3939
}
4040

41+
export type MessageRecordParams = {
42+
messageId: string
43+
userId: string
44+
agentId: string
45+
clientId: string | null
46+
clientRequestId: string | null
47+
startTime: Date
48+
model: string
49+
reasoningText: string
50+
response: string
51+
cost: number
52+
credits: number
53+
byok: boolean
54+
inputTokens: number
55+
cacheCreationInputTokens: number | null
56+
cacheReadInputTokens: number
57+
reasoningTokens: number | null
58+
outputTokens: number
59+
ttftMs: number | null
60+
logger: Logger
61+
finishedAt?: Date
62+
latencyMs?: number
63+
}
64+
4165
// Add a minimal structural type that both `db` and `tx` satisfy
4266
type DbConn = Pick<
4367
typeof db,
@@ -148,7 +172,14 @@ export async function updateGrantBalance(params: {
148172
tx: DbConn
149173
logger: Logger
150174
}) {
151-
const { userId: _userId, grant, consumed: _consumed, newBalance, tx, logger: _logger } = params
175+
const {
176+
userId: _userId,
177+
grant,
178+
consumed: _consumed,
179+
newBalance,
180+
tx,
181+
logger: _logger,
182+
} = params
152183
await tx
153184
.update(schema.creditLedger)
154185
.set({ balance: newBalance })
@@ -282,8 +313,14 @@ export async function calculateUsageAndBalance(
282313
includeSubscriptionCredits: false,
283314
...params,
284315
}
285-
const { userId, quotaResetDate, now, isPersonalContext, includeSubscriptionCredits, logger } =
286-
withDefaults
316+
const {
317+
userId,
318+
quotaResetDate,
319+
now,
320+
isPersonalContext,
321+
includeSubscriptionCredits,
322+
logger,
323+
} = withDefaults
287324

288325
// Get all relevant grants in one query, using the provided connection
289326
const grants = await getOrderedActiveGrants(withDefaults)
@@ -328,7 +365,11 @@ export async function calculateUsageAndBalance(
328365
// Skip subscription credits for personal context unless explicitly included
329366
// (subscription credits are shown separately in the CLI with progress bars,
330367
// but need to be included for credit gating after ensureSubscriberBlockGrant)
331-
if (isPersonalContext && grantType === 'subscription' && !includeSubscriptionCredits) {
368+
if (
369+
isPersonalContext &&
370+
grantType === 'subscription' &&
371+
!includeSubscriptionCredits
372+
) {
332373
continue
333374
}
334375

@@ -506,6 +547,78 @@ function extractPostgresErrorDetails(error: unknown): Record<string, unknown> {
506547
return details
507548
}
508549

550+
export async function recordMessageWithoutBilling(
551+
params: MessageRecordParams,
552+
): Promise<void> {
553+
const {
554+
messageId,
555+
userId,
556+
agentId,
557+
clientId,
558+
clientRequestId,
559+
startTime,
560+
model,
561+
reasoningText,
562+
response,
563+
cost,
564+
credits,
565+
byok,
566+
inputTokens,
567+
cacheCreationInputTokens,
568+
cacheReadInputTokens,
569+
reasoningTokens,
570+
outputTokens,
571+
ttftMs,
572+
logger,
573+
} = params
574+
575+
if (userId === TEST_USER_ID) {
576+
return
577+
}
578+
579+
const finishedAt = params.finishedAt ?? new Date()
580+
const latencyMs =
581+
params.latencyMs ?? finishedAt.getTime() - startTime.getTime()
582+
583+
try {
584+
await db
585+
.insert(schema.message)
586+
.values({
587+
id: messageId,
588+
agent_id: agentId,
589+
finished_at: finishedAt,
590+
client_id: clientId,
591+
client_request_id: clientRequestId,
592+
model,
593+
reasoning_text: reasoningText,
594+
response,
595+
input_tokens: inputTokens,
596+
cache_creation_input_tokens: cacheCreationInputTokens,
597+
cache_read_input_tokens: cacheReadInputTokens,
598+
reasoning_tokens: reasoningTokens,
599+
output_tokens: outputTokens,
600+
cost: cost.toString(),
601+
credits,
602+
byok,
603+
latency_ms: latencyMs,
604+
ttft_ms: ttftMs,
605+
user_id: userId,
606+
})
607+
.onConflictDoNothing({ target: schema.message.id })
608+
} catch (error) {
609+
logger.error(
610+
{
611+
messageId,
612+
userId,
613+
agentId,
614+
error: getErrorObject(error),
615+
pgDetails: extractPostgresErrorDetails(error),
616+
},
617+
'Failed to insert message row',
618+
)
619+
}
620+
}
621+
509622
export async function consumeCreditsAndAddAgentStep(params: {
510623
messageId: string
511624
userId: string
@@ -704,51 +817,21 @@ export async function consumeCreditsAndAddAgentStep(params: {
704817
// Always record the message row. If billing failed, mark credits=0 so the
705818
// audit row still exists — the row being absent is how OR costs leaked before.
706819
const recordedCredits = billingError === null ? credits : 0
707-
708-
try {
709-
await db
710-
.insert(schema.message)
711-
.values({
712-
id: messageId,
713-
agent_id: agentId,
714-
finished_at: new Date(),
715-
client_id: clientId,
716-
client_request_id: clientRequestId,
717-
model,
718-
reasoning_text: reasoningText,
719-
response,
720-
input_tokens: inputTokens,
721-
cache_creation_input_tokens: cacheCreationInputTokens,
722-
cache_read_input_tokens: cacheReadInputTokens,
723-
reasoning_tokens: reasoningTokens,
724-
output_tokens: outputTokens,
725-
cost: cost.toString(),
726-
credits: recordedCredits,
727-
byok,
728-
latency_ms: latencyMs,
729-
ttft_ms: ttftMs,
730-
user_id: userId,
731-
})
732-
.onConflictDoNothing({ target: schema.message.id })
733-
} catch (error) {
734-
logger.error(
735-
{
736-
messageId,
737-
userId,
738-
agentId,
739-
error: getErrorObject(error),
740-
pgDetails: extractPostgresErrorDetails(error),
741-
},
742-
'Failed to insert message row',
743-
)
744-
}
820+
await recordMessageWithoutBilling({
821+
...params,
822+
credits: recordedCredits,
823+
finishedAt,
824+
latencyMs,
825+
})
745826

746827
if (billingError) {
747828
return failure(billingError)
748829
}
749830

750-
const finalResult: CreditConsumptionResult =
751-
consumeResult ?? { consumed: 0, fromPurchased: 0 }
831+
const finalResult: CreditConsumptionResult = consumeResult ?? {
832+
consumed: 0,
833+
fromPurchased: 0,
834+
}
752835

753836
logger.info(
754837
{

web/src/app/api/v1/chat/completions/__tests__/completions.test.ts

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -566,6 +566,51 @@ describe('/api/v1/chat/completions POST endpoint', () => {
566566
FETCH_PATH_TEST_TIMEOUT_MS,
567567
)
568568

569+
it(
570+
'skips duplicate country checks when an active freebuff session gate admits the request',
571+
async () => {
572+
const req = new NextRequest(
573+
'http://localhost:3000/api/v1/chat/completions',
574+
{
575+
method: 'POST',
576+
headers: {
577+
Authorization: 'Bearer test-api-key-new-free',
578+
'cf-ipcountry': 'T1',
579+
'x-forwarded-for': '8.8.8.8',
580+
},
581+
body: JSON.stringify({
582+
model: 'minimax/minimax-m2.7',
583+
stream: false,
584+
codebuff_metadata: {
585+
run_id: 'run-free',
586+
client_id: 'test-client-id-123',
587+
cost_mode: 'free',
588+
freebuff_instance_id: 'active-instance-123',
589+
},
590+
}),
591+
},
592+
)
593+
594+
const response = await postChatCompletions({
595+
req,
596+
getUserInfoFromApiKey: mockGetUserInfoFromApiKey,
597+
logger: mockLogger,
598+
trackEvent: mockTrackEvent,
599+
getUserUsageData: mockGetUserUsageData,
600+
getAgentRunFromId: mockGetAgentRunFromId,
601+
fetch: mockFetch,
602+
insertMessageBigquery: mockInsertMessageBigquery,
603+
loggerWithContext: mockLoggerWithContext,
604+
checkSessionAdmissible: async () =>
605+
({ ok: true, reason: 'active', remainingMs: 60_000 }) as const,
606+
})
607+
608+
expect(response.status).toBe(200)
609+
expect(mockGetUserUsageData).not.toHaveBeenCalled()
610+
},
611+
FETCH_PATH_TEST_TIMEOUT_MS,
612+
)
613+
569614
it(
570615
'lets a BYOK free-tier new account through the paid-plan gate',
571616
async () => {

0 commit comments

Comments
 (0)