-
-
Notifications
You must be signed in to change notification settings - Fork 66
fix(streaming): normalize non-SSE upstream chat completions #412
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
+242
−3
Merged
Changes from all commits
Commits
Show all changes
4 commits
Select commit
Hold shift + click to select a range
c999793
fix(streaming): normalize non-SSE upstream chat completions
SantiagoDePolonia a88abd9
fix(streaming): widen non-SSE normalization and avoid passthrough stalls
SantiagoDePolonia ee8ef7f
fix(streaming): avoid tainted size arithmetic in SSE buffer build
SantiagoDePolonia de658e3
fix(streaming): guard nil xai request and drop dead empty-body branch
SantiagoDePolonia File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,113 @@ | ||
| package providers | ||
|
|
||
| import ( | ||
| "bufio" | ||
| "bytes" | ||
| "encoding/json" | ||
| "io" | ||
| ) | ||
|
|
||
| // chatDonePayload terminates a chat completions SSE stream. | ||
| var chatDonePayload = []byte("data: [DONE]\n\n") | ||
|
|
||
| // peekForNonSSE inspects up to this many leading bytes to classify the upstream | ||
| // response. SSE payloads begin with a field name (data:, event:, id:, retry:) or | ||
| // a ':' comment; a buffered JSON completion begins with '{'. 512 bytes comfortably | ||
| // clears any leading whitespace or comment lines without buffering real streams. | ||
| const peekForNonSSE = 512 | ||
|
|
||
| // EnsureChatCompletionSSE normalizes a chat completions stream so the client | ||
| // always receives well-formed Server-Sent Events terminated by data: [DONE]. | ||
| // | ||
| // Some OpenAI-compatible upstreams ignore stream:true and reply with a single | ||
| // buffered application/json completion (no data: framing, no [DONE]). Forwarding | ||
| // that verbatim under a text/event-stream content type leaves SSE clients waiting | ||
| // forever for an end-of-stream marker that never arrives. When the upstream body | ||
| // is detected as a buffered JSON object it is re-emitted as one SSE chunk plus a | ||
| // terminal [DONE]; genuine SSE streams pass through untouched with no buffering. | ||
| func EnsureChatCompletionSSE(stream io.ReadCloser) io.ReadCloser { | ||
| if stream == nil { | ||
| return nil | ||
| } | ||
|
|
||
| reader := bufio.NewReaderSize(stream, peekForNonSSE) | ||
| if firstNonSpaceByte(reader, peekForNonSSE) != '{' { | ||
| // Genuine SSE (or empty): stream through unchanged, no buffering. | ||
| return &bufferedReadCloser{Reader: reader, closer: stream} | ||
| } | ||
|
|
||
| // The '{' that classified this body is already buffered, so io.ReadAll | ||
| // always returns at least that byte; a mid-read failure still yields the | ||
| // partial bytes. Either way bufferedCompletionToSSE forwards what arrived | ||
| // (raw when the JSON is truncated) and appends [DONE], so generated content | ||
| // is never dropped and the client always receives a terminator. | ||
| body, _ := io.ReadAll(reader) | ||
| _ = stream.Close() //nolint:errcheck | ||
| return io.NopCloser(bytes.NewReader(bufferedCompletionToSSE(body))) | ||
| } | ||
|
|
||
| // firstNonSpaceByte reports the first non-whitespace byte buffered by reader, | ||
| // peeking one byte further at a time so a genuine SSE stream is classified from | ||
| // its first token without blocking until a full buffer fills. It never consumes | ||
| // input, so a passed-through stream is forwarded byte-for-byte. Returns 0 when | ||
| // the stream ends, errors, or yields only whitespace within max bytes. | ||
| func firstNonSpaceByte(r *bufio.Reader, max int) byte { | ||
| for i := 1; i <= max; i++ { | ||
| prefix, err := r.Peek(i) | ||
| if len(prefix) < i { | ||
| _ = err // EOF or error before any non-space byte was found | ||
| return 0 | ||
| } | ||
| switch b := prefix[i-1]; b { | ||
| case ' ', '\t', '\r', '\n': | ||
| continue | ||
| default: | ||
| return b | ||
| } | ||
| } | ||
| return 0 | ||
| } | ||
|
|
||
| // bufferedCompletionToSSE wraps a buffered chat completion JSON object as a | ||
| // single SSE chunk followed by the terminal [DONE] marker. The object field is | ||
| // rewritten to chat.completion.chunk and each choice's message is moved to delta | ||
| // so OpenAI SSE clients parse it as a streaming chunk. If the body does not parse | ||
| // as a JSON object it is forwarded as-is so no data is lost, still followed by | ||
| // [DONE] so the client stops waiting. | ||
| func bufferedCompletionToSSE(body []byte) []byte { | ||
| payload := body | ||
| var obj map[string]any | ||
| if err := json.Unmarshal(body, &obj); err == nil { | ||
| obj["object"] = "chat.completion.chunk" | ||
| if choices, ok := obj["choices"].([]any); ok { | ||
| for _, c := range choices { | ||
| choice, ok := c.(map[string]any) | ||
| if !ok { | ||
| continue | ||
| } | ||
| if msg, ok := choice["message"]; ok { | ||
| choice["delta"] = msg | ||
| delete(choice, "message") | ||
| } | ||
| } | ||
| } | ||
| if encoded, err := json.Marshal(obj); err == nil { | ||
| payload = encoded | ||
| } | ||
| } | ||
|
|
||
| var out bytes.Buffer | ||
| out.WriteString("data: ") | ||
| out.Write(payload) | ||
| out.WriteString("\n\n") | ||
| out.Write(chatDonePayload) | ||
| return out.Bytes() | ||
| } | ||
|
|
||
| // bufferedReadCloser pairs a buffered reader with the underlying stream's Close. | ||
| type bufferedReadCloser struct { | ||
| *bufio.Reader | ||
| closer io.Closer | ||
| } | ||
|
|
||
| func (b *bufferedReadCloser) Close() error { return b.closer.Close() } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,111 @@ | ||
| package providers | ||
|
|
||
| import ( | ||
| "io" | ||
| "strings" | ||
| "testing" | ||
| ) | ||
|
|
||
| // errAfterReadCloser yields its data once, then fails — simulating a connection | ||
| // that drops mid-body after some bytes have arrived. | ||
| type errAfterReadCloser struct { | ||
| data []byte | ||
| err error | ||
| done bool | ||
| } | ||
|
|
||
| func (r *errAfterReadCloser) Read(p []byte) (int, error) { | ||
| if r.done { | ||
| return 0, r.err | ||
| } | ||
| n := copy(p, r.data) | ||
| r.data = r.data[n:] | ||
| if len(r.data) == 0 { | ||
| r.done = true | ||
| } | ||
| return n, nil | ||
| } | ||
|
|
||
| func (r *errAfterReadCloser) Close() error { return nil } | ||
|
|
||
| func TestEnsureChatCompletionSSE_ConvertsBufferedJSON(t *testing.T) { | ||
| // Upstream ignored stream:true and returned a buffered, non-SSE completion. | ||
| body := `{"id":"x","object":"chat.completion","choices":[{"finish_reason":"stop","message":{"role":"assistant","content":"Hi there"}}]}` | ||
| stream := io.NopCloser(strings.NewReader(body)) | ||
|
|
||
| got, err := io.ReadAll(EnsureChatCompletionSSE(stream)) | ||
| if err != nil { | ||
| t.Fatalf("read stream: %v", err) | ||
| } | ||
|
|
||
| out := string(got) | ||
| if !strings.HasPrefix(out, "data: {") { | ||
| t.Fatalf("expected SSE data framing, got %q", out) | ||
| } | ||
| if !strings.HasSuffix(out, "data: [DONE]\n\n") { | ||
| t.Fatalf("expected terminal done marker, got %q", out) | ||
| } | ||
| if !strings.Contains(out, `"object":"chat.completion.chunk"`) { | ||
| t.Fatalf("expected object rewritten to chunk, got %q", out) | ||
| } | ||
| if !strings.Contains(out, `"delta":`) || strings.Contains(out, `"message":`) { | ||
| t.Fatalf("expected message rewritten to delta, got %q", out) | ||
| } | ||
| } | ||
|
|
||
| func TestEnsureChatCompletionSSE_PassesThroughRealSSE(t *testing.T) { | ||
| chunks := [][]byte{ | ||
| []byte("data: {\"choices\":[{\"delta\":{\"content\":\"Hi\"}}]}\n\n"), | ||
| []byte("data: {\"choices\":[{\"delta\":{\"content\":\" there\"}}]}\n\n"), | ||
| []byte("data: [DONE]\n\n"), | ||
| } | ||
| original := strings.Join([]string{string(chunks[0]), string(chunks[1]), string(chunks[2])}, "") | ||
| stream := &chunkedReadCloser{chunks: chunks} | ||
|
|
||
| got, err := io.ReadAll(EnsureChatCompletionSSE(stream)) | ||
| if err != nil { | ||
| t.Fatalf("read stream: %v", err) | ||
| } | ||
| if string(got) != original { | ||
| t.Fatalf("expected genuine SSE passed through unchanged.\n got: %q\nwant: %q", string(got), original) | ||
| } | ||
| } | ||
|
|
||
| func TestEnsureChatCompletionSSE_PassesThroughSSEWithLeadingComment(t *testing.T) { | ||
| // Providers like OpenRouter emit a leading ": ... PROCESSING" comment line. | ||
| body := ": OPENROUTER PROCESSING\n\ndata: {\"choices\":[{\"delta\":{\"content\":\"Hi\"}}]}\n\ndata: [DONE]\n\n" | ||
| stream := io.NopCloser(strings.NewReader(body)) | ||
|
|
||
| got, err := io.ReadAll(EnsureChatCompletionSSE(stream)) | ||
| if err != nil { | ||
| t.Fatalf("read stream: %v", err) | ||
| } | ||
| if string(got) != body { | ||
| t.Fatalf("expected comment-prefixed SSE unchanged, got %q", string(got)) | ||
| } | ||
| } | ||
|
|
||
| func TestEnsureChatCompletionSSE_PreservesPartialBodyOnReadError(t *testing.T) { | ||
| // Upstream began a buffered JSON body, then the connection dropped mid-read. | ||
| // The partial content must still reach the client, followed by [DONE]. | ||
| partial := `{"id":"x","choices":[{"message":{"content":"Hel` | ||
| stream := &errAfterReadCloser{data: []byte(partial), err: io.ErrUnexpectedEOF} | ||
|
|
||
| got, err := io.ReadAll(EnsureChatCompletionSSE(stream)) | ||
| if err != nil { | ||
| t.Fatalf("read stream: %v", err) | ||
| } | ||
| out := string(got) | ||
| if !strings.Contains(out, "Hel") { | ||
| t.Fatalf("expected partial content preserved, got %q", out) | ||
| } | ||
| if !strings.HasSuffix(out, "data: [DONE]\n\n") { | ||
| t.Fatalf("expected terminal done marker, got %q", out) | ||
| } | ||
| } | ||
|
|
||
| func TestEnsureChatCompletionSSE_NilStream(t *testing.T) { | ||
| if EnsureChatCompletionSSE(nil) != nil { | ||
| t.Fatal("expected nil for nil stream") | ||
| } | ||
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.