@@ -13,6 +13,7 @@ import {
1313import {
1414 fileDateCanContainRangeEvent ,
1515 isTimestampInRange ,
16+ parseTelemetryTimestampMs ,
1617 type TelemetryDateRange ,
1718} from "./range" ;
1819
@@ -25,7 +26,18 @@ interface TelemetryLogFile {
2526 readonly part : number ;
2627}
2728
28- /** Log files that could contain events in `range`, in chronological order. */
29+ interface TelemetryEventEntry {
30+ readonly event : TelemetryEvent ;
31+ readonly file : TelemetryLogFile ;
32+ readonly lineNumber : number ;
33+ }
34+
35+ interface EventCursor {
36+ readonly entry : TelemetryEventEntry ;
37+ readonly iterator : AsyncIterator < TelemetryEventEntry > ;
38+ }
39+
40+ /** Log files whose dates could overlap `range`. */
2941export async function listTelemetryFilesForRange (
3042 telemetryDir : string ,
3143 range : TelemetryDateRange ,
@@ -41,7 +53,7 @@ export async function listTelemetryFilesForRange(
4153 }
4254
4355 return names
44- . map ( ( name ) => parseLogFilename ( telemetryDir , name ) )
56+ . map ( ( name ) => parseLogFilePath ( path . join ( telemetryDir , name ) ) )
4557 . filter (
4658 ( file ) : file is TelemetryLogFile =>
4759 file !== undefined && fileDateCanContainRangeEvent ( file . date , range ) ,
@@ -50,17 +62,44 @@ export async function listTelemetryFilesForRange(
5062 . map ( ( { path : filePath } ) => filePath ) ;
5163}
5264
53- /**
54- * Yields events from `filePaths` in order, keeping only those whose timestamp
55- * falls inside `range`. Reads line-by-line so memory stays flat on big files.
56- */
57- export async function * streamTelemetryEvents (
65+ /** Merge per-session append streams by timestamp, buffering one event per session. */
66+ export async function * streamTelemetryEventsSorted (
5867 filePaths : readonly string [ ] ,
5968 range : TelemetryDateRange ,
6069) : AsyncIterable < TelemetryEvent > {
61- for ( const filePath of filePaths ) {
62- const name = path . basename ( filePath ) ;
63- const stream = createReadStream ( filePath , { encoding : "utf8" } ) ;
70+ const frontier : EventCursor [ ] = [ ] ;
71+ for ( const files of groupLogFilesBySession ( filePaths ) ) {
72+ const iterator = streamTelemetryEventEntries ( files , range ) [
73+ Symbol . asyncIterator
74+ ] ( ) ;
75+ const next = await iterator . next ( ) ;
76+ if ( ! next . done ) {
77+ frontier . push ( { entry : next . value , iterator } ) ;
78+ }
79+ }
80+
81+ while ( frontier . length > 0 ) {
82+ frontier . sort ( ( a , b ) => compareEventEntries ( a . entry , b . entry ) ) ;
83+ const cursor = frontier . shift ( ) ;
84+ if ( ! cursor ) {
85+ return ;
86+ }
87+ yield cursor . entry . event ;
88+
89+ const next = await cursor . iterator . next ( ) ;
90+ if ( ! next . done ) {
91+ frontier . push ( { entry : next . value , iterator : cursor . iterator } ) ;
92+ }
93+ }
94+ }
95+
96+ async function * streamTelemetryEventEntries (
97+ files : readonly TelemetryLogFile [ ] ,
98+ range : TelemetryDateRange ,
99+ ) : AsyncIterable < TelemetryEventEntry > {
100+ for ( const file of files ) {
101+ const name = path . basename ( file . path ) ;
102+ const stream = createReadStream ( file . path , { encoding : "utf8" } ) ;
64103 const lines = readline . createInterface ( {
65104 input : stream ,
66105 crlfDelay : Infinity ,
@@ -74,7 +113,7 @@ export async function* streamTelemetryEvents(
74113 }
75114 const event = parseTelemetryEventLine ( line , name , lineNumber ) ;
76115 if ( isTimestampInRange ( event . timestamp , range ) ) {
77- yield event ;
116+ yield { event, file , lineNumber } ;
78117 }
79118 }
80119 } catch ( err ) {
@@ -96,15 +135,31 @@ export async function* streamTelemetryEvents(
96135 }
97136}
98137
99- function parseLogFilename (
100- dir : string ,
101- name : string ,
102- ) : TelemetryLogFile | undefined {
103- const parsed = localJsonlFiles . parseFileName ( name ) ;
104- if ( ! parsed ) {
105- return undefined ;
138+ function groupLogFilesBySession (
139+ filePaths : readonly string [ ] ,
140+ ) : TelemetryLogFile [ ] [ ] {
141+ const groups = new Map < string , TelemetryLogFile [ ] > ( ) ;
142+ for ( const file of parseLogFilePaths ( filePaths ) . sort ( compareLogFiles ) ) {
143+ const group = groups . get ( file . session ) ;
144+ if ( group ) {
145+ group . push ( file ) ;
146+ } else {
147+ groups . set ( file . session , [ file ] ) ;
148+ }
106149 }
107- return { path : path . join ( dir , name ) , ...parsed } ;
150+ return [ ...groups . values ( ) ] ;
151+ }
152+
153+ function parseLogFilePaths ( filePaths : readonly string [ ] ) : TelemetryLogFile [ ] {
154+ return filePaths . flatMap ( ( filePath ) => {
155+ const file = parseLogFilePath ( filePath ) ;
156+ return file ? [ file ] : [ ] ;
157+ } ) ;
158+ }
159+
160+ function parseLogFilePath ( filePath : string ) : TelemetryLogFile | undefined {
161+ const parsed = localJsonlFiles . parseFileName ( path . basename ( filePath ) ) ;
162+ return parsed ? { path : filePath , ...parsed } : undefined ;
108163}
109164
110165function compareLogFiles ( a : TelemetryLogFile , b : TelemetryLogFile ) : number {
@@ -114,3 +169,20 @@ function compareLogFiles(a: TelemetryLogFile, b: TelemetryLogFile): number {
114169 a . part - b . part
115170 ) ;
116171}
172+
173+ function compareEventEntries (
174+ a : TelemetryEventEntry ,
175+ b : TelemetryEventEntry ,
176+ ) : number {
177+ const timestamp =
178+ parseTelemetryTimestampMs ( a . event . timestamp ) -
179+ parseTelemetryTimestampMs ( b . event . timestamp ) ;
180+ return (
181+ timestamp ||
182+ a . event . context . sessionId . localeCompare ( b . event . context . sessionId ) ||
183+ a . file . session . localeCompare ( b . file . session ) ||
184+ a . file . date . localeCompare ( b . file . date ) ||
185+ a . file . part - b . file . part ||
186+ a . lineNumber - b . lineNumber
187+ ) ;
188+ }
0 commit comments