diff --git a/tok/bleve.go b/tok/bleve.go index 05de89e30cc..2c00ed38f4f 100644 --- a/tok/bleve.go +++ b/tok/bleve.go @@ -61,7 +61,7 @@ func setupBleve() { // uniqueTerms takes a token stream and returns a string slice of unique terms. func uniqueTerms(tokens analysis.TokenStream) []string { - var terms []string + terms := make([]string, 0, len(tokens)) for i := range tokens { terms = append(terms, string(tokens[i].Term)) } diff --git a/tok/tok_test.go b/tok/tok_test.go index 4c95094e577..a97e9523601 100644 --- a/tok/tok_test.go +++ b/tok/tok_test.go @@ -6,6 +6,7 @@ package tok import ( + "fmt" "math" "sort" "strings" @@ -653,5 +654,20 @@ func TestNGramTokenizerNonStringInput(t *testing.T) { } func BenchmarkTermTokenizer(b *testing.B) { - b.Skip() // tmp + t := TermTokenizer{} + b.ReportAllocs() + for _, text := range []string{ + "the quick brown fox jumps over the lazy dog", + "Lorem ipsum dolor sit amet, consectetur adipiscing elit", + "a b c d e f g h i j k l m n o p q r s t u v w x y z", + } { + b.Run(fmt.Sprintf("len=%d", len(text)), func(b *testing.B) { + for i := 0; i < b.N; i++ { + _, err := t.Tokens(text) + if err != nil { + b.Fatal(err) + } + } + }) + } }