From 40e9043114496928b5133d5ac534d8dc2e15be20 Mon Sep 17 00:00:00 2001 From: Martin Atkins Date: Sat, 27 May 2017 09:15:37 -0700 Subject: [PATCH] TokenCount function, for counting tokens This is like AllTokens except it returns just the number of tokens found, rather than the tokens themselves. --- textseg/all_tokens.go | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/textseg/all_tokens.go b/textseg/all_tokens.go index ef2a524..5752e9e 100644 --- a/textseg/all_tokens.go +++ b/textseg/all_tokens.go @@ -16,3 +16,15 @@ func AllTokens(buf []byte, splitFunc bufio.SplitFunc) ([][]byte, error) { } return ret, scanner.Err() } + +// TokenCount is a utility that uses a bufio.SplitFunc to count the number of +// recognized tokens in the given buffer. +func TokenCount(buf []byte, splitFunc bufio.SplitFunc) (int, error) { + scanner := bufio.NewScanner(bytes.NewReader(buf)) + scanner.Split(splitFunc) + var ret int + for scanner.Scan() { + ret++ + } + return ret, scanner.Err() +}