diff --git a/output.go b/output.go index 61d3f0c..e87032b 100644 --- a/output.go +++ b/output.go @@ -30,19 +30,19 @@ func writeOutput(root string, files []string, outputPath string) (count int64, e tc := &TokenCounter{w: bw} - tc.Printf("Project Path: %s\n\n", filepath.Base(root)) - tc.Println("Source Tree:") - tc.Println("") + fmt.Fprintf(tc, "Project Path: %s\n\n", filepath.Base(root)) + fmt.Fprintln(tc, "Source Tree:") + fmt.Fprintln(tc, "") - tc.Println("```txt") - tc.Println(filepath.Base(root)) + fmt.Fprintln(tc, "```txt") + fmt.Fprintln(tc, filepath.Base(root)) if err := writeTree(tc, files); err != nil { return 0, err } - tc.Println("```") - tc.Println("") + fmt.Fprintln(tc, "```") + fmt.Fprintln(tc, "") for _, file := range files { if file == outputPath || filepath.Base(file) == outputPath { @@ -52,7 +52,7 @@ func writeOutput(root string, files []string, outputPath string) (count int64, e fullPath := filepath.Join(root, file) content, err := os.ReadFile(fullPath) if err != nil { - tc.Printf("Error reading %s: %v\n", file, err) + fmt.Fprintf(tc, "Error reading %s: %v\n", file, err) continue } @@ -61,8 +61,8 @@ func writeOutput(root string, files []string, outputPath string) (count int64, e ext = "txt" } - tc.Printf("`%s`:\n\n", file) - tc.Printf("```%s\n", ext) + fmt.Fprintf(tc, "`%s`:\n\n", file) + fmt.Fprintf(tc, "```%s\n", ext) if _, err := tc.Write(content); err != nil { return 0, err @@ -73,8 +73,8 @@ func writeOutput(root string, files []string, outputPath string) (count int64, e return 0, err } } - tc.Println("```") - tc.Println("") + fmt.Fprintln(tc, "```") + fmt.Fprintln(tc, "") } return tc.Count, tc.Err @@ -116,9 +116,9 @@ func printNode(w io.Writer, node map[string]any, prefix string) error { children := node[key].(map[string]any) if len(children) > 0 { - childPrefix := prefix + "│ " + childPrefix := prefix + "│   " if isLast { - childPrefix = prefix + " " + childPrefix = prefix + "    " } if err := printNode(w, children, childPrefix); err != nil { return err diff --git a/token.go b/token.go index e670abb..b364986 100644 --- a/token.go +++ b/token.go @@ -4,14 +4,17 @@ import ( "fmt" "io" "unicode" + "unicode/utf8" ) type TokenCounter struct { - w io.Writer - Count int64 - Err error - inWord bool - inSpace bool + w io.Writer + Count int64 + Err error + + leftover []byte + inWord bool + wordLen int } func (tc *TokenCounter) Write(p []byte) (int, error) { @@ -19,8 +22,29 @@ func (tc *TokenCounter) Write(p []byte) (int, error) { return 0, tc.Err } - for _, b := range p { - r := rune(b) + data := p + if len(tc.leftover) > 0 { + data = make([]byte, len(tc.leftover)+len(p)) + copy(data, tc.leftover) + copy(data[len(tc.leftover):], p) + } + + totalProcessed := 0 + + for len(data) > 0 { + r, size := utf8.DecodeRune(data) + + if r == utf8.RuneError && size == 1 { + if len(data) < utf8.UTFMax { + tc.leftover = data + break + } + } + + data = data[size:] + totalProcessed += size + tc.leftover = nil + isSpace := unicode.IsSpace(r) isAlpha := unicode.IsLetter(r) || unicode.IsNumber(r) || r == '_' @@ -28,24 +52,32 @@ func (tc *TokenCounter) Write(p []byte) (int, error) { if !tc.inWord { tc.Count++ tc.inWord = true - tc.inSpace = false + tc.wordLen = 1 + } else { + tc.wordLen++ + if tc.wordLen > 4 { + tc.Count++ + tc.wordLen = 1 + } } } else if isSpace { - if !tc.inSpace { - tc.Count++ - tc.inSpace = true - tc.inWord = false - } + tc.inWord = false + tc.wordLen = 0 } else { tc.Count++ tc.inWord = false - tc.inSpace = false + tc.wordLen = 0 } } - n, err := tc.w.Write(p) - tc.Err = err - return n, err + var n int + if tc.w != nil { + n, tc.Err = tc.w.Write(p) + } else { + n = len(p) + } + + return n, tc.Err } func (tc *TokenCounter) WriteByte(c byte) error { @@ -57,12 +89,5 @@ func (tc *TokenCounter) Printf(format string, a ...any) { if tc.Err != nil { return } - _, _ = fmt.Fprintf(tc, format, a...) -} - -func (tc *TokenCounter) Println(a ...any) { - if tc.Err != nil { - return - } - _, _ = fmt.Fprintln(tc, a...) + fmt.Fprintf(tc, format, a...) }