Refine TokenCounter handling and output

Decode across Write calls and buffer partial runes. Approximate
tokenization by splitting long alphanumeric runs (>4) and properly count
spaces and punctuation. Remove TokenCounter Printf/Println helpers and
update callers to use fmt.Fprintf/Fprintln. Avoid writing when the
underlying writer is nil.
This commit is contained in:
2026-01-31 03:24:17 +01:00
parent 4453856fd1
commit 14dfacea29
2 changed files with 64 additions and 39 deletions
+14 -14
View File
@@ -30,19 +30,19 @@ func writeOutput(root string, files []string, outputPath string) (count int64, e
tc := &TokenCounter{w: bw} tc := &TokenCounter{w: bw}
tc.Printf("Project Path: %s\n\n", filepath.Base(root)) fmt.Fprintf(tc, "Project Path: %s\n\n", filepath.Base(root))
tc.Println("Source Tree:") fmt.Fprintln(tc, "Source Tree:")
tc.Println("") fmt.Fprintln(tc, "")
tc.Println("```txt") fmt.Fprintln(tc, "```txt")
tc.Println(filepath.Base(root)) fmt.Fprintln(tc, filepath.Base(root))
if err := writeTree(tc, files); err != nil { if err := writeTree(tc, files); err != nil {
return 0, err return 0, err
} }
tc.Println("```") fmt.Fprintln(tc, "```")
tc.Println("") fmt.Fprintln(tc, "")
for _, file := range files { for _, file := range files {
if file == outputPath || filepath.Base(file) == outputPath { if file == outputPath || filepath.Base(file) == outputPath {
@@ -52,7 +52,7 @@ func writeOutput(root string, files []string, outputPath string) (count int64, e
fullPath := filepath.Join(root, file) fullPath := filepath.Join(root, file)
content, err := os.ReadFile(fullPath) content, err := os.ReadFile(fullPath)
if err != nil { if err != nil {
tc.Printf("Error reading %s: %v\n", file, err) fmt.Fprintf(tc, "Error reading %s: %v\n", file, err)
continue continue
} }
@@ -61,8 +61,8 @@ func writeOutput(root string, files []string, outputPath string) (count int64, e
ext = "txt" ext = "txt"
} }
tc.Printf("`%s`:\n\n", file) fmt.Fprintf(tc, "`%s`:\n\n", file)
tc.Printf("```%s\n", ext) fmt.Fprintf(tc, "```%s\n", ext)
if _, err := tc.Write(content); err != nil { if _, err := tc.Write(content); err != nil {
return 0, err return 0, err
@@ -73,8 +73,8 @@ func writeOutput(root string, files []string, outputPath string) (count int64, e
return 0, err return 0, err
} }
} }
tc.Println("```") fmt.Fprintln(tc, "```")
tc.Println("") fmt.Fprintln(tc, "")
} }
return tc.Count, tc.Err return tc.Count, tc.Err
@@ -116,9 +116,9 @@ func printNode(w io.Writer, node map[string]any, prefix string) error {
children := node[key].(map[string]any) children := node[key].(map[string]any)
if len(children) > 0 { if len(children) > 0 {
childPrefix := prefix + "│ " childPrefix := prefix + "│   "
if isLast { if isLast {
childPrefix = prefix + " " childPrefix = prefix + "    "
} }
if err := printNode(w, children, childPrefix); err != nil { if err := printNode(w, children, childPrefix); err != nil {
return err return err
+50 -25
View File
@@ -4,14 +4,17 @@ import (
"fmt" "fmt"
"io" "io"
"unicode" "unicode"
"unicode/utf8"
) )
type TokenCounter struct { type TokenCounter struct {
w io.Writer w io.Writer
Count int64 Count int64
Err error Err error
inWord bool
inSpace bool leftover []byte
inWord bool
wordLen int
} }
func (tc *TokenCounter) Write(p []byte) (int, error) { func (tc *TokenCounter) Write(p []byte) (int, error) {
@@ -19,8 +22,29 @@ func (tc *TokenCounter) Write(p []byte) (int, error) {
return 0, tc.Err return 0, tc.Err
} }
for _, b := range p { data := p
r := rune(b) if len(tc.leftover) > 0 {
data = make([]byte, len(tc.leftover)+len(p))
copy(data, tc.leftover)
copy(data[len(tc.leftover):], p)
}
totalProcessed := 0
for len(data) > 0 {
r, size := utf8.DecodeRune(data)
if r == utf8.RuneError && size == 1 {
if len(data) < utf8.UTFMax {
tc.leftover = data
break
}
}
data = data[size:]
totalProcessed += size
tc.leftover = nil
isSpace := unicode.IsSpace(r) isSpace := unicode.IsSpace(r)
isAlpha := unicode.IsLetter(r) || unicode.IsNumber(r) || r == '_' isAlpha := unicode.IsLetter(r) || unicode.IsNumber(r) || r == '_'
@@ -28,24 +52,32 @@ func (tc *TokenCounter) Write(p []byte) (int, error) {
if !tc.inWord { if !tc.inWord {
tc.Count++ tc.Count++
tc.inWord = true tc.inWord = true
tc.inSpace = false tc.wordLen = 1
} else {
tc.wordLen++
if tc.wordLen > 4 {
tc.Count++
tc.wordLen = 1
}
} }
} else if isSpace { } else if isSpace {
if !tc.inSpace { tc.inWord = false
tc.Count++ tc.wordLen = 0
tc.inSpace = true
tc.inWord = false
}
} else { } else {
tc.Count++ tc.Count++
tc.inWord = false tc.inWord = false
tc.inSpace = false tc.wordLen = 0
} }
} }
n, err := tc.w.Write(p) var n int
tc.Err = err if tc.w != nil {
return n, err n, tc.Err = tc.w.Write(p)
} else {
n = len(p)
}
return n, tc.Err
} }
func (tc *TokenCounter) WriteByte(c byte) error { func (tc *TokenCounter) WriteByte(c byte) error {
@@ -57,12 +89,5 @@ func (tc *TokenCounter) Printf(format string, a ...any) {
if tc.Err != nil { if tc.Err != nil {
return return
} }
_, _ = fmt.Fprintf(tc, format, a...) fmt.Fprintf(tc, format, a...)
}
func (tc *TokenCounter) Println(a ...any) {
if tc.Err != nil {
return
}
_, _ = fmt.Fprintln(tc, a...)
} }