mirror of
https://github.com/skidoodle/ctx.git
synced 2026-04-28 11:17:42 +02:00
Refine TokenCounter handling and output
Decode across Write calls and buffer partial runes. Approximate tokenization by splitting long alphanumeric runs (>4) and properly count spaces and punctuation. Remove TokenCounter Printf/Println helpers and update callers to use fmt.Fprintf/Fprintln. Avoid writing when the underlying writer is nil.
This commit is contained in:
@@ -30,19 +30,19 @@ func writeOutput(root string, files []string, outputPath string) (count int64, e
|
|||||||
|
|
||||||
tc := &TokenCounter{w: bw}
|
tc := &TokenCounter{w: bw}
|
||||||
|
|
||||||
tc.Printf("Project Path: %s\n\n", filepath.Base(root))
|
fmt.Fprintf(tc, "Project Path: %s\n\n", filepath.Base(root))
|
||||||
tc.Println("Source Tree:")
|
fmt.Fprintln(tc, "Source Tree:")
|
||||||
tc.Println("")
|
fmt.Fprintln(tc, "")
|
||||||
|
|
||||||
tc.Println("```txt")
|
fmt.Fprintln(tc, "```txt")
|
||||||
tc.Println(filepath.Base(root))
|
fmt.Fprintln(tc, filepath.Base(root))
|
||||||
|
|
||||||
if err := writeTree(tc, files); err != nil {
|
if err := writeTree(tc, files); err != nil {
|
||||||
return 0, err
|
return 0, err
|
||||||
}
|
}
|
||||||
|
|
||||||
tc.Println("```")
|
fmt.Fprintln(tc, "```")
|
||||||
tc.Println("")
|
fmt.Fprintln(tc, "")
|
||||||
|
|
||||||
for _, file := range files {
|
for _, file := range files {
|
||||||
if file == outputPath || filepath.Base(file) == outputPath {
|
if file == outputPath || filepath.Base(file) == outputPath {
|
||||||
@@ -52,7 +52,7 @@ func writeOutput(root string, files []string, outputPath string) (count int64, e
|
|||||||
fullPath := filepath.Join(root, file)
|
fullPath := filepath.Join(root, file)
|
||||||
content, err := os.ReadFile(fullPath)
|
content, err := os.ReadFile(fullPath)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
tc.Printf("Error reading %s: %v\n", file, err)
|
fmt.Fprintf(tc, "Error reading %s: %v\n", file, err)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -61,8 +61,8 @@ func writeOutput(root string, files []string, outputPath string) (count int64, e
|
|||||||
ext = "txt"
|
ext = "txt"
|
||||||
}
|
}
|
||||||
|
|
||||||
tc.Printf("`%s`:\n\n", file)
|
fmt.Fprintf(tc, "`%s`:\n\n", file)
|
||||||
tc.Printf("```%s\n", ext)
|
fmt.Fprintf(tc, "```%s\n", ext)
|
||||||
|
|
||||||
if _, err := tc.Write(content); err != nil {
|
if _, err := tc.Write(content); err != nil {
|
||||||
return 0, err
|
return 0, err
|
||||||
@@ -73,8 +73,8 @@ func writeOutput(root string, files []string, outputPath string) (count int64, e
|
|||||||
return 0, err
|
return 0, err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
tc.Println("```")
|
fmt.Fprintln(tc, "```")
|
||||||
tc.Println("")
|
fmt.Fprintln(tc, "")
|
||||||
}
|
}
|
||||||
|
|
||||||
return tc.Count, tc.Err
|
return tc.Count, tc.Err
|
||||||
@@ -116,9 +116,9 @@ func printNode(w io.Writer, node map[string]any, prefix string) error {
|
|||||||
|
|
||||||
children := node[key].(map[string]any)
|
children := node[key].(map[string]any)
|
||||||
if len(children) > 0 {
|
if len(children) > 0 {
|
||||||
childPrefix := prefix + "│ "
|
childPrefix := prefix + "│ "
|
||||||
if isLast {
|
if isLast {
|
||||||
childPrefix = prefix + " "
|
childPrefix = prefix + " "
|
||||||
}
|
}
|
||||||
if err := printNode(w, children, childPrefix); err != nil {
|
if err := printNode(w, children, childPrefix); err != nil {
|
||||||
return err
|
return err
|
||||||
|
|||||||
@@ -4,14 +4,17 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"unicode"
|
"unicode"
|
||||||
|
"unicode/utf8"
|
||||||
)
|
)
|
||||||
|
|
||||||
type TokenCounter struct {
|
type TokenCounter struct {
|
||||||
w io.Writer
|
w io.Writer
|
||||||
Count int64
|
Count int64
|
||||||
Err error
|
Err error
|
||||||
inWord bool
|
|
||||||
inSpace bool
|
leftover []byte
|
||||||
|
inWord bool
|
||||||
|
wordLen int
|
||||||
}
|
}
|
||||||
|
|
||||||
func (tc *TokenCounter) Write(p []byte) (int, error) {
|
func (tc *TokenCounter) Write(p []byte) (int, error) {
|
||||||
@@ -19,8 +22,29 @@ func (tc *TokenCounter) Write(p []byte) (int, error) {
|
|||||||
return 0, tc.Err
|
return 0, tc.Err
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, b := range p {
|
data := p
|
||||||
r := rune(b)
|
if len(tc.leftover) > 0 {
|
||||||
|
data = make([]byte, len(tc.leftover)+len(p))
|
||||||
|
copy(data, tc.leftover)
|
||||||
|
copy(data[len(tc.leftover):], p)
|
||||||
|
}
|
||||||
|
|
||||||
|
totalProcessed := 0
|
||||||
|
|
||||||
|
for len(data) > 0 {
|
||||||
|
r, size := utf8.DecodeRune(data)
|
||||||
|
|
||||||
|
if r == utf8.RuneError && size == 1 {
|
||||||
|
if len(data) < utf8.UTFMax {
|
||||||
|
tc.leftover = data
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
data = data[size:]
|
||||||
|
totalProcessed += size
|
||||||
|
tc.leftover = nil
|
||||||
|
|
||||||
isSpace := unicode.IsSpace(r)
|
isSpace := unicode.IsSpace(r)
|
||||||
isAlpha := unicode.IsLetter(r) || unicode.IsNumber(r) || r == '_'
|
isAlpha := unicode.IsLetter(r) || unicode.IsNumber(r) || r == '_'
|
||||||
|
|
||||||
@@ -28,24 +52,32 @@ func (tc *TokenCounter) Write(p []byte) (int, error) {
|
|||||||
if !tc.inWord {
|
if !tc.inWord {
|
||||||
tc.Count++
|
tc.Count++
|
||||||
tc.inWord = true
|
tc.inWord = true
|
||||||
tc.inSpace = false
|
tc.wordLen = 1
|
||||||
|
} else {
|
||||||
|
tc.wordLen++
|
||||||
|
if tc.wordLen > 4 {
|
||||||
|
tc.Count++
|
||||||
|
tc.wordLen = 1
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} else if isSpace {
|
} else if isSpace {
|
||||||
if !tc.inSpace {
|
tc.inWord = false
|
||||||
tc.Count++
|
tc.wordLen = 0
|
||||||
tc.inSpace = true
|
|
||||||
tc.inWord = false
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
tc.Count++
|
tc.Count++
|
||||||
tc.inWord = false
|
tc.inWord = false
|
||||||
tc.inSpace = false
|
tc.wordLen = 0
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
n, err := tc.w.Write(p)
|
var n int
|
||||||
tc.Err = err
|
if tc.w != nil {
|
||||||
return n, err
|
n, tc.Err = tc.w.Write(p)
|
||||||
|
} else {
|
||||||
|
n = len(p)
|
||||||
|
}
|
||||||
|
|
||||||
|
return n, tc.Err
|
||||||
}
|
}
|
||||||
|
|
||||||
func (tc *TokenCounter) WriteByte(c byte) error {
|
func (tc *TokenCounter) WriteByte(c byte) error {
|
||||||
@@ -57,12 +89,5 @@ func (tc *TokenCounter) Printf(format string, a ...any) {
|
|||||||
if tc.Err != nil {
|
if tc.Err != nil {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
_, _ = fmt.Fprintf(tc, format, a...)
|
fmt.Fprintf(tc, format, a...)
|
||||||
}
|
|
||||||
|
|
||||||
func (tc *TokenCounter) Println(a ...any) {
|
|
||||||
if tc.Err != nil {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
_, _ = fmt.Fprintln(tc, a...)
|
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user