2 Commits

Author SHA1 Message Date
x a32b7e4693 Use TokenCounter and correct unsafe pointer usage
Record write errors in TokenCounter.Printf and add Println method
to propagate write failures. Route file-read warnings to stderr
instead of mixing them into token output. Use unsafe.Add with a
base pointer when computing the drop target on Windows.
2026-01-31 03:33:36 +01:00
x 14dfacea29 Refine TokenCounter handling and output
Decode across Write calls and buffer partial runes. Approximate
tokenization by splitting long alphanumeric runs (>4) and properly count
spaces and punctuation. Remove TokenCounter Printf/Println helpers and
update callers to use fmt.Fprintf/Fprintln. Avoid writing when the
underlying writer is nil.
2026-01-31 03:24:17 +01:00
3 changed files with 58 additions and 24 deletions
+4 -2
View File
@@ -62,11 +62,13 @@ func CopyFile(path string) error {
return fmt.Errorf("GlobalLock failed") return fmt.Errorf("GlobalLock failed")
} }
df := (*dropFiles)(unsafe.Pointer(ptrVal)) basePtr := unsafe.Pointer(ptrVal)
df := (*dropFiles)(basePtr)
df.pFiles = dropSize df.pFiles = dropSize
df.fWide = 1 df.fWide = 1
targetPtr := unsafe.Pointer(ptrVal + uintptr(dropSize)) targetPtr := unsafe.Add(basePtr, dropSize)
srcSlice := unsafe.Slice((*uint16)(unsafe.Pointer(&pathUTF16[0])), len(pathUTF16)) srcSlice := unsafe.Slice((*uint16)(unsafe.Pointer(&pathUTF16[0])), len(pathUTF16))
dstSlice := unsafe.Slice((*uint16)(targetPtr), len(pathUTF16)) dstSlice := unsafe.Slice((*uint16)(targetPtr), len(pathUTF16))
+3 -3
View File
@@ -52,7 +52,7 @@ func writeOutput(root string, files []string, outputPath string) (count int64, e
fullPath := filepath.Join(root, file) fullPath := filepath.Join(root, file)
content, err := os.ReadFile(fullPath) content, err := os.ReadFile(fullPath)
if err != nil { if err != nil {
tc.Printf("Error reading %s: %v\n", file, err) fmt.Fprintf(os.Stderr, "ctx: warning: skipping %s: %v\n", file, err)
continue continue
} }
@@ -116,9 +116,9 @@ func printNode(w io.Writer, node map[string]any, prefix string) error {
children := node[key].(map[string]any) children := node[key].(map[string]any)
if len(children) > 0 { if len(children) > 0 {
childPrefix := prefix + "│ " childPrefix := prefix + "│   "
if isLast { if isLast {
childPrefix = prefix + " " childPrefix = prefix + "    "
} }
if err := printNode(w, children, childPrefix); err != nil { if err := printNode(w, children, childPrefix); err != nil {
return err return err
+51 -19
View File
@@ -4,14 +4,17 @@ import (
"fmt" "fmt"
"io" "io"
"unicode" "unicode"
"unicode/utf8"
) )
type TokenCounter struct { type TokenCounter struct {
w io.Writer w io.Writer
Count int64 Count int64
Err error Err error
inWord bool
inSpace bool leftover []byte
inWord bool
wordLen int
} }
func (tc *TokenCounter) Write(p []byte) (int, error) { func (tc *TokenCounter) Write(p []byte) (int, error) {
@@ -19,8 +22,29 @@ func (tc *TokenCounter) Write(p []byte) (int, error) {
return 0, tc.Err return 0, tc.Err
} }
for _, b := range p { data := p
r := rune(b) if len(tc.leftover) > 0 {
data = make([]byte, len(tc.leftover)+len(p))
copy(data, tc.leftover)
copy(data[len(tc.leftover):], p)
}
totalProcessed := 0
for len(data) > 0 {
r, size := utf8.DecodeRune(data)
if r == utf8.RuneError && size == 1 {
if len(data) < utf8.UTFMax {
tc.leftover = data
break
}
}
data = data[size:]
totalProcessed += size
tc.leftover = nil
isSpace := unicode.IsSpace(r) isSpace := unicode.IsSpace(r)
isAlpha := unicode.IsLetter(r) || unicode.IsNumber(r) || r == '_' isAlpha := unicode.IsLetter(r) || unicode.IsNumber(r) || r == '_'
@@ -28,24 +52,32 @@ func (tc *TokenCounter) Write(p []byte) (int, error) {
if !tc.inWord { if !tc.inWord {
tc.Count++ tc.Count++
tc.inWord = true tc.inWord = true
tc.inSpace = false tc.wordLen = 1
} else {
tc.wordLen++
if tc.wordLen > 4 {
tc.Count++
tc.wordLen = 1
}
} }
} else if isSpace { } else if isSpace {
if !tc.inSpace { tc.inWord = false
tc.Count++ tc.wordLen = 0
tc.inSpace = true
tc.inWord = false
}
} else { } else {
tc.Count++ tc.Count++
tc.inWord = false tc.inWord = false
tc.inSpace = false tc.wordLen = 0
} }
} }
n, err := tc.w.Write(p) var n int
tc.Err = err if tc.w != nil {
return n, err n, tc.Err = tc.w.Write(p)
} else {
n = len(p)
}
return n, tc.Err
} }
func (tc *TokenCounter) WriteByte(c byte) error { func (tc *TokenCounter) WriteByte(c byte) error {
@@ -57,12 +89,12 @@ func (tc *TokenCounter) Printf(format string, a ...any) {
if tc.Err != nil { if tc.Err != nil {
return return
} }
_, _ = fmt.Fprintf(tc, format, a...) _, tc.Err = fmt.Fprintf(tc, format, a...)
} }
func (tc *TokenCounter) Println(a ...any) { func (tc *TokenCounter) Println(a ...any) {
if tc.Err != nil { if tc.Err != nil {
return return
} }
_, _ = fmt.Fprintln(tc, a...) _, tc.Err = fmt.Fprintln(tc, a...)
} }