init claude-code
This commit is contained in:
@@ -0,0 +1,230 @@
|
||||
import { feature } from 'bun:bundle'
|
||||
import { logEvent } from '../../services/analytics/index.js'
|
||||
import { logForDebugging } from '../debug.js'
|
||||
import {
|
||||
ensureParserInitialized,
|
||||
getParserModule,
|
||||
type TsNode,
|
||||
} from './bashParser.js'
|
||||
|
||||
export type Node = TsNode
|
||||
|
||||
export interface ParsedCommandData {
|
||||
rootNode: Node
|
||||
envVars: string[]
|
||||
commandNode: Node | null
|
||||
originalCommand: string
|
||||
}
|
||||
|
||||
const MAX_COMMAND_LENGTH = 10000
|
||||
const DECLARATION_COMMANDS = new Set([
|
||||
'export',
|
||||
'declare',
|
||||
'typeset',
|
||||
'readonly',
|
||||
'local',
|
||||
'unset',
|
||||
'unsetenv',
|
||||
])
|
||||
const ARGUMENT_TYPES = new Set(['word', 'string', 'raw_string', 'number'])
|
||||
const SUBSTITUTION_TYPES = new Set([
|
||||
'command_substitution',
|
||||
'process_substitution',
|
||||
])
|
||||
const COMMAND_TYPES = new Set(['command', 'declaration_command'])
|
||||
|
||||
let logged = false
|
||||
function logLoadOnce(success: boolean): void {
|
||||
if (logged) return
|
||||
logged = true
|
||||
logForDebugging(
|
||||
success ? 'tree-sitter: native module loaded' : 'tree-sitter: unavailable',
|
||||
)
|
||||
logEvent('tengu_tree_sitter_load', { success })
|
||||
}
|
||||
|
||||
/**
|
||||
* Awaits WASM init (Parser.init + Language.load). Must be called before
|
||||
* parseCommand/parseCommandRaw for the parser to be available. Idempotent.
|
||||
*/
|
||||
export async function ensureInitialized(): Promise<void> {
|
||||
if (feature('TREE_SITTER_BASH') || feature('TREE_SITTER_BASH_SHADOW')) {
|
||||
await ensureParserInitialized()
|
||||
}
|
||||
}
|
||||
|
||||
export async function parseCommand(
|
||||
command: string,
|
||||
): Promise<ParsedCommandData | null> {
|
||||
if (!command || command.length > MAX_COMMAND_LENGTH) return null
|
||||
|
||||
// Gate: ant-only until pentest. External builds fall back to legacy
|
||||
// regex/shell-quote path. Guarding the whole body inside the positive
|
||||
// branch lets Bun DCE the NAPI import AND keeps telemetry honest — we
|
||||
// only fire tengu_tree_sitter_load when a load was genuinely attempted.
|
||||
if (feature('TREE_SITTER_BASH')) {
|
||||
await ensureParserInitialized()
|
||||
const mod = getParserModule()
|
||||
logLoadOnce(mod !== null)
|
||||
if (!mod) return null
|
||||
|
||||
try {
|
||||
const rootNode = mod.parse(command)
|
||||
if (!rootNode) return null
|
||||
|
||||
const commandNode = findCommandNode(rootNode, null)
|
||||
const envVars = extractEnvVars(commandNode)
|
||||
|
||||
return { rootNode, envVars, commandNode, originalCommand: command }
|
||||
} catch {
|
||||
return null
|
||||
}
|
||||
}
|
||||
return null
|
||||
}
|
||||
|
||||
/**
|
||||
* SECURITY: Sentinel for "parser was loaded and attempted, but aborted"
|
||||
* (timeout / node budget / Rust panic). Distinct from `null` (module not
|
||||
* loaded). Adversarial input can trigger abort under MAX_COMMAND_LENGTH:
|
||||
* `(( a[0][0]... ))` with ~2800 subscripts hits PARSE_TIMEOUT_MICROS.
|
||||
* Callers MUST treat this as fail-closed (too-complex), NOT route to legacy.
|
||||
*/
|
||||
export const PARSE_ABORTED = Symbol('parse-aborted')
|
||||
|
||||
/**
|
||||
* Raw parse — skips findCommandNode/extractEnvVars which the security
|
||||
* walker in ast.ts doesn't use. Saves one tree walk per bash command.
|
||||
*
|
||||
* Returns:
|
||||
* - Node: parse succeeded
|
||||
* - null: module not loaded / feature off / empty / over-length
|
||||
* - PARSE_ABORTED: module loaded but parse failed (timeout/panic)
|
||||
*/
|
||||
export async function parseCommandRaw(
|
||||
command: string,
|
||||
): Promise<Node | null | typeof PARSE_ABORTED> {
|
||||
if (!command || command.length > MAX_COMMAND_LENGTH) return null
|
||||
if (feature('TREE_SITTER_BASH') || feature('TREE_SITTER_BASH_SHADOW')) {
|
||||
await ensureParserInitialized()
|
||||
const mod = getParserModule()
|
||||
logLoadOnce(mod !== null)
|
||||
if (!mod) return null
|
||||
try {
|
||||
const result = mod.parse(command)
|
||||
// SECURITY: Module loaded; null here = timeout/node-budget abort in
|
||||
// bashParser.ts (PARSE_TIMEOUT_MS=50, MAX_NODES=50_000).
|
||||
// Previously collapsed into `return null` → parse-unavailable → legacy
|
||||
// path, which lacks EVAL_LIKE_BUILTINS — `trap`, `enable`, `hash` leaked.
|
||||
if (result === null) {
|
||||
logEvent('tengu_tree_sitter_parse_abort', {
|
||||
cmdLength: command.length,
|
||||
panic: false,
|
||||
})
|
||||
return PARSE_ABORTED
|
||||
}
|
||||
return result
|
||||
} catch {
|
||||
logEvent('tengu_tree_sitter_parse_abort', {
|
||||
cmdLength: command.length,
|
||||
panic: true,
|
||||
})
|
||||
return PARSE_ABORTED
|
||||
}
|
||||
}
|
||||
return null
|
||||
}
|
||||
|
||||
function findCommandNode(node: Node, parent: Node | null): Node | null {
|
||||
const { type, children } = node
|
||||
|
||||
if (COMMAND_TYPES.has(type)) return node
|
||||
|
||||
// Variable assignment followed by command
|
||||
if (type === 'variable_assignment' && parent) {
|
||||
return (
|
||||
parent.children.find(
|
||||
c => COMMAND_TYPES.has(c.type) && c.startIndex > node.startIndex,
|
||||
) ?? null
|
||||
)
|
||||
}
|
||||
|
||||
// Pipeline: recurse into first child (which may be a redirected_statement)
|
||||
if (type === 'pipeline') {
|
||||
for (const child of children) {
|
||||
const result = findCommandNode(child, node)
|
||||
if (result) return result
|
||||
}
|
||||
return null
|
||||
}
|
||||
|
||||
// Redirected statement: find the command inside
|
||||
if (type === 'redirected_statement') {
|
||||
return children.find(c => COMMAND_TYPES.has(c.type)) ?? null
|
||||
}
|
||||
|
||||
// Recursive search
|
||||
for (const child of children) {
|
||||
const result = findCommandNode(child, node)
|
||||
if (result) return result
|
||||
}
|
||||
|
||||
return null
|
||||
}
|
||||
|
||||
function extractEnvVars(commandNode: Node | null): string[] {
|
||||
if (!commandNode || commandNode.type !== 'command') return []
|
||||
|
||||
const envVars: string[] = []
|
||||
for (const child of commandNode.children) {
|
||||
if (child.type === 'variable_assignment') {
|
||||
envVars.push(child.text)
|
||||
} else if (child.type === 'command_name' || child.type === 'word') {
|
||||
break
|
||||
}
|
||||
}
|
||||
return envVars
|
||||
}
|
||||
|
||||
export function extractCommandArguments(commandNode: Node): string[] {
|
||||
// Declaration commands
|
||||
if (commandNode.type === 'declaration_command') {
|
||||
const firstChild = commandNode.children[0]
|
||||
return firstChild && DECLARATION_COMMANDS.has(firstChild.text)
|
||||
? [firstChild.text]
|
||||
: []
|
||||
}
|
||||
|
||||
const args: string[] = []
|
||||
let foundCommandName = false
|
||||
|
||||
for (const child of commandNode.children) {
|
||||
if (child.type === 'variable_assignment') continue
|
||||
|
||||
// Command name
|
||||
if (
|
||||
child.type === 'command_name' ||
|
||||
(!foundCommandName && child.type === 'word')
|
||||
) {
|
||||
foundCommandName = true
|
||||
args.push(child.text)
|
||||
continue
|
||||
}
|
||||
|
||||
// Arguments
|
||||
if (ARGUMENT_TYPES.has(child.type)) {
|
||||
args.push(stripQuotes(child.text))
|
||||
} else if (SUBSTITUTION_TYPES.has(child.type)) {
|
||||
break
|
||||
}
|
||||
}
|
||||
return args
|
||||
}
|
||||
|
||||
function stripQuotes(text: string): string {
|
||||
return text.length >= 2 &&
|
||||
((text[0] === '"' && text.at(-1) === '"') ||
|
||||
(text[0] === "'" && text.at(-1) === "'"))
|
||||
? text.slice(1, -1)
|
||||
: text
|
||||
}
|
||||
Reference in New Issue
Block a user