init claude-code

This commit is contained in:
2026-04-01 17:32:37 +02:00
commit 73b208c009
1902 changed files with 513237 additions and 0 deletions
+31
View File
@@ -0,0 +1,31 @@
/**
* CLI exit helpers for subcommand handlers.
*
* Consolidates the 4-5 line "print + lint-suppress + exit" block that was
* copy-pasted ~60 times across `claude mcp *` / `claude plugin *` handlers.
* The `: never` return type lets TypeScript narrow control flow at call sites
* without a trailing `return`.
*/
/* eslint-disable custom-rules/no-process-exit -- centralized CLI exit point */
// `return undefined as never` (not a post-exit throw) — tests spy on
// process.exit and let it return. Call sites write `return cliError(...)`
// where subsequent code would dereference narrowed-away values under mock.
// cliError uses console.error (tests spy on console.error); cliOk uses
// process.stdout.write (tests spy on process.stdout.write — Bun's console.log
// doesn't route through a spied process.stdout.write).
/** Write an error message to stderr (if given) and exit with code 1. */
export function cliError(msg?: string): never {
// biome-ignore lint/suspicious/noConsole: centralized CLI error output
if (msg) console.error(msg)
process.exit(1)
return undefined as never
}
/** Write a message to stdout (if given) and exit with code 0. */
export function cliOk(msg?: string): never {
if (msg) process.stdout.write(msg + '\n')
process.exit(0)
return undefined as never
}
+70
View File
@@ -0,0 +1,70 @@
/**
* Agents subcommand handler — prints the list of configured agents.
* Dynamically imported only when `claude agents` runs.
*/
import {
AGENT_SOURCE_GROUPS,
compareAgentsByName,
getOverrideSourceLabel,
type ResolvedAgent,
resolveAgentModelDisplay,
resolveAgentOverrides,
} from '../../tools/AgentTool/agentDisplay.js'
import {
getActiveAgentsFromList,
getAgentDefinitionsWithOverrides,
} from '../../tools/AgentTool/loadAgentsDir.js'
import { getCwd } from '../../utils/cwd.js'
function formatAgent(agent: ResolvedAgent): string {
const model = resolveAgentModelDisplay(agent)
const parts = [agent.agentType]
if (model) {
parts.push(model)
}
if (agent.memory) {
parts.push(`${agent.memory} memory`)
}
return parts.join(' · ')
}
export async function agentsHandler(): Promise<void> {
const cwd = getCwd()
const { allAgents } = await getAgentDefinitionsWithOverrides(cwd)
const activeAgents = getActiveAgentsFromList(allAgents)
const resolvedAgents = resolveAgentOverrides(allAgents, activeAgents)
const lines: string[] = []
let totalActive = 0
for (const { label, source } of AGENT_SOURCE_GROUPS) {
const groupAgents = resolvedAgents
.filter(a => a.source === source)
.sort(compareAgentsByName)
if (groupAgents.length === 0) continue
lines.push(`${label}:`)
for (const agent of groupAgents) {
if (agent.overriddenBy) {
const winnerSource = getOverrideSourceLabel(agent.overriddenBy)
lines.push(` (shadowed by ${winnerSource}) ${formatAgent(agent)}`)
} else {
lines.push(` ${formatAgent(agent)}`)
totalActive++
}
}
lines.push('')
}
if (lines.length === 0) {
// biome-ignore lint/suspicious/noConsole:: intentional console output
console.log('No agents found.')
} else {
// biome-ignore lint/suspicious/noConsole:: intentional console output
console.log(`${totalActive} active agents\n`)
// biome-ignore lint/suspicious/noConsole:: intentional console output
console.log(lines.join('\n').trimEnd())
}
}
+330
View File
@@ -0,0 +1,330 @@
/* eslint-disable custom-rules/no-process-exit -- CLI subcommand handler intentionally exits */
import {
clearAuthRelatedCaches,
performLogout,
} from '../../commands/logout/logout.js'
import {
type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
logEvent,
} from '../../services/analytics/index.js'
import { getSSLErrorHint } from '../../services/api/errorUtils.js'
import { fetchAndStoreClaudeCodeFirstTokenDate } from '../../services/api/firstTokenDate.js'
import {
createAndStoreApiKey,
fetchAndStoreUserRoles,
refreshOAuthToken,
shouldUseClaudeAIAuth,
storeOAuthAccountInfo,
} from '../../services/oauth/client.js'
import { getOauthProfileFromOauthToken } from '../../services/oauth/getOauthProfile.js'
import { OAuthService } from '../../services/oauth/index.js'
import type { OAuthTokens } from '../../services/oauth/types.js'
import {
clearOAuthTokenCache,
getAnthropicApiKeyWithSource,
getAuthTokenSource,
getOauthAccountInfo,
getSubscriptionType,
isUsing3PServices,
saveOAuthTokensIfNeeded,
validateForceLoginOrg,
} from '../../utils/auth.js'
import { saveGlobalConfig } from '../../utils/config.js'
import { logForDebugging } from '../../utils/debug.js'
import { isRunningOnHomespace } from '../../utils/envUtils.js'
import { errorMessage } from '../../utils/errors.js'
import { logError } from '../../utils/log.js'
import { getAPIProvider } from '../../utils/model/providers.js'
import { getInitialSettings } from '../../utils/settings/settings.js'
import { jsonStringify } from '../../utils/slowOperations.js'
import {
buildAccountProperties,
buildAPIProviderProperties,
} from '../../utils/status.js'
/**
* Shared post-token-acquisition logic. Saves tokens, fetches profile/roles,
* and sets up the local auth state.
*/
export async function installOAuthTokens(tokens: OAuthTokens): Promise<void> {
// Clear old state before saving new credentials
await performLogout({ clearOnboarding: false })
// Reuse pre-fetched profile if available, otherwise fetch fresh
const profile =
tokens.profile ?? (await getOauthProfileFromOauthToken(tokens.accessToken))
if (profile) {
storeOAuthAccountInfo({
accountUuid: profile.account.uuid,
emailAddress: profile.account.email,
organizationUuid: profile.organization.uuid,
displayName: profile.account.display_name || undefined,
hasExtraUsageEnabled:
profile.organization.has_extra_usage_enabled ?? undefined,
billingType: profile.organization.billing_type ?? undefined,
subscriptionCreatedAt:
profile.organization.subscription_created_at ?? undefined,
accountCreatedAt: profile.account.created_at,
})
} else if (tokens.tokenAccount) {
// Fallback to token exchange account data when profile endpoint fails
storeOAuthAccountInfo({
accountUuid: tokens.tokenAccount.uuid,
emailAddress: tokens.tokenAccount.emailAddress,
organizationUuid: tokens.tokenAccount.organizationUuid,
})
}
const storageResult = saveOAuthTokensIfNeeded(tokens)
clearOAuthTokenCache()
if (storageResult.warning) {
logEvent('tengu_oauth_storage_warning', {
warning:
storageResult.warning as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
})
}
// Roles and first-token-date may fail for limited-scope tokens (e.g.
// inference-only from setup-token). They're not required for core auth.
await fetchAndStoreUserRoles(tokens.accessToken).catch(err =>
logForDebugging(String(err), { level: 'error' }),
)
if (shouldUseClaudeAIAuth(tokens.scopes)) {
await fetchAndStoreClaudeCodeFirstTokenDate().catch(err =>
logForDebugging(String(err), { level: 'error' }),
)
} else {
// API key creation is critical for Console users — let it throw.
const apiKey = await createAndStoreApiKey(tokens.accessToken)
if (!apiKey) {
throw new Error(
'Unable to create API key. The server accepted the request but did not return a key.',
)
}
}
await clearAuthRelatedCaches()
}
export async function authLogin({
email,
sso,
console: useConsole,
claudeai,
}: {
email?: string
sso?: boolean
console?: boolean
claudeai?: boolean
}): Promise<void> {
if (useConsole && claudeai) {
process.stderr.write(
'Error: --console and --claudeai cannot be used together.\n',
)
process.exit(1)
}
const settings = getInitialSettings()
// forceLoginMethod is a hard constraint (enterprise setting) — matches ConsoleOAuthFlow behavior.
// Without it, --console selects Console; --claudeai (or no flag) selects claude.ai.
const loginWithClaudeAi = settings.forceLoginMethod
? settings.forceLoginMethod === 'claudeai'
: !useConsole
const orgUUID = settings.forceLoginOrgUUID
// Fast path: if a refresh token is provided via env var, skip the browser
// OAuth flow and exchange it directly for tokens.
const envRefreshToken = process.env.CLAUDE_CODE_OAUTH_REFRESH_TOKEN
if (envRefreshToken) {
const envScopes = process.env.CLAUDE_CODE_OAUTH_SCOPES
if (!envScopes) {
process.stderr.write(
'CLAUDE_CODE_OAUTH_SCOPES is required when using CLAUDE_CODE_OAUTH_REFRESH_TOKEN.\n' +
'Set it to the space-separated scopes the refresh token was issued with\n' +
'(e.g. "user:inference" or "user:profile user:inference user:sessions:claude_code user:mcp_servers").\n',
)
process.exit(1)
}
const scopes = envScopes.split(/\s+/).filter(Boolean)
try {
logEvent('tengu_login_from_refresh_token', {})
const tokens = await refreshOAuthToken(envRefreshToken, { scopes })
await installOAuthTokens(tokens)
const orgResult = await validateForceLoginOrg()
if (!orgResult.valid) {
process.stderr.write(orgResult.message + '\n')
process.exit(1)
}
// Mark onboarding complete — interactive paths handle this via
// the Onboarding component, but the env var path skips it.
saveGlobalConfig(current => {
if (current.hasCompletedOnboarding) return current
return { ...current, hasCompletedOnboarding: true }
})
logEvent('tengu_oauth_success', {
loginWithClaudeAi: shouldUseClaudeAIAuth(tokens.scopes),
})
process.stdout.write('Login successful.\n')
process.exit(0)
} catch (err) {
logError(err)
const sslHint = getSSLErrorHint(err)
process.stderr.write(
`Login failed: ${errorMessage(err)}\n${sslHint ? sslHint + '\n' : ''}`,
)
process.exit(1)
}
}
const resolvedLoginMethod = sso ? 'sso' : undefined
const oauthService = new OAuthService()
try {
logEvent('tengu_oauth_flow_start', { loginWithClaudeAi })
const result = await oauthService.startOAuthFlow(
async url => {
process.stdout.write('Opening browser to sign in…\n')
process.stdout.write(`If the browser didn't open, visit: ${url}\n`)
},
{
loginWithClaudeAi,
loginHint: email,
loginMethod: resolvedLoginMethod,
orgUUID,
},
)
await installOAuthTokens(result)
const orgResult = await validateForceLoginOrg()
if (!orgResult.valid) {
process.stderr.write(orgResult.message + '\n')
process.exit(1)
}
logEvent('tengu_oauth_success', { loginWithClaudeAi })
process.stdout.write('Login successful.\n')
process.exit(0)
} catch (err) {
logError(err)
const sslHint = getSSLErrorHint(err)
process.stderr.write(
`Login failed: ${errorMessage(err)}\n${sslHint ? sslHint + '\n' : ''}`,
)
process.exit(1)
} finally {
oauthService.cleanup()
}
}
export async function authStatus(opts: {
json?: boolean
text?: boolean
}): Promise<void> {
const { source: authTokenSource, hasToken } = getAuthTokenSource()
const { source: apiKeySource } = getAnthropicApiKeyWithSource()
const hasApiKeyEnvVar =
!!process.env.ANTHROPIC_API_KEY && !isRunningOnHomespace()
const oauthAccount = getOauthAccountInfo()
const subscriptionType = getSubscriptionType()
const using3P = isUsing3PServices()
const loggedIn =
hasToken || apiKeySource !== 'none' || hasApiKeyEnvVar || using3P
// Determine auth method
let authMethod: string = 'none'
if (using3P) {
authMethod = 'third_party'
} else if (authTokenSource === 'claude.ai') {
authMethod = 'claude.ai'
} else if (authTokenSource === 'apiKeyHelper') {
authMethod = 'api_key_helper'
} else if (authTokenSource !== 'none') {
authMethod = 'oauth_token'
} else if (apiKeySource === 'ANTHROPIC_API_KEY' || hasApiKeyEnvVar) {
authMethod = 'api_key'
} else if (apiKeySource === '/login managed key') {
authMethod = 'claude.ai'
}
if (opts.text) {
const properties = [
...buildAccountProperties(),
...buildAPIProviderProperties(),
]
let hasAuthProperty = false
for (const prop of properties) {
const value =
typeof prop.value === 'string'
? prop.value
: Array.isArray(prop.value)
? prop.value.join(', ')
: null
if (value === null || value === 'none') {
continue
}
hasAuthProperty = true
if (prop.label) {
process.stdout.write(`${prop.label}: ${value}\n`)
} else {
process.stdout.write(`${value}\n`)
}
}
if (!hasAuthProperty && hasApiKeyEnvVar) {
process.stdout.write('API key: ANTHROPIC_API_KEY\n')
}
if (!loggedIn) {
process.stdout.write(
'Not logged in. Run claude auth login to authenticate.\n',
)
}
} else {
const apiProvider = getAPIProvider()
const resolvedApiKeySource =
apiKeySource !== 'none'
? apiKeySource
: hasApiKeyEnvVar
? 'ANTHROPIC_API_KEY'
: null
const output: Record<string, string | boolean | null> = {
loggedIn,
authMethod,
apiProvider,
}
if (resolvedApiKeySource) {
output.apiKeySource = resolvedApiKeySource
}
if (authMethod === 'claude.ai') {
output.email = oauthAccount?.emailAddress ?? null
output.orgId = oauthAccount?.organizationUuid ?? null
output.orgName = oauthAccount?.organizationName ?? null
output.subscriptionType = subscriptionType ?? null
}
process.stdout.write(jsonStringify(output, null, 2) + '\n')
}
process.exit(loggedIn ? 0 : 1)
}
export async function authLogout(): Promise<void> {
try {
await performLogout({ clearOnboarding: false })
} catch {
process.stderr.write('Failed to log out.\n')
process.exit(1)
}
process.stdout.write('Successfully logged out from your Anthropic account.\n')
process.exit(0)
}
+170
View File
@@ -0,0 +1,170 @@
/**
* Auto mode subcommand handlers — dump default/merged classifier rules and
* critique user-written rules. Dynamically imported when `claude auto-mode ...` runs.
*/
import { errorMessage } from '../../utils/errors.js'
import {
getMainLoopModel,
parseUserSpecifiedModel,
} from '../../utils/model/model.js'
import {
type AutoModeRules,
buildDefaultExternalSystemPrompt,
getDefaultExternalAutoModeRules,
} from '../../utils/permissions/yoloClassifier.js'
import { getAutoModeConfig } from '../../utils/settings/settings.js'
import { sideQuery } from '../../utils/sideQuery.js'
import { jsonStringify } from '../../utils/slowOperations.js'
function writeRules(rules: AutoModeRules): void {
process.stdout.write(jsonStringify(rules, null, 2) + '\n')
}
export function autoModeDefaultsHandler(): void {
writeRules(getDefaultExternalAutoModeRules())
}
/**
* Dump the effective auto mode config: user settings where provided, external
* defaults otherwise. Per-section REPLACE semantics — matches how
* buildYoloSystemPrompt resolves the external template (a non-empty user
* section replaces that section's defaults entirely; an empty/absent section
* falls through to defaults).
*/
export function autoModeConfigHandler(): void {
const config = getAutoModeConfig()
const defaults = getDefaultExternalAutoModeRules()
writeRules({
allow: config?.allow?.length ? config.allow : defaults.allow,
soft_deny: config?.soft_deny?.length
? config.soft_deny
: defaults.soft_deny,
environment: config?.environment?.length
? config.environment
: defaults.environment,
})
}
const CRITIQUE_SYSTEM_PROMPT =
'You are an expert reviewer of auto mode classifier rules for Claude Code.\n' +
'\n' +
'Claude Code has an "auto mode" that uses an AI classifier to decide whether ' +
'tool calls should be auto-approved or require user confirmation. Users can ' +
'write custom rules in three categories:\n' +
'\n' +
'- **allow**: Actions the classifier should auto-approve\n' +
'- **soft_deny**: Actions the classifier should block (require user confirmation)\n' +
"- **environment**: Context about the user's setup that helps the classifier make decisions\n" +
'\n' +
"Your job is to critique the user's custom rules for clarity, completeness, " +
'and potential issues. The classifier is an LLM that reads these rules as ' +
'part of its system prompt.\n' +
'\n' +
'For each rule, evaluate:\n' +
'1. **Clarity**: Is the rule unambiguous? Could the classifier misinterpret it?\n' +
"2. **Completeness**: Are there gaps or edge cases the rule doesn't cover?\n" +
'3. **Conflicts**: Do any of the rules conflict with each other?\n' +
'4. **Actionability**: Is the rule specific enough for the classifier to act on?\n' +
'\n' +
'Be concise and constructive. Only comment on rules that could be improved. ' +
'If all rules look good, say so.'
export async function autoModeCritiqueHandler(options: {
model?: string
}): Promise<void> {
const config = getAutoModeConfig()
const hasCustomRules =
(config?.allow?.length ?? 0) > 0 ||
(config?.soft_deny?.length ?? 0) > 0 ||
(config?.environment?.length ?? 0) > 0
if (!hasCustomRules) {
process.stdout.write(
'No custom auto mode rules found.\n\n' +
'Add rules to your settings file under autoMode.{allow, soft_deny, environment}.\n' +
'Run `claude auto-mode defaults` to see the default rules for reference.\n',
)
return
}
const model = options.model
? parseUserSpecifiedModel(options.model)
: getMainLoopModel()
const defaults = getDefaultExternalAutoModeRules()
const classifierPrompt = buildDefaultExternalSystemPrompt()
const userRulesSummary =
formatRulesForCritique('allow', config?.allow ?? [], defaults.allow) +
formatRulesForCritique(
'soft_deny',
config?.soft_deny ?? [],
defaults.soft_deny,
) +
formatRulesForCritique(
'environment',
config?.environment ?? [],
defaults.environment,
)
process.stdout.write('Analyzing your auto mode rules…\n\n')
let response
try {
response = await sideQuery({
querySource: 'auto_mode_critique',
model,
system: CRITIQUE_SYSTEM_PROMPT,
skipSystemPromptPrefix: true,
max_tokens: 4096,
messages: [
{
role: 'user',
content:
'Here is the full classifier system prompt that the auto mode classifier receives:\n\n' +
'<classifier_system_prompt>\n' +
classifierPrompt +
'\n</classifier_system_prompt>\n\n' +
"Here are the user's custom rules that REPLACE the corresponding default sections:\n\n" +
userRulesSummary +
'\nPlease critique these custom rules.',
},
],
})
} catch (error) {
process.stderr.write(
'Failed to analyze rules: ' + errorMessage(error) + '\n',
)
process.exitCode = 1
return
}
const textBlock = response.content.find(block => block.type === 'text')
if (textBlock?.type === 'text') {
process.stdout.write(textBlock.text + '\n')
} else {
process.stdout.write('No critique was generated. Please try again.\n')
}
}
function formatRulesForCritique(
section: string,
userRules: string[],
defaultRules: string[],
): string {
if (userRules.length === 0) return ''
const customLines = userRules.map(r => '- ' + r).join('\n')
const defaultLines = defaultRules.map(r => '- ' + r).join('\n')
return (
'## ' +
section +
' (custom rules replacing defaults)\n' +
'Custom:\n' +
customLines +
'\n\n' +
'Defaults being replaced:\n' +
defaultLines +
'\n\n'
)
}
File diff suppressed because one or more lines are too long
+878
View File
@@ -0,0 +1,878 @@
/**
* Plugin and marketplace subcommand handlers — extracted from main.tsx for lazy loading.
* These are dynamically imported only when `claude plugin *` or `claude plugin marketplace *` runs.
*/
/* eslint-disable custom-rules/no-process-exit -- CLI subcommand handlers intentionally exit */
import figures from 'figures'
import { basename, dirname } from 'path'
import { setUseCoworkPlugins } from '../../bootstrap/state.js'
import {
type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
type AnalyticsMetadata_I_VERIFIED_THIS_IS_PII_TAGGED,
logEvent,
} from '../../services/analytics/index.js'
import {
disableAllPlugins,
disablePlugin,
enablePlugin,
installPlugin,
uninstallPlugin,
updatePluginCli,
VALID_INSTALLABLE_SCOPES,
VALID_UPDATE_SCOPES,
} from '../../services/plugins/pluginCliCommands.js'
import { getPluginErrorMessage } from '../../types/plugin.js'
import { errorMessage } from '../../utils/errors.js'
import { logError } from '../../utils/log.js'
import { clearAllCaches } from '../../utils/plugins/cacheUtils.js'
import { getInstallCounts } from '../../utils/plugins/installCounts.js'
import {
isPluginInstalled,
loadInstalledPluginsV2,
} from '../../utils/plugins/installedPluginsManager.js'
import {
createPluginId,
loadMarketplacesWithGracefulDegradation,
} from '../../utils/plugins/marketplaceHelpers.js'
import {
addMarketplaceSource,
loadKnownMarketplacesConfig,
refreshAllMarketplaces,
refreshMarketplace,
removeMarketplaceSource,
saveMarketplaceToSettings,
} from '../../utils/plugins/marketplaceManager.js'
import { loadPluginMcpServers } from '../../utils/plugins/mcpPluginIntegration.js'
import { parseMarketplaceInput } from '../../utils/plugins/parseMarketplaceInput.js'
import {
parsePluginIdentifier,
scopeToSettingSource,
} from '../../utils/plugins/pluginIdentifier.js'
import { loadAllPlugins } from '../../utils/plugins/pluginLoader.js'
import type { PluginSource } from '../../utils/plugins/schemas.js'
import {
type ValidationResult,
validateManifest,
validatePluginContents,
} from '../../utils/plugins/validatePlugin.js'
import { jsonStringify } from '../../utils/slowOperations.js'
import { plural } from '../../utils/stringUtils.js'
import { cliError, cliOk } from '../exit.js'
// Re-export for main.tsx to reference in option definitions
export { VALID_INSTALLABLE_SCOPES, VALID_UPDATE_SCOPES }
/**
* Helper function to handle marketplace command errors consistently.
*/
export function handleMarketplaceError(error: unknown, action: string): never {
logError(error)
cliError(`${figures.cross} Failed to ${action}: ${errorMessage(error)}`)
}
function printValidationResult(result: ValidationResult): void {
if (result.errors.length > 0) {
// biome-ignore lint/suspicious/noConsole:: intentional console output
console.log(
`${figures.cross} Found ${result.errors.length} ${plural(result.errors.length, 'error')}:\n`,
)
result.errors.forEach(error => {
// biome-ignore lint/suspicious/noConsole:: intentional console output
console.log(` ${figures.pointer} ${error.path}: ${error.message}`)
})
// biome-ignore lint/suspicious/noConsole:: intentional console output
console.log('')
}
if (result.warnings.length > 0) {
// biome-ignore lint/suspicious/noConsole:: intentional console output
console.log(
`${figures.warning} Found ${result.warnings.length} ${plural(result.warnings.length, 'warning')}:\n`,
)
result.warnings.forEach(warning => {
// biome-ignore lint/suspicious/noConsole:: intentional console output
console.log(` ${figures.pointer} ${warning.path}: ${warning.message}`)
})
// biome-ignore lint/suspicious/noConsole:: intentional console output
console.log('')
}
}
// plugin validate
export async function pluginValidateHandler(
manifestPath: string,
options: { cowork?: boolean },
): Promise<void> {
if (options.cowork) setUseCoworkPlugins(true)
try {
const result = await validateManifest(manifestPath)
// biome-ignore lint/suspicious/noConsole:: intentional console output
console.log(`Validating ${result.fileType} manifest: ${result.filePath}\n`)
printValidationResult(result)
// If this is a plugin manifest located inside a .claude-plugin directory,
// also validate the plugin's content files (skills, agents, commands,
// hooks). Works whether the user passed a directory or the plugin.json
// path directly.
let contentResults: ValidationResult[] = []
if (result.fileType === 'plugin') {
const manifestDir = dirname(result.filePath)
if (basename(manifestDir) === '.claude-plugin') {
contentResults = await validatePluginContents(dirname(manifestDir))
for (const r of contentResults) {
// biome-ignore lint/suspicious/noConsole:: intentional console output
console.log(`Validating ${r.fileType}: ${r.filePath}\n`)
printValidationResult(r)
}
}
}
const allSuccess = result.success && contentResults.every(r => r.success)
const hasWarnings =
result.warnings.length > 0 ||
contentResults.some(r => r.warnings.length > 0)
if (allSuccess) {
cliOk(
hasWarnings
? `${figures.tick} Validation passed with warnings`
: `${figures.tick} Validation passed`,
)
} else {
// biome-ignore lint/suspicious/noConsole:: intentional console output
console.log(`${figures.cross} Validation failed`)
process.exit(1)
}
} catch (error) {
logError(error)
// biome-ignore lint/suspicious/noConsole:: intentional console output
console.error(
`${figures.cross} Unexpected error during validation: ${errorMessage(error)}`,
)
process.exit(2)
}
}
// plugin list (lines 52175416)
export async function pluginListHandler(options: {
json?: boolean
available?: boolean
cowork?: boolean
}): Promise<void> {
if (options.cowork) setUseCoworkPlugins(true)
logEvent('tengu_plugin_list_command', {})
const installedData = loadInstalledPluginsV2()
const { getPluginEditableScopes } = await import(
'../../utils/plugins/pluginStartupCheck.js'
)
const enabledPlugins = getPluginEditableScopes()
const pluginIds = Object.keys(installedData.plugins)
// Load all plugins once. The JSON and human paths both need:
// - loadErrors (to show load failures per plugin)
// - inline plugins (session-only via --plugin-dir, source='name@inline')
// which are NOT in installedData.plugins (V2 bookkeeping) — they must
// be surfaced separately or `plugin list` silently ignores --plugin-dir.
const {
enabled: loadedEnabled,
disabled: loadedDisabled,
errors: loadErrors,
} = await loadAllPlugins()
const allLoadedPlugins = [...loadedEnabled, ...loadedDisabled]
const inlinePlugins = allLoadedPlugins.filter(p =>
p.source.endsWith('@inline'),
)
// Path-level inline failures (dir doesn't exist, parse error before
// manifest is read) use source='inline[N]'. Plugin-level errors after
// manifest read use source='name@inline'. Collect both for the session
// section — these are otherwise invisible since they have no pluginId.
const inlineLoadErrors = loadErrors.filter(
e => e.source.endsWith('@inline') || e.source.startsWith('inline['),
)
if (options.json) {
// Create a map of plugin source to loaded plugin for quick lookup
const loadedPluginMap = new Map(allLoadedPlugins.map(p => [p.source, p]))
const plugins: Array<{
id: string
version: string
scope: string
enabled: boolean
installPath: string
installedAt?: string
lastUpdated?: string
projectPath?: string
mcpServers?: Record<string, unknown>
errors?: string[]
}> = []
for (const pluginId of pluginIds.sort()) {
const installations = installedData.plugins[pluginId]
if (!installations || installations.length === 0) continue
// Find loading errors for this plugin
const pluginName = parsePluginIdentifier(pluginId).name
const pluginErrors = loadErrors
.filter(
e =>
e.source === pluginId || ('plugin' in e && e.plugin === pluginName),
)
.map(getPluginErrorMessage)
for (const installation of installations) {
// Try to find the loaded plugin to get MCP servers
const loadedPlugin = loadedPluginMap.get(pluginId)
let mcpServers: Record<string, unknown> | undefined
if (loadedPlugin) {
// Load MCP servers if not already cached
const servers =
loadedPlugin.mcpServers ||
(await loadPluginMcpServers(loadedPlugin))
if (servers && Object.keys(servers).length > 0) {
mcpServers = servers
}
}
plugins.push({
id: pluginId,
version: installation.version || 'unknown',
scope: installation.scope,
enabled: enabledPlugins.has(pluginId),
installPath: installation.installPath,
installedAt: installation.installedAt,
lastUpdated: installation.lastUpdated,
projectPath: installation.projectPath,
mcpServers,
errors: pluginErrors.length > 0 ? pluginErrors : undefined,
})
}
}
// Session-only plugins: scope='session', no install metadata.
// Filter from inlineLoadErrors (not loadErrors) so an installed plugin
// with the same manifest name doesn't cross-contaminate via e.plugin.
// The e.plugin fallback catches the dirName≠manifestName case:
// createPluginFromPath tags errors with `${dirName}@inline` but
// plugin.source is reassigned to `${manifest.name}@inline` afterward
// (pluginLoader.ts loadInlinePlugins), so e.source !== p.source when
// a dev checkout dir like ~/code/my-fork/ has manifest name 'cool-plugin'.
for (const p of inlinePlugins) {
const servers = p.mcpServers || (await loadPluginMcpServers(p))
const pErrors = inlineLoadErrors
.filter(
e => e.source === p.source || ('plugin' in e && e.plugin === p.name),
)
.map(getPluginErrorMessage)
plugins.push({
id: p.source,
version: p.manifest.version ?? 'unknown',
scope: 'session',
enabled: p.enabled !== false,
installPath: p.path,
mcpServers:
servers && Object.keys(servers).length > 0 ? servers : undefined,
errors: pErrors.length > 0 ? pErrors : undefined,
})
}
// Path-level inline failures (--plugin-dir /nonexistent): no LoadedPlugin
// exists so the loop above can't surface them. Mirror the human-path
// handling so JSON consumers see the failure instead of silent omission.
for (const e of inlineLoadErrors.filter(e =>
e.source.startsWith('inline['),
)) {
plugins.push({
id: e.source,
version: 'unknown',
scope: 'session',
enabled: false,
installPath: 'path' in e ? e.path : '',
errors: [getPluginErrorMessage(e)],
})
}
// If --available is set, also load available plugins from marketplaces
if (options.available) {
const available: Array<{
pluginId: string
name: string
description?: string
marketplaceName: string
version?: string
source: PluginSource
installCount?: number
}> = []
try {
const [config, installCounts] = await Promise.all([
loadKnownMarketplacesConfig(),
getInstallCounts(),
])
const { marketplaces } =
await loadMarketplacesWithGracefulDegradation(config)
for (const {
name: marketplaceName,
data: marketplace,
} of marketplaces) {
if (marketplace) {
for (const entry of marketplace.plugins) {
const pluginId = createPluginId(entry.name, marketplaceName)
// Only include plugins that are not already installed
if (!isPluginInstalled(pluginId)) {
available.push({
pluginId,
name: entry.name,
description: entry.description,
marketplaceName,
version: entry.version,
source: entry.source,
installCount: installCounts?.get(pluginId),
})
}
}
}
}
} catch {
// Silently ignore marketplace loading errors
}
cliOk(jsonStringify({ installed: plugins, available }, null, 2))
} else {
cliOk(jsonStringify(plugins, null, 2))
}
}
if (pluginIds.length === 0 && inlinePlugins.length === 0) {
// inlineLoadErrors can exist with zero inline plugins (e.g. --plugin-dir
// points at a nonexistent path). Don't early-exit over them — fall
// through to the session section so the failure is visible.
if (inlineLoadErrors.length === 0) {
cliOk(
'No plugins installed. Use `claude plugin install` to install a plugin.',
)
}
}
if (pluginIds.length > 0) {
// biome-ignore lint/suspicious/noConsole:: intentional console output
console.log('Installed plugins:\n')
}
for (const pluginId of pluginIds.sort()) {
const installations = installedData.plugins[pluginId]
if (!installations || installations.length === 0) continue
// Find loading errors for this plugin
const pluginName = parsePluginIdentifier(pluginId).name
const pluginErrors = loadErrors.filter(
e => e.source === pluginId || ('plugin' in e && e.plugin === pluginName),
)
for (const installation of installations) {
const isEnabled = enabledPlugins.has(pluginId)
const status =
pluginErrors.length > 0
? `${figures.cross} failed to load`
: isEnabled
? `${figures.tick} enabled`
: `${figures.cross} disabled`
const version = installation.version || 'unknown'
const scope = installation.scope
// biome-ignore lint/suspicious/noConsole:: intentional console output
console.log(` ${figures.pointer} ${pluginId}`)
// biome-ignore lint/suspicious/noConsole:: intentional console output
console.log(` Version: ${version}`)
// biome-ignore lint/suspicious/noConsole:: intentional console output
console.log(` Scope: ${scope}`)
// biome-ignore lint/suspicious/noConsole:: intentional console output
console.log(` Status: ${status}`)
for (const error of pluginErrors) {
// biome-ignore lint/suspicious/noConsole:: intentional console output
console.log(` Error: ${getPluginErrorMessage(error)}`)
}
// biome-ignore lint/suspicious/noConsole:: intentional console output
console.log('')
}
}
if (inlinePlugins.length > 0 || inlineLoadErrors.length > 0) {
// biome-ignore lint/suspicious/noConsole:: intentional console output
console.log('Session-only plugins (--plugin-dir):\n')
for (const p of inlinePlugins) {
// Same dirName≠manifestName fallback as the JSON path above — error
// sources use the dir basename but p.source uses the manifest name.
const pErrors = inlineLoadErrors.filter(
e => e.source === p.source || ('plugin' in e && e.plugin === p.name),
)
const status =
pErrors.length > 0
? `${figures.cross} loaded with errors`
: `${figures.tick} loaded`
// biome-ignore lint/suspicious/noConsole:: intentional console output
console.log(` ${figures.pointer} ${p.source}`)
// biome-ignore lint/suspicious/noConsole:: intentional console output
console.log(` Version: ${p.manifest.version ?? 'unknown'}`)
// biome-ignore lint/suspicious/noConsole:: intentional console output
console.log(` Path: ${p.path}`)
// biome-ignore lint/suspicious/noConsole:: intentional console output
console.log(` Status: ${status}`)
for (const e of pErrors) {
// biome-ignore lint/suspicious/noConsole:: intentional console output
console.log(` Error: ${getPluginErrorMessage(e)}`)
}
// biome-ignore lint/suspicious/noConsole:: intentional console output
console.log('')
}
// Path-level failures: no LoadedPlugin object exists. Show them so
// `--plugin-dir /typo` doesn't just silently produce nothing.
for (const e of inlineLoadErrors.filter(e =>
e.source.startsWith('inline['),
)) {
// biome-ignore lint/suspicious/noConsole:: intentional console output
console.log(
` ${figures.pointer} ${e.source}: ${figures.cross} ${getPluginErrorMessage(e)}\n`,
)
}
}
cliOk()
}
// marketplace add (lines 54335487)
export async function marketplaceAddHandler(
source: string,
options: { cowork?: boolean; sparse?: string[]; scope?: string },
): Promise<void> {
if (options.cowork) setUseCoworkPlugins(true)
try {
const parsed = await parseMarketplaceInput(source)
if (!parsed) {
cliError(
`${figures.cross} Invalid marketplace source format. Try: owner/repo, https://..., or ./path`,
)
}
if ('error' in parsed) {
cliError(`${figures.cross} ${parsed.error}`)
}
// Validate scope
const scope = options.scope ?? 'user'
if (scope !== 'user' && scope !== 'project' && scope !== 'local') {
cliError(
`${figures.cross} Invalid scope '${scope}'. Use: user, project, or local`,
)
}
const settingSource = scopeToSettingSource(scope)
let marketplaceSource = parsed
if (options.sparse && options.sparse.length > 0) {
if (
marketplaceSource.source === 'github' ||
marketplaceSource.source === 'git'
) {
marketplaceSource = {
...marketplaceSource,
sparsePaths: options.sparse,
}
} else {
cliError(
`${figures.cross} --sparse is only supported for github and git marketplace sources (got: ${marketplaceSource.source})`,
)
}
}
// biome-ignore lint/suspicious/noConsole:: intentional console output
console.log('Adding marketplace...')
const { name, alreadyMaterialized, resolvedSource } =
await addMarketplaceSource(marketplaceSource, message => {
// biome-ignore lint/suspicious/noConsole:: intentional console output
console.log(message)
})
// Write intent to settings at the requested scope
saveMarketplaceToSettings(name, { source: resolvedSource }, settingSource)
clearAllCaches()
let sourceType = marketplaceSource.source
if (marketplaceSource.source === 'github') {
sourceType =
marketplaceSource.repo as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS
}
logEvent('tengu_marketplace_added', {
source_type:
sourceType as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
})
cliOk(
alreadyMaterialized
? `${figures.tick} Marketplace '${name}' already on disk — declared in ${scope} settings`
: `${figures.tick} Successfully added marketplace: ${name} (declared in ${scope} settings)`,
)
} catch (error) {
handleMarketplaceError(error, 'add marketplace')
}
}
// marketplace list (lines 54975565)
export async function marketplaceListHandler(options: {
json?: boolean
cowork?: boolean
}): Promise<void> {
if (options.cowork) setUseCoworkPlugins(true)
try {
const config = await loadKnownMarketplacesConfig()
const names = Object.keys(config)
if (options.json) {
const marketplaces = names.sort().map(name => {
const marketplace = config[name]
const source = marketplace?.source
return {
name,
source: source?.source,
...(source?.source === 'github' && { repo: source.repo }),
...(source?.source === 'git' && { url: source.url }),
...(source?.source === 'url' && { url: source.url }),
...(source?.source === 'directory' && { path: source.path }),
...(source?.source === 'file' && { path: source.path }),
installLocation: marketplace?.installLocation,
}
})
cliOk(jsonStringify(marketplaces, null, 2))
}
if (names.length === 0) {
cliOk('No marketplaces configured')
}
// biome-ignore lint/suspicious/noConsole:: intentional console output
console.log('Configured marketplaces:\n')
names.forEach(name => {
const marketplace = config[name]
// biome-ignore lint/suspicious/noConsole:: intentional console output
console.log(` ${figures.pointer} ${name}`)
if (marketplace?.source) {
const src = marketplace.source
if (src.source === 'github') {
// biome-ignore lint/suspicious/noConsole:: intentional console output
console.log(` Source: GitHub (${src.repo})`)
} else if (src.source === 'git') {
// biome-ignore lint/suspicious/noConsole:: intentional console output
console.log(` Source: Git (${src.url})`)
} else if (src.source === 'url') {
// biome-ignore lint/suspicious/noConsole:: intentional console output
console.log(` Source: URL (${src.url})`)
} else if (src.source === 'directory') {
// biome-ignore lint/suspicious/noConsole:: intentional console output
console.log(` Source: Directory (${src.path})`)
} else if (src.source === 'file') {
// biome-ignore lint/suspicious/noConsole:: intentional console output
console.log(` Source: File (${src.path})`)
}
}
// biome-ignore lint/suspicious/noConsole:: intentional console output
console.log('')
})
cliOk()
} catch (error) {
handleMarketplaceError(error, 'list marketplaces')
}
}
// marketplace remove (lines 55765598)
export async function marketplaceRemoveHandler(
name: string,
options: { cowork?: boolean },
): Promise<void> {
if (options.cowork) setUseCoworkPlugins(true)
try {
await removeMarketplaceSource(name)
clearAllCaches()
logEvent('tengu_marketplace_removed', {
marketplace_name:
name as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
})
cliOk(`${figures.tick} Successfully removed marketplace: ${name}`)
} catch (error) {
handleMarketplaceError(error, 'remove marketplace')
}
}
// marketplace update (lines 56095672)
export async function marketplaceUpdateHandler(
name: string | undefined,
options: { cowork?: boolean },
): Promise<void> {
if (options.cowork) setUseCoworkPlugins(true)
try {
if (name) {
// biome-ignore lint/suspicious/noConsole:: intentional console output
console.log(`Updating marketplace: ${name}...`)
await refreshMarketplace(name, message => {
// biome-ignore lint/suspicious/noConsole:: intentional console output
console.log(message)
})
clearAllCaches()
logEvent('tengu_marketplace_updated', {
marketplace_name:
name as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
})
cliOk(`${figures.tick} Successfully updated marketplace: ${name}`)
} else {
const config = await loadKnownMarketplacesConfig()
const marketplaceNames = Object.keys(config)
if (marketplaceNames.length === 0) {
cliOk('No marketplaces configured')
}
// biome-ignore lint/suspicious/noConsole:: intentional console output
console.log(`Updating ${marketplaceNames.length} marketplace(s)...`)
await refreshAllMarketplaces()
clearAllCaches()
logEvent('tengu_marketplace_updated_all', {
count:
marketplaceNames.length as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
})
cliOk(
`${figures.tick} Successfully updated ${marketplaceNames.length} marketplace(s)`,
)
}
} catch (error) {
handleMarketplaceError(error, 'update marketplace(s)')
}
}
// plugin install (lines 56905721)
export async function pluginInstallHandler(
plugin: string,
options: { scope?: string; cowork?: boolean },
): Promise<void> {
if (options.cowork) setUseCoworkPlugins(true)
const scope = options.scope || 'user'
if (options.cowork && scope !== 'user') {
cliError('--cowork can only be used with user scope')
}
if (
!VALID_INSTALLABLE_SCOPES.includes(
scope as (typeof VALID_INSTALLABLE_SCOPES)[number],
)
) {
cliError(
`Invalid scope: ${scope}. Must be one of: ${VALID_INSTALLABLE_SCOPES.join(', ')}.`,
)
}
// _PROTO_* routes to PII-tagged plugin_name/marketplace_name BQ columns.
// Unredacted plugin arg was previously logged to general-access
// additional_metadata for all users — dropped in favor of the privileged
// column route. marketplace may be undefined (fires before resolution).
const { name, marketplace } = parsePluginIdentifier(plugin)
logEvent('tengu_plugin_install_command', {
_PROTO_plugin_name: name as AnalyticsMetadata_I_VERIFIED_THIS_IS_PII_TAGGED,
...(marketplace && {
_PROTO_marketplace_name:
marketplace as AnalyticsMetadata_I_VERIFIED_THIS_IS_PII_TAGGED,
}),
scope: scope as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
})
await installPlugin(plugin, scope as 'user' | 'project' | 'local')
}
// plugin uninstall (lines 57385769)
export async function pluginUninstallHandler(
plugin: string,
options: { scope?: string; cowork?: boolean; keepData?: boolean },
): Promise<void> {
if (options.cowork) setUseCoworkPlugins(true)
const scope = options.scope || 'user'
if (options.cowork && scope !== 'user') {
cliError('--cowork can only be used with user scope')
}
if (
!VALID_INSTALLABLE_SCOPES.includes(
scope as (typeof VALID_INSTALLABLE_SCOPES)[number],
)
) {
cliError(
`Invalid scope: ${scope}. Must be one of: ${VALID_INSTALLABLE_SCOPES.join(', ')}.`,
)
}
const { name, marketplace } = parsePluginIdentifier(plugin)
logEvent('tengu_plugin_uninstall_command', {
_PROTO_plugin_name: name as AnalyticsMetadata_I_VERIFIED_THIS_IS_PII_TAGGED,
...(marketplace && {
_PROTO_marketplace_name:
marketplace as AnalyticsMetadata_I_VERIFIED_THIS_IS_PII_TAGGED,
}),
scope: scope as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
})
await uninstallPlugin(
plugin,
scope as 'user' | 'project' | 'local',
options.keepData,
)
}
// plugin enable (lines 57835818)
export async function pluginEnableHandler(
plugin: string,
options: { scope?: string; cowork?: boolean },
): Promise<void> {
if (options.cowork) setUseCoworkPlugins(true)
let scope: (typeof VALID_INSTALLABLE_SCOPES)[number] | undefined
if (options.scope) {
if (
!VALID_INSTALLABLE_SCOPES.includes(
options.scope as (typeof VALID_INSTALLABLE_SCOPES)[number],
)
) {
cliError(
`Invalid scope "${options.scope}". Valid scopes: ${VALID_INSTALLABLE_SCOPES.join(', ')}`,
)
}
scope = options.scope as (typeof VALID_INSTALLABLE_SCOPES)[number]
}
if (options.cowork && scope !== undefined && scope !== 'user') {
cliError('--cowork can only be used with user scope')
}
// --cowork always operates at user scope
if (options.cowork && scope === undefined) {
scope = 'user'
}
const { name, marketplace } = parsePluginIdentifier(plugin)
logEvent('tengu_plugin_enable_command', {
_PROTO_plugin_name: name as AnalyticsMetadata_I_VERIFIED_THIS_IS_PII_TAGGED,
...(marketplace && {
_PROTO_marketplace_name:
marketplace as AnalyticsMetadata_I_VERIFIED_THIS_IS_PII_TAGGED,
}),
scope: (scope ??
'auto') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
})
await enablePlugin(plugin, scope)
}
// plugin disable (lines 58335902)
export async function pluginDisableHandler(
plugin: string | undefined,
options: { scope?: string; cowork?: boolean; all?: boolean },
): Promise<void> {
if (options.all && plugin) {
cliError('Cannot use --all with a specific plugin')
}
if (!options.all && !plugin) {
cliError('Please specify a plugin name or use --all to disable all plugins')
}
if (options.cowork) setUseCoworkPlugins(true)
if (options.all) {
if (options.scope) {
cliError('Cannot use --scope with --all')
}
// No _PROTO_plugin_name here — --all disables all plugins.
// Distinguishable from the specific-plugin branch by plugin_name IS NULL.
logEvent('tengu_plugin_disable_command', {})
await disableAllPlugins()
return
}
let scope: (typeof VALID_INSTALLABLE_SCOPES)[number] | undefined
if (options.scope) {
if (
!VALID_INSTALLABLE_SCOPES.includes(
options.scope as (typeof VALID_INSTALLABLE_SCOPES)[number],
)
) {
cliError(
`Invalid scope "${options.scope}". Valid scopes: ${VALID_INSTALLABLE_SCOPES.join(', ')}`,
)
}
scope = options.scope as (typeof VALID_INSTALLABLE_SCOPES)[number]
}
if (options.cowork && scope !== undefined && scope !== 'user') {
cliError('--cowork can only be used with user scope')
}
// --cowork always operates at user scope
if (options.cowork && scope === undefined) {
scope = 'user'
}
const { name, marketplace } = parsePluginIdentifier(plugin!)
logEvent('tengu_plugin_disable_command', {
_PROTO_plugin_name: name as AnalyticsMetadata_I_VERIFIED_THIS_IS_PII_TAGGED,
...(marketplace && {
_PROTO_marketplace_name:
marketplace as AnalyticsMetadata_I_VERIFIED_THIS_IS_PII_TAGGED,
}),
scope: (scope ??
'auto') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
})
await disablePlugin(plugin!, scope)
}
// plugin update (lines 59185948)
export async function pluginUpdateHandler(
plugin: string,
options: { scope?: string; cowork?: boolean },
): Promise<void> {
if (options.cowork) setUseCoworkPlugins(true)
const { name, marketplace } = parsePluginIdentifier(plugin)
logEvent('tengu_plugin_update_command', {
_PROTO_plugin_name: name as AnalyticsMetadata_I_VERIFIED_THIS_IS_PII_TAGGED,
...(marketplace && {
_PROTO_marketplace_name:
marketplace as AnalyticsMetadata_I_VERIFIED_THIS_IS_PII_TAGGED,
}),
})
let scope: (typeof VALID_UPDATE_SCOPES)[number] = 'user'
if (options.scope) {
if (
!VALID_UPDATE_SCOPES.includes(
options.scope as (typeof VALID_UPDATE_SCOPES)[number],
)
) {
cliError(
`Invalid scope "${options.scope}". Valid scopes: ${VALID_UPDATE_SCOPES.join(', ')}`,
)
}
scope = options.scope as (typeof VALID_UPDATE_SCOPES)[number]
}
if (options.cowork && scope !== 'user') {
cliError('--cowork can only be used with user scope')
}
await updatePluginCli(plugin, scope)
}
File diff suppressed because one or more lines are too long
+32
View File
@@ -0,0 +1,32 @@
import { jsonStringify } from '../utils/slowOperations.js'
// JSON.stringify emits U+2028/U+2029 raw (valid per ECMA-404). When the
// output is a single NDJSON line, any receiver that uses JavaScript
// line-terminator semantics (ECMA-262 §11.3 — \n \r U+2028 U+2029) to
// split the stream will cut the JSON mid-string. ProcessTransport now
// silently skips non-JSON lines rather than crashing (gh-28405), but
// the truncated fragment is still lost — the message is silently dropped.
//
// The \uXXXX form is equivalent JSON (parses to the same string) but
// can never be mistaken for a line terminator by ANY receiver. This is
// what ES2019's "Subsume JSON" proposal and Node's util.inspect do.
//
// Single regex with alternation: the callback's one dispatch per match
// is cheaper than two full-string scans.
const JS_LINE_TERMINATORS = /\u2028|\u2029/g
function escapeJsLineTerminators(json: string): string {
return json.replace(JS_LINE_TERMINATORS, c =>
c === '\u2028' ? '\\u2028' : '\\u2029',
)
}
/**
* JSON.stringify for one-message-per-line transports. Escapes U+2028
* LINE SEPARATOR and U+2029 PARAGRAPH SEPARATOR so the serialized output
* cannot be broken by a line-splitting receiver. Output is still valid
* JSON and parses to the same value.
*/
export function ndjsonSafeStringify(value: unknown): string {
return escapeJsLineTerminators(jsonStringify(value))
}
+5594
View File
File diff suppressed because it is too large Load Diff
+255
View File
@@ -0,0 +1,255 @@
import type { StdoutMessage } from 'src/entrypoints/sdk/controlTypes.js'
import { PassThrough } from 'stream'
import { URL } from 'url'
import { getSessionId } from '../bootstrap/state.js'
import { getPollIntervalConfig } from '../bridge/pollConfig.js'
import { registerCleanup } from '../utils/cleanupRegistry.js'
import { setCommandLifecycleListener } from '../utils/commandLifecycle.js'
import { isDebugMode, logForDebugging } from '../utils/debug.js'
import { logForDiagnosticsNoPII } from '../utils/diagLogs.js'
import { isEnvTruthy } from '../utils/envUtils.js'
import { errorMessage } from '../utils/errors.js'
import { gracefulShutdown } from '../utils/gracefulShutdown.js'
import { logError } from '../utils/log.js'
import { writeToStdout } from '../utils/process.js'
import { getSessionIngressAuthToken } from '../utils/sessionIngressAuth.js'
import {
setSessionMetadataChangedListener,
setSessionStateChangedListener,
} from '../utils/sessionState.js'
import {
setInternalEventReader,
setInternalEventWriter,
} from '../utils/sessionStorage.js'
import { ndjsonSafeStringify } from './ndjsonSafeStringify.js'
import { StructuredIO } from './structuredIO.js'
import { CCRClient, CCRInitError } from './transports/ccrClient.js'
import { SSETransport } from './transports/SSETransport.js'
import type { Transport } from './transports/Transport.js'
import { getTransportForUrl } from './transports/transportUtils.js'
/**
* Bidirectional streaming for SDK mode with session tracking
* Supports WebSocket transport
*/
export class RemoteIO extends StructuredIO {
private url: URL
private transport: Transport
private inputStream: PassThrough
private readonly isBridge: boolean = false
private readonly isDebug: boolean = false
private ccrClient: CCRClient | null = null
private keepAliveTimer: ReturnType<typeof setInterval> | null = null
constructor(
streamUrl: string,
initialPrompt?: AsyncIterable<string>,
replayUserMessages?: boolean,
) {
const inputStream = new PassThrough({ encoding: 'utf8' })
super(inputStream, replayUserMessages)
this.inputStream = inputStream
this.url = new URL(streamUrl)
// Prepare headers with session token if available
const headers: Record<string, string> = {}
const sessionToken = getSessionIngressAuthToken()
if (sessionToken) {
headers['Authorization'] = `Bearer ${sessionToken}`
} else {
logForDebugging('[remote-io] No session ingress token available', {
level: 'error',
})
}
// Add environment runner version if available (set by Environment Manager)
const erVersion = process.env.CLAUDE_CODE_ENVIRONMENT_RUNNER_VERSION
if (erVersion) {
headers['x-environment-runner-version'] = erVersion
}
// Provide a callback that re-reads the session token dynamically.
// When the parent process refreshes the token (via token file or env var),
// the transport can pick it up on reconnection.
const refreshHeaders = (): Record<string, string> => {
const h: Record<string, string> = {}
const freshToken = getSessionIngressAuthToken()
if (freshToken) {
h['Authorization'] = `Bearer ${freshToken}`
}
const freshErVersion = process.env.CLAUDE_CODE_ENVIRONMENT_RUNNER_VERSION
if (freshErVersion) {
h['x-environment-runner-version'] = freshErVersion
}
return h
}
// Get appropriate transport based on URL protocol
this.transport = getTransportForUrl(
this.url,
headers,
getSessionId(),
refreshHeaders,
)
// Set up data callback
this.isBridge = process.env.CLAUDE_CODE_ENVIRONMENT_KIND === 'bridge'
this.isDebug = isDebugMode()
this.transport.setOnData((data: string) => {
this.inputStream.write(data)
if (this.isBridge && this.isDebug) {
writeToStdout(data.endsWith('\n') ? data : data + '\n')
}
})
// Set up close callback to handle connection failures
this.transport.setOnClose(() => {
// End the input stream to trigger graceful shutdown
this.inputStream.end()
})
// Initialize CCR v2 client (heartbeats, epoch, state reporting, event writes).
// The CCRClient constructor wires the SSE received-ack handler
// synchronously, so new CCRClient() MUST run before transport.connect() —
// otherwise early SSE frames hit an unwired onEventCallback and their
// 'received' delivery acks are silently dropped.
if (isEnvTruthy(process.env.CLAUDE_CODE_USE_CCR_V2)) {
// CCR v2 is SSE+POST by definition. getTransportForUrl returns
// SSETransport under the same env var, but the two checks live in
// different files — assert the invariant so a future decoupling
// fails loudly here instead of confusingly inside CCRClient.
if (!(this.transport instanceof SSETransport)) {
throw new Error(
'CCR v2 requires SSETransport; check getTransportForUrl',
)
}
this.ccrClient = new CCRClient(this.transport, this.url)
const init = this.ccrClient.initialize()
this.restoredWorkerState = init.catch(() => null)
init.catch((error: unknown) => {
logForDiagnosticsNoPII('error', 'cli_worker_lifecycle_init_failed', {
reason: error instanceof CCRInitError ? error.reason : 'unknown',
})
logError(
new Error(`CCRClient initialization failed: ${errorMessage(error)}`),
)
void gracefulShutdown(1, 'other')
})
registerCleanup(async () => this.ccrClient?.close())
// Register internal event writer for transcript persistence.
// When set, sessionStorage writes transcript messages as CCR v2
// internal events instead of v1 Session Ingress.
setInternalEventWriter((eventType, payload, options) =>
this.ccrClient!.writeInternalEvent(eventType, payload, options),
)
// Register internal event readers for session resume.
// When set, hydrateFromCCRv2InternalEvents() can fetch foreground
// and subagent internal events to reconstruct conversation state.
setInternalEventReader(
() => this.ccrClient!.readInternalEvents(),
() => this.ccrClient!.readSubagentInternalEvents(),
)
const LIFECYCLE_TO_DELIVERY = {
started: 'processing',
completed: 'processed',
} as const
setCommandLifecycleListener((uuid, state) => {
this.ccrClient?.reportDelivery(uuid, LIFECYCLE_TO_DELIVERY[state])
})
setSessionStateChangedListener((state, details) => {
this.ccrClient?.reportState(state, details)
})
setSessionMetadataChangedListener(metadata => {
this.ccrClient?.reportMetadata(metadata)
})
}
// Start connection only after all callbacks are wired (setOnData above,
// setOnEvent inside new CCRClient() when CCR v2 is enabled).
void this.transport.connect()
// Push a silent keep_alive frame on a fixed interval so upstream
// proxies and the session-ingress layer don't GC an otherwise-idle
// remote control session. The keep_alive type is filtered before
// reaching any client UI (Query.ts drops it; structuredIO.ts drops it;
// web/iOS/Android never see it in their message loop). Interval comes
// from GrowthBook (tengu_bridge_poll_interval_config
// session_keepalive_interval_v2_ms, default 120s); 0 = disabled.
// Bridge-only: fixes Envoy idle timeout on bridge-topology sessions
// (#21931). byoc workers ran without this before #21931 and do not
// need it — different network path.
const keepAliveIntervalMs =
getPollIntervalConfig().session_keepalive_interval_v2_ms
if (this.isBridge && keepAliveIntervalMs > 0) {
this.keepAliveTimer = setInterval(() => {
logForDebugging('[remote-io] keep_alive sent')
void this.write({ type: 'keep_alive' }).catch(err => {
logForDebugging(
`[remote-io] keep_alive write failed: ${errorMessage(err)}`,
)
})
}, keepAliveIntervalMs)
this.keepAliveTimer.unref?.()
}
// Register for graceful shutdown cleanup
registerCleanup(async () => this.close())
// If initial prompt is provided, send it through the input stream
if (initialPrompt) {
// Convert the initial prompt to the input stream format.
// Chunks from stdin may already contain trailing newlines, so strip
// them before appending our own to avoid double-newline issues that
// cause structuredIO to parse empty lines. String() handles both
// string chunks and Buffer objects from process.stdin.
const stream = this.inputStream
void (async () => {
for await (const chunk of initialPrompt) {
stream.write(String(chunk).replace(/\n$/, '') + '\n')
}
})()
}
}
override flushInternalEvents(): Promise<void> {
return this.ccrClient?.flushInternalEvents() ?? Promise.resolve()
}
override get internalEventsPending(): number {
return this.ccrClient?.internalEventsPending ?? 0
}
/**
* Send output to the transport.
* In bridge mode, control_request messages are always echoed to stdout so the
* bridge parent can detect permission requests. Other messages are echoed only
* in debug mode.
*/
async write(message: StdoutMessage): Promise<void> {
if (this.ccrClient) {
await this.ccrClient.writeEvent(message)
} else {
await this.transport.write(message)
}
if (this.isBridge) {
if (message.type === 'control_request' || this.isDebug) {
writeToStdout(ndjsonSafeStringify(message) + '\n')
}
}
}
/**
* Clean up connections gracefully
*/
close(): void {
if (this.keepAliveTimer) {
clearInterval(this.keepAliveTimer)
this.keepAliveTimer = null
}
this.transport.close()
this.inputStream.end()
}
}
+859
View File
@@ -0,0 +1,859 @@
import { feature } from 'bun:bundle'
import type {
ElicitResult,
JSONRPCMessage,
} from '@modelcontextprotocol/sdk/types.js'
import { randomUUID } from 'crypto'
import type { AssistantMessage } from 'src//types/message.js'
import type {
HookInput,
HookJSONOutput,
PermissionUpdate,
SDKMessage,
SDKUserMessage,
} from 'src/entrypoints/agentSdkTypes.js'
import { SDKControlElicitationResponseSchema } from 'src/entrypoints/sdk/controlSchemas.js'
import type {
SDKControlRequest,
SDKControlResponse,
StdinMessage,
StdoutMessage,
} from 'src/entrypoints/sdk/controlTypes.js'
import type { CanUseToolFn } from 'src/hooks/useCanUseTool.js'
import type { Tool, ToolUseContext } from 'src/Tool.js'
import { type HookCallback, hookJSONOutputSchema } from 'src/types/hooks.js'
import { logForDebugging } from 'src/utils/debug.js'
import { logForDiagnosticsNoPII } from 'src/utils/diagLogs.js'
import { AbortError } from 'src/utils/errors.js'
import {
type Output as PermissionToolOutput,
permissionPromptToolResultToPermissionDecision,
outputSchema as permissionToolOutputSchema,
} from 'src/utils/permissions/PermissionPromptToolResultSchema.js'
import type {
PermissionDecision,
PermissionDecisionReason,
} from 'src/utils/permissions/PermissionResult.js'
import { hasPermissionsToUseTool } from 'src/utils/permissions/permissions.js'
import { writeToStdout } from 'src/utils/process.js'
import { jsonStringify } from 'src/utils/slowOperations.js'
import { z } from 'zod/v4'
import { notifyCommandLifecycle } from '../utils/commandLifecycle.js'
import { normalizeControlMessageKeys } from '../utils/controlMessageCompat.js'
import { executePermissionRequestHooks } from '../utils/hooks.js'
import {
applyPermissionUpdates,
persistPermissionUpdates,
} from '../utils/permissions/PermissionUpdate.js'
import {
notifySessionStateChanged,
type RequiresActionDetails,
type SessionExternalMetadata,
} from '../utils/sessionState.js'
import { jsonParse } from '../utils/slowOperations.js'
import { Stream } from '../utils/stream.js'
import { ndjsonSafeStringify } from './ndjsonSafeStringify.js'
/**
* Synthetic tool name used when forwarding sandbox network permission
* requests via the can_use_tool control_request protocol. SDK hosts
* see this as a normal tool permission prompt.
*/
export const SANDBOX_NETWORK_ACCESS_TOOL_NAME = 'SandboxNetworkAccess'
function serializeDecisionReason(
reason: PermissionDecisionReason | undefined,
): string | undefined {
if (!reason) {
return undefined
}
if (
(feature('BASH_CLASSIFIER') || feature('TRANSCRIPT_CLASSIFIER')) &&
reason.type === 'classifier'
) {
return reason.reason
}
switch (reason.type) {
case 'rule':
case 'mode':
case 'subcommandResults':
case 'permissionPromptTool':
return undefined
case 'hook':
case 'asyncAgent':
case 'sandboxOverride':
case 'workingDir':
case 'safetyCheck':
case 'other':
return reason.reason
}
}
function buildRequiresActionDetails(
tool: Tool,
input: Record<string, unknown>,
toolUseID: string,
requestId: string,
): RequiresActionDetails {
// Per-tool summary methods may throw on malformed input; permission
// handling must not break because of a bad description.
let description: string
try {
description =
tool.getActivityDescription?.(input) ??
tool.getToolUseSummary?.(input) ??
tool.userFacingName(input)
} catch {
description = tool.name
}
return {
tool_name: tool.name,
action_description: description,
tool_use_id: toolUseID,
request_id: requestId,
input,
}
}
type PendingRequest<T> = {
resolve: (result: T) => void
reject: (error: unknown) => void
schema?: z.Schema
request: SDKControlRequest
}
/**
* Provides a structured way to read and write SDK messages from stdio,
* capturing the SDK protocol.
*/
// Maximum number of resolved tool_use IDs to track. Once exceeded, the oldest
// entry is evicted. This bounds memory in very long sessions while keeping
// enough history to catch duplicate control_response deliveries.
const MAX_RESOLVED_TOOL_USE_IDS = 1000
export class StructuredIO {
readonly structuredInput: AsyncGenerator<StdinMessage | SDKMessage>
private readonly pendingRequests = new Map<string, PendingRequest<unknown>>()
// CCR external_metadata read back on worker start; null when the
// transport doesn't restore. Assigned by RemoteIO.
restoredWorkerState: Promise<SessionExternalMetadata | null> =
Promise.resolve(null)
private inputClosed = false
private unexpectedResponseCallback?: (
response: SDKControlResponse,
) => Promise<void>
// Tracks tool_use IDs that have been resolved through the normal permission
// flow (or aborted by a hook). When a duplicate control_response arrives
// after the original was already handled, this Set prevents the orphan
// handler from re-processing it — which would push duplicate assistant
// messages into mutableMessages and cause a 400 "tool_use ids must be unique"
// error from the API.
private readonly resolvedToolUseIds = new Set<string>()
private prependedLines: string[] = []
private onControlRequestSent?: (request: SDKControlRequest) => void
private onControlRequestResolved?: (requestId: string) => void
// sendRequest() and print.ts both enqueue here; the drain loop is the
// only writer. Prevents control_request from overtaking queued stream_events.
readonly outbound = new Stream<StdoutMessage>()
constructor(
private readonly input: AsyncIterable<string>,
private readonly replayUserMessages?: boolean,
) {
this.input = input
this.structuredInput = this.read()
}
/**
* Records a tool_use ID as resolved so that late/duplicate control_response
* messages for the same tool are ignored by the orphan handler.
*/
private trackResolvedToolUseId(request: SDKControlRequest): void {
if (request.request.subtype === 'can_use_tool') {
this.resolvedToolUseIds.add(request.request.tool_use_id)
if (this.resolvedToolUseIds.size > MAX_RESOLVED_TOOL_USE_IDS) {
// Evict the oldest entry (Sets iterate in insertion order)
const first = this.resolvedToolUseIds.values().next().value
if (first !== undefined) {
this.resolvedToolUseIds.delete(first)
}
}
}
}
/** Flush pending internal events. No-op for non-remote IO. Overridden by RemoteIO. */
flushInternalEvents(): Promise<void> {
return Promise.resolve()
}
/** Internal-event queue depth. Overridden by RemoteIO; zero otherwise. */
get internalEventsPending(): number {
return 0
}
/**
* Queue a user turn to be yielded before the next message from this.input.
* Works before iteration starts and mid-stream — read() re-checks
* prependedLines between each yielded message.
*/
prependUserMessage(content: string): void {
this.prependedLines.push(
jsonStringify({
type: 'user',
session_id: '',
message: { role: 'user', content },
parent_tool_use_id: null,
} satisfies SDKUserMessage) + '\n',
)
}
private async *read() {
let content = ''
// Called once before for-await (an empty this.input otherwise skips the
// loop body entirely), then again per block. prependedLines re-check is
// inside the while so a prepend pushed between two messages in the SAME
// block still lands first.
const splitAndProcess = async function* (this: StructuredIO) {
for (;;) {
if (this.prependedLines.length > 0) {
content = this.prependedLines.join('') + content
this.prependedLines = []
}
const newline = content.indexOf('\n')
if (newline === -1) break
const line = content.slice(0, newline)
content = content.slice(newline + 1)
const message = await this.processLine(line)
if (message) {
logForDiagnosticsNoPII('info', 'cli_stdin_message_parsed', {
type: message.type,
})
yield message
}
}
}.bind(this)
yield* splitAndProcess()
for await (const block of this.input) {
content += block
yield* splitAndProcess()
}
if (content) {
const message = await this.processLine(content)
if (message) {
yield message
}
}
this.inputClosed = true
for (const request of this.pendingRequests.values()) {
// Reject all pending requests if the input stream
request.reject(
new Error('Tool permission stream closed before response received'),
)
}
}
getPendingPermissionRequests() {
return Array.from(this.pendingRequests.values())
.map(entry => entry.request)
.filter(pr => pr.request.subtype === 'can_use_tool')
}
setUnexpectedResponseCallback(
callback: (response: SDKControlResponse) => Promise<void>,
): void {
this.unexpectedResponseCallback = callback
}
/**
* Inject a control_response message to resolve a pending permission request.
* Used by the bridge to feed permission responses from claude.ai into the
* SDK permission flow.
*
* Also sends a control_cancel_request to the SDK consumer so its canUseTool
* callback is aborted via the signal — otherwise the callback hangs.
*/
injectControlResponse(response: SDKControlResponse): void {
const requestId = response.response?.request_id
if (!requestId) return
const request = this.pendingRequests.get(requestId)
if (!request) return
this.trackResolvedToolUseId(request.request)
this.pendingRequests.delete(requestId)
// Cancel the SDK consumer's canUseTool callback — the bridge won.
void this.write({
type: 'control_cancel_request',
request_id: requestId,
})
if (response.response.subtype === 'error') {
request.reject(new Error(response.response.error))
} else {
const result = response.response.response
if (request.schema) {
try {
request.resolve(request.schema.parse(result))
} catch (error) {
request.reject(error)
}
} else {
request.resolve({})
}
}
}
/**
* Register a callback invoked whenever a can_use_tool control_request
* is written to stdout. Used by the bridge to forward permission
* requests to claude.ai.
*/
setOnControlRequestSent(
callback: ((request: SDKControlRequest) => void) | undefined,
): void {
this.onControlRequestSent = callback
}
/**
* Register a callback invoked when a can_use_tool control_response arrives
* from the SDK consumer (via stdin). Used by the bridge to cancel the
* stale permission prompt on claude.ai when the SDK consumer wins the race.
*/
setOnControlRequestResolved(
callback: ((requestId: string) => void) | undefined,
): void {
this.onControlRequestResolved = callback
}
private async processLine(
line: string,
): Promise<StdinMessage | SDKMessage | undefined> {
// Skip empty lines (e.g. from double newlines in piped stdin)
if (!line) {
return undefined
}
try {
const message = normalizeControlMessageKeys(jsonParse(line)) as
| StdinMessage
| SDKMessage
if (message.type === 'keep_alive') {
// Silently ignore keep-alive messages
return undefined
}
if (message.type === 'update_environment_variables') {
// Apply environment variable updates directly to process.env.
// Used by bridge session runner for auth token refresh
// (CLAUDE_CODE_SESSION_ACCESS_TOKEN) which must be readable
// by the REPL process itself, not just child Bash commands.
const keys = Object.keys(message.variables)
for (const [key, value] of Object.entries(message.variables)) {
process.env[key] = value
}
logForDebugging(
`[structuredIO] applied update_environment_variables: ${keys.join(', ')}`,
)
return undefined
}
if (message.type === 'control_response') {
// Close lifecycle for every control_response, including duplicates
// and orphans — orphans don't yield to print.ts's main loop, so this
// is the only path that sees them. uuid is server-injected into the
// payload.
const uuid =
'uuid' in message && typeof message.uuid === 'string'
? message.uuid
: undefined
if (uuid) {
notifyCommandLifecycle(uuid, 'completed')
}
const request = this.pendingRequests.get(message.response.request_id)
if (!request) {
// Check if this tool_use was already resolved through the normal
// permission flow. Duplicate control_response deliveries (e.g. from
// WebSocket reconnects) arrive after the original was handled, and
// re-processing them would push duplicate assistant messages into
// the conversation, causing API 400 errors.
const responsePayload =
message.response.subtype === 'success'
? message.response.response
: undefined
const toolUseID = responsePayload?.toolUseID
if (
typeof toolUseID === 'string' &&
this.resolvedToolUseIds.has(toolUseID)
) {
logForDebugging(
`Ignoring duplicate control_response for already-resolved toolUseID=${toolUseID} request_id=${message.response.request_id}`,
)
return undefined
}
if (this.unexpectedResponseCallback) {
await this.unexpectedResponseCallback(message)
}
return undefined // Ignore responses for requests we don't know about
}
this.trackResolvedToolUseId(request.request)
this.pendingRequests.delete(message.response.request_id)
// Notify the bridge when the SDK consumer resolves a can_use_tool
// request, so it can cancel the stale permission prompt on claude.ai.
if (
request.request.request.subtype === 'can_use_tool' &&
this.onControlRequestResolved
) {
this.onControlRequestResolved(message.response.request_id)
}
if (message.response.subtype === 'error') {
request.reject(new Error(message.response.error))
return undefined
}
const result = message.response.response
if (request.schema) {
try {
request.resolve(request.schema.parse(result))
} catch (error) {
request.reject(error)
}
} else {
request.resolve({})
}
// Propagate control responses when replay is enabled
if (this.replayUserMessages) {
return message
}
return undefined
}
if (
message.type !== 'user' &&
message.type !== 'control_request' &&
message.type !== 'assistant' &&
message.type !== 'system'
) {
logForDebugging(`Ignoring unknown message type: ${message.type}`, {
level: 'warn',
})
return undefined
}
if (message.type === 'control_request') {
if (!message.request) {
exitWithMessage(`Error: Missing request on control_request`)
}
return message
}
if (message.type === 'assistant' || message.type === 'system') {
return message
}
if (message.message.role !== 'user') {
exitWithMessage(
`Error: Expected message role 'user', got '${message.message.role}'`,
)
}
return message
} catch (error) {
// biome-ignore lint/suspicious/noConsole:: intentional console output
console.error(`Error parsing streaming input line: ${line}: ${error}`)
// eslint-disable-next-line custom-rules/no-process-exit
process.exit(1)
}
}
async write(message: StdoutMessage): Promise<void> {
writeToStdout(ndjsonSafeStringify(message) + '\n')
}
private async sendRequest<Response>(
request: SDKControlRequest['request'],
schema: z.Schema,
signal?: AbortSignal,
requestId: string = randomUUID(),
): Promise<Response> {
const message: SDKControlRequest = {
type: 'control_request',
request_id: requestId,
request,
}
if (this.inputClosed) {
throw new Error('Stream closed')
}
if (signal?.aborted) {
throw new Error('Request aborted')
}
this.outbound.enqueue(message)
if (request.subtype === 'can_use_tool' && this.onControlRequestSent) {
this.onControlRequestSent(message)
}
const aborted = () => {
this.outbound.enqueue({
type: 'control_cancel_request',
request_id: requestId,
})
// Immediately reject the outstanding promise, without
// waiting for the host to acknowledge the cancellation.
const request = this.pendingRequests.get(requestId)
if (request) {
// Track the tool_use ID as resolved before rejecting, so that a
// late response from the host is ignored by the orphan handler.
this.trackResolvedToolUseId(request.request)
request.reject(new AbortError())
}
}
if (signal) {
signal.addEventListener('abort', aborted, {
once: true,
})
}
try {
return await new Promise<Response>((resolve, reject) => {
this.pendingRequests.set(requestId, {
request: {
type: 'control_request',
request_id: requestId,
request,
},
resolve: result => {
resolve(result as Response)
},
reject,
schema,
})
})
} finally {
if (signal) {
signal.removeEventListener('abort', aborted)
}
this.pendingRequests.delete(requestId)
}
}
createCanUseTool(
onPermissionPrompt?: (details: RequiresActionDetails) => void,
): CanUseToolFn {
return async (
tool: Tool,
input: { [key: string]: unknown },
toolUseContext: ToolUseContext,
assistantMessage: AssistantMessage,
toolUseID: string,
forceDecision?: PermissionDecision,
): Promise<PermissionDecision> => {
const mainPermissionResult =
forceDecision ??
(await hasPermissionsToUseTool(
tool,
input,
toolUseContext,
assistantMessage,
toolUseID,
))
// If the tool is allowed or denied, return the result
if (
mainPermissionResult.behavior === 'allow' ||
mainPermissionResult.behavior === 'deny'
) {
return mainPermissionResult
}
// Run PermissionRequest hooks in parallel with the SDK permission
// prompt. In the terminal CLI, hooks race against the interactive
// prompt so that e.g. a hook with --delay 20 doesn't block the UI.
// We need the same behavior here: the SDK host (VS Code, etc.) shows
// its permission dialog immediately while hooks run in the background.
// Whichever resolves first wins; the loser is cancelled/ignored.
// AbortController used to cancel the SDK request if a hook decides first
const hookAbortController = new AbortController()
const parentSignal = toolUseContext.abortController.signal
// Forward parent abort to our local controller
const onParentAbort = () => hookAbortController.abort()
parentSignal.addEventListener('abort', onParentAbort, { once: true })
try {
// Start the hook evaluation (runs in background)
const hookPromise = executePermissionRequestHooksForSDK(
tool.name,
toolUseID,
input,
toolUseContext,
mainPermissionResult.suggestions,
).then(decision => ({ source: 'hook' as const, decision }))
// Start the SDK permission prompt immediately (don't wait for hooks)
const requestId = randomUUID()
onPermissionPrompt?.(
buildRequiresActionDetails(tool, input, toolUseID, requestId),
)
const sdkPromise = this.sendRequest<PermissionToolOutput>(
{
subtype: 'can_use_tool',
tool_name: tool.name,
input,
permission_suggestions: mainPermissionResult.suggestions,
blocked_path: mainPermissionResult.blockedPath,
decision_reason: serializeDecisionReason(
mainPermissionResult.decisionReason,
),
tool_use_id: toolUseID,
agent_id: toolUseContext.agentId,
},
permissionToolOutputSchema(),
hookAbortController.signal,
requestId,
).then(result => ({ source: 'sdk' as const, result }))
// Race: hook completion vs SDK prompt response.
// The hook promise always resolves (never rejects), returning
// undefined if no hook made a decision.
const winner = await Promise.race([hookPromise, sdkPromise])
if (winner.source === 'hook') {
if (winner.decision) {
// Hook decided — abort the pending SDK request.
// Suppress the expected AbortError rejection from sdkPromise.
sdkPromise.catch(() => {})
hookAbortController.abort()
return winner.decision
}
// Hook passed through (no decision) — wait for the SDK prompt
const sdkResult = await sdkPromise
return permissionPromptToolResultToPermissionDecision(
sdkResult.result,
tool,
input,
toolUseContext,
)
}
// SDK prompt responded first — use its result (hook still running
// in background but its result will be ignored)
return permissionPromptToolResultToPermissionDecision(
winner.result,
tool,
input,
toolUseContext,
)
} catch (error) {
return permissionPromptToolResultToPermissionDecision(
{
behavior: 'deny',
message: `Tool permission request failed: ${error}`,
toolUseID,
},
tool,
input,
toolUseContext,
)
} finally {
// Only transition back to 'running' if no other permission prompts
// are pending (concurrent tool execution can have multiple in-flight).
if (this.getPendingPermissionRequests().length === 0) {
notifySessionStateChanged('running')
}
parentSignal.removeEventListener('abort', onParentAbort)
}
}
}
createHookCallback(callbackId: string, timeout?: number): HookCallback {
return {
type: 'callback',
timeout,
callback: async (
input: HookInput,
toolUseID: string | null,
abort: AbortSignal | undefined,
): Promise<HookJSONOutput> => {
try {
const result = await this.sendRequest<HookJSONOutput>(
{
subtype: 'hook_callback',
callback_id: callbackId,
input,
tool_use_id: toolUseID || undefined,
},
hookJSONOutputSchema(),
abort,
)
return result
} catch (error) {
// biome-ignore lint/suspicious/noConsole:: intentional console output
console.error(`Error in hook callback ${callbackId}:`, error)
return {}
}
},
}
}
/**
* Sends an elicitation request to the SDK consumer and returns the response.
*/
async handleElicitation(
serverName: string,
message: string,
requestedSchema?: Record<string, unknown>,
signal?: AbortSignal,
mode?: 'form' | 'url',
url?: string,
elicitationId?: string,
): Promise<ElicitResult> {
try {
const result = await this.sendRequest<ElicitResult>(
{
subtype: 'elicitation',
mcp_server_name: serverName,
message,
mode,
url,
elicitation_id: elicitationId,
requested_schema: requestedSchema,
},
SDKControlElicitationResponseSchema(),
signal,
)
return result
} catch {
return { action: 'cancel' as const }
}
}
/**
* Creates a SandboxAskCallback that forwards sandbox network permission
* requests to the SDK host as can_use_tool control_requests.
*
* This piggybacks on the existing can_use_tool protocol with a synthetic
* tool name so that SDK hosts (VS Code, CCR, etc.) can prompt the user
* for network access without requiring a new protocol subtype.
*/
createSandboxAskCallback(): (hostPattern: {
host: string
port?: number
}) => Promise<boolean> {
return async (hostPattern): Promise<boolean> => {
try {
const result = await this.sendRequest<PermissionToolOutput>(
{
subtype: 'can_use_tool',
tool_name: SANDBOX_NETWORK_ACCESS_TOOL_NAME,
input: { host: hostPattern.host },
tool_use_id: randomUUID(),
description: `Allow network connection to ${hostPattern.host}?`,
},
permissionToolOutputSchema(),
)
return result.behavior === 'allow'
} catch {
// If the request fails (stream closed, abort, etc.), deny the connection
return false
}
}
}
/**
* Sends an MCP message to an SDK server and waits for the response
*/
async sendMcpMessage(
serverName: string,
message: JSONRPCMessage,
): Promise<JSONRPCMessage> {
const response = await this.sendRequest<{ mcp_response: JSONRPCMessage }>(
{
subtype: 'mcp_message',
server_name: serverName,
message,
},
z.object({
mcp_response: z.any() as z.Schema<JSONRPCMessage>,
}),
)
return response.mcp_response
}
}
function exitWithMessage(message: string): never {
// biome-ignore lint/suspicious/noConsole:: intentional console output
console.error(message)
// eslint-disable-next-line custom-rules/no-process-exit
process.exit(1)
}
/**
* Execute PermissionRequest hooks and return a decision if one is made.
* Returns undefined if no hook made a decision.
*/
async function executePermissionRequestHooksForSDK(
toolName: string,
toolUseID: string,
input: Record<string, unknown>,
toolUseContext: ToolUseContext,
suggestions: PermissionUpdate[] | undefined,
): Promise<PermissionDecision | undefined> {
const appState = toolUseContext.getAppState()
const permissionMode = appState.toolPermissionContext.mode
// Iterate directly over the generator instead of using `all`
const hookGenerator = executePermissionRequestHooks(
toolName,
toolUseID,
input,
toolUseContext,
permissionMode,
suggestions,
toolUseContext.abortController.signal,
)
for await (const hookResult of hookGenerator) {
if (
hookResult.permissionRequestResult &&
(hookResult.permissionRequestResult.behavior === 'allow' ||
hookResult.permissionRequestResult.behavior === 'deny')
) {
const decision = hookResult.permissionRequestResult
if (decision.behavior === 'allow') {
const finalInput = decision.updatedInput || input
// Apply permission updates if provided by hook ("always allow")
const permissionUpdates = decision.updatedPermissions ?? []
if (permissionUpdates.length > 0) {
persistPermissionUpdates(permissionUpdates)
const currentAppState = toolUseContext.getAppState()
const updatedContext = applyPermissionUpdates(
currentAppState.toolPermissionContext,
permissionUpdates,
)
// Update permission context via setAppState
toolUseContext.setAppState(prev => {
if (prev.toolPermissionContext === updatedContext) return prev
return { ...prev, toolPermissionContext: updatedContext }
})
}
return {
behavior: 'allow',
updatedInput: finalInput,
userModified: false,
decisionReason: {
type: 'hook',
hookName: 'PermissionRequest',
},
}
} else {
// Hook denied the permission
return {
behavior: 'deny',
message:
decision.message || 'Permission denied by PermissionRequest hook',
decisionReason: {
type: 'hook',
hookName: 'PermissionRequest',
},
}
}
}
}
return undefined
}
+282
View File
@@ -0,0 +1,282 @@
import axios, { type AxiosError } from 'axios'
import type { StdoutMessage } from 'src/entrypoints/sdk/controlTypes.js'
import { logForDebugging } from '../../utils/debug.js'
import { logForDiagnosticsNoPII } from '../../utils/diagLogs.js'
import { getSessionIngressAuthToken } from '../../utils/sessionIngressAuth.js'
import { SerialBatchEventUploader } from './SerialBatchEventUploader.js'
import {
WebSocketTransport,
type WebSocketTransportOptions,
} from './WebSocketTransport.js'
const BATCH_FLUSH_INTERVAL_MS = 100
// Per-attempt POST timeout. Bounds how long a single stuck POST can block
// the serialized queue. Without this, a hung connection stalls all writes.
const POST_TIMEOUT_MS = 15_000
// Grace period for queued writes on close(). Covers a healthy POST (~100ms)
// plus headroom; best-effort, not a delivery guarantee under degraded network.
// Void-ed (nothing awaits it) so this is a last resort — replBridge teardown
// now closes AFTER archive so archive latency is the primary drain window.
// NOTE: gracefulShutdown's cleanup budget is 2s (not the 5s outer failsafe);
// 3s here exceeds it, but the process lives ~2s longer for hooks+analytics.
const CLOSE_GRACE_MS = 3000
/**
* Hybrid transport: WebSocket for reads, HTTP POST for writes.
*
* Write flow:
*
* write(stream_event) ─┐
* │ (100ms timer)
* │
* ▼
* write(other) ────► uploader.enqueue() (SerialBatchEventUploader)
* ▲ │
* writeBatch() ────────┘ │ serial, batched, retries indefinitely,
* │ backpressure at maxQueueSize
* ▼
* postOnce() (single HTTP POST, throws on retryable)
*
* stream_event messages accumulate in streamEventBuffer for up to 100ms
* before enqueue (reduces POST count for high-volume content deltas). A
* non-stream write flushes any buffered stream_events first to preserve order.
*
* Serialization + retry + backpressure are delegated to SerialBatchEventUploader
* (same primitive CCR uses). At most one POST in-flight; events arriving during
* a POST batch into the next one. On failure, the uploader re-queues and retries
* with exponential backoff + jitter. If the queue fills past maxQueueSize,
* enqueue() blocks — giving awaiting callers backpressure.
*
* Why serialize? Bridge mode fires writes via `void transport.write()`
* (fire-and-forget). Without this, concurrent POSTs → concurrent Firestore
* writes to the same document → collisions → retry storms → pages oncall.
*/
export class HybridTransport extends WebSocketTransport {
private postUrl: string
private uploader: SerialBatchEventUploader<StdoutMessage>
// stream_event delay buffer — accumulates content deltas for up to
// BATCH_FLUSH_INTERVAL_MS before enqueueing (reduces POST count)
private streamEventBuffer: StdoutMessage[] = []
private streamEventTimer: ReturnType<typeof setTimeout> | null = null
constructor(
url: URL,
headers: Record<string, string> = {},
sessionId?: string,
refreshHeaders?: () => Record<string, string>,
options?: WebSocketTransportOptions & {
maxConsecutiveFailures?: number
onBatchDropped?: (batchSize: number, failures: number) => void
},
) {
super(url, headers, sessionId, refreshHeaders, options)
const { maxConsecutiveFailures, onBatchDropped } = options ?? {}
this.postUrl = convertWsUrlToPostUrl(url)
this.uploader = new SerialBatchEventUploader<StdoutMessage>({
// Large cap — session-ingress accepts arbitrary batch sizes. Events
// naturally batch during in-flight POSTs; this just bounds the payload.
maxBatchSize: 500,
// Bridge callers use `void transport.write()` — backpressure doesn't
// apply (they don't await). A batch >maxQueueSize deadlocks (see
// SerialBatchEventUploader backpressure check). So set it high enough
// to be a memory bound only. Wire real backpressure in a follow-up
// once callers await.
maxQueueSize: 100_000,
baseDelayMs: 500,
maxDelayMs: 8000,
jitterMs: 1000,
// Optional cap so a persistently-failing server can't pin the drain
// loop for the lifetime of the process. Undefined = indefinite retry.
// replBridge sets this; the 1P transportUtils path does not.
maxConsecutiveFailures,
onBatchDropped: (batchSize, failures) => {
logForDiagnosticsNoPII(
'error',
'cli_hybrid_batch_dropped_max_failures',
{
batchSize,
failures,
},
)
onBatchDropped?.(batchSize, failures)
},
send: batch => this.postOnce(batch),
})
logForDebugging(`HybridTransport: POST URL = ${this.postUrl}`)
logForDiagnosticsNoPII('info', 'cli_hybrid_transport_initialized')
}
/**
* Enqueue a message and wait for the queue to drain. Returning flush()
* preserves the contract that `await write()` resolves after the event is
* POSTed (relied on by tests and replBridge's initial flush). Fire-and-forget
* callers (`void transport.write()`) are unaffected — they don't await,
* so the later resolution doesn't add latency.
*/
override async write(message: StdoutMessage): Promise<void> {
if (message.type === 'stream_event') {
// Delay: accumulate stream_events briefly before enqueueing.
// Promise resolves immediately — callers don't await stream_events.
this.streamEventBuffer.push(message)
if (!this.streamEventTimer) {
this.streamEventTimer = setTimeout(
() => this.flushStreamEvents(),
BATCH_FLUSH_INTERVAL_MS,
)
}
return
}
// Immediate: flush any buffered stream_events (ordering), then this event.
await this.uploader.enqueue([...this.takeStreamEvents(), message])
return this.uploader.flush()
}
async writeBatch(messages: StdoutMessage[]): Promise<void> {
await this.uploader.enqueue([...this.takeStreamEvents(), ...messages])
return this.uploader.flush()
}
/** Snapshot before/after writeBatch() to detect silent drops. */
get droppedBatchCount(): number {
return this.uploader.droppedBatchCount
}
/**
* Block until all pending events are POSTed. Used by bridge's initial
* history flush so onStateChange('connected') fires after persistence.
*/
flush(): Promise<void> {
void this.uploader.enqueue(this.takeStreamEvents())
return this.uploader.flush()
}
/** Take ownership of buffered stream_events and clear the delay timer. */
private takeStreamEvents(): StdoutMessage[] {
if (this.streamEventTimer) {
clearTimeout(this.streamEventTimer)
this.streamEventTimer = null
}
const buffered = this.streamEventBuffer
this.streamEventBuffer = []
return buffered
}
/** Delay timer fired — enqueue accumulated stream_events. */
private flushStreamEvents(): void {
this.streamEventTimer = null
void this.uploader.enqueue(this.takeStreamEvents())
}
override close(): void {
if (this.streamEventTimer) {
clearTimeout(this.streamEventTimer)
this.streamEventTimer = null
}
this.streamEventBuffer = []
// Grace period for queued writes — fallback. replBridge teardown now
// awaits archive between write and close (see CLOSE_GRACE_MS), so
// archive latency is the primary drain window and this is a last
// resort. Keep close() sync (returns immediately) but defer
// uploader.close() so any remaining queue gets a chance to finish.
const uploader = this.uploader
let graceTimer: ReturnType<typeof setTimeout> | undefined
void Promise.race([
uploader.flush(),
new Promise<void>(r => {
// eslint-disable-next-line no-restricted-syntax -- need timer ref for clearTimeout
graceTimer = setTimeout(r, CLOSE_GRACE_MS)
}),
]).finally(() => {
clearTimeout(graceTimer)
uploader.close()
})
super.close()
}
/**
* Single-attempt POST. Throws on retryable failures (429, 5xx, network)
* so SerialBatchEventUploader re-queues and retries. Returns on success
* and on permanent failures (4xx non-429, no token) so the uploader moves on.
*/
private async postOnce(events: StdoutMessage[]): Promise<void> {
const sessionToken = getSessionIngressAuthToken()
if (!sessionToken) {
logForDebugging('HybridTransport: No session token available for POST')
logForDiagnosticsNoPII('warn', 'cli_hybrid_post_no_token')
return
}
const headers: Record<string, string> = {
Authorization: `Bearer ${sessionToken}`,
'Content-Type': 'application/json',
}
let response
try {
response = await axios.post(
this.postUrl,
{ events },
{
headers,
validateStatus: () => true,
timeout: POST_TIMEOUT_MS,
},
)
} catch (error) {
const axiosError = error as AxiosError
logForDebugging(`HybridTransport: POST error: ${axiosError.message}`)
logForDiagnosticsNoPII('warn', 'cli_hybrid_post_network_error')
throw error
}
if (response.status >= 200 && response.status < 300) {
logForDebugging(`HybridTransport: POST success count=${events.length}`)
return
}
// 4xx (except 429) are permanent — drop, don't retry.
if (
response.status >= 400 &&
response.status < 500 &&
response.status !== 429
) {
logForDebugging(
`HybridTransport: POST returned ${response.status} (permanent), dropping`,
)
logForDiagnosticsNoPII('warn', 'cli_hybrid_post_client_error', {
status: response.status,
})
return
}
// 429 / 5xx — retryable. Throw so uploader re-queues and backs off.
logForDebugging(
`HybridTransport: POST returned ${response.status} (retryable)`,
)
logForDiagnosticsNoPII('warn', 'cli_hybrid_post_retryable_error', {
status: response.status,
})
throw new Error(`POST failed with ${response.status}`)
}
}
/**
* Convert a WebSocket URL to the HTTP POST endpoint URL.
* From: wss://api.example.com/v2/session_ingress/ws/<session_id>
* To: https://api.example.com/v2/session_ingress/session/<session_id>/events
*/
function convertWsUrlToPostUrl(wsUrl: URL): string {
const protocol = wsUrl.protocol === 'wss:' ? 'https:' : 'http:'
// Replace /ws/ with /session/ and append /events
let pathname = wsUrl.pathname
pathname = pathname.replace('/ws/', '/session/')
if (!pathname.endsWith('/events')) {
pathname = pathname.endsWith('/')
? pathname + 'events'
: pathname + '/events'
}
return `${protocol}//${wsUrl.host}${pathname}${wsUrl.search}`
}
+711
View File
@@ -0,0 +1,711 @@
import axios, { type AxiosError } from 'axios'
import type { StdoutMessage } from 'src/entrypoints/sdk/controlTypes.js'
import { logForDebugging } from '../../utils/debug.js'
import { logForDiagnosticsNoPII } from '../../utils/diagLogs.js'
import { errorMessage } from '../../utils/errors.js'
import { getSessionIngressAuthHeaders } from '../../utils/sessionIngressAuth.js'
import { sleep } from '../../utils/sleep.js'
import { jsonParse, jsonStringify } from '../../utils/slowOperations.js'
import { getClaudeCodeUserAgent } from '../../utils/userAgent.js'
import type { Transport } from './Transport.js'
// ---------------------------------------------------------------------------
// Configuration
// ---------------------------------------------------------------------------
const RECONNECT_BASE_DELAY_MS = 1000
const RECONNECT_MAX_DELAY_MS = 30_000
/** Time budget for reconnection attempts before giving up (10 minutes). */
const RECONNECT_GIVE_UP_MS = 600_000
/** Server sends keepalives every 15s; treat connection as dead after 45s of silence. */
const LIVENESS_TIMEOUT_MS = 45_000
/**
* HTTP status codes that indicate a permanent server-side rejection.
* The transport transitions to 'closed' immediately without retrying.
*/
const PERMANENT_HTTP_CODES = new Set([401, 403, 404])
// POST retry configuration (matches HybridTransport)
const POST_MAX_RETRIES = 10
const POST_BASE_DELAY_MS = 500
const POST_MAX_DELAY_MS = 8000
/** Hoisted TextDecoder options to avoid per-chunk allocation in readStream. */
const STREAM_DECODE_OPTS: TextDecodeOptions = { stream: true }
/** Hoisted axios validateStatus callback to avoid per-request closure allocation. */
function alwaysValidStatus(): boolean {
return true
}
// ---------------------------------------------------------------------------
// SSE Frame Parser
// ---------------------------------------------------------------------------
type SSEFrame = {
event?: string
id?: string
data?: string
}
/**
* Incrementally parse SSE frames from a text buffer.
* Returns parsed frames and the remaining (incomplete) buffer.
*
* @internal exported for testing
*/
export function parseSSEFrames(buffer: string): {
frames: SSEFrame[]
remaining: string
} {
const frames: SSEFrame[] = []
let pos = 0
// SSE frames are delimited by double newlines
let idx: number
while ((idx = buffer.indexOf('\n\n', pos)) !== -1) {
const rawFrame = buffer.slice(pos, idx)
pos = idx + 2
// Skip empty frames
if (!rawFrame.trim()) continue
const frame: SSEFrame = {}
let isComment = false
for (const line of rawFrame.split('\n')) {
if (line.startsWith(':')) {
// SSE comment (e.g., `:keepalive`)
isComment = true
continue
}
const colonIdx = line.indexOf(':')
if (colonIdx === -1) continue
const field = line.slice(0, colonIdx)
// Per SSE spec, strip one leading space after colon if present
const value =
line[colonIdx + 1] === ' '
? line.slice(colonIdx + 2)
: line.slice(colonIdx + 1)
switch (field) {
case 'event':
frame.event = value
break
case 'id':
frame.id = value
break
case 'data':
// Per SSE spec, multiple data: lines are concatenated with \n
frame.data = frame.data ? frame.data + '\n' + value : value
break
// Ignore other fields (retry:, etc.)
}
}
// Only emit frames that have data (or are pure comments which reset liveness)
if (frame.data || isComment) {
frames.push(frame)
}
}
return { frames, remaining: buffer.slice(pos) }
}
// ---------------------------------------------------------------------------
// Types
// ---------------------------------------------------------------------------
type SSETransportState =
| 'idle'
| 'connected'
| 'reconnecting'
| 'closing'
| 'closed'
/**
* Payload for `event: client_event` frames, matching the StreamClientEvent
* proto message in session_stream.proto. This is the only event type sent
* to worker subscribers — delivery_update, session_update, ephemeral_event,
* and catch_up_truncated are client-channel-only (see notifier.go and
* event_stream.go SubscriberClient guard).
*/
export type StreamClientEvent = {
event_id: string
sequence_num: number
event_type: string
source: string
payload: Record<string, unknown>
created_at: string
}
// ---------------------------------------------------------------------------
// SSETransport
// ---------------------------------------------------------------------------
/**
* Transport that uses SSE for reading and HTTP POST for writing.
*
* Reads events via Server-Sent Events from the CCR v2 event stream endpoint.
* Writes events via HTTP POST with retry logic (same pattern as HybridTransport).
*
* Each `event: client_event` frame carries a StreamClientEvent proto JSON
* directly in `data:`. The transport extracts `payload` and passes it to
* `onData` as newline-delimited JSON for StructuredIO consumers.
*
* Supports automatic reconnection with exponential backoff and Last-Event-ID
* for resumption after disconnection.
*/
export class SSETransport implements Transport {
private state: SSETransportState = 'idle'
private onData?: (data: string) => void
private onCloseCallback?: (closeCode?: number) => void
private onEventCallback?: (event: StreamClientEvent) => void
private headers: Record<string, string>
private sessionId?: string
private refreshHeaders?: () => Record<string, string>
private readonly getAuthHeaders: () => Record<string, string>
// SSE connection state
private abortController: AbortController | null = null
private lastSequenceNum = 0
private seenSequenceNums = new Set<number>()
// Reconnection state
private reconnectAttempts = 0
private reconnectStartTime: number | null = null
private reconnectTimer: NodeJS.Timeout | null = null
// Liveness detection
private livenessTimer: NodeJS.Timeout | null = null
// POST URL (derived from SSE URL)
private postUrl: string
// Runtime epoch for CCR v2 event format
constructor(
private readonly url: URL,
headers: Record<string, string> = {},
sessionId?: string,
refreshHeaders?: () => Record<string, string>,
initialSequenceNum?: number,
/**
* Per-instance auth header source. Omit to read the process-wide
* CLAUDE_CODE_SESSION_ACCESS_TOKEN (single-session callers). Required
* for concurrent multi-session callers — the env-var path is a process
* global and would stomp across sessions.
*/
getAuthHeaders?: () => Record<string, string>,
) {
this.headers = headers
this.sessionId = sessionId
this.refreshHeaders = refreshHeaders
this.getAuthHeaders = getAuthHeaders ?? getSessionIngressAuthHeaders
this.postUrl = convertSSEUrlToPostUrl(url)
// Seed with a caller-provided high-water mark so the first connect()
// sends from_sequence_num / Last-Event-ID. Without this, a fresh
// SSETransport always asks the server to replay from sequence 0 —
// the entire session history on every transport swap.
if (initialSequenceNum !== undefined && initialSequenceNum > 0) {
this.lastSequenceNum = initialSequenceNum
}
logForDebugging(`SSETransport: SSE URL = ${url.href}`)
logForDebugging(`SSETransport: POST URL = ${this.postUrl}`)
logForDiagnosticsNoPII('info', 'cli_sse_transport_initialized')
}
/**
* High-water mark of sequence numbers seen on this stream. Callers that
* recreate the transport (e.g. replBridge onWorkReceived) read this before
* close() and pass it as `initialSequenceNum` to the next instance so the
* server resumes from the right point instead of replaying everything.
*/
getLastSequenceNum(): number {
return this.lastSequenceNum
}
async connect(): Promise<void> {
if (this.state !== 'idle' && this.state !== 'reconnecting') {
logForDebugging(
`SSETransport: Cannot connect, current state is ${this.state}`,
{ level: 'error' },
)
logForDiagnosticsNoPII('error', 'cli_sse_connect_failed')
return
}
this.state = 'reconnecting'
const connectStartTime = Date.now()
// Build SSE URL with sequence number for resumption
const sseUrl = new URL(this.url.href)
if (this.lastSequenceNum > 0) {
sseUrl.searchParams.set('from_sequence_num', String(this.lastSequenceNum))
}
// Build headers -- use fresh auth headers (supports Cookie for session keys).
// Remove stale Authorization header from this.headers when Cookie auth is used,
// since sending both confuses the auth interceptor.
const authHeaders = this.getAuthHeaders()
const headers: Record<string, string> = {
...this.headers,
...authHeaders,
Accept: 'text/event-stream',
'anthropic-version': '2023-06-01',
'User-Agent': getClaudeCodeUserAgent(),
}
if (authHeaders['Cookie']) {
delete headers['Authorization']
}
if (this.lastSequenceNum > 0) {
headers['Last-Event-ID'] = String(this.lastSequenceNum)
}
logForDebugging(`SSETransport: Opening ${sseUrl.href}`)
logForDiagnosticsNoPII('info', 'cli_sse_connect_opening')
this.abortController = new AbortController()
try {
// eslint-disable-next-line eslint-plugin-n/no-unsupported-features/node-builtins
const response = await fetch(sseUrl.href, {
headers,
signal: this.abortController.signal,
})
if (!response.ok) {
const isPermanent = PERMANENT_HTTP_CODES.has(response.status)
logForDebugging(
`SSETransport: HTTP ${response.status}${isPermanent ? ' (permanent)' : ''}`,
{ level: 'error' },
)
logForDiagnosticsNoPII('error', 'cli_sse_connect_http_error', {
status: response.status,
})
if (isPermanent) {
this.state = 'closed'
this.onCloseCallback?.(response.status)
return
}
this.handleConnectionError()
return
}
if (!response.body) {
logForDebugging('SSETransport: No response body')
this.handleConnectionError()
return
}
// Successfully connected
const connectDuration = Date.now() - connectStartTime
logForDebugging('SSETransport: Connected')
logForDiagnosticsNoPII('info', 'cli_sse_connect_connected', {
duration_ms: connectDuration,
})
this.state = 'connected'
this.reconnectAttempts = 0
this.reconnectStartTime = null
this.resetLivenessTimer()
// Read the SSE stream
await this.readStream(response.body)
} catch (error) {
if (this.abortController?.signal.aborted) {
// Intentional close
return
}
logForDebugging(
`SSETransport: Connection error: ${errorMessage(error)}`,
{ level: 'error' },
)
logForDiagnosticsNoPII('error', 'cli_sse_connect_error')
this.handleConnectionError()
}
}
/**
* Read and process the SSE stream body.
*/
// eslint-disable-next-line eslint-plugin-n/no-unsupported-features/node-builtins
private async readStream(body: ReadableStream<Uint8Array>): Promise<void> {
const reader = body.getReader()
const decoder = new TextDecoder()
let buffer = ''
try {
while (true) {
const { done, value } = await reader.read()
if (done) break
buffer += decoder.decode(value, STREAM_DECODE_OPTS)
const { frames, remaining } = parseSSEFrames(buffer)
buffer = remaining
for (const frame of frames) {
// Any frame (including keepalive comments) proves the connection is alive
this.resetLivenessTimer()
if (frame.id) {
const seqNum = parseInt(frame.id, 10)
if (!isNaN(seqNum)) {
if (this.seenSequenceNums.has(seqNum)) {
logForDebugging(
`SSETransport: DUPLICATE frame seq=${seqNum} (lastSequenceNum=${this.lastSequenceNum}, seenCount=${this.seenSequenceNums.size})`,
{ level: 'warn' },
)
logForDiagnosticsNoPII('warn', 'cli_sse_duplicate_sequence')
} else {
this.seenSequenceNums.add(seqNum)
// Prevent unbounded growth: once we have many entries, prune
// old sequence numbers that are well below the high-water mark.
// Only sequence numbers near lastSequenceNum matter for dedup.
if (this.seenSequenceNums.size > 1000) {
const threshold = this.lastSequenceNum - 200
for (const s of this.seenSequenceNums) {
if (s < threshold) {
this.seenSequenceNums.delete(s)
}
}
}
}
if (seqNum > this.lastSequenceNum) {
this.lastSequenceNum = seqNum
}
}
}
if (frame.event && frame.data) {
this.handleSSEFrame(frame.event, frame.data)
} else if (frame.data) {
// data: without event: — server is emitting the old envelope format
// or a bug. Log so incidents show as a signal instead of silent drops.
logForDebugging(
'SSETransport: Frame has data: but no event: field — dropped',
{ level: 'warn' },
)
logForDiagnosticsNoPII('warn', 'cli_sse_frame_missing_event_field')
}
}
}
} catch (error) {
if (this.abortController?.signal.aborted) return
logForDebugging(
`SSETransport: Stream read error: ${errorMessage(error)}`,
{ level: 'error' },
)
logForDiagnosticsNoPII('error', 'cli_sse_stream_read_error')
} finally {
reader.releaseLock()
}
// Stream ended — reconnect unless we're closing
if (this.state !== 'closing' && this.state !== 'closed') {
logForDebugging('SSETransport: Stream ended, reconnecting')
this.handleConnectionError()
}
}
/**
* Handle a single SSE frame. The event: field names the variant; data:
* carries the inner proto JSON directly (no envelope).
*
* Worker subscribers only receive client_event frames (see notifier.go) —
* any other event type indicates a server-side change that CC doesn't yet
* understand. Log a diagnostic so we notice in telemetry.
*/
private handleSSEFrame(eventType: string, data: string): void {
if (eventType !== 'client_event') {
logForDebugging(
`SSETransport: Unexpected SSE event type '${eventType}' on worker stream`,
{ level: 'warn' },
)
logForDiagnosticsNoPII('warn', 'cli_sse_unexpected_event_type', {
event_type: eventType,
})
return
}
let ev: StreamClientEvent
try {
ev = jsonParse(data) as StreamClientEvent
} catch (error) {
logForDebugging(
`SSETransport: Failed to parse client_event data: ${errorMessage(error)}`,
{ level: 'error' },
)
return
}
const payload = ev.payload
if (payload && typeof payload === 'object' && 'type' in payload) {
const sessionLabel = this.sessionId ? ` session=${this.sessionId}` : ''
logForDebugging(
`SSETransport: Event seq=${ev.sequence_num} event_id=${ev.event_id} event_type=${ev.event_type} payload_type=${String(payload.type)}${sessionLabel}`,
)
logForDiagnosticsNoPII('info', 'cli_sse_message_received')
// Pass the unwrapped payload as newline-delimited JSON,
// matching the format that StructuredIO/WebSocketTransport consumers expect
this.onData?.(jsonStringify(payload) + '\n')
} else {
logForDebugging(
`SSETransport: Ignoring client_event with no type in payload: event_id=${ev.event_id}`,
)
}
this.onEventCallback?.(ev)
}
/**
* Handle connection errors with exponential backoff and time budget.
*/
private handleConnectionError(): void {
this.clearLivenessTimer()
if (this.state === 'closing' || this.state === 'closed') return
// Abort any in-flight SSE fetch
this.abortController?.abort()
this.abortController = null
const now = Date.now()
if (!this.reconnectStartTime) {
this.reconnectStartTime = now
}
const elapsed = now - this.reconnectStartTime
if (elapsed < RECONNECT_GIVE_UP_MS) {
// Clear any existing timer
if (this.reconnectTimer) {
clearTimeout(this.reconnectTimer)
this.reconnectTimer = null
}
// Refresh headers before reconnecting
if (this.refreshHeaders) {
const freshHeaders = this.refreshHeaders()
Object.assign(this.headers, freshHeaders)
logForDebugging('SSETransport: Refreshed headers for reconnect')
}
this.state = 'reconnecting'
this.reconnectAttempts++
const baseDelay = Math.min(
RECONNECT_BASE_DELAY_MS * Math.pow(2, this.reconnectAttempts - 1),
RECONNECT_MAX_DELAY_MS,
)
// Add ±25% jitter
const delay = Math.max(
0,
baseDelay + baseDelay * 0.25 * (2 * Math.random() - 1),
)
logForDebugging(
`SSETransport: Reconnecting in ${Math.round(delay)}ms (attempt ${this.reconnectAttempts}, ${Math.round(elapsed / 1000)}s elapsed)`,
)
logForDiagnosticsNoPII('error', 'cli_sse_reconnect_attempt', {
reconnectAttempts: this.reconnectAttempts,
})
this.reconnectTimer = setTimeout(() => {
this.reconnectTimer = null
void this.connect()
}, delay)
} else {
logForDebugging(
`SSETransport: Reconnection time budget exhausted after ${Math.round(elapsed / 1000)}s`,
{ level: 'error' },
)
logForDiagnosticsNoPII('error', 'cli_sse_reconnect_exhausted', {
reconnectAttempts: this.reconnectAttempts,
elapsedMs: elapsed,
})
this.state = 'closed'
this.onCloseCallback?.()
}
}
/**
* Bound timeout callback. Hoisted from an inline closure so that
* resetLivenessTimer (called per-frame) does not allocate a new closure
* on every SSE frame.
*/
private readonly onLivenessTimeout = (): void => {
this.livenessTimer = null
logForDebugging('SSETransport: Liveness timeout, reconnecting', {
level: 'error',
})
logForDiagnosticsNoPII('error', 'cli_sse_liveness_timeout')
this.abortController?.abort()
this.handleConnectionError()
}
/**
* Reset the liveness timer. If no SSE frame arrives within the timeout,
* treat the connection as dead and reconnect.
*/
private resetLivenessTimer(): void {
this.clearLivenessTimer()
this.livenessTimer = setTimeout(this.onLivenessTimeout, LIVENESS_TIMEOUT_MS)
}
private clearLivenessTimer(): void {
if (this.livenessTimer) {
clearTimeout(this.livenessTimer)
this.livenessTimer = null
}
}
// -----------------------------------------------------------------------
// Write (HTTP POST) — same pattern as HybridTransport
// -----------------------------------------------------------------------
async write(message: StdoutMessage): Promise<void> {
const authHeaders = this.getAuthHeaders()
if (Object.keys(authHeaders).length === 0) {
logForDebugging('SSETransport: No session token available for POST')
logForDiagnosticsNoPII('warn', 'cli_sse_post_no_token')
return
}
const headers: Record<string, string> = {
...authHeaders,
'Content-Type': 'application/json',
'anthropic-version': '2023-06-01',
'User-Agent': getClaudeCodeUserAgent(),
}
logForDebugging(
`SSETransport: POST body keys=${Object.keys(message as Record<string, unknown>).join(',')}`,
)
for (let attempt = 1; attempt <= POST_MAX_RETRIES; attempt++) {
try {
const response = await axios.post(this.postUrl, message, {
headers,
validateStatus: alwaysValidStatus,
})
if (response.status === 200 || response.status === 201) {
logForDebugging(`SSETransport: POST success type=${message.type}`)
return
}
logForDebugging(
`SSETransport: POST ${response.status} body=${jsonStringify(response.data).slice(0, 200)}`,
)
// 4xx errors (except 429) are permanent - don't retry
if (
response.status >= 400 &&
response.status < 500 &&
response.status !== 429
) {
logForDebugging(
`SSETransport: POST returned ${response.status} (client error), not retrying`,
)
logForDiagnosticsNoPII('warn', 'cli_sse_post_client_error', {
status: response.status,
})
return
}
// 429 or 5xx - retry
logForDebugging(
`SSETransport: POST returned ${response.status}, attempt ${attempt}/${POST_MAX_RETRIES}`,
)
logForDiagnosticsNoPII('warn', 'cli_sse_post_retryable_error', {
status: response.status,
attempt,
})
} catch (error) {
const axiosError = error as AxiosError
logForDebugging(
`SSETransport: POST error: ${axiosError.message}, attempt ${attempt}/${POST_MAX_RETRIES}`,
)
logForDiagnosticsNoPII('warn', 'cli_sse_post_network_error', {
attempt,
})
}
if (attempt === POST_MAX_RETRIES) {
logForDebugging(
`SSETransport: POST failed after ${POST_MAX_RETRIES} attempts, continuing`,
)
logForDiagnosticsNoPII('warn', 'cli_sse_post_retries_exhausted')
return
}
const delayMs = Math.min(
POST_BASE_DELAY_MS * Math.pow(2, attempt - 1),
POST_MAX_DELAY_MS,
)
await sleep(delayMs)
}
}
// -----------------------------------------------------------------------
// Transport interface
// -----------------------------------------------------------------------
isConnectedStatus(): boolean {
return this.state === 'connected'
}
isClosedStatus(): boolean {
return this.state === 'closed'
}
setOnData(callback: (data: string) => void): void {
this.onData = callback
}
setOnClose(callback: (closeCode?: number) => void): void {
this.onCloseCallback = callback
}
setOnEvent(callback: (event: StreamClientEvent) => void): void {
this.onEventCallback = callback
}
close(): void {
if (this.reconnectTimer) {
clearTimeout(this.reconnectTimer)
this.reconnectTimer = null
}
this.clearLivenessTimer()
this.state = 'closing'
this.abortController?.abort()
this.abortController = null
}
}
// ---------------------------------------------------------------------------
// URL Conversion
// ---------------------------------------------------------------------------
/**
* Convert an SSE URL to the HTTP POST endpoint URL.
* The SSE stream URL and POST URL share the same base; the POST endpoint
* is at `/events` (without `/stream`).
*
* From: https://api.example.com/v2/session_ingress/session/<session_id>/events/stream
* To: https://api.example.com/v2/session_ingress/session/<session_id>/events
*/
function convertSSEUrlToPostUrl(sseUrl: URL): string {
let pathname = sseUrl.pathname
// Remove /stream suffix to get the POST events endpoint
if (pathname.endsWith('/stream')) {
pathname = pathname.slice(0, -'/stream'.length)
}
return `${sseUrl.protocol}//${sseUrl.host}${pathname}`
}
+275
View File
@@ -0,0 +1,275 @@
import { jsonStringify } from '../../utils/slowOperations.js'
/**
* Serial ordered event uploader with batching, retry, and backpressure.
*
* - enqueue() adds events to a pending buffer
* - At most 1 POST in-flight at a time
* - Drains up to maxBatchSize items per POST
* - New events accumulate while in-flight
* - On failure: exponential backoff (clamped), retries indefinitely
* until success or close() — unless maxConsecutiveFailures is set,
* in which case the failing batch is dropped and drain advances
* - flush() blocks until pending is empty and kicks drain if needed
* - Backpressure: enqueue() blocks when maxQueueSize is reached
*/
/**
* Throw from config.send() to make the uploader wait a server-supplied
* duration before retrying (e.g. 429 with Retry-After). When retryAfterMs
* is set, it overrides exponential backoff for that attempt — clamped to
* [baseDelayMs, maxDelayMs] and jittered so a misbehaving server can
* neither hot-loop nor stall the client, and many sessions sharing a rate
* limit don't all pounce at the same instant. Without retryAfterMs, behaves
* like any other thrown error (exponential backoff).
*/
export class RetryableError extends Error {
constructor(
message: string,
readonly retryAfterMs?: number,
) {
super(message)
}
}
type SerialBatchEventUploaderConfig<T> = {
/** Max items per POST (1 = no batching) */
maxBatchSize: number
/**
* Max serialized bytes per POST. First item always goes in regardless of
* size; subsequent items only if cumulative JSON bytes stay under this.
* Undefined = no byte limit (count-only batching).
*/
maxBatchBytes?: number
/** Max pending items before enqueue() blocks */
maxQueueSize: number
/** The actual HTTP call — caller controls payload format */
send: (batch: T[]) => Promise<void>
/** Base delay for exponential backoff (ms) */
baseDelayMs: number
/** Max delay cap (ms) */
maxDelayMs: number
/** Random jitter range added to retry delay (ms) */
jitterMs: number
/**
* After this many consecutive send() failures, drop the failing batch
* and move on to the next pending item with a fresh failure budget.
* Undefined = retry indefinitely (default).
*/
maxConsecutiveFailures?: number
/** Called when a batch is dropped for hitting maxConsecutiveFailures. */
onBatchDropped?: (batchSize: number, failures: number) => void
}
export class SerialBatchEventUploader<T> {
private pending: T[] = []
private pendingAtClose = 0
private draining = false
private closed = false
private backpressureResolvers: Array<() => void> = []
private sleepResolve: (() => void) | null = null
private flushResolvers: Array<() => void> = []
private droppedBatches = 0
private readonly config: SerialBatchEventUploaderConfig<T>
constructor(config: SerialBatchEventUploaderConfig<T>) {
this.config = config
}
/**
* Monotonic count of batches dropped via maxConsecutiveFailures. Callers
* can snapshot before flush() and compare after to detect silent drops
* (flush() resolves normally even when batches were dropped).
*/
get droppedBatchCount(): number {
return this.droppedBatches
}
/**
* Pending queue depth. After close(), returns the count at close time —
* close() clears the queue but shutdown diagnostics may read this after.
*/
get pendingCount(): number {
return this.closed ? this.pendingAtClose : this.pending.length
}
/**
* Add events to the pending buffer. Returns immediately if space is
* available. Blocks (awaits) if the buffer is full — caller pauses
* until drain frees space.
*/
async enqueue(events: T | T[]): Promise<void> {
if (this.closed) return
const items = Array.isArray(events) ? events : [events]
if (items.length === 0) return
// Backpressure: wait until there's space
while (
this.pending.length + items.length > this.config.maxQueueSize &&
!this.closed
) {
await new Promise<void>(resolve => {
this.backpressureResolvers.push(resolve)
})
}
if (this.closed) return
this.pending.push(...items)
void this.drain()
}
/**
* Block until all pending events have been sent.
* Used at turn boundaries and graceful shutdown.
*/
flush(): Promise<void> {
if (this.pending.length === 0 && !this.draining) {
return Promise.resolve()
}
void this.drain()
return new Promise<void>(resolve => {
this.flushResolvers.push(resolve)
})
}
/**
* Drop pending events and stop processing.
* Resolves any blocked enqueue() and flush() callers.
*/
close(): void {
if (this.closed) return
this.closed = true
this.pendingAtClose = this.pending.length
this.pending = []
this.sleepResolve?.()
this.sleepResolve = null
for (const resolve of this.backpressureResolvers) resolve()
this.backpressureResolvers = []
for (const resolve of this.flushResolvers) resolve()
this.flushResolvers = []
}
/**
* Drain loop. At most one instance runs at a time (guarded by this.draining).
* Sends batches serially. On failure, backs off and retries indefinitely.
*/
private async drain(): Promise<void> {
if (this.draining || this.closed) return
this.draining = true
let failures = 0
try {
while (this.pending.length > 0 && !this.closed) {
const batch = this.takeBatch()
if (batch.length === 0) continue
try {
await this.config.send(batch)
failures = 0
} catch (err) {
failures++
if (
this.config.maxConsecutiveFailures !== undefined &&
failures >= this.config.maxConsecutiveFailures
) {
this.droppedBatches++
this.config.onBatchDropped?.(batch.length, failures)
failures = 0
this.releaseBackpressure()
continue
}
// Re-queue the failed batch at the front. Use concat (single
// allocation) instead of unshift(...batch) which shifts every
// pending item batch.length times. Only hit on failure path.
this.pending = batch.concat(this.pending)
const retryAfterMs =
err instanceof RetryableError ? err.retryAfterMs : undefined
await this.sleep(this.retryDelay(failures, retryAfterMs))
continue
}
// Release backpressure waiters if space opened up
this.releaseBackpressure()
}
} finally {
this.draining = false
// Notify flush waiters if queue is empty
if (this.pending.length === 0) {
for (const resolve of this.flushResolvers) resolve()
this.flushResolvers = []
}
}
}
/**
* Pull the next batch from pending. Respects both maxBatchSize and
* maxBatchBytes. The first item is always taken; subsequent items only
* if adding them keeps the cumulative JSON size under maxBatchBytes.
*
* Un-serializable items (BigInt, circular refs, throwing toJSON) are
* dropped in place — they can never be sent and leaving them at
* pending[0] would poison the queue and hang flush() forever.
*/
private takeBatch(): T[] {
const { maxBatchSize, maxBatchBytes } = this.config
if (maxBatchBytes === undefined) {
return this.pending.splice(0, maxBatchSize)
}
let bytes = 0
let count = 0
while (count < this.pending.length && count < maxBatchSize) {
let itemBytes: number
try {
itemBytes = Buffer.byteLength(jsonStringify(this.pending[count]))
} catch {
this.pending.splice(count, 1)
continue
}
if (count > 0 && bytes + itemBytes > maxBatchBytes) break
bytes += itemBytes
count++
}
return this.pending.splice(0, count)
}
private retryDelay(failures: number, retryAfterMs?: number): number {
const jitter = Math.random() * this.config.jitterMs
if (retryAfterMs !== undefined) {
// Jitter on top of the server's hint prevents thundering herd when
// many sessions share a rate limit and all receive the same
// Retry-After. Clamp first, then spread — same shape as the
// exponential path (effective ceiling is maxDelayMs + jitterMs).
const clamped = Math.max(
this.config.baseDelayMs,
Math.min(retryAfterMs, this.config.maxDelayMs),
)
return clamped + jitter
}
const exponential = Math.min(
this.config.baseDelayMs * 2 ** (failures - 1),
this.config.maxDelayMs,
)
return exponential + jitter
}
private releaseBackpressure(): void {
const resolvers = this.backpressureResolvers
this.backpressureResolvers = []
for (const resolve of resolvers) resolve()
}
private sleep(ms: number): Promise<void> {
return new Promise(resolve => {
this.sleepResolve = resolve
setTimeout(
(self, resolve) => {
self.sleepResolve = null
resolve()
},
ms,
this,
resolve,
)
})
}
}
+800
View File
@@ -0,0 +1,800 @@
import type { StdoutMessage } from 'src/entrypoints/sdk/controlTypes.js'
import type WsWebSocket from 'ws'
import { logEvent } from '../../services/analytics/index.js'
import { CircularBuffer } from '../../utils/CircularBuffer.js'
import { logForDebugging } from '../../utils/debug.js'
import { logForDiagnosticsNoPII } from '../../utils/diagLogs.js'
import { isEnvTruthy } from '../../utils/envUtils.js'
import { getWebSocketTLSOptions } from '../../utils/mtls.js'
import {
getWebSocketProxyAgent,
getWebSocketProxyUrl,
} from '../../utils/proxy.js'
import {
registerSessionActivityCallback,
unregisterSessionActivityCallback,
} from '../../utils/sessionActivity.js'
import { jsonStringify } from '../../utils/slowOperations.js'
import type { Transport } from './Transport.js'
const KEEP_ALIVE_FRAME = '{"type":"keep_alive"}\n'
const DEFAULT_MAX_BUFFER_SIZE = 1000
const DEFAULT_BASE_RECONNECT_DELAY = 1000
const DEFAULT_MAX_RECONNECT_DELAY = 30000
/** Time budget for reconnection attempts before giving up (10 minutes). */
const DEFAULT_RECONNECT_GIVE_UP_MS = 600_000
const DEFAULT_PING_INTERVAL = 10000
const DEFAULT_KEEPALIVE_INTERVAL = 300_000 // 5 minutes
/**
* Threshold for detecting system sleep/wake. If the gap between consecutive
* reconnection attempts exceeds this, the machine likely slept. We reset
* the reconnection budget and retry — the server will reject with permanent
* close codes (4001/1002) if the session was reaped during sleep.
*/
const SLEEP_DETECTION_THRESHOLD_MS = DEFAULT_MAX_RECONNECT_DELAY * 2 // 60s
/**
* WebSocket close codes that indicate a permanent server-side rejection.
* The transport transitions to 'closed' immediately without retrying.
*/
const PERMANENT_CLOSE_CODES = new Set([
1002, // protocol error — server rejected handshake (e.g. session reaped)
4001, // session expired / not found
4003, // unauthorized
])
export type WebSocketTransportOptions = {
/** When false, the transport does not attempt automatic reconnection on
* disconnect. Use this when the caller has its own recovery mechanism
* (e.g. the REPL bridge poll loop). Defaults to true. */
autoReconnect?: boolean
/** Gates the tengu_ws_transport_* telemetry events. Set true at the
* REPL-bridge construction site so only Remote Control sessions (the
* Cloudflare-idle-timeout population) emit; print-mode workers stay
* silent. Defaults to false. */
isBridge?: boolean
}
type WebSocketTransportState =
| 'idle'
| 'connected'
| 'reconnecting'
| 'closing'
| 'closed'
// Common interface between globalThis.WebSocket and ws.WebSocket
type WebSocketLike = {
close(): void
send(data: string): void
ping?(): void // Bun & ws both support this
}
export class WebSocketTransport implements Transport {
private ws: WebSocketLike | null = null
private lastSentId: string | null = null
protected url: URL
protected state: WebSocketTransportState = 'idle'
protected onData?: (data: string) => void
private onCloseCallback?: (closeCode?: number) => void
private onConnectCallback?: () => void
private headers: Record<string, string>
private sessionId?: string
private autoReconnect: boolean
private isBridge: boolean
// Reconnection state
private reconnectAttempts = 0
private reconnectStartTime: number | null = null
private reconnectTimer: NodeJS.Timeout | null = null
private lastReconnectAttemptTime: number | null = null
// Wall-clock of last WS data-frame activity (inbound message or outbound
// ws.send). Used to compute idle time at close — the signal for diagnosing
// proxy idle-timeout RSTs (e.g. Cloudflare 5-min). Excludes ping/pong
// control frames (proxies don't count those).
private lastActivityTime = 0
// Ping interval for connection health checks
private pingInterval: NodeJS.Timeout | null = null
private pongReceived = true
// Periodic keep_alive data frames to reset proxy idle timers
private keepAliveInterval: NodeJS.Timeout | null = null
// Message buffering for replay on reconnection
private messageBuffer: CircularBuffer<StdoutMessage>
// Track which runtime's WS we're using so we can detach listeners
// with the matching API (removeEventListener vs. off).
private isBunWs = false
// Captured at connect() time for handleOpenEvent timing. Stored as an
// instance field so the onOpen handler can be a stable class-property
// arrow function (removable in doDisconnect) instead of a closure over
// a local variable.
private connectStartTime = 0
private refreshHeaders?: () => Record<string, string>
constructor(
url: URL,
headers: Record<string, string> = {},
sessionId?: string,
refreshHeaders?: () => Record<string, string>,
options?: WebSocketTransportOptions,
) {
this.url = url
this.headers = headers
this.sessionId = sessionId
this.refreshHeaders = refreshHeaders
this.autoReconnect = options?.autoReconnect ?? true
this.isBridge = options?.isBridge ?? false
this.messageBuffer = new CircularBuffer(DEFAULT_MAX_BUFFER_SIZE)
}
public async connect(): Promise<void> {
if (this.state !== 'idle' && this.state !== 'reconnecting') {
logForDebugging(
`WebSocketTransport: Cannot connect, current state is ${this.state}`,
{ level: 'error' },
)
logForDiagnosticsNoPII('error', 'cli_websocket_connect_failed')
return
}
this.state = 'reconnecting'
this.connectStartTime = Date.now()
logForDebugging(`WebSocketTransport: Opening ${this.url.href}`)
logForDiagnosticsNoPII('info', 'cli_websocket_connect_opening')
// Start with provided headers and add runtime headers
const headers = { ...this.headers }
if (this.lastSentId) {
headers['X-Last-Request-Id'] = this.lastSentId
logForDebugging(
`WebSocketTransport: Adding X-Last-Request-Id header: ${this.lastSentId}`,
)
}
if (typeof Bun !== 'undefined') {
// Bun's WebSocket supports headers/proxy options but the DOM typings don't
// eslint-disable-next-line eslint-plugin-n/no-unsupported-features/node-builtins
const ws = new globalThis.WebSocket(this.url.href, {
headers,
proxy: getWebSocketProxyUrl(this.url.href),
tls: getWebSocketTLSOptions() || undefined,
} as unknown as string[])
this.ws = ws
this.isBunWs = true
ws.addEventListener('open', this.onBunOpen)
ws.addEventListener('message', this.onBunMessage)
ws.addEventListener('error', this.onBunError)
// eslint-disable-next-line eslint-plugin-n/no-unsupported-features/node-builtins
ws.addEventListener('close', this.onBunClose)
// 'pong' is Bun-specific — not in DOM typings.
ws.addEventListener('pong', this.onPong)
} else {
const { default: WS } = await import('ws')
const ws = new WS(this.url.href, {
headers,
agent: getWebSocketProxyAgent(this.url.href),
...getWebSocketTLSOptions(),
})
this.ws = ws
this.isBunWs = false
ws.on('open', this.onNodeOpen)
ws.on('message', this.onNodeMessage)
ws.on('error', this.onNodeError)
ws.on('close', this.onNodeClose)
ws.on('pong', this.onPong)
}
}
// --- Bun (native WebSocket) event handlers ---
// Stored as class-property arrow functions so they can be removed in
// doDisconnect(). Without removal, each reconnect orphans the old WS
// object + its 5 closures until GC, which accumulates under network
// instability. Mirrors the pattern in src/utils/mcpWebSocketTransport.ts.
private onBunOpen = () => {
this.handleOpenEvent()
// Bun's WebSocket doesn't expose upgrade response headers,
// so replay all buffered messages. The server deduplicates by UUID.
if (this.lastSentId) {
this.replayBufferedMessages('')
}
}
private onBunMessage = (event: MessageEvent) => {
const message =
typeof event.data === 'string' ? event.data : String(event.data)
this.lastActivityTime = Date.now()
logForDiagnosticsNoPII('info', 'cli_websocket_message_received', {
length: message.length,
})
if (this.onData) {
this.onData(message)
}
}
private onBunError = () => {
logForDebugging('WebSocketTransport: Error', {
level: 'error',
})
logForDiagnosticsNoPII('error', 'cli_websocket_connect_error')
// close event fires after error — let it call handleConnectionError
}
// eslint-disable-next-line eslint-plugin-n/no-unsupported-features/node-builtins
private onBunClose = (event: CloseEvent) => {
const isClean = event.code === 1000 || event.code === 1001
logForDebugging(
`WebSocketTransport: Closed: ${event.code}`,
isClean ? undefined : { level: 'error' },
)
logForDiagnosticsNoPII('error', 'cli_websocket_connect_closed')
this.handleConnectionError(event.code)
}
// --- Node (ws package) event handlers ---
private onNodeOpen = () => {
// Capture ws before handleOpenEvent() invokes onConnectCallback — if the
// callback synchronously closes the transport, this.ws becomes null.
// The old inline-closure code had this safety implicitly via closure capture.
const ws = this.ws
this.handleOpenEvent()
if (!ws) return
// Check for last-id in upgrade response headers (ws package only)
const nws = ws as unknown as WsWebSocket & {
upgradeReq?: { headers?: Record<string, string> }
}
const upgradeResponse = nws.upgradeReq
if (upgradeResponse?.headers?.['x-last-request-id']) {
const serverLastId = upgradeResponse.headers['x-last-request-id']
this.replayBufferedMessages(serverLastId)
}
}
private onNodeMessage = (data: Buffer) => {
const message = data.toString()
this.lastActivityTime = Date.now()
logForDiagnosticsNoPII('info', 'cli_websocket_message_received', {
length: message.length,
})
if (this.onData) {
this.onData(message)
}
}
private onNodeError = (err: Error) => {
logForDebugging(`WebSocketTransport: Error: ${err.message}`, {
level: 'error',
})
logForDiagnosticsNoPII('error', 'cli_websocket_connect_error')
// close event fires after error — let it call handleConnectionError
}
private onNodeClose = (code: number, _reason: Buffer) => {
const isClean = code === 1000 || code === 1001
logForDebugging(
`WebSocketTransport: Closed: ${code}`,
isClean ? undefined : { level: 'error' },
)
logForDiagnosticsNoPII('error', 'cli_websocket_connect_closed')
this.handleConnectionError(code)
}
// --- Shared handlers ---
private onPong = () => {
this.pongReceived = true
}
private handleOpenEvent(): void {
const connectDuration = Date.now() - this.connectStartTime
logForDebugging('WebSocketTransport: Connected')
logForDiagnosticsNoPII('info', 'cli_websocket_connect_connected', {
duration_ms: connectDuration,
})
// Reconnect success — capture attempt count + downtime before resetting.
// reconnectStartTime is null on first connect, non-null on reopen.
if (this.isBridge && this.reconnectStartTime !== null) {
logEvent('tengu_ws_transport_reconnected', {
attempts: this.reconnectAttempts,
downtimeMs: Date.now() - this.reconnectStartTime,
})
}
this.reconnectAttempts = 0
this.reconnectStartTime = null
this.lastReconnectAttemptTime = null
this.lastActivityTime = Date.now()
this.state = 'connected'
this.onConnectCallback?.()
// Start periodic pings to detect dead connections
this.startPingInterval()
// Start periodic keep_alive data frames to reset proxy idle timers
this.startKeepaliveInterval()
// Register callback for session activity signals
registerSessionActivityCallback(() => {
void this.write({ type: 'keep_alive' })
})
}
protected sendLine(line: string): boolean {
if (!this.ws || this.state !== 'connected') {
logForDebugging('WebSocketTransport: Not connected')
logForDiagnosticsNoPII('info', 'cli_websocket_send_not_connected')
return false
}
try {
this.ws.send(line)
this.lastActivityTime = Date.now()
return true
} catch (error) {
logForDebugging(`WebSocketTransport: Failed to send: ${error}`, {
level: 'error',
})
logForDiagnosticsNoPII('error', 'cli_websocket_send_error')
// Don't null this.ws here — let doDisconnect() (via handleConnectionError)
// handle cleanup so listeners are removed before the WS is released.
this.handleConnectionError()
return false
}
}
/**
* Remove all listeners attached in connect() for the given WebSocket.
* Without this, each reconnect orphans the old WS object + its closures
* until GC — these accumulate under network instability. Mirrors the
* pattern in src/utils/mcpWebSocketTransport.ts.
*/
private removeWsListeners(ws: WebSocketLike): void {
if (this.isBunWs) {
const nws = ws as unknown as globalThis.WebSocket
nws.removeEventListener('open', this.onBunOpen)
nws.removeEventListener('message', this.onBunMessage)
nws.removeEventListener('error', this.onBunError)
// eslint-disable-next-line eslint-plugin-n/no-unsupported-features/node-builtins
nws.removeEventListener('close', this.onBunClose)
// 'pong' is Bun-specific — not in DOM typings
nws.removeEventListener('pong' as 'message', this.onPong)
} else {
const nws = ws as unknown as WsWebSocket
nws.off('open', this.onNodeOpen)
nws.off('message', this.onNodeMessage)
nws.off('error', this.onNodeError)
nws.off('close', this.onNodeClose)
nws.off('pong', this.onPong)
}
}
protected doDisconnect(): void {
// Stop pinging and keepalive when disconnecting
this.stopPingInterval()
this.stopKeepaliveInterval()
// Unregister session activity callback
unregisterSessionActivityCallback()
if (this.ws) {
// Remove listeners BEFORE close() so the old WS + closures can be
// GC'd promptly instead of lingering until the next mark-and-sweep.
this.removeWsListeners(this.ws)
this.ws.close()
this.ws = null
}
}
private handleConnectionError(closeCode?: number): void {
logForDebugging(
`WebSocketTransport: Disconnected from ${this.url.href}` +
(closeCode != null ? ` (code ${closeCode})` : ''),
)
logForDiagnosticsNoPII('info', 'cli_websocket_disconnected')
if (this.isBridge) {
// Fire on every close — including intermediate ones during a reconnect
// storm (those never surface to the onCloseCallback consumer). For the
// Cloudflare-5min-idle hypothesis: cluster msSinceLastActivity; if the
// peak sits at ~300s with closeCode 1006, that's the proxy RST.
logEvent('tengu_ws_transport_closed', {
closeCode,
msSinceLastActivity:
this.lastActivityTime > 0 ? Date.now() - this.lastActivityTime : -1,
// 'connected' = healthy drop (the Cloudflare case); 'reconnecting' =
// connect-rejection mid-storm. State isn't mutated until the branches
// below, so this reads the pre-close value.
wasConnected: this.state === 'connected',
reconnectAttempts: this.reconnectAttempts,
})
}
this.doDisconnect()
if (this.state === 'closing' || this.state === 'closed') return
// Permanent codes: don't retry — server has definitively ended the session.
// Exception: 4003 (unauthorized) can be retried when refreshHeaders is
// available and returns a new token (e.g. after the parent process mints
// a fresh session ingress token during reconnection).
let headersRefreshed = false
if (closeCode === 4003 && this.refreshHeaders) {
const freshHeaders = this.refreshHeaders()
if (freshHeaders.Authorization !== this.headers.Authorization) {
Object.assign(this.headers, freshHeaders)
headersRefreshed = true
logForDebugging(
'WebSocketTransport: 4003 received but headers refreshed, scheduling reconnect',
)
logForDiagnosticsNoPII('info', 'cli_websocket_4003_token_refreshed')
}
}
if (
closeCode != null &&
PERMANENT_CLOSE_CODES.has(closeCode) &&
!headersRefreshed
) {
logForDebugging(
`WebSocketTransport: Permanent close code ${closeCode}, not reconnecting`,
{ level: 'error' },
)
logForDiagnosticsNoPII('error', 'cli_websocket_permanent_close', {
closeCode,
})
this.state = 'closed'
this.onCloseCallback?.(closeCode)
return
}
// When autoReconnect is disabled, go straight to closed state.
// The caller (e.g. REPL bridge poll loop) handles recovery.
if (!this.autoReconnect) {
this.state = 'closed'
this.onCloseCallback?.(closeCode)
return
}
// Schedule reconnection with exponential backoff and time budget
const now = Date.now()
if (!this.reconnectStartTime) {
this.reconnectStartTime = now
}
// Detect system sleep/wake: if the gap since our last reconnection
// attempt greatly exceeds the max delay, the machine likely slept
// (e.g. laptop lid closed). Reset the budget and retry from scratch —
// the server will reject with permanent close codes (4001/1002) if
// the session was reaped while we were asleep.
if (
this.lastReconnectAttemptTime !== null &&
now - this.lastReconnectAttemptTime > SLEEP_DETECTION_THRESHOLD_MS
) {
logForDebugging(
`WebSocketTransport: Detected system sleep (${Math.round((now - this.lastReconnectAttemptTime) / 1000)}s gap), resetting reconnection budget`,
)
logForDiagnosticsNoPII('info', 'cli_websocket_sleep_detected', {
gapMs: now - this.lastReconnectAttemptTime,
})
this.reconnectStartTime = now
this.reconnectAttempts = 0
}
this.lastReconnectAttemptTime = now
const elapsed = now - this.reconnectStartTime
if (elapsed < DEFAULT_RECONNECT_GIVE_UP_MS) {
// Clear any existing reconnection timer to avoid duplicates
if (this.reconnectTimer) {
clearTimeout(this.reconnectTimer)
this.reconnectTimer = null
}
// Refresh headers before reconnecting (e.g. to pick up a new session token).
// Skip if already refreshed by the 4003 path above.
if (!headersRefreshed && this.refreshHeaders) {
const freshHeaders = this.refreshHeaders()
Object.assign(this.headers, freshHeaders)
logForDebugging('WebSocketTransport: Refreshed headers for reconnect')
}
this.state = 'reconnecting'
this.reconnectAttempts++
const baseDelay = Math.min(
DEFAULT_BASE_RECONNECT_DELAY * Math.pow(2, this.reconnectAttempts - 1),
DEFAULT_MAX_RECONNECT_DELAY,
)
// Add ±25% jitter to avoid thundering herd
const delay = Math.max(
0,
baseDelay + baseDelay * 0.25 * (2 * Math.random() - 1),
)
logForDebugging(
`WebSocketTransport: Reconnecting in ${Math.round(delay)}ms (attempt ${this.reconnectAttempts}, ${Math.round(elapsed / 1000)}s elapsed)`,
)
logForDiagnosticsNoPII('error', 'cli_websocket_reconnect_attempt', {
reconnectAttempts: this.reconnectAttempts,
})
if (this.isBridge) {
logEvent('tengu_ws_transport_reconnecting', {
attempt: this.reconnectAttempts,
elapsedMs: elapsed,
delayMs: Math.round(delay),
})
}
this.reconnectTimer = setTimeout(() => {
this.reconnectTimer = null
void this.connect()
}, delay)
} else {
logForDebugging(
`WebSocketTransport: Reconnection time budget exhausted after ${Math.round(elapsed / 1000)}s for ${this.url.href}`,
{ level: 'error' },
)
logForDiagnosticsNoPII('error', 'cli_websocket_reconnect_exhausted', {
reconnectAttempts: this.reconnectAttempts,
elapsedMs: elapsed,
})
this.state = 'closed'
// Notify close callback
if (this.onCloseCallback) {
this.onCloseCallback(closeCode)
}
}
}
close(): void {
// Clear any pending reconnection timer
if (this.reconnectTimer) {
clearTimeout(this.reconnectTimer)
this.reconnectTimer = null
}
// Clear ping and keepalive intervals
this.stopPingInterval()
this.stopKeepaliveInterval()
// Unregister session activity callback
unregisterSessionActivityCallback()
this.state = 'closing'
this.doDisconnect()
}
private replayBufferedMessages(lastId: string): void {
const messages = this.messageBuffer.toArray()
if (messages.length === 0) return
// Find where to start replay based on server's last received message
let startIndex = 0
if (lastId) {
const lastConfirmedIndex = messages.findIndex(
message => 'uuid' in message && message.uuid === lastId,
)
if (lastConfirmedIndex >= 0) {
// Server confirmed messages up to lastConfirmedIndex — evict them
startIndex = lastConfirmedIndex + 1
// Rebuild the buffer with only unconfirmed messages
const remaining = messages.slice(startIndex)
this.messageBuffer.clear()
this.messageBuffer.addAll(remaining)
if (remaining.length === 0) {
this.lastSentId = null
}
logForDebugging(
`WebSocketTransport: Evicted ${startIndex} confirmed messages, ${remaining.length} remaining`,
)
logForDiagnosticsNoPII(
'info',
'cli_websocket_evicted_confirmed_messages',
{
evicted: startIndex,
remaining: remaining.length,
},
)
}
}
const messagesToReplay = messages.slice(startIndex)
if (messagesToReplay.length === 0) {
logForDebugging('WebSocketTransport: No new messages to replay')
logForDiagnosticsNoPII('info', 'cli_websocket_no_messages_to_replay')
return
}
logForDebugging(
`WebSocketTransport: Replaying ${messagesToReplay.length} buffered messages`,
)
logForDiagnosticsNoPII('info', 'cli_websocket_messages_to_replay', {
count: messagesToReplay.length,
})
for (const message of messagesToReplay) {
const line = jsonStringify(message) + '\n'
const success = this.sendLine(line)
if (!success) {
this.handleConnectionError()
break
}
}
// Do NOT clear the buffer after replay — messages remain buffered until
// the server confirms receipt on the next reconnection. This prevents
// message loss if the connection drops after replay but before the server
// processes the messages.
}
isConnectedStatus(): boolean {
return this.state === 'connected'
}
isClosedStatus(): boolean {
return this.state === 'closed'
}
setOnData(callback: (data: string) => void): void {
this.onData = callback
}
setOnConnect(callback: () => void): void {
this.onConnectCallback = callback
}
setOnClose(callback: (closeCode?: number) => void): void {
this.onCloseCallback = callback
}
getStateLabel(): string {
return this.state
}
async write(message: StdoutMessage): Promise<void> {
if ('uuid' in message && typeof message.uuid === 'string') {
this.messageBuffer.add(message)
this.lastSentId = message.uuid
}
const line = jsonStringify(message) + '\n'
if (this.state !== 'connected') {
// Message buffered for replay when connected (if it has a UUID)
return
}
const sessionLabel = this.sessionId ? ` session=${this.sessionId}` : ''
const detailLabel = this.getControlMessageDetailLabel(message)
logForDebugging(
`WebSocketTransport: Sending message type=${message.type}${sessionLabel}${detailLabel}`,
)
this.sendLine(line)
}
private getControlMessageDetailLabel(message: StdoutMessage): string {
if (message.type === 'control_request') {
const { request_id, request } = message
const toolName =
request.subtype === 'can_use_tool' ? request.tool_name : ''
return ` subtype=${request.subtype} request_id=${request_id}${toolName ? ` tool=${toolName}` : ''}`
}
if (message.type === 'control_response') {
const { subtype, request_id } = message.response
return ` subtype=${subtype} request_id=${request_id}`
}
return ''
}
private startPingInterval(): void {
// Clear any existing interval
this.stopPingInterval()
this.pongReceived = true
let lastTickTime = Date.now()
// Send ping periodically to detect dead connections.
// If the previous ping got no pong, treat the connection as dead.
this.pingInterval = setInterval(() => {
if (this.state === 'connected' && this.ws) {
const now = Date.now()
const gap = now - lastTickTime
lastTickTime = now
// Process-suspension detector. If the wall-clock gap between ticks
// greatly exceeds the 10s interval, the process was suspended
// (laptop lid, SIGSTOP, VM pause). setInterval does not queue
// missed ticks — it coalesces — so on wake this callback fires
// once with a huge gap. The socket is almost certainly dead:
// NAT mappings drop in 30s5min, and the server has been
// retransmitting into the void. Don't wait for a ping/pong
// round-trip to confirm (ws.ping() on a dead socket returns
// immediately with no error — bytes go into the kernel send
// buffer). Assume dead and reconnect now. A spurious reconnect
// after a short sleep is cheap — replayBufferedMessages() handles
// it and the server dedups by UUID.
if (gap > SLEEP_DETECTION_THRESHOLD_MS) {
logForDebugging(
`WebSocketTransport: ${Math.round(gap / 1000)}s tick gap detected — process was suspended, forcing reconnect`,
)
logForDiagnosticsNoPII(
'info',
'cli_websocket_sleep_detected_on_ping',
{ gapMs: gap },
)
this.handleConnectionError()
return
}
if (!this.pongReceived) {
logForDebugging(
'WebSocketTransport: No pong received, connection appears dead',
{ level: 'error' },
)
logForDiagnosticsNoPII('error', 'cli_websocket_pong_timeout')
this.handleConnectionError()
return
}
this.pongReceived = false
try {
this.ws.ping?.()
} catch (error) {
logForDebugging(`WebSocketTransport: Ping failed: ${error}`, {
level: 'error',
})
logForDiagnosticsNoPII('error', 'cli_websocket_ping_failed')
}
}
}, DEFAULT_PING_INTERVAL)
}
private stopPingInterval(): void {
if (this.pingInterval) {
clearInterval(this.pingInterval)
this.pingInterval = null
}
}
private startKeepaliveInterval(): void {
this.stopKeepaliveInterval()
// In CCR sessions, session activity heartbeats handle keep-alives
if (isEnvTruthy(process.env.CLAUDE_CODE_REMOTE)) {
return
}
this.keepAliveInterval = setInterval(() => {
if (this.state === 'connected' && this.ws) {
try {
this.ws.send(KEEP_ALIVE_FRAME)
this.lastActivityTime = Date.now()
logForDebugging(
'WebSocketTransport: Sent periodic keep_alive data frame',
)
} catch (error) {
logForDebugging(
`WebSocketTransport: Periodic keep_alive failed: ${error}`,
{ level: 'error' },
)
logForDiagnosticsNoPII('error', 'cli_websocket_keepalive_failed')
}
}
}, DEFAULT_KEEPALIVE_INTERVAL)
}
private stopKeepaliveInterval(): void {
if (this.keepAliveInterval) {
clearInterval(this.keepAliveInterval)
this.keepAliveInterval = null
}
}
}
+131
View File
@@ -0,0 +1,131 @@
import { sleep } from '../../utils/sleep.js'
/**
* Coalescing uploader for PUT /worker (session state + metadata).
*
* - 1 in-flight PUT + 1 pending patch
* - New calls coalesce into pending (never grows beyond 1 slot)
* - On success: send pending if exists
* - On failure: exponential backoff (clamped), retries indefinitely
* until success or close(). Absorbs any pending patches before each retry.
* - No backpressure needed — naturally bounded at 2 slots
*
* Coalescing rules:
* - Top-level keys (worker_status, external_metadata) — last value wins
* - Inside external_metadata / internal_metadata — RFC 7396 merge:
* keys are added/overwritten, null values preserved (server deletes)
*/
type WorkerStateUploaderConfig = {
send: (body: Record<string, unknown>) => Promise<boolean>
/** Base delay for exponential backoff (ms) */
baseDelayMs: number
/** Max delay cap (ms) */
maxDelayMs: number
/** Random jitter range added to retry delay (ms) */
jitterMs: number
}
export class WorkerStateUploader {
private inflight: Promise<void> | null = null
private pending: Record<string, unknown> | null = null
private closed = false
private readonly config: WorkerStateUploaderConfig
constructor(config: WorkerStateUploaderConfig) {
this.config = config
}
/**
* Enqueue a patch to PUT /worker. Coalesces with any existing pending
* patch. Fire-and-forget — callers don't need to await.
*/
enqueue(patch: Record<string, unknown>): void {
if (this.closed) return
this.pending = this.pending ? coalescePatches(this.pending, patch) : patch
void this.drain()
}
close(): void {
this.closed = true
this.pending = null
}
private async drain(): Promise<void> {
if (this.inflight || this.closed) return
if (!this.pending) return
const payload = this.pending
this.pending = null
this.inflight = this.sendWithRetry(payload).then(() => {
this.inflight = null
if (this.pending && !this.closed) {
void this.drain()
}
})
}
/** Retries indefinitely with exponential backoff until success or close(). */
private async sendWithRetry(payload: Record<string, unknown>): Promise<void> {
let current = payload
let failures = 0
while (!this.closed) {
const ok = await this.config.send(current)
if (ok) return
failures++
await sleep(this.retryDelay(failures))
// Absorb any patches that arrived during the retry
if (this.pending && !this.closed) {
current = coalescePatches(current, this.pending)
this.pending = null
}
}
}
private retryDelay(failures: number): number {
const exponential = Math.min(
this.config.baseDelayMs * 2 ** (failures - 1),
this.config.maxDelayMs,
)
const jitter = Math.random() * this.config.jitterMs
return exponential + jitter
}
}
/**
* Coalesce two patches for PUT /worker.
*
* Top-level keys: overlay replaces base (last value wins).
* Metadata keys (external_metadata, internal_metadata): RFC 7396 merge
* one level deep — overlay keys are added/overwritten, null values
* preserved for server-side delete.
*/
function coalescePatches(
base: Record<string, unknown>,
overlay: Record<string, unknown>,
): Record<string, unknown> {
const merged = { ...base }
for (const [key, value] of Object.entries(overlay)) {
if (
(key === 'external_metadata' || key === 'internal_metadata') &&
merged[key] &&
typeof merged[key] === 'object' &&
typeof value === 'object' &&
value !== null
) {
// RFC 7396 merge — overlay keys win, nulls preserved for server
merged[key] = {
...(merged[key] as Record<string, unknown>),
...(value as Record<string, unknown>),
}
} else {
merged[key] = value
}
}
return merged
}
+998
View File
@@ -0,0 +1,998 @@
import { randomUUID } from 'crypto'
import type {
SDKPartialAssistantMessage,
StdoutMessage,
} from 'src/entrypoints/sdk/controlTypes.js'
import { decodeJwtExpiry } from '../../bridge/jwtUtils.js'
import { logForDebugging } from '../../utils/debug.js'
import { logForDiagnosticsNoPII } from '../../utils/diagLogs.js'
import { errorMessage, getErrnoCode } from '../../utils/errors.js'
import { createAxiosInstance } from '../../utils/proxy.js'
import {
registerSessionActivityCallback,
unregisterSessionActivityCallback,
} from '../../utils/sessionActivity.js'
import {
getSessionIngressAuthHeaders,
getSessionIngressAuthToken,
} from '../../utils/sessionIngressAuth.js'
import type {
RequiresActionDetails,
SessionState,
} from '../../utils/sessionState.js'
import { sleep } from '../../utils/sleep.js'
import { getClaudeCodeUserAgent } from '../../utils/userAgent.js'
import {
RetryableError,
SerialBatchEventUploader,
} from './SerialBatchEventUploader.js'
import type { SSETransport, StreamClientEvent } from './SSETransport.js'
import { WorkerStateUploader } from './WorkerStateUploader.js'
/** Default interval between heartbeat events (20s; server TTL is 60s). */
const DEFAULT_HEARTBEAT_INTERVAL_MS = 20_000
/**
* stream_event messages accumulate in a delay buffer for up to this many ms
* before enqueue. Mirrors HybridTransport's batching window. text_delta
* events for the same content block accumulate into a single full-so-far
* snapshot per flush — each emitted event is self-contained so a client
* connecting mid-stream sees complete text, not a fragment.
*/
const STREAM_EVENT_FLUSH_INTERVAL_MS = 100
/** Hoisted axios validateStatus callback to avoid per-request closure allocation. */
function alwaysValidStatus(): boolean {
return true
}
export type CCRInitFailReason =
| 'no_auth_headers'
| 'missing_epoch'
| 'worker_register_failed'
/** Thrown by initialize(); carries a typed reason for the diag classifier. */
export class CCRInitError extends Error {
constructor(readonly reason: CCRInitFailReason) {
super(`CCRClient init failed: ${reason}`)
}
}
/**
* Consecutive 401/403 with a VALID-LOOKING token before giving up. An
* expired JWT short-circuits this (exits immediately — deterministic,
* retry is futile). This threshold is for the uncertain case: token's
* exp is in the future but server says 401 (userauth down, KMS hiccup,
* clock skew). 10 × 20s heartbeat ≈ 200s to ride it out.
*/
const MAX_CONSECUTIVE_AUTH_FAILURES = 10
type EventPayload = {
uuid: string
type: string
[key: string]: unknown
}
type ClientEvent = {
payload: EventPayload
ephemeral?: boolean
}
/**
* Structural subset of a stream_event carrying a text_delta. Not a narrowing
* of SDKPartialAssistantMessage — RawMessageStreamEvent's delta is a union and
* narrowing through two levels defeats the discriminant.
*/
type CoalescedStreamEvent = {
type: 'stream_event'
uuid: string
session_id: string
parent_tool_use_id: string | null
event: {
type: 'content_block_delta'
index: number
delta: { type: 'text_delta'; text: string }
}
}
/**
* Accumulator state for text_delta coalescing. Keyed by API message ID so
* lifetime is tied to the assistant message — cleared when the complete
* SDKAssistantMessage arrives (writeEvent), which is reliable even when
* abort/error paths skip content_block_stop/message_stop delivery.
*/
export type StreamAccumulatorState = {
/** API message ID (msg_...) → blocks[blockIndex] → chunk array. */
byMessage: Map<string, string[][]>
/**
* {session_id}:{parent_tool_use_id} → active message ID.
* content_block_delta events don't carry the message ID (only
* message_start does), so we track which message is currently streaming
* for each scope. At most one message streams per scope at a time.
*/
scopeToMessage: Map<string, string>
}
export function createStreamAccumulator(): StreamAccumulatorState {
return { byMessage: new Map(), scopeToMessage: new Map() }
}
function scopeKey(m: {
session_id: string
parent_tool_use_id: string | null
}): string {
return `${m.session_id}:${m.parent_tool_use_id ?? ''}`
}
/**
* Accumulate text_delta stream_events into full-so-far snapshots per content
* block. Each flush emits ONE event per touched block containing the FULL
* accumulated text from the start of the block — a client connecting
* mid-stream receives a self-contained snapshot, not a fragment.
*
* Non-text-delta events pass through unchanged. message_start records the
* active message ID for the scope; content_block_delta appends chunks;
* the snapshot event reuses the first text_delta UUID seen for that block in
* this flush so server-side idempotency remains stable across retries.
*
* Cleanup happens in writeEvent when the complete assistant message arrives
* (reliable), not here on stop events (abort/error paths skip those).
*/
export function accumulateStreamEvents(
buffer: SDKPartialAssistantMessage[],
state: StreamAccumulatorState,
): EventPayload[] {
const out: EventPayload[] = []
// chunks[] → snapshot already in `out` this flush. Keyed by the chunks
// array reference (stable per {messageId, index}) so subsequent deltas
// rewrite the same entry instead of emitting one event per delta.
const touched = new Map<string[], CoalescedStreamEvent>()
for (const msg of buffer) {
switch (msg.event.type) {
case 'message_start': {
const id = msg.event.message.id
const prevId = state.scopeToMessage.get(scopeKey(msg))
if (prevId) state.byMessage.delete(prevId)
state.scopeToMessage.set(scopeKey(msg), id)
state.byMessage.set(id, [])
out.push(msg)
break
}
case 'content_block_delta': {
if (msg.event.delta.type !== 'text_delta') {
out.push(msg)
break
}
const messageId = state.scopeToMessage.get(scopeKey(msg))
const blocks = messageId ? state.byMessage.get(messageId) : undefined
if (!blocks) {
// Delta without a preceding message_start (reconnect mid-stream,
// or message_start was in a prior buffer that got dropped). Pass
// through raw — can't produce a full-so-far snapshot without the
// prior chunks anyway.
out.push(msg)
break
}
const chunks = (blocks[msg.event.index] ??= [])
chunks.push(msg.event.delta.text)
const existing = touched.get(chunks)
if (existing) {
existing.event.delta.text = chunks.join('')
break
}
const snapshot: CoalescedStreamEvent = {
type: 'stream_event',
uuid: msg.uuid,
session_id: msg.session_id,
parent_tool_use_id: msg.parent_tool_use_id,
event: {
type: 'content_block_delta',
index: msg.event.index,
delta: { type: 'text_delta', text: chunks.join('') },
},
}
touched.set(chunks, snapshot)
out.push(snapshot)
break
}
default:
out.push(msg)
}
}
return out
}
/**
* Clear accumulator entries for a completed assistant message. Called from
* writeEvent when the SDKAssistantMessage arrives — the reliable end-of-stream
* signal that fires even when abort/interrupt/error skip SSE stop events.
*/
export function clearStreamAccumulatorForMessage(
state: StreamAccumulatorState,
assistant: {
session_id: string
parent_tool_use_id: string | null
message: { id: string }
},
): void {
state.byMessage.delete(assistant.message.id)
const scope = scopeKey(assistant)
if (state.scopeToMessage.get(scope) === assistant.message.id) {
state.scopeToMessage.delete(scope)
}
}
type RequestResult = { ok: true } | { ok: false; retryAfterMs?: number }
type WorkerEvent = {
payload: EventPayload
is_compaction?: boolean
agent_id?: string
}
export type InternalEvent = {
event_id: string
event_type: string
payload: Record<string, unknown>
event_metadata?: Record<string, unknown> | null
is_compaction: boolean
created_at: string
agent_id?: string
}
type ListInternalEventsResponse = {
data: InternalEvent[]
next_cursor?: string
}
type WorkerStateResponse = {
worker?: {
external_metadata?: Record<string, unknown>
}
}
/**
* Manages the worker lifecycle protocol with CCR v2:
* - Epoch management: reads worker_epoch from CLAUDE_CODE_WORKER_EPOCH env var
* - Runtime state reporting: PUT /sessions/{id}/worker
* - Heartbeat: POST /sessions/{id}/worker/heartbeat for liveness detection
*
* All writes go through this.request().
*/
export class CCRClient {
private workerEpoch = 0
private readonly heartbeatIntervalMs: number
private readonly heartbeatJitterFraction: number
private heartbeatTimer: NodeJS.Timeout | null = null
private heartbeatInFlight = false
private closed = false
private consecutiveAuthFailures = 0
private currentState: SessionState | null = null
private readonly sessionBaseUrl: string
private readonly sessionId: string
private readonly http = createAxiosInstance({ keepAlive: true })
// stream_event delay buffer — accumulates content deltas for up to
// STREAM_EVENT_FLUSH_INTERVAL_MS before enqueueing (reduces POST count
// and enables text_delta coalescing). Mirrors HybridTransport's pattern.
private streamEventBuffer: SDKPartialAssistantMessage[] = []
private streamEventTimer: ReturnType<typeof setTimeout> | null = null
// Full-so-far text accumulator. Persists across flushes so each emitted
// text_delta event carries the complete text from the start of the block —
// mid-stream reconnects see a self-contained snapshot. Keyed by API message
// ID; cleared in writeEvent when the complete assistant message arrives.
private streamTextAccumulator = createStreamAccumulator()
private readonly workerState: WorkerStateUploader
private readonly eventUploader: SerialBatchEventUploader<ClientEvent>
private readonly internalEventUploader: SerialBatchEventUploader<WorkerEvent>
private readonly deliveryUploader: SerialBatchEventUploader<{
eventId: string
status: 'received' | 'processing' | 'processed'
}>
/**
* Called when the server returns 409 (a newer worker epoch superseded ours).
* Default: process.exit(1) — correct for spawn-mode children where the
* parent bridge re-spawns. In-process callers (replBridge) MUST override
* this to close gracefully instead; exit would kill the user's REPL.
*/
private readonly onEpochMismatch: () => never
/**
* Auth header source. Defaults to the process-wide session-ingress token
* (CLAUDE_CODE_SESSION_ACCESS_TOKEN env var). Callers managing multiple
* concurrent sessions with distinct JWTs MUST inject this — the env-var
* path is a process global and would stomp across sessions.
*/
private readonly getAuthHeaders: () => Record<string, string>
constructor(
transport: SSETransport,
sessionUrl: URL,
opts?: {
onEpochMismatch?: () => never
heartbeatIntervalMs?: number
heartbeatJitterFraction?: number
/**
* Per-instance auth header source. Omit to read the process-wide
* CLAUDE_CODE_SESSION_ACCESS_TOKEN (single-session callers — REPL,
* daemon). Required for concurrent multi-session callers.
*/
getAuthHeaders?: () => Record<string, string>
},
) {
this.onEpochMismatch =
opts?.onEpochMismatch ??
(() => {
// eslint-disable-next-line custom-rules/no-process-exit
process.exit(1)
})
this.heartbeatIntervalMs =
opts?.heartbeatIntervalMs ?? DEFAULT_HEARTBEAT_INTERVAL_MS
this.heartbeatJitterFraction = opts?.heartbeatJitterFraction ?? 0
this.getAuthHeaders = opts?.getAuthHeaders ?? getSessionIngressAuthHeaders
// Session URL: https://host/v1/code/sessions/{id}
if (sessionUrl.protocol !== 'http:' && sessionUrl.protocol !== 'https:') {
throw new Error(
`CCRClient: Expected http(s) URL, got ${sessionUrl.protocol}`,
)
}
const pathname = sessionUrl.pathname.replace(/\/$/, '')
this.sessionBaseUrl = `${sessionUrl.protocol}//${sessionUrl.host}${pathname}`
// Extract session ID from the URL path (last segment)
this.sessionId = pathname.split('/').pop() || ''
this.workerState = new WorkerStateUploader({
send: body =>
this.request(
'put',
'/worker',
{ worker_epoch: this.workerEpoch, ...body },
'PUT worker',
).then(r => r.ok),
baseDelayMs: 500,
maxDelayMs: 30_000,
jitterMs: 500,
})
this.eventUploader = new SerialBatchEventUploader<ClientEvent>({
maxBatchSize: 100,
maxBatchBytes: 10 * 1024 * 1024,
// flushStreamEventBuffer() enqueues a full 100ms window of accumulated
// stream_events in one call. A burst of mixed delta types that don't
// fold into a single snapshot could exceed the old cap (50) and deadlock
// on the SerialBatchEventUploader backpressure check. Match
// HybridTransport's bound — high enough to be memory-only.
maxQueueSize: 100_000,
send: async batch => {
const result = await this.request(
'post',
'/worker/events',
{ worker_epoch: this.workerEpoch, events: batch },
'client events',
)
if (!result.ok) {
throw new RetryableError(
'client event POST failed',
result.retryAfterMs,
)
}
},
baseDelayMs: 500,
maxDelayMs: 30_000,
jitterMs: 500,
})
this.internalEventUploader = new SerialBatchEventUploader<WorkerEvent>({
maxBatchSize: 100,
maxBatchBytes: 10 * 1024 * 1024,
maxQueueSize: 200,
send: async batch => {
const result = await this.request(
'post',
'/worker/internal-events',
{ worker_epoch: this.workerEpoch, events: batch },
'internal events',
)
if (!result.ok) {
throw new RetryableError(
'internal event POST failed',
result.retryAfterMs,
)
}
},
baseDelayMs: 500,
maxDelayMs: 30_000,
jitterMs: 500,
})
this.deliveryUploader = new SerialBatchEventUploader<{
eventId: string
status: 'received' | 'processing' | 'processed'
}>({
maxBatchSize: 64,
maxQueueSize: 64,
send: async batch => {
const result = await this.request(
'post',
'/worker/events/delivery',
{
worker_epoch: this.workerEpoch,
updates: batch.map(d => ({
event_id: d.eventId,
status: d.status,
})),
},
'delivery batch',
)
if (!result.ok) {
throw new RetryableError('delivery POST failed', result.retryAfterMs)
}
},
baseDelayMs: 500,
maxDelayMs: 30_000,
jitterMs: 500,
})
// Ack each received client_event so CCR can track delivery status.
// Wired here (not in initialize()) so the callback is registered the
// moment new CCRClient() returns — remoteIO must be free to call
// transport.connect() immediately after without racing the first
// SSE catch-up frame against an unwired onEventCallback.
transport.setOnEvent((event: StreamClientEvent) => {
this.reportDelivery(event.event_id, 'received')
})
}
/**
* Initialize the session worker:
* 1. Take worker_epoch from the argument, or fall back to
* CLAUDE_CODE_WORKER_EPOCH (set by env-manager / bridge spawner)
* 2. Report state as 'idle'
* 3. Start heartbeat timer
*
* In-process callers (replBridge) pass the epoch directly — they
* registered the worker themselves and there is no parent process
* setting env vars.
*/
async initialize(epoch?: number): Promise<Record<string, unknown> | null> {
const startMs = Date.now()
if (Object.keys(this.getAuthHeaders()).length === 0) {
throw new CCRInitError('no_auth_headers')
}
if (epoch === undefined) {
const rawEpoch = process.env.CLAUDE_CODE_WORKER_EPOCH
epoch = rawEpoch ? parseInt(rawEpoch, 10) : NaN
}
if (isNaN(epoch)) {
throw new CCRInitError('missing_epoch')
}
this.workerEpoch = epoch
// Concurrent with the init PUT — neither depends on the other.
const restoredPromise = this.getWorkerState()
const result = await this.request(
'put',
'/worker',
{
worker_status: 'idle',
worker_epoch: this.workerEpoch,
// Clear stale pending_action/task_summary left by a prior
// worker crash — the in-session clears don't survive process restart.
external_metadata: {
pending_action: null,
task_summary: null,
},
},
'PUT worker (init)',
)
if (!result.ok) {
// 409 → onEpochMismatch may throw, but request() catches it and returns
// false. Without this check we'd continue to startHeartbeat(), leaking a
// 20s timer against a dead epoch. Throw so connect()'s rejection handler
// fires instead of the success path.
throw new CCRInitError('worker_register_failed')
}
this.currentState = 'idle'
this.startHeartbeat()
// sessionActivity's refcount-gated timer fires while an API call or tool
// is in-flight; without a write the container lease can expire mid-wait.
// v1 wires this in WebSocketTransport per-connection.
registerSessionActivityCallback(() => {
void this.writeEvent({ type: 'keep_alive' })
})
logForDebugging(`CCRClient: initialized, epoch=${this.workerEpoch}`)
logForDiagnosticsNoPII('info', 'cli_worker_lifecycle_initialized', {
epoch: this.workerEpoch,
duration_ms: Date.now() - startMs,
})
// Await the concurrent GET and log state_restored here, after the PUT
// has succeeded — logging inside getWorkerState() raced: if the GET
// resolved before the PUT failed, diagnostics showed both init_failed
// and state_restored for the same session.
const { metadata, durationMs } = await restoredPromise
if (!this.closed) {
logForDiagnosticsNoPII('info', 'cli_worker_state_restored', {
duration_ms: durationMs,
had_state: metadata !== null,
})
}
return metadata
}
// Control_requests are marked processed and not re-delivered on
// restart, so read back what the prior worker wrote.
private async getWorkerState(): Promise<{
metadata: Record<string, unknown> | null
durationMs: number
}> {
const startMs = Date.now()
const authHeaders = this.getAuthHeaders()
if (Object.keys(authHeaders).length === 0) {
return { metadata: null, durationMs: 0 }
}
const data = await this.getWithRetry<WorkerStateResponse>(
`${this.sessionBaseUrl}/worker`,
authHeaders,
'worker_state',
)
return {
metadata: data?.worker?.external_metadata ?? null,
durationMs: Date.now() - startMs,
}
}
/**
* Send an authenticated HTTP request to CCR. Handles auth headers,
* 409 epoch mismatch, and error logging. Returns { ok: true } on 2xx.
* On 429, reads Retry-After (integer seconds) so the uploader can honor
* the server's backoff hint instead of blindly exponentiating.
*/
private async request(
method: 'post' | 'put',
path: string,
body: unknown,
label: string,
{ timeout = 10_000 }: { timeout?: number } = {},
): Promise<RequestResult> {
const authHeaders = this.getAuthHeaders()
if (Object.keys(authHeaders).length === 0) return { ok: false }
try {
const response = await this.http[method](
`${this.sessionBaseUrl}${path}`,
body,
{
headers: {
...authHeaders,
'Content-Type': 'application/json',
'anthropic-version': '2023-06-01',
'User-Agent': getClaudeCodeUserAgent(),
},
validateStatus: alwaysValidStatus,
timeout,
},
)
if (response.status >= 200 && response.status < 300) {
this.consecutiveAuthFailures = 0
return { ok: true }
}
if (response.status === 409) {
this.handleEpochMismatch()
}
if (response.status === 401 || response.status === 403) {
// A 401 with an expired JWT is deterministic — no retry will
// ever succeed. Check the token's own exp before burning
// wall-clock on the threshold loop.
const tok = getSessionIngressAuthToken()
const exp = tok ? decodeJwtExpiry(tok) : null
if (exp !== null && exp * 1000 < Date.now()) {
logForDebugging(
`CCRClient: session_token expired (exp=${new Date(exp * 1000).toISOString()}) — no refresh was delivered, exiting`,
{ level: 'error' },
)
logForDiagnosticsNoPII('error', 'cli_worker_token_expired_no_refresh')
this.onEpochMismatch()
}
// Token looks valid but server says 401 — possible server-side
// blip (userauth down, KMS hiccup). Count toward threshold.
this.consecutiveAuthFailures++
if (this.consecutiveAuthFailures >= MAX_CONSECUTIVE_AUTH_FAILURES) {
logForDebugging(
`CCRClient: ${this.consecutiveAuthFailures} consecutive auth failures with a valid-looking token — server-side auth unrecoverable, exiting`,
{ level: 'error' },
)
logForDiagnosticsNoPII('error', 'cli_worker_auth_failures_exhausted')
this.onEpochMismatch()
}
}
logForDebugging(`CCRClient: ${label} returned ${response.status}`, {
level: 'warn',
})
logForDiagnosticsNoPII('warn', 'cli_worker_request_failed', {
method,
path,
status: response.status,
})
if (response.status === 429) {
const raw = response.headers?.['retry-after']
const seconds = typeof raw === 'string' ? parseInt(raw, 10) : NaN
if (!isNaN(seconds) && seconds >= 0) {
return { ok: false, retryAfterMs: seconds * 1000 }
}
}
return { ok: false }
} catch (error) {
logForDebugging(`CCRClient: ${label} failed: ${errorMessage(error)}`, {
level: 'warn',
})
logForDiagnosticsNoPII('warn', 'cli_worker_request_error', {
method,
path,
error_code: getErrnoCode(error),
})
return { ok: false }
}
}
/** Report worker state to CCR via PUT /sessions/{id}/worker. */
reportState(state: SessionState, details?: RequiresActionDetails): void {
if (state === this.currentState && !details) return
this.currentState = state
this.workerState.enqueue({
worker_status: state,
requires_action_details: details
? {
tool_name: details.tool_name,
action_description: details.action_description,
request_id: details.request_id,
}
: null,
})
}
/** Report external metadata to CCR via PUT /worker. */
reportMetadata(metadata: Record<string, unknown>): void {
this.workerState.enqueue({ external_metadata: metadata })
}
/**
* Handle epoch mismatch (409 Conflict). A newer CC instance has replaced
* this one — exit immediately.
*/
private handleEpochMismatch(): never {
logForDebugging('CCRClient: Epoch mismatch (409), shutting down', {
level: 'error',
})
logForDiagnosticsNoPII('error', 'cli_worker_epoch_mismatch')
this.onEpochMismatch()
}
/** Start periodic heartbeat. */
private startHeartbeat(): void {
this.stopHeartbeat()
const schedule = (): void => {
const jitter =
this.heartbeatIntervalMs *
this.heartbeatJitterFraction *
(2 * Math.random() - 1)
this.heartbeatTimer = setTimeout(tick, this.heartbeatIntervalMs + jitter)
}
const tick = (): void => {
void this.sendHeartbeat()
// stopHeartbeat nulls the timer; check after the fire-and-forget send
// but before rescheduling so close() during sendHeartbeat is honored.
if (this.heartbeatTimer === null) return
schedule()
}
schedule()
}
/** Stop heartbeat timer. */
private stopHeartbeat(): void {
if (this.heartbeatTimer) {
clearTimeout(this.heartbeatTimer)
this.heartbeatTimer = null
}
}
/** Send a heartbeat via POST /sessions/{id}/worker/heartbeat. */
private async sendHeartbeat(): Promise<void> {
if (this.heartbeatInFlight) return
this.heartbeatInFlight = true
try {
const result = await this.request(
'post',
'/worker/heartbeat',
{ session_id: this.sessionId, worker_epoch: this.workerEpoch },
'Heartbeat',
{ timeout: 5_000 },
)
if (result.ok) {
logForDebugging('CCRClient: Heartbeat sent')
}
} finally {
this.heartbeatInFlight = false
}
}
/**
* Write a StdoutMessage as a client event via POST /sessions/{id}/worker/events.
* These events are visible to frontend clients via the SSE stream.
* Injects a UUID if missing to ensure server-side idempotency on retry.
*
* stream_event messages are held in a 100ms delay buffer and accumulated
* (text_deltas for the same content block emit a full-so-far snapshot per
* flush). A non-stream_event write flushes the buffer first so downstream
* ordering is preserved.
*/
async writeEvent(message: StdoutMessage): Promise<void> {
if (message.type === 'stream_event') {
this.streamEventBuffer.push(message)
if (!this.streamEventTimer) {
this.streamEventTimer = setTimeout(
() => void this.flushStreamEventBuffer(),
STREAM_EVENT_FLUSH_INTERVAL_MS,
)
}
return
}
await this.flushStreamEventBuffer()
if (message.type === 'assistant') {
clearStreamAccumulatorForMessage(this.streamTextAccumulator, message)
}
await this.eventUploader.enqueue(this.toClientEvent(message))
}
/** Wrap a StdoutMessage as a ClientEvent, injecting a UUID if missing. */
private toClientEvent(message: StdoutMessage): ClientEvent {
const msg = message as unknown as Record<string, unknown>
return {
payload: {
...msg,
uuid: typeof msg.uuid === 'string' ? msg.uuid : randomUUID(),
} as EventPayload,
}
}
/**
* Drain the stream_event delay buffer: accumulate text_deltas into
* full-so-far snapshots, clear the timer, enqueue the resulting events.
* Called from the timer, from writeEvent on a non-stream message, and from
* flush(). close() drops the buffer — call flush() first if you need
* delivery.
*/
private async flushStreamEventBuffer(): Promise<void> {
if (this.streamEventTimer) {
clearTimeout(this.streamEventTimer)
this.streamEventTimer = null
}
if (this.streamEventBuffer.length === 0) return
const buffered = this.streamEventBuffer
this.streamEventBuffer = []
const payloads = accumulateStreamEvents(
buffered,
this.streamTextAccumulator,
)
await this.eventUploader.enqueue(
payloads.map(payload => ({ payload, ephemeral: true })),
)
}
/**
* Write an internal worker event via POST /sessions/{id}/worker/internal-events.
* These events are NOT visible to frontend clients — they store worker-internal
* state (transcript messages, compaction markers) needed for session resume.
*/
async writeInternalEvent(
eventType: string,
payload: Record<string, unknown>,
{
isCompaction = false,
agentId,
}: {
isCompaction?: boolean
agentId?: string
} = {},
): Promise<void> {
const event: WorkerEvent = {
payload: {
type: eventType,
...payload,
uuid: typeof payload.uuid === 'string' ? payload.uuid : randomUUID(),
} as EventPayload,
...(isCompaction && { is_compaction: true }),
...(agentId && { agent_id: agentId }),
}
await this.internalEventUploader.enqueue(event)
}
/**
* Flush pending internal events. Call between turns and on shutdown
* to ensure transcript entries are persisted.
*/
flushInternalEvents(): Promise<void> {
return this.internalEventUploader.flush()
}
/**
* Flush pending client events (writeEvent queue). Call before close()
* when the caller needs delivery confirmation — close() abandons the
* queue. Resolves once the uploader drains or rejects; returns
* regardless of whether individual POSTs succeeded (check server state
* separately if that matters).
*/
async flush(): Promise<void> {
await this.flushStreamEventBuffer()
return this.eventUploader.flush()
}
/**
* Read foreground agent internal events from
* GET /sessions/{id}/worker/internal-events.
* Returns transcript entries from the last compaction boundary, or null on failure.
* Used for session resume.
*/
async readInternalEvents(): Promise<InternalEvent[] | null> {
return this.paginatedGet('/worker/internal-events', {}, 'internal_events')
}
/**
* Read all subagent internal events from
* GET /sessions/{id}/worker/internal-events?subagents=true.
* Returns a merged stream across all non-foreground agents, each from its
* compaction point. Used for session resume.
*/
async readSubagentInternalEvents(): Promise<InternalEvent[] | null> {
return this.paginatedGet(
'/worker/internal-events',
{ subagents: 'true' },
'subagent_events',
)
}
/**
* Paginated GET with retry. Fetches all pages from a list endpoint,
* retrying each page on failure with exponential backoff + jitter.
*/
private async paginatedGet(
path: string,
params: Record<string, string>,
context: string,
): Promise<InternalEvent[] | null> {
const authHeaders = this.getAuthHeaders()
if (Object.keys(authHeaders).length === 0) return null
const allEvents: InternalEvent[] = []
let cursor: string | undefined
do {
const url = new URL(`${this.sessionBaseUrl}${path}`)
for (const [k, v] of Object.entries(params)) {
url.searchParams.set(k, v)
}
if (cursor) {
url.searchParams.set('cursor', cursor)
}
const page = await this.getWithRetry<ListInternalEventsResponse>(
url.toString(),
authHeaders,
context,
)
if (!page) return null
allEvents.push(...(page.data ?? []))
cursor = page.next_cursor
} while (cursor)
logForDebugging(
`CCRClient: Read ${allEvents.length} internal events from ${path}${params.subagents ? ' (subagents)' : ''}`,
)
return allEvents
}
/**
* Single GET request with retry. Returns the parsed response body
* on success, null if all retries are exhausted.
*/
private async getWithRetry<T>(
url: string,
authHeaders: Record<string, string>,
context: string,
): Promise<T | null> {
for (let attempt = 1; attempt <= 10; attempt++) {
let response
try {
response = await this.http.get<T>(url, {
headers: {
...authHeaders,
'anthropic-version': '2023-06-01',
'User-Agent': getClaudeCodeUserAgent(),
},
validateStatus: alwaysValidStatus,
timeout: 30_000,
})
} catch (error) {
logForDebugging(
`CCRClient: GET ${url} failed (attempt ${attempt}/10): ${errorMessage(error)}`,
{ level: 'warn' },
)
if (attempt < 10) {
const delay =
Math.min(500 * 2 ** (attempt - 1), 30_000) + Math.random() * 500
await sleep(delay)
}
continue
}
if (response.status >= 200 && response.status < 300) {
return response.data
}
if (response.status === 409) {
this.handleEpochMismatch()
}
logForDebugging(
`CCRClient: GET ${url} returned ${response.status} (attempt ${attempt}/10)`,
{ level: 'warn' },
)
if (attempt < 10) {
const delay =
Math.min(500 * 2 ** (attempt - 1), 30_000) + Math.random() * 500
await sleep(delay)
}
}
logForDebugging('CCRClient: GET retries exhausted', { level: 'error' })
logForDiagnosticsNoPII('error', 'cli_worker_get_retries_exhausted', {
context,
})
return null
}
/**
* Report delivery status for a client-to-worker event.
* POST /v1/code/sessions/{id}/worker/events/delivery (batch endpoint)
*/
reportDelivery(
eventId: string,
status: 'received' | 'processing' | 'processed',
): void {
void this.deliveryUploader.enqueue({ eventId, status })
}
/** Get the current epoch (for external use). */
getWorkerEpoch(): number {
return this.workerEpoch
}
/** Internal-event queue depth — shutdown-snapshot backpressure signal. */
get internalEventsPending(): number {
return this.internalEventUploader.pendingCount
}
/** Clean up uploaders and timers. */
close(): void {
this.closed = true
this.stopHeartbeat()
unregisterSessionActivityCallback()
if (this.streamEventTimer) {
clearTimeout(this.streamEventTimer)
this.streamEventTimer = null
}
this.streamEventBuffer = []
this.streamTextAccumulator.byMessage.clear()
this.streamTextAccumulator.scopeToMessage.clear()
this.workerState.close()
this.eventUploader.close()
this.internalEventUploader.close()
this.deliveryUploader.close()
}
}
+45
View File
@@ -0,0 +1,45 @@
import { URL } from 'url'
import { isEnvTruthy } from '../../utils/envUtils.js'
import { HybridTransport } from './HybridTransport.js'
import { SSETransport } from './SSETransport.js'
import type { Transport } from './Transport.js'
import { WebSocketTransport } from './WebSocketTransport.js'
/**
* Helper function to get the appropriate transport for a URL.
*
* Transport selection priority:
* 1. SSETransport (SSE reads + POST writes) when CLAUDE_CODE_USE_CCR_V2 is set
* 2. HybridTransport (WS reads + POST writes) when CLAUDE_CODE_POST_FOR_SESSION_INGRESS_V2 is set
* 3. WebSocketTransport (WS reads + WS writes) — default
*/
export function getTransportForUrl(
url: URL,
headers: Record<string, string> = {},
sessionId?: string,
refreshHeaders?: () => Record<string, string>,
): Transport {
if (isEnvTruthy(process.env.CLAUDE_CODE_USE_CCR_V2)) {
// v2: SSE for reads, HTTP POST for writes
// --sdk-url is the session URL (.../sessions/{id});
// derive the SSE stream URL by appending /worker/events/stream
const sseUrl = new URL(url.href)
if (sseUrl.protocol === 'wss:') {
sseUrl.protocol = 'https:'
} else if (sseUrl.protocol === 'ws:') {
sseUrl.protocol = 'http:'
}
sseUrl.pathname =
sseUrl.pathname.replace(/\/$/, '') + '/worker/events/stream'
return new SSETransport(sseUrl, headers, sessionId, refreshHeaders)
}
if (url.protocol === 'ws:' || url.protocol === 'wss:') {
if (isEnvTruthy(process.env.CLAUDE_CODE_POST_FOR_SESSION_INGRESS_V2)) {
return new HybridTransport(url, headers, sessionId, refreshHeaders)
}
return new WebSocketTransport(url, headers, sessionId, refreshHeaders)
} else {
throw new Error(`Unsupported protocol: ${url.protocol}`)
}
}
+422
View File
@@ -0,0 +1,422 @@
import chalk from 'chalk'
import { logEvent } from 'src/services/analytics/index.js'
import {
getLatestVersion,
type InstallStatus,
installGlobalPackage,
} from 'src/utils/autoUpdater.js'
import { regenerateCompletionCache } from 'src/utils/completionCache.js'
import {
getGlobalConfig,
type InstallMethod,
saveGlobalConfig,
} from 'src/utils/config.js'
import { logForDebugging } from 'src/utils/debug.js'
import { getDoctorDiagnostic } from 'src/utils/doctorDiagnostic.js'
import { gracefulShutdown } from 'src/utils/gracefulShutdown.js'
import {
installOrUpdateClaudePackage,
localInstallationExists,
} from 'src/utils/localInstaller.js'
import {
installLatest as installLatestNative,
removeInstalledSymlink,
} from 'src/utils/nativeInstaller/index.js'
import { getPackageManager } from 'src/utils/nativeInstaller/packageManagers.js'
import { writeToStdout } from 'src/utils/process.js'
import { gte } from 'src/utils/semver.js'
import { getInitialSettings } from 'src/utils/settings/settings.js'
export async function update() {
logEvent('tengu_update_check', {})
writeToStdout(`Current version: ${MACRO.VERSION}\n`)
const channel = getInitialSettings()?.autoUpdatesChannel ?? 'latest'
writeToStdout(`Checking for updates to ${channel} version...\n`)
logForDebugging('update: Starting update check')
// Run diagnostic to detect potential issues
logForDebugging('update: Running diagnostic')
const diagnostic = await getDoctorDiagnostic()
logForDebugging(`update: Installation type: ${diagnostic.installationType}`)
logForDebugging(
`update: Config install method: ${diagnostic.configInstallMethod}`,
)
// Check for multiple installations
if (diagnostic.multipleInstallations.length > 1) {
writeToStdout('\n')
writeToStdout(chalk.yellow('Warning: Multiple installations found') + '\n')
for (const install of diagnostic.multipleInstallations) {
const current =
diagnostic.installationType === install.type
? ' (currently running)'
: ''
writeToStdout(`- ${install.type} at ${install.path}${current}\n`)
}
}
// Display warnings if any exist
if (diagnostic.warnings.length > 0) {
writeToStdout('\n')
for (const warning of diagnostic.warnings) {
logForDebugging(`update: Warning detected: ${warning.issue}`)
// Don't skip PATH warnings - they're always relevant
// The user needs to know that 'which claude' points elsewhere
logForDebugging(`update: Showing warning: ${warning.issue}`)
writeToStdout(chalk.yellow(`Warning: ${warning.issue}\n`))
writeToStdout(chalk.bold(`Fix: ${warning.fix}\n`))
}
}
// Update config if installMethod is not set (but skip for package managers)
const config = getGlobalConfig()
if (
!config.installMethod &&
diagnostic.installationType !== 'package-manager'
) {
writeToStdout('\n')
writeToStdout('Updating configuration to track installation method...\n')
let detectedMethod: 'local' | 'native' | 'global' | 'unknown' = 'unknown'
// Map diagnostic installation type to config install method
switch (diagnostic.installationType) {
case 'npm-local':
detectedMethod = 'local'
break
case 'native':
detectedMethod = 'native'
break
case 'npm-global':
detectedMethod = 'global'
break
default:
detectedMethod = 'unknown'
}
saveGlobalConfig(current => ({
...current,
installMethod: detectedMethod,
}))
writeToStdout(`Installation method set to: ${detectedMethod}\n`)
}
// Check if running from development build
if (diagnostic.installationType === 'development') {
writeToStdout('\n')
writeToStdout(
chalk.yellow('Warning: Cannot update development build') + '\n',
)
await gracefulShutdown(1)
}
// Check if running from a package manager
if (diagnostic.installationType === 'package-manager') {
const packageManager = await getPackageManager()
writeToStdout('\n')
if (packageManager === 'homebrew') {
writeToStdout('Claude is managed by Homebrew.\n')
const latest = await getLatestVersion(channel)
if (latest && !gte(MACRO.VERSION, latest)) {
writeToStdout(`Update available: ${MACRO.VERSION}${latest}\n`)
writeToStdout('\n')
writeToStdout('To update, run:\n')
writeToStdout(chalk.bold(' brew upgrade claude-code') + '\n')
} else {
writeToStdout('Claude is up to date!\n')
}
} else if (packageManager === 'winget') {
writeToStdout('Claude is managed by winget.\n')
const latest = await getLatestVersion(channel)
if (latest && !gte(MACRO.VERSION, latest)) {
writeToStdout(`Update available: ${MACRO.VERSION}${latest}\n`)
writeToStdout('\n')
writeToStdout('To update, run:\n')
writeToStdout(
chalk.bold(' winget upgrade Anthropic.ClaudeCode') + '\n',
)
} else {
writeToStdout('Claude is up to date!\n')
}
} else if (packageManager === 'apk') {
writeToStdout('Claude is managed by apk.\n')
const latest = await getLatestVersion(channel)
if (latest && !gte(MACRO.VERSION, latest)) {
writeToStdout(`Update available: ${MACRO.VERSION}${latest}\n`)
writeToStdout('\n')
writeToStdout('To update, run:\n')
writeToStdout(chalk.bold(' apk upgrade claude-code') + '\n')
} else {
writeToStdout('Claude is up to date!\n')
}
} else {
// pacman, deb, and rpm don't get specific commands because they each have
// multiple frontends (pacman: yay/paru/makepkg, deb: apt/apt-get/aptitude/nala,
// rpm: dnf/yum/zypper)
writeToStdout('Claude is managed by a package manager.\n')
writeToStdout('Please use your package manager to update.\n')
}
await gracefulShutdown(0)
}
// Check for config/reality mismatch (skip for package-manager installs)
if (
config.installMethod &&
diagnostic.configInstallMethod !== 'not set' &&
diagnostic.installationType !== 'package-manager'
) {
const runningType = diagnostic.installationType
const configExpects = diagnostic.configInstallMethod
// Map installation types for comparison
const typeMapping: Record<string, string> = {
'npm-local': 'local',
'npm-global': 'global',
native: 'native',
development: 'development',
unknown: 'unknown',
}
const normalizedRunningType = typeMapping[runningType] || runningType
if (
normalizedRunningType !== configExpects &&
configExpects !== 'unknown'
) {
writeToStdout('\n')
writeToStdout(chalk.yellow('Warning: Configuration mismatch') + '\n')
writeToStdout(`Config expects: ${configExpects} installation\n`)
writeToStdout(`Currently running: ${runningType}\n`)
writeToStdout(
chalk.yellow(
`Updating the ${runningType} installation you are currently using`,
) + '\n',
)
// Update config to match reality
saveGlobalConfig(current => ({
...current,
installMethod: normalizedRunningType as InstallMethod,
}))
writeToStdout(
`Config updated to reflect current installation method: ${normalizedRunningType}\n`,
)
}
}
// Handle native installation updates first
if (diagnostic.installationType === 'native') {
logForDebugging(
'update: Detected native installation, using native updater',
)
try {
const result = await installLatestNative(channel, true)
// Handle lock contention gracefully
if (result.lockFailed) {
const pidInfo = result.lockHolderPid
? ` (PID ${result.lockHolderPid})`
: ''
writeToStdout(
chalk.yellow(
`Another Claude process${pidInfo} is currently running. Please try again in a moment.`,
) + '\n',
)
await gracefulShutdown(0)
}
if (!result.latestVersion) {
process.stderr.write('Failed to check for updates\n')
await gracefulShutdown(1)
}
if (result.latestVersion === MACRO.VERSION) {
writeToStdout(
chalk.green(`Claude Code is up to date (${MACRO.VERSION})`) + '\n',
)
} else {
writeToStdout(
chalk.green(
`Successfully updated from ${MACRO.VERSION} to version ${result.latestVersion}`,
) + '\n',
)
await regenerateCompletionCache()
}
await gracefulShutdown(0)
} catch (error) {
process.stderr.write('Error: Failed to install native update\n')
process.stderr.write(String(error) + '\n')
process.stderr.write('Try running "claude doctor" for diagnostics\n')
await gracefulShutdown(1)
}
}
// Fallback to existing JS/npm-based update logic
// Remove native installer symlink since we're not using native installation
// But only if user hasn't migrated to native installation
if (config.installMethod !== 'native') {
await removeInstalledSymlink()
}
logForDebugging('update: Checking npm registry for latest version')
logForDebugging(`update: Package URL: ${MACRO.PACKAGE_URL}`)
const npmTag = channel === 'stable' ? 'stable' : 'latest'
const npmCommand = `npm view ${MACRO.PACKAGE_URL}@${npmTag} version`
logForDebugging(`update: Running: ${npmCommand}`)
const latestVersion = await getLatestVersion(channel)
logForDebugging(
`update: Latest version from npm: ${latestVersion || 'FAILED'}`,
)
if (!latestVersion) {
logForDebugging('update: Failed to get latest version from npm registry')
process.stderr.write(chalk.red('Failed to check for updates') + '\n')
process.stderr.write('Unable to fetch latest version from npm registry\n')
process.stderr.write('\n')
process.stderr.write('Possible causes:\n')
process.stderr.write(' • Network connectivity issues\n')
process.stderr.write(' • npm registry is unreachable\n')
process.stderr.write(' • Corporate proxy/firewall blocking npm\n')
if (MACRO.PACKAGE_URL && !MACRO.PACKAGE_URL.startsWith('@anthropic')) {
process.stderr.write(
' • Internal/development build not published to npm\n',
)
}
process.stderr.write('\n')
process.stderr.write('Try:\n')
process.stderr.write(' • Check your internet connection\n')
process.stderr.write(' • Run with --debug flag for more details\n')
const packageName =
MACRO.PACKAGE_URL ||
(process.env.USER_TYPE === 'ant'
? '@anthropic-ai/claude-cli'
: '@anthropic-ai/claude-code')
process.stderr.write(
` • Manually check: npm view ${packageName} version\n`,
)
process.stderr.write(' • Check if you need to login: npm whoami\n')
await gracefulShutdown(1)
}
// Check if versions match exactly, including any build metadata (like SHA)
if (latestVersion === MACRO.VERSION) {
writeToStdout(
chalk.green(`Claude Code is up to date (${MACRO.VERSION})`) + '\n',
)
await gracefulShutdown(0)
}
writeToStdout(
`New version available: ${latestVersion} (current: ${MACRO.VERSION})\n`,
)
writeToStdout('Installing update...\n')
// Determine update method based on what's actually running
let useLocalUpdate = false
let updateMethodName = ''
switch (diagnostic.installationType) {
case 'npm-local':
useLocalUpdate = true
updateMethodName = 'local'
break
case 'npm-global':
useLocalUpdate = false
updateMethodName = 'global'
break
case 'unknown': {
// Fallback to detection if we can't determine installation type
const isLocal = await localInstallationExists()
useLocalUpdate = isLocal
updateMethodName = isLocal ? 'local' : 'global'
writeToStdout(
chalk.yellow('Warning: Could not determine installation type') + '\n',
)
writeToStdout(
`Attempting ${updateMethodName} update based on file detection...\n`,
)
break
}
default:
process.stderr.write(
`Error: Cannot update ${diagnostic.installationType} installation\n`,
)
await gracefulShutdown(1)
}
writeToStdout(`Using ${updateMethodName} installation update method...\n`)
logForDebugging(`update: Update method determined: ${updateMethodName}`)
logForDebugging(`update: useLocalUpdate: ${useLocalUpdate}`)
let status: InstallStatus
if (useLocalUpdate) {
logForDebugging(
'update: Calling installOrUpdateClaudePackage() for local update',
)
status = await installOrUpdateClaudePackage(channel)
} else {
logForDebugging('update: Calling installGlobalPackage() for global update')
status = await installGlobalPackage()
}
logForDebugging(`update: Installation status: ${status}`)
switch (status) {
case 'success':
writeToStdout(
chalk.green(
`Successfully updated from ${MACRO.VERSION} to version ${latestVersion}`,
) + '\n',
)
await regenerateCompletionCache()
break
case 'no_permissions':
process.stderr.write(
'Error: Insufficient permissions to install update\n',
)
if (useLocalUpdate) {
process.stderr.write('Try manually updating with:\n')
process.stderr.write(
` cd ~/.claude/local && npm update ${MACRO.PACKAGE_URL}\n`,
)
} else {
process.stderr.write('Try running with sudo or fix npm permissions\n')
process.stderr.write(
'Or consider using native installation with: claude install\n',
)
}
await gracefulShutdown(1)
break
case 'install_failed':
process.stderr.write('Error: Failed to install update\n')
if (useLocalUpdate) {
process.stderr.write('Try manually updating with:\n')
process.stderr.write(
` cd ~/.claude/local && npm update ${MACRO.PACKAGE_URL}\n`,
)
} else {
process.stderr.write(
'Or consider using native installation with: claude install\n',
)
}
await gracefulShutdown(1)
break
case 'in_progress':
process.stderr.write(
'Error: Another instance is currently performing an update\n',
)
process.stderr.write('Please wait and try again later\n')
await gracefulShutdown(1)
break
}
await gracefulShutdown(0)
}