init claude-code

This commit is contained in:
2026-04-01 17:32:37 +02:00
commit 73b208c009
1902 changed files with 513237 additions and 0 deletions
+119
View File
@@ -0,0 +1,119 @@
import axios from 'axios'
import { getOauthConfig } from '../../constants/oauth.js'
import { getOAuthHeaders, prepareApiRequest } from '../../utils/teleport/api.js'
export type AdminRequestType = 'limit_increase' | 'seat_upgrade'
export type AdminRequestStatus = 'pending' | 'approved' | 'dismissed'
export type AdminRequestSeatUpgradeDetails = {
message?: string | null
current_seat_tier?: string | null
}
export type AdminRequestCreateParams =
| {
request_type: 'limit_increase'
details: null
}
| {
request_type: 'seat_upgrade'
details: AdminRequestSeatUpgradeDetails
}
export type AdminRequest = {
uuid: string
status: AdminRequestStatus
requester_uuid?: string | null
created_at: string
} & (
| {
request_type: 'limit_increase'
details: null
}
| {
request_type: 'seat_upgrade'
details: AdminRequestSeatUpgradeDetails
}
)
/**
* Create an admin request (limit increase or seat upgrade).
*
* For Team/Enterprise users who don't have billing/admin permissions,
* this creates a request that their admin can act on.
*
* If a pending request of the same type already exists for this user,
* returns the existing request instead of creating a new one.
*/
export async function createAdminRequest(
params: AdminRequestCreateParams,
): Promise<AdminRequest> {
const { accessToken, orgUUID } = await prepareApiRequest()
const headers = {
...getOAuthHeaders(accessToken),
'x-organization-uuid': orgUUID,
}
const url = `${getOauthConfig().BASE_API_URL}/api/oauth/organizations/${orgUUID}/admin_requests`
const response = await axios.post<AdminRequest>(url, params, { headers })
return response.data
}
/**
* Get pending admin request of a specific type for the current user.
*
* Returns the pending request if one exists, otherwise null.
*/
export async function getMyAdminRequests(
requestType: AdminRequestType,
statuses: AdminRequestStatus[],
): Promise<AdminRequest[] | null> {
const { accessToken, orgUUID } = await prepareApiRequest()
const headers = {
...getOAuthHeaders(accessToken),
'x-organization-uuid': orgUUID,
}
let url = `${getOauthConfig().BASE_API_URL}/api/oauth/organizations/${orgUUID}/admin_requests/me?request_type=${requestType}`
for (const status of statuses) {
url += `&statuses=${status}`
}
const response = await axios.get<AdminRequest[] | null>(url, {
headers,
})
return response.data
}
type AdminRequestEligibilityResponse = {
request_type: AdminRequestType
is_allowed: boolean
}
/**
* Check if a specific admin request type is allowed for this org.
*/
export async function checkAdminRequestEligibility(
requestType: AdminRequestType,
): Promise<AdminRequestEligibilityResponse | null> {
const { accessToken, orgUUID } = await prepareApiRequest()
const headers = {
...getOAuthHeaders(accessToken),
'x-organization-uuid': orgUUID,
}
const url = `${getOauthConfig().BASE_API_URL}/api/oauth/organizations/${orgUUID}/admin_requests/eligibility?request_type=${requestType}`
const response = await axios.get<AdminRequestEligibilityResponse>(url, {
headers,
})
return response.data
}
+141
View File
@@ -0,0 +1,141 @@
import axios from 'axios'
import isEqual from 'lodash-es/isEqual.js'
import {
getAnthropicApiKey,
getClaudeAIOAuthTokens,
hasProfileScope,
} from 'src/utils/auth.js'
import { z } from 'zod'
import { getOauthConfig, OAUTH_BETA_HEADER } from '../../constants/oauth.js'
import { getGlobalConfig, saveGlobalConfig } from '../../utils/config.js'
import { logForDebugging } from '../../utils/debug.js'
import { withOAuth401Retry } from '../../utils/http.js'
import { lazySchema } from '../../utils/lazySchema.js'
import { logError } from '../../utils/log.js'
import { getAPIProvider } from '../../utils/model/providers.js'
import { isEssentialTrafficOnly } from '../../utils/privacyLevel.js'
import { getClaudeCodeUserAgent } from '../../utils/userAgent.js'
const bootstrapResponseSchema = lazySchema(() =>
z.object({
client_data: z.record(z.unknown()).nullish(),
additional_model_options: z
.array(
z
.object({
model: z.string(),
name: z.string(),
description: z.string(),
})
.transform(({ model, name, description }) => ({
value: model,
label: name,
description,
})),
)
.nullish(),
}),
)
type BootstrapResponse = z.infer<ReturnType<typeof bootstrapResponseSchema>>
async function fetchBootstrapAPI(): Promise<BootstrapResponse | null> {
if (isEssentialTrafficOnly()) {
logForDebugging('[Bootstrap] Skipped: Nonessential traffic disabled')
return null
}
if (getAPIProvider() !== 'firstParty') {
logForDebugging('[Bootstrap] Skipped: 3P provider')
return null
}
// OAuth preferred (requires user:profile scope — service-key OAuth tokens
// lack it and would 403). Fall back to API key auth for console users.
const apiKey = getAnthropicApiKey()
const hasUsableOAuth =
getClaudeAIOAuthTokens()?.accessToken && hasProfileScope()
if (!hasUsableOAuth && !apiKey) {
logForDebugging('[Bootstrap] Skipped: no usable OAuth or API key')
return null
}
const endpoint = `${getOauthConfig().BASE_API_URL}/api/claude_cli/bootstrap`
// withOAuth401Retry handles the refresh-and-retry. API key users fail
// through on 401 (no refresh mechanism — no OAuth token to pass).
try {
return await withOAuth401Retry(async () => {
// Re-read OAuth each call so the retry picks up the refreshed token.
const token = getClaudeAIOAuthTokens()?.accessToken
let authHeaders: Record<string, string>
if (token && hasProfileScope()) {
authHeaders = {
Authorization: `Bearer ${token}`,
'anthropic-beta': OAUTH_BETA_HEADER,
}
} else if (apiKey) {
authHeaders = { 'x-api-key': apiKey }
} else {
logForDebugging('[Bootstrap] No auth available on retry, aborting')
return null
}
logForDebugging('[Bootstrap] Fetching')
const response = await axios.get<unknown>(endpoint, {
headers: {
'Content-Type': 'application/json',
'User-Agent': getClaudeCodeUserAgent(),
...authHeaders,
},
timeout: 5000,
})
const parsed = bootstrapResponseSchema().safeParse(response.data)
if (!parsed.success) {
logForDebugging(
`[Bootstrap] Response failed validation: ${parsed.error.message}`,
)
return null
}
logForDebugging('[Bootstrap] Fetch ok')
return parsed.data
})
} catch (error) {
logForDebugging(
`[Bootstrap] Fetch failed: ${axios.isAxiosError(error) ? (error.response?.status ?? error.code) : 'unknown'}`,
)
throw error
}
}
/**
* Fetch bootstrap data from the API and persist to disk cache.
*/
export async function fetchBootstrapData(): Promise<void> {
try {
const response = await fetchBootstrapAPI()
if (!response) return
const clientData = response.client_data ?? null
const additionalModelOptions = response.additional_model_options ?? []
// Only persist if data actually changed — avoids a config write on every startup.
const config = getGlobalConfig()
if (
isEqual(config.clientDataCache, clientData) &&
isEqual(config.additionalModelOptionsCache, additionalModelOptions)
) {
logForDebugging('[Bootstrap] Cache unchanged, skipping write')
return
}
logForDebugging('[Bootstrap] Cache updated, persisting to disk')
saveGlobalConfig(current => ({
...current,
clientDataCache: clientData,
additionalModelOptionsCache: additionalModelOptions,
}))
} catch (error) {
logError(error)
}
}
File diff suppressed because it is too large Load Diff
+389
View File
@@ -0,0 +1,389 @@
import Anthropic, { type ClientOptions } from '@anthropic-ai/sdk'
import { randomUUID } from 'crypto'
import type { GoogleAuth } from 'google-auth-library'
import {
checkAndRefreshOAuthTokenIfNeeded,
getAnthropicApiKey,
getApiKeyFromApiKeyHelper,
getClaudeAIOAuthTokens,
isClaudeAISubscriber,
refreshAndGetAwsCredentials,
refreshGcpCredentialsIfNeeded,
} from 'src/utils/auth.js'
import { getUserAgent } from 'src/utils/http.js'
import { getSmallFastModel } from 'src/utils/model/model.js'
import {
getAPIProvider,
isFirstPartyAnthropicBaseUrl,
} from 'src/utils/model/providers.js'
import { getProxyFetchOptions } from 'src/utils/proxy.js'
import {
getIsNonInteractiveSession,
getSessionId,
} from '../../bootstrap/state.js'
import { getOauthConfig } from '../../constants/oauth.js'
import { isDebugToStdErr, logForDebugging } from '../../utils/debug.js'
import {
getAWSRegion,
getVertexRegionForModel,
isEnvTruthy,
} from '../../utils/envUtils.js'
/**
* Environment variables for different client types:
*
* Direct API:
* - ANTHROPIC_API_KEY: Required for direct API access
*
* AWS Bedrock:
* - AWS credentials configured via aws-sdk defaults
* - AWS_REGION or AWS_DEFAULT_REGION: Sets the AWS region for all models (default: us-east-1)
* - ANTHROPIC_SMALL_FAST_MODEL_AWS_REGION: Optional. Override AWS region specifically for the small fast model (Haiku)
*
* Foundry (Azure):
* - ANTHROPIC_FOUNDRY_RESOURCE: Your Azure resource name (e.g., 'my-resource')
* For the full endpoint: https://{resource}.services.ai.azure.com/anthropic/v1/messages
* - ANTHROPIC_FOUNDRY_BASE_URL: Optional. Alternative to resource - provide full base URL directly
* (e.g., 'https://my-resource.services.ai.azure.com')
*
* Authentication (one of the following):
* - ANTHROPIC_FOUNDRY_API_KEY: Your Microsoft Foundry API key (if using API key auth)
* - Azure AD authentication: If no API key is provided, uses DefaultAzureCredential
* which supports multiple auth methods (environment variables, managed identity,
* Azure CLI, etc.). See: https://docs.microsoft.com/en-us/javascript/api/@azure/identity
*
* Vertex AI:
* - Model-specific region variables (highest priority):
* - VERTEX_REGION_CLAUDE_3_5_HAIKU: Region for Claude 3.5 Haiku model
* - VERTEX_REGION_CLAUDE_HAIKU_4_5: Region for Claude Haiku 4.5 model
* - VERTEX_REGION_CLAUDE_3_5_SONNET: Region for Claude 3.5 Sonnet model
* - VERTEX_REGION_CLAUDE_3_7_SONNET: Region for Claude 3.7 Sonnet model
* - CLOUD_ML_REGION: Optional. The default GCP region to use for all models
* If specific model region not specified above
* - ANTHROPIC_VERTEX_PROJECT_ID: Required. Your GCP project ID
* - Standard GCP credentials configured via google-auth-library
*
* Priority for determining region:
* 1. Hardcoded model-specific environment variables
* 2. Global CLOUD_ML_REGION variable
* 3. Default region from config
* 4. Fallback region (us-east5)
*/
function createStderrLogger(): ClientOptions['logger'] {
return {
error: (msg, ...args) =>
// biome-ignore lint/suspicious/noConsole:: intentional console output -- SDK logger must use console
console.error('[Anthropic SDK ERROR]', msg, ...args),
// biome-ignore lint/suspicious/noConsole:: intentional console output -- SDK logger must use console
warn: (msg, ...args) => console.error('[Anthropic SDK WARN]', msg, ...args),
// biome-ignore lint/suspicious/noConsole:: intentional console output -- SDK logger must use console
info: (msg, ...args) => console.error('[Anthropic SDK INFO]', msg, ...args),
debug: (msg, ...args) =>
// biome-ignore lint/suspicious/noConsole:: intentional console output -- SDK logger must use console
console.error('[Anthropic SDK DEBUG]', msg, ...args),
}
}
export async function getAnthropicClient({
apiKey,
maxRetries,
model,
fetchOverride,
source,
}: {
apiKey?: string
maxRetries: number
model?: string
fetchOverride?: ClientOptions['fetch']
source?: string
}): Promise<Anthropic> {
const containerId = process.env.CLAUDE_CODE_CONTAINER_ID
const remoteSessionId = process.env.CLAUDE_CODE_REMOTE_SESSION_ID
const clientApp = process.env.CLAUDE_AGENT_SDK_CLIENT_APP
const customHeaders = getCustomHeaders()
const defaultHeaders: { [key: string]: string } = {
'x-app': 'cli',
'User-Agent': getUserAgent(),
'X-Claude-Code-Session-Id': getSessionId(),
...customHeaders,
...(containerId ? { 'x-claude-remote-container-id': containerId } : {}),
...(remoteSessionId
? { 'x-claude-remote-session-id': remoteSessionId }
: {}),
// SDK consumers can identify their app/library for backend analytics
...(clientApp ? { 'x-client-app': clientApp } : {}),
}
// Log API client configuration for HFI debugging
logForDebugging(
`[API:request] Creating client, ANTHROPIC_CUSTOM_HEADERS present: ${!!process.env.ANTHROPIC_CUSTOM_HEADERS}, has Authorization header: ${!!customHeaders['Authorization']}`,
)
// Add additional protection header if enabled via env var
const additionalProtectionEnabled = isEnvTruthy(
process.env.CLAUDE_CODE_ADDITIONAL_PROTECTION,
)
if (additionalProtectionEnabled) {
defaultHeaders['x-anthropic-additional-protection'] = 'true'
}
logForDebugging('[API:auth] OAuth token check starting')
await checkAndRefreshOAuthTokenIfNeeded()
logForDebugging('[API:auth] OAuth token check complete')
if (!isClaudeAISubscriber()) {
await configureApiKeyHeaders(defaultHeaders, getIsNonInteractiveSession())
}
const resolvedFetch = buildFetch(fetchOverride, source)
const ARGS = {
defaultHeaders,
maxRetries,
timeout: parseInt(process.env.API_TIMEOUT_MS || String(600 * 1000), 10),
dangerouslyAllowBrowser: true,
fetchOptions: getProxyFetchOptions({
forAnthropicAPI: true,
}) as ClientOptions['fetchOptions'],
...(resolvedFetch && {
fetch: resolvedFetch,
}),
}
if (isEnvTruthy(process.env.CLAUDE_CODE_USE_BEDROCK)) {
const { AnthropicBedrock } = await import('@anthropic-ai/bedrock-sdk')
// Use region override for small fast model if specified
const awsRegion =
model === getSmallFastModel() &&
process.env.ANTHROPIC_SMALL_FAST_MODEL_AWS_REGION
? process.env.ANTHROPIC_SMALL_FAST_MODEL_AWS_REGION
: getAWSRegion()
const bedrockArgs: ConstructorParameters<typeof AnthropicBedrock>[0] = {
...ARGS,
awsRegion,
...(isEnvTruthy(process.env.CLAUDE_CODE_SKIP_BEDROCK_AUTH) && {
skipAuth: true,
}),
...(isDebugToStdErr() && { logger: createStderrLogger() }),
}
// Add API key authentication if available
if (process.env.AWS_BEARER_TOKEN_BEDROCK) {
bedrockArgs.skipAuth = true
// Add the Bearer token for Bedrock API key authentication
bedrockArgs.defaultHeaders = {
...bedrockArgs.defaultHeaders,
Authorization: `Bearer ${process.env.AWS_BEARER_TOKEN_BEDROCK}`,
}
} else if (!isEnvTruthy(process.env.CLAUDE_CODE_SKIP_BEDROCK_AUTH)) {
// Refresh auth and get credentials with cache clearing
const cachedCredentials = await refreshAndGetAwsCredentials()
if (cachedCredentials) {
bedrockArgs.awsAccessKey = cachedCredentials.accessKeyId
bedrockArgs.awsSecretKey = cachedCredentials.secretAccessKey
bedrockArgs.awsSessionToken = cachedCredentials.sessionToken
}
}
// we have always been lying about the return type - this doesn't support batching or models
return new AnthropicBedrock(bedrockArgs) as unknown as Anthropic
}
if (isEnvTruthy(process.env.CLAUDE_CODE_USE_FOUNDRY)) {
const { AnthropicFoundry } = await import('@anthropic-ai/foundry-sdk')
// Determine Azure AD token provider based on configuration
// SDK reads ANTHROPIC_FOUNDRY_API_KEY by default
let azureADTokenProvider: (() => Promise<string>) | undefined
if (!process.env.ANTHROPIC_FOUNDRY_API_KEY) {
if (isEnvTruthy(process.env.CLAUDE_CODE_SKIP_FOUNDRY_AUTH)) {
// Mock token provider for testing/proxy scenarios (similar to Vertex mock GoogleAuth)
azureADTokenProvider = () => Promise.resolve('')
} else {
// Use real Azure AD authentication with DefaultAzureCredential
const {
DefaultAzureCredential: AzureCredential,
getBearerTokenProvider,
} = await import('@azure/identity')
azureADTokenProvider = getBearerTokenProvider(
new AzureCredential(),
'https://cognitiveservices.azure.com/.default',
)
}
}
const foundryArgs: ConstructorParameters<typeof AnthropicFoundry>[0] = {
...ARGS,
...(azureADTokenProvider && { azureADTokenProvider }),
...(isDebugToStdErr() && { logger: createStderrLogger() }),
}
// we have always been lying about the return type - this doesn't support batching or models
return new AnthropicFoundry(foundryArgs) as unknown as Anthropic
}
if (isEnvTruthy(process.env.CLAUDE_CODE_USE_VERTEX)) {
// Refresh GCP credentials if gcpAuthRefresh is configured and credentials are expired
// This is similar to how we handle AWS credential refresh for Bedrock
if (!isEnvTruthy(process.env.CLAUDE_CODE_SKIP_VERTEX_AUTH)) {
await refreshGcpCredentialsIfNeeded()
}
const [{ AnthropicVertex }, { GoogleAuth }] = await Promise.all([
import('@anthropic-ai/vertex-sdk'),
import('google-auth-library'),
])
// TODO: Cache either GoogleAuth instance or AuthClient to improve performance
// Currently we create a new GoogleAuth instance for every getAnthropicClient() call
// This could cause repeated authentication flows and metadata server checks
// However, caching needs careful handling of:
// - Credential refresh/expiration
// - Environment variable changes (GOOGLE_APPLICATION_CREDENTIALS, project vars)
// - Cross-request auth state management
// See: https://github.com/googleapis/google-auth-library-nodejs/issues/390 for caching challenges
// Prevent metadata server timeout by providing projectId as fallback
// google-auth-library checks project ID in this order:
// 1. Environment variables (GCLOUD_PROJECT, GOOGLE_CLOUD_PROJECT, etc.)
// 2. Credential files (service account JSON, ADC file)
// 3. gcloud config
// 4. GCE metadata server (causes 12s timeout outside GCP)
//
// We only set projectId if user hasn't configured other discovery methods
// to avoid interfering with their existing auth setup
// Check project environment variables in same order as google-auth-library
// See: https://github.com/googleapis/google-auth-library-nodejs/blob/main/src/auth/googleauth.ts
const hasProjectEnvVar =
process.env['GCLOUD_PROJECT'] ||
process.env['GOOGLE_CLOUD_PROJECT'] ||
process.env['gcloud_project'] ||
process.env['google_cloud_project']
// Check for credential file paths (service account or ADC)
// Note: We're checking both standard and lowercase variants to be safe,
// though we should verify what google-auth-library actually checks
const hasKeyFile =
process.env['GOOGLE_APPLICATION_CREDENTIALS'] ||
process.env['google_application_credentials']
const googleAuth = isEnvTruthy(process.env.CLAUDE_CODE_SKIP_VERTEX_AUTH)
? ({
// Mock GoogleAuth for testing/proxy scenarios
getClient: () => ({
getRequestHeaders: () => ({}),
}),
} as unknown as GoogleAuth)
: new GoogleAuth({
scopes: ['https://www.googleapis.com/auth/cloud-platform'],
// Only use ANTHROPIC_VERTEX_PROJECT_ID as last resort fallback
// This prevents the 12-second metadata server timeout when:
// - No project env vars are set AND
// - No credential keyfile is specified AND
// - ADC file exists but lacks project_id field
//
// Risk: If auth project != API target project, this could cause billing/audit issues
// Mitigation: Users can set GOOGLE_CLOUD_PROJECT to override
...(hasProjectEnvVar || hasKeyFile
? {}
: {
projectId: process.env.ANTHROPIC_VERTEX_PROJECT_ID,
}),
})
const vertexArgs: ConstructorParameters<typeof AnthropicVertex>[0] = {
...ARGS,
region: getVertexRegionForModel(model),
googleAuth,
...(isDebugToStdErr() && { logger: createStderrLogger() }),
}
// we have always been lying about the return type - this doesn't support batching or models
return new AnthropicVertex(vertexArgs) as unknown as Anthropic
}
// Determine authentication method based on available tokens
const clientConfig: ConstructorParameters<typeof Anthropic>[0] = {
apiKey: isClaudeAISubscriber() ? null : apiKey || getAnthropicApiKey(),
authToken: isClaudeAISubscriber()
? getClaudeAIOAuthTokens()?.accessToken
: undefined,
// Set baseURL from OAuth config when using staging OAuth
...(process.env.USER_TYPE === 'ant' &&
isEnvTruthy(process.env.USE_STAGING_OAUTH)
? { baseURL: getOauthConfig().BASE_API_URL }
: {}),
...ARGS,
...(isDebugToStdErr() && { logger: createStderrLogger() }),
}
return new Anthropic(clientConfig)
}
async function configureApiKeyHeaders(
headers: Record<string, string>,
isNonInteractiveSession: boolean,
): Promise<void> {
const token =
process.env.ANTHROPIC_AUTH_TOKEN ||
(await getApiKeyFromApiKeyHelper(isNonInteractiveSession))
if (token) {
headers['Authorization'] = `Bearer ${token}`
}
}
function getCustomHeaders(): Record<string, string> {
const customHeaders: Record<string, string> = {}
const customHeadersEnv = process.env.ANTHROPIC_CUSTOM_HEADERS
if (!customHeadersEnv) return customHeaders
// Split by newlines to support multiple headers
const headerStrings = customHeadersEnv.split(/\n|\r\n/)
for (const headerString of headerStrings) {
if (!headerString.trim()) continue
// Parse header in format "Name: Value" (curl style). Split on first `:`
// then trim — avoids regex backtracking on malformed long header lines.
const colonIdx = headerString.indexOf(':')
if (colonIdx === -1) continue
const name = headerString.slice(0, colonIdx).trim()
const value = headerString.slice(colonIdx + 1).trim()
if (name) {
customHeaders[name] = value
}
}
return customHeaders
}
export const CLIENT_REQUEST_ID_HEADER = 'x-client-request-id'
function buildFetch(
fetchOverride: ClientOptions['fetch'],
source: string | undefined,
): ClientOptions['fetch'] {
// eslint-disable-next-line eslint-plugin-n/no-unsupported-features/node-builtins
const inner = fetchOverride ?? globalThis.fetch
// Only send to the first-party API — Bedrock/Vertex/Foundry don't log it
// and unknown headers risk rejection by strict proxies (inc-4029 class).
const injectClientRequestId =
getAPIProvider() === 'firstParty' && isFirstPartyAnthropicBaseUrl()
return (input, init) => {
// eslint-disable-next-line eslint-plugin-n/no-unsupported-features/node-builtins
const headers = new Headers(init?.headers)
// Generate a client-side request ID so timeouts (which return no server
// request ID) can still be correlated with server logs by the API team.
// Callers that want to track the ID themselves can pre-set the header.
if (injectClientRequestId && !headers.has(CLIENT_REQUEST_ID_HEADER)) {
headers.set(CLIENT_REQUEST_ID_HEADER, randomUUID())
}
try {
// eslint-disable-next-line eslint-plugin-n/no-unsupported-features/node-builtins
const url = input instanceof Request ? input.url : String(input)
const id = headers.get(CLIENT_REQUEST_ID_HEADER)
logForDebugging(
`[API REQUEST] ${new URL(url).pathname}${id ? ` ${CLIENT_REQUEST_ID_HEADER}=${id}` : ''} source=${source ?? 'unknown'}`,
)
} catch {
// never let logging crash the fetch
}
return inner(input, { ...init, headers })
}
}
+226
View File
@@ -0,0 +1,226 @@
import type { ClientOptions } from '@anthropic-ai/sdk'
import { createHash } from 'crypto'
import { promises as fs } from 'fs'
import { dirname, join } from 'path'
import { getSessionId } from 'src/bootstrap/state.js'
import { getClaudeConfigHomeDir } from '../../utils/envUtils.js'
import { jsonParse, jsonStringify } from '../../utils/slowOperations.js'
function hashString(str: string): string {
return createHash('sha256').update(str).digest('hex')
}
// Cache last few API requests for ant users (e.g., for /issue command)
const MAX_CACHED_REQUESTS = 5
const cachedApiRequests: Array<{ timestamp: string; request: unknown }> = []
type DumpState = {
initialized: boolean
messageCountSeen: number
lastInitDataHash: string
// Cheap proxy for change detection — skips the expensive stringify+hash
// when model/tools/system are structurally identical to the last call.
lastInitFingerprint: string
}
// Track state per session to avoid duplicating data
const dumpState = new Map<string, DumpState>()
export function getLastApiRequests(): Array<{
timestamp: string
request: unknown
}> {
return [...cachedApiRequests]
}
export function clearApiRequestCache(): void {
cachedApiRequests.length = 0
}
export function clearDumpState(agentIdOrSessionId: string): void {
dumpState.delete(agentIdOrSessionId)
}
export function clearAllDumpState(): void {
dumpState.clear()
}
export function addApiRequestToCache(requestData: unknown): void {
if (process.env.USER_TYPE !== 'ant') return
cachedApiRequests.push({
timestamp: new Date().toISOString(),
request: requestData,
})
if (cachedApiRequests.length > MAX_CACHED_REQUESTS) {
cachedApiRequests.shift()
}
}
export function getDumpPromptsPath(agentIdOrSessionId?: string): string {
return join(
getClaudeConfigHomeDir(),
'dump-prompts',
`${agentIdOrSessionId ?? getSessionId()}.jsonl`,
)
}
function appendToFile(filePath: string, entries: string[]): void {
if (entries.length === 0) return
fs.mkdir(dirname(filePath), { recursive: true })
.then(() => fs.appendFile(filePath, entries.join('\n') + '\n'))
.catch(() => {})
}
function initFingerprint(req: Record<string, unknown>): string {
const tools = req.tools as Array<{ name?: string }> | undefined
const system = req.system as unknown[] | string | undefined
const sysLen =
typeof system === 'string'
? system.length
: Array.isArray(system)
? system.reduce(
(n: number, b) => n + ((b as { text?: string }).text?.length ?? 0),
0,
)
: 0
const toolNames = tools?.map(t => t.name ?? '').join(',') ?? ''
return `${req.model}|${toolNames}|${sysLen}`
}
function dumpRequest(
body: string,
ts: string,
state: DumpState,
filePath: string,
): void {
try {
const req = jsonParse(body) as Record<string, unknown>
addApiRequestToCache(req)
if (process.env.USER_TYPE !== 'ant') return
const entries: string[] = []
const messages = (req.messages ?? []) as Array<{ role?: string }>
// Write init data (system, tools, metadata) on first request,
// and a system_update entry whenever it changes.
// Cheap fingerprint first: system+tools don't change between turns,
// so skip the 300ms stringify when the shape is unchanged.
const fingerprint = initFingerprint(req)
if (!state.initialized || fingerprint !== state.lastInitFingerprint) {
const { messages: _, ...initData } = req
const initDataStr = jsonStringify(initData)
const initDataHash = hashString(initDataStr)
state.lastInitFingerprint = fingerprint
if (!state.initialized) {
state.initialized = true
state.lastInitDataHash = initDataHash
// Reuse initDataStr rather than re-serializing initData inside a wrapper.
// timestamp from toISOString() contains no chars needing JSON escaping.
entries.push(
`{"type":"init","timestamp":"${ts}","data":${initDataStr}}`,
)
} else if (initDataHash !== state.lastInitDataHash) {
state.lastInitDataHash = initDataHash
entries.push(
`{"type":"system_update","timestamp":"${ts}","data":${initDataStr}}`,
)
}
}
// Write only new user messages (assistant messages captured in response)
for (const msg of messages.slice(state.messageCountSeen)) {
if (msg.role === 'user') {
entries.push(
jsonStringify({ type: 'message', timestamp: ts, data: msg }),
)
}
}
state.messageCountSeen = messages.length
appendToFile(filePath, entries)
} catch {
// Ignore parsing errors
}
}
export function createDumpPromptsFetch(
agentIdOrSessionId: string,
): ClientOptions['fetch'] {
const filePath = getDumpPromptsPath(agentIdOrSessionId)
return async (input: RequestInfo | URL, init?: RequestInit) => {
const state = dumpState.get(agentIdOrSessionId) ?? {
initialized: false,
messageCountSeen: 0,
lastInitDataHash: '',
lastInitFingerprint: '',
}
dumpState.set(agentIdOrSessionId, state)
let timestamp: string | undefined
if (init?.method === 'POST' && init.body) {
timestamp = new Date().toISOString()
// Parsing + stringifying the request (system prompt + tool schemas = MBs)
// takes hundreds of ms. Defer so it doesn't block the actual API call —
// this is debug tooling for /issue, not on the critical path.
setImmediate(dumpRequest, init.body as string, timestamp, state, filePath)
}
// eslint-disable-next-line eslint-plugin-n/no-unsupported-features/node-builtins
const response = await globalThis.fetch(input, init)
// Save response async
if (timestamp && response.ok && process.env.USER_TYPE === 'ant') {
const cloned = response.clone()
void (async () => {
try {
const isStreaming = cloned.headers
.get('content-type')
?.includes('text/event-stream')
let data: unknown
if (isStreaming && cloned.body) {
// Parse SSE stream into chunks
const reader = cloned.body.getReader()
const decoder = new TextDecoder()
let buffer = ''
try {
while (true) {
const { done, value } = await reader.read()
if (done) break
buffer += decoder.decode(value, { stream: true })
}
} finally {
reader.releaseLock()
}
const chunks: unknown[] = []
for (const event of buffer.split('\n\n')) {
for (const line of event.split('\n')) {
if (line.startsWith('data: ') && line !== 'data: [DONE]') {
try {
chunks.push(jsonParse(line.slice(6)))
} catch {
// Ignore parse errors
}
}
}
}
data = { stream: true, chunks }
} else {
data = await cloned.json()
}
await fs.appendFile(
filePath,
jsonStringify({ type: 'response', timestamp, data }) + '\n',
)
} catch {
// Best effort
}
})()
}
return response
}
}
+22
View File
@@ -0,0 +1,22 @@
import type { NonNullableUsage } from '../../entrypoints/sdk/sdkUtilityTypes.js'
/**
* Zero-initialized usage object. Extracted from logging.ts so that
* bridge/replBridge.ts can import it without transitively pulling in
* api/errors.ts → utils/messages.ts → BashTool.tsx → the world.
*/
export const EMPTY_USAGE: Readonly<NonNullableUsage> = {
input_tokens: 0,
cache_creation_input_tokens: 0,
cache_read_input_tokens: 0,
output_tokens: 0,
server_tool_use: { web_search_requests: 0, web_fetch_requests: 0 },
service_tier: 'standard',
cache_creation: {
ephemeral_1h_input_tokens: 0,
ephemeral_5m_input_tokens: 0,
},
inference_geo: '',
iterations: [],
speed: 'standard',
}
+260
View File
@@ -0,0 +1,260 @@
import type { APIError } from '@anthropic-ai/sdk'
// SSL/TLS error codes from OpenSSL (used by both Node.js and Bun)
// See: https://www.openssl.org/docs/man3.1/man3/X509_STORE_CTX_get_error.html
const SSL_ERROR_CODES = new Set([
// Certificate verification errors
'UNABLE_TO_VERIFY_LEAF_SIGNATURE',
'UNABLE_TO_GET_ISSUER_CERT',
'UNABLE_TO_GET_ISSUER_CERT_LOCALLY',
'CERT_SIGNATURE_FAILURE',
'CERT_NOT_YET_VALID',
'CERT_HAS_EXPIRED',
'CERT_REVOKED',
'CERT_REJECTED',
'CERT_UNTRUSTED',
// Self-signed certificate errors
'DEPTH_ZERO_SELF_SIGNED_CERT',
'SELF_SIGNED_CERT_IN_CHAIN',
// Chain errors
'CERT_CHAIN_TOO_LONG',
'PATH_LENGTH_EXCEEDED',
// Hostname/altname errors
'ERR_TLS_CERT_ALTNAME_INVALID',
'HOSTNAME_MISMATCH',
// TLS handshake errors
'ERR_TLS_HANDSHAKE_TIMEOUT',
'ERR_SSL_WRONG_VERSION_NUMBER',
'ERR_SSL_DECRYPTION_FAILED_OR_BAD_RECORD_MAC',
])
export type ConnectionErrorDetails = {
code: string
message: string
isSSLError: boolean
}
/**
* Extracts connection error details from the error cause chain.
* The Anthropic SDK wraps underlying errors in the `cause` property.
* This function walks the cause chain to find the root error code/message.
*/
export function extractConnectionErrorDetails(
error: unknown,
): ConnectionErrorDetails | null {
if (!error || typeof error !== 'object') {
return null
}
// Walk the cause chain to find the root error with a code
let current: unknown = error
const maxDepth = 5 // Prevent infinite loops
let depth = 0
while (current && depth < maxDepth) {
if (
current instanceof Error &&
'code' in current &&
typeof current.code === 'string'
) {
const code = current.code
const isSSLError = SSL_ERROR_CODES.has(code)
return {
code,
message: current.message,
isSSLError,
}
}
// Move to the next cause in the chain
if (
current instanceof Error &&
'cause' in current &&
current.cause !== current
) {
current = current.cause
depth++
} else {
break
}
}
return null
}
/**
* Returns an actionable hint for SSL/TLS errors, intended for contexts outside
* the main API client (OAuth token exchange, preflight connectivity checks)
* where `formatAPIError` doesn't apply.
*
* Motivation: enterprise users behind TLS-intercepting proxies (Zscaler et al.)
* see OAuth complete in-browser but the CLI's token exchange silently fails
* with a raw SSL code. Surfacing the likely fix saves a support round-trip.
*/
export function getSSLErrorHint(error: unknown): string | null {
const details = extractConnectionErrorDetails(error)
if (!details?.isSSLError) {
return null
}
return `SSL certificate error (${details.code}). If you are behind a corporate proxy or TLS-intercepting firewall, set NODE_EXTRA_CA_CERTS to your CA bundle path, or ask IT to allowlist *.anthropic.com. Run /doctor for details.`
}
/**
* Strips HTML content (e.g., CloudFlare error pages) from a message string,
* returning a user-friendly title or empty string if HTML is detected.
* Returns the original message unchanged if no HTML is found.
*/
function sanitizeMessageHTML(message: string): string {
if (message.includes('<!DOCTYPE html') || message.includes('<html')) {
const titleMatch = message.match(/<title>([^<]+)<\/title>/)
if (titleMatch && titleMatch[1]) {
return titleMatch[1].trim()
}
return ''
}
return message
}
/**
* Detects if an error message contains HTML content (e.g., CloudFlare error pages)
* and returns a user-friendly message instead
*/
export function sanitizeAPIError(apiError: APIError): string {
const message = apiError.message
if (!message) {
// Sometimes message is undefined
// TODO: figure out why
return ''
}
return sanitizeMessageHTML(message)
}
/**
* Shapes of deserialized API errors from session JSONL.
*
* After JSON round-tripping, the SDK's APIError loses its `.message` property.
* The actual message lives at different nesting levels depending on the provider:
*
* - Bedrock/proxy: `{ error: { message: "..." } }`
* - Standard Anthropic API: `{ error: { error: { message: "..." } } }`
* (the outer `.error` is the response body, the inner `.error` is the API error)
*
* See also: `getErrorMessage` in `logging.ts` which handles the same shapes.
*/
type NestedAPIError = {
error?: {
message?: string
error?: { message?: string }
}
}
function hasNestedError(value: unknown): value is NestedAPIError {
return (
typeof value === 'object' &&
value !== null &&
'error' in value &&
typeof value.error === 'object' &&
value.error !== null
)
}
/**
* Extract a human-readable message from a deserialized API error that lacks
* a top-level `.message`.
*
* Checks two nesting levels (deeper first for specificity):
* 1. `error.error.error.message` — standard Anthropic API shape
* 2. `error.error.message` — Bedrock shape
*/
function extractNestedErrorMessage(error: APIError): string | null {
if (!hasNestedError(error)) {
return null
}
// Access `.error` via the narrowed type so TypeScript sees the nested shape
// instead of the SDK's `Object | undefined`.
const narrowed: NestedAPIError = error
const nested = narrowed.error
// Standard Anthropic API shape: { error: { error: { message } } }
const deepMsg = nested?.error?.message
if (typeof deepMsg === 'string' && deepMsg.length > 0) {
const sanitized = sanitizeMessageHTML(deepMsg)
if (sanitized.length > 0) {
return sanitized
}
}
// Bedrock shape: { error: { message } }
const msg = nested?.message
if (typeof msg === 'string' && msg.length > 0) {
const sanitized = sanitizeMessageHTML(msg)
if (sanitized.length > 0) {
return sanitized
}
}
return null
}
export function formatAPIError(error: APIError): string {
// Extract connection error details from the cause chain
const connectionDetails = extractConnectionErrorDetails(error)
if (connectionDetails) {
const { code, isSSLError } = connectionDetails
// Handle timeout errors
if (code === 'ETIMEDOUT') {
return 'Request timed out. Check your internet connection and proxy settings'
}
// Handle SSL/TLS errors with specific messages
if (isSSLError) {
switch (code) {
case 'UNABLE_TO_VERIFY_LEAF_SIGNATURE':
case 'UNABLE_TO_GET_ISSUER_CERT':
case 'UNABLE_TO_GET_ISSUER_CERT_LOCALLY':
return 'Unable to connect to API: SSL certificate verification failed. Check your proxy or corporate SSL certificates'
case 'CERT_HAS_EXPIRED':
return 'Unable to connect to API: SSL certificate has expired'
case 'CERT_REVOKED':
return 'Unable to connect to API: SSL certificate has been revoked'
case 'DEPTH_ZERO_SELF_SIGNED_CERT':
case 'SELF_SIGNED_CERT_IN_CHAIN':
return 'Unable to connect to API: Self-signed certificate detected. Check your proxy or corporate SSL certificates'
case 'ERR_TLS_CERT_ALTNAME_INVALID':
case 'HOSTNAME_MISMATCH':
return 'Unable to connect to API: SSL certificate hostname mismatch'
case 'CERT_NOT_YET_VALID':
return 'Unable to connect to API: SSL certificate is not yet valid'
default:
return `Unable to connect to API: SSL error (${code})`
}
}
}
if (error.message === 'Connection error.') {
// If we have a code but it's not SSL, include it for debugging
if (connectionDetails?.code) {
return `Unable to connect to API (${connectionDetails.code})`
}
return 'Unable to connect to API. Check your internet connection'
}
// Guard: when deserialized from JSONL (e.g. --resume), the error object may
// be a plain object without a `.message` property. Return a safe fallback
// instead of undefined, which would crash callers that access `.length`.
if (!error.message) {
return (
extractNestedErrorMessage(error) ??
`API error (status ${error.status ?? 'unknown'})`
)
}
const sanitizedMessage = sanitizeAPIError(error)
// Use sanitized message if it's different from the original (i.e., HTML was sanitized)
return sanitizedMessage !== error.message && sanitizedMessage.length > 0
? sanitizedMessage
: error.message
}
File diff suppressed because it is too large Load Diff
+748
View File
@@ -0,0 +1,748 @@
/**
* Files API client for managing files
*
* This module provides functionality to download and upload files to Anthropic Public Files API.
* Used by the Claude Code agent to download file attachments at session startup.
*
* API Reference: https://docs.anthropic.com/en/api/files-content
*/
import axios from 'axios'
import { randomUUID } from 'crypto'
import * as fs from 'fs/promises'
import * as path from 'path'
import { count } from '../../utils/array.js'
import { getCwd } from '../../utils/cwd.js'
import { logForDebugging } from '../../utils/debug.js'
import { errorMessage } from '../../utils/errors.js'
import { logError } from '../../utils/log.js'
import { sleep } from '../../utils/sleep.js'
import {
type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
logEvent,
} from '../analytics/index.js'
// Files API is currently in beta. oauth-2025-04-20 enables Bearer OAuth
// on public-api routes (auth.py: "oauth_auth" not in beta_versions → 404).
const FILES_API_BETA_HEADER = 'files-api-2025-04-14,oauth-2025-04-20'
const ANTHROPIC_VERSION = '2023-06-01'
// API base URL - uses ANTHROPIC_BASE_URL set by env-manager for the appropriate environment
// Falls back to public API for standalone usage
function getDefaultApiBaseUrl(): string {
return (
process.env.ANTHROPIC_BASE_URL ||
process.env.CLAUDE_CODE_API_BASE_URL ||
'https://api.anthropic.com'
)
}
function logDebugError(message: string): void {
logForDebugging(`[files-api] ${message}`, { level: 'error' })
}
function logDebug(message: string): void {
logForDebugging(`[files-api] ${message}`)
}
/**
* File specification parsed from CLI args
* Format: --file=<file_id>:<relative_path>
*/
export type File = {
fileId: string
relativePath: string
}
/**
* Configuration for the files API client
*/
export type FilesApiConfig = {
/** OAuth token for authentication (from session JWT) */
oauthToken: string
/** Base URL for the API (default: https://api.anthropic.com) */
baseUrl?: string
/** Session ID for creating session-specific directories */
sessionId: string
}
/**
* Result of a file download operation
*/
export type DownloadResult = {
fileId: string
path: string
success: boolean
error?: string
bytesWritten?: number
}
const MAX_RETRIES = 3
const BASE_DELAY_MS = 500
const MAX_FILE_SIZE_BYTES = 500 * 1024 * 1024 // 500MB
/**
* Result type for retry operations - signals whether to continue retrying
*/
type RetryResult<T> = { done: true; value: T } | { done: false; error?: string }
/**
* Executes an operation with exponential backoff retry logic
*
* @param operation - Operation name for logging
* @param attemptFn - Function to execute on each attempt, returns RetryResult
* @returns The successful result value
* @throws Error if all retries exhausted
*/
async function retryWithBackoff<T>(
operation: string,
attemptFn: (attempt: number) => Promise<RetryResult<T>>,
): Promise<T> {
let lastError = ''
for (let attempt = 1; attempt <= MAX_RETRIES; attempt++) {
const result = await attemptFn(attempt)
if (result.done) {
return result.value
}
lastError = result.error || `${operation} failed`
logDebug(
`${operation} attempt ${attempt}/${MAX_RETRIES} failed: ${lastError}`,
)
if (attempt < MAX_RETRIES) {
const delayMs = BASE_DELAY_MS * Math.pow(2, attempt - 1)
logDebug(`Retrying ${operation} in ${delayMs}ms...`)
await sleep(delayMs)
}
}
throw new Error(`${lastError} after ${MAX_RETRIES} attempts`)
}
/**
* Downloads a single file from the Anthropic Public Files API
*
* @param fileId - The file ID (e.g., "file_011CNha8iCJcU1wXNR6q4V8w")
* @param config - Files API configuration
* @returns The file content as a Buffer
*/
export async function downloadFile(
fileId: string,
config: FilesApiConfig,
): Promise<Buffer> {
const baseUrl = config.baseUrl || getDefaultApiBaseUrl()
const url = `${baseUrl}/v1/files/${fileId}/content`
const headers = {
Authorization: `Bearer ${config.oauthToken}`,
'anthropic-version': ANTHROPIC_VERSION,
'anthropic-beta': FILES_API_BETA_HEADER,
}
logDebug(`Downloading file ${fileId} from ${url}`)
return retryWithBackoff(`Download file ${fileId}`, async () => {
try {
const response = await axios.get(url, {
headers,
responseType: 'arraybuffer',
timeout: 60000, // 60 second timeout for large files
validateStatus: status => status < 500,
})
if (response.status === 200) {
logDebug(`Downloaded file ${fileId} (${response.data.length} bytes)`)
return { done: true, value: Buffer.from(response.data) }
}
// Non-retriable errors - throw immediately
if (response.status === 404) {
throw new Error(`File not found: ${fileId}`)
}
if (response.status === 401) {
throw new Error('Authentication failed: invalid or missing API key')
}
if (response.status === 403) {
throw new Error(`Access denied to file: ${fileId}`)
}
return { done: false, error: `status ${response.status}` }
} catch (error) {
if (!axios.isAxiosError(error)) {
throw error
}
return { done: false, error: error.message }
}
})
}
/**
* Normalizes a relative path, strips redundant prefixes, and builds the full
* download path under {basePath}/{session_id}/uploads/.
* Returns null if the path is invalid (e.g., path traversal).
*/
export function buildDownloadPath(
basePath: string,
sessionId: string,
relativePath: string,
): string | null {
const normalized = path.normalize(relativePath)
if (normalized.startsWith('..')) {
logDebugError(
`Invalid file path: ${relativePath}. Path must not traverse above workspace`,
)
return null
}
const uploadsBase = path.join(basePath, sessionId, 'uploads')
const redundantPrefixes = [
path.join(basePath, sessionId, 'uploads') + path.sep,
path.sep + 'uploads' + path.sep,
]
const matchedPrefix = redundantPrefixes.find(p => normalized.startsWith(p))
const cleanPath = matchedPrefix
? normalized.slice(matchedPrefix.length)
: normalized
return path.join(uploadsBase, cleanPath)
}
/**
* Downloads a file and saves it to the session-specific workspace directory
*
* @param attachment - The file attachment to download
* @param config - Files API configuration
* @returns Download result with success/failure status
*/
export async function downloadAndSaveFile(
attachment: File,
config: FilesApiConfig,
): Promise<DownloadResult> {
const { fileId, relativePath } = attachment
const fullPath = buildDownloadPath(getCwd(), config.sessionId, relativePath)
if (!fullPath) {
return {
fileId,
path: '',
success: false,
error: `Invalid file path: ${relativePath}`,
}
}
try {
// Download the file content
const content = await downloadFile(fileId, config)
// Ensure the parent directory exists
const parentDir = path.dirname(fullPath)
await fs.mkdir(parentDir, { recursive: true })
// Write the file
await fs.writeFile(fullPath, content)
logDebug(`Saved file ${fileId} to ${fullPath} (${content.length} bytes)`)
return {
fileId,
path: fullPath,
success: true,
bytesWritten: content.length,
}
} catch (error) {
logDebugError(`Failed to download file ${fileId}: ${errorMessage(error)}`)
if (error instanceof Error) {
logError(error)
}
return {
fileId,
path: fullPath,
success: false,
error: errorMessage(error),
}
}
}
// Default concurrency limit for parallel downloads
const DEFAULT_CONCURRENCY = 5
/**
* Execute promises with limited concurrency
*
* @param items - Items to process
* @param fn - Async function to apply to each item
* @param concurrency - Maximum concurrent operations
* @returns Results in the same order as input items
*/
async function parallelWithLimit<T, R>(
items: T[],
fn: (item: T, index: number) => Promise<R>,
concurrency: number,
): Promise<R[]> {
const results: R[] = new Array(items.length)
let currentIndex = 0
async function worker(): Promise<void> {
while (currentIndex < items.length) {
const index = currentIndex++
const item = items[index]
if (item !== undefined) {
results[index] = await fn(item, index)
}
}
}
// Start workers up to the concurrency limit
const workers: Promise<void>[] = []
const workerCount = Math.min(concurrency, items.length)
for (let i = 0; i < workerCount; i++) {
workers.push(worker())
}
await Promise.all(workers)
return results
}
/**
* Downloads all file attachments for a session in parallel
*
* @param attachments - List of file attachments to download
* @param config - Files API configuration
* @param concurrency - Maximum concurrent downloads (default: 5)
* @returns Array of download results in the same order as input
*/
export async function downloadSessionFiles(
files: File[],
config: FilesApiConfig,
concurrency: number = DEFAULT_CONCURRENCY,
): Promise<DownloadResult[]> {
if (files.length === 0) {
return []
}
logDebug(
`Downloading ${files.length} file(s) for session ${config.sessionId}`,
)
const startTime = Date.now()
// Download files in parallel with concurrency limit
const results = await parallelWithLimit(
files,
file => downloadAndSaveFile(file, config),
concurrency,
)
const elapsedMs = Date.now() - startTime
const successCount = count(results, r => r.success)
logDebug(
`Downloaded ${successCount}/${files.length} file(s) in ${elapsedMs}ms`,
)
return results
}
// ============================================================================
// Upload Functions (BYOC mode)
// ============================================================================
/**
* Result of a file upload operation
*/
export type UploadResult =
| {
path: string
fileId: string
size: number
success: true
}
| {
path: string
error: string
success: false
}
/**
* Upload a single file to the Files API (BYOC mode)
*
* Size validation is performed after reading the file to avoid TOCTOU race
* conditions where the file size could change between initial check and upload.
*
* @param filePath - Absolute path to the file to upload
* @param relativePath - Relative path for the file (used as filename in API)
* @param config - Files API configuration
* @returns Upload result with success/failure status
*/
export async function uploadFile(
filePath: string,
relativePath: string,
config: FilesApiConfig,
opts?: { signal?: AbortSignal },
): Promise<UploadResult> {
const baseUrl = config.baseUrl || getDefaultApiBaseUrl()
const url = `${baseUrl}/v1/files`
const headers = {
Authorization: `Bearer ${config.oauthToken}`,
'anthropic-version': ANTHROPIC_VERSION,
'anthropic-beta': FILES_API_BETA_HEADER,
}
logDebug(`Uploading file ${filePath} as ${relativePath}`)
// Read file content first (outside retry loop since it's not a network operation)
let content: Buffer
try {
content = await fs.readFile(filePath)
} catch (error) {
logEvent('tengu_file_upload_failed', {
error_type:
'file_read' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
})
return {
path: relativePath,
error: errorMessage(error),
success: false,
}
}
const fileSize = content.length
if (fileSize > MAX_FILE_SIZE_BYTES) {
logEvent('tengu_file_upload_failed', {
error_type:
'file_too_large' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
})
return {
path: relativePath,
error: `File exceeds maximum size of ${MAX_FILE_SIZE_BYTES} bytes (actual: ${fileSize})`,
success: false,
}
}
// Use crypto.randomUUID for boundary to avoid collisions when uploads start same millisecond
const boundary = `----FormBoundary${randomUUID()}`
const filename = path.basename(relativePath)
// Build the multipart body
const bodyParts: Buffer[] = []
// File part
bodyParts.push(
Buffer.from(
`--${boundary}\r\n` +
`Content-Disposition: form-data; name="file"; filename="${filename}"\r\n` +
`Content-Type: application/octet-stream\r\n\r\n`,
),
)
bodyParts.push(content)
bodyParts.push(Buffer.from('\r\n'))
// Purpose part
bodyParts.push(
Buffer.from(
`--${boundary}\r\n` +
`Content-Disposition: form-data; name="purpose"\r\n\r\n` +
`user_data\r\n`,
),
)
// End boundary
bodyParts.push(Buffer.from(`--${boundary}--\r\n`))
const body = Buffer.concat(bodyParts)
try {
return await retryWithBackoff(`Upload file ${relativePath}`, async () => {
try {
const response = await axios.post(url, body, {
headers: {
...headers,
'Content-Type': `multipart/form-data; boundary=${boundary}`,
'Content-Length': body.length.toString(),
},
timeout: 120000, // 2 minute timeout for uploads
signal: opts?.signal,
validateStatus: status => status < 500,
})
if (response.status === 200 || response.status === 201) {
const fileId = response.data?.id
if (!fileId) {
return {
done: false,
error: 'Upload succeeded but no file ID returned',
}
}
logDebug(`Uploaded file ${filePath} -> ${fileId} (${fileSize} bytes)`)
return {
done: true,
value: {
path: relativePath,
fileId,
size: fileSize,
success: true as const,
},
}
}
// Non-retriable errors - throw to exit retry loop
if (response.status === 401) {
logEvent('tengu_file_upload_failed', {
error_type:
'auth' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
})
throw new UploadNonRetriableError(
'Authentication failed: invalid or missing API key',
)
}
if (response.status === 403) {
logEvent('tengu_file_upload_failed', {
error_type:
'forbidden' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
})
throw new UploadNonRetriableError('Access denied for upload')
}
if (response.status === 413) {
logEvent('tengu_file_upload_failed', {
error_type:
'size' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
})
throw new UploadNonRetriableError('File too large for upload')
}
return { done: false, error: `status ${response.status}` }
} catch (error) {
// Non-retriable errors propagate up
if (error instanceof UploadNonRetriableError) {
throw error
}
if (axios.isCancel(error)) {
throw new UploadNonRetriableError('Upload canceled')
}
// Network errors are retriable
if (axios.isAxiosError(error)) {
return { done: false, error: error.message }
}
throw error
}
})
} catch (error) {
if (error instanceof UploadNonRetriableError) {
return {
path: relativePath,
error: error.message,
success: false,
}
}
logEvent('tengu_file_upload_failed', {
error_type:
'network' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
})
return {
path: relativePath,
error: errorMessage(error),
success: false,
}
}
}
/** Error class for non-retriable upload failures */
class UploadNonRetriableError extends Error {
constructor(message: string) {
super(message)
this.name = 'UploadNonRetriableError'
}
}
/**
* Upload multiple files in parallel with concurrency limit (BYOC mode)
*
* @param files - Array of files to upload (path and relativePath)
* @param config - Files API configuration
* @param concurrency - Maximum concurrent uploads (default: 5)
* @returns Array of upload results in the same order as input
*/
export async function uploadSessionFiles(
files: Array<{ path: string; relativePath: string }>,
config: FilesApiConfig,
concurrency: number = DEFAULT_CONCURRENCY,
): Promise<UploadResult[]> {
if (files.length === 0) {
return []
}
logDebug(`Uploading ${files.length} file(s) for session ${config.sessionId}`)
const startTime = Date.now()
const results = await parallelWithLimit(
files,
file => uploadFile(file.path, file.relativePath, config),
concurrency,
)
const elapsedMs = Date.now() - startTime
const successCount = count(results, r => r.success)
logDebug(`Uploaded ${successCount}/${files.length} file(s) in ${elapsedMs}ms`)
return results
}
// ============================================================================
// List Files Functions (1P/Cloud mode)
// ============================================================================
/**
* File metadata returned from listFilesCreatedAfter
*/
export type FileMetadata = {
filename: string
fileId: string
size: number
}
/**
* List files created after a given timestamp (1P/Cloud mode).
* Uses the public GET /v1/files endpoint with after_created_at query param.
* Handles pagination via after_id cursor when has_more is true.
*
* @param afterCreatedAt - ISO 8601 timestamp to filter files created after
* @param config - Files API configuration
* @returns Array of file metadata for files created after the timestamp
*/
export async function listFilesCreatedAfter(
afterCreatedAt: string,
config: FilesApiConfig,
): Promise<FileMetadata[]> {
const baseUrl = config.baseUrl || getDefaultApiBaseUrl()
const headers = {
Authorization: `Bearer ${config.oauthToken}`,
'anthropic-version': ANTHROPIC_VERSION,
'anthropic-beta': FILES_API_BETA_HEADER,
}
logDebug(`Listing files created after ${afterCreatedAt}`)
const allFiles: FileMetadata[] = []
let afterId: string | undefined
// Paginate through results
while (true) {
const params: Record<string, string> = {
after_created_at: afterCreatedAt,
}
if (afterId) {
params.after_id = afterId
}
const page = await retryWithBackoff(
`List files after ${afterCreatedAt}`,
async () => {
try {
const response = await axios.get(`${baseUrl}/v1/files`, {
headers,
params,
timeout: 60000,
validateStatus: status => status < 500,
})
if (response.status === 200) {
return { done: true, value: response.data }
}
if (response.status === 401) {
logEvent('tengu_file_list_failed', {
error_type:
'auth' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
})
throw new Error('Authentication failed: invalid or missing API key')
}
if (response.status === 403) {
logEvent('tengu_file_list_failed', {
error_type:
'forbidden' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
})
throw new Error('Access denied to list files')
}
return { done: false, error: `status ${response.status}` }
} catch (error) {
if (!axios.isAxiosError(error)) {
throw error
}
logEvent('tengu_file_list_failed', {
error_type:
'network' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
})
return { done: false, error: error.message }
}
},
)
const files = page.data || []
for (const f of files) {
allFiles.push({
filename: f.filename,
fileId: f.id,
size: f.size_bytes,
})
}
if (!page.has_more) {
break
}
// Use the last file's ID as cursor for next page
const lastFile = files.at(-1)
if (!lastFile?.id) {
break
}
afterId = lastFile.id
}
logDebug(`Listed ${allFiles.length} files created after ${afterCreatedAt}`)
return allFiles
}
// ============================================================================
// Parse Functions
// ============================================================================
/**
* Parse file attachment specs from CLI arguments
* Format: <file_id>:<relative_path>
*
* @param fileSpecs - Array of file spec strings
* @returns Parsed file attachments
*/
export function parseFileSpecs(fileSpecs: string[]): File[] {
const files: File[] = []
// Sandbox-gateway may pass multiple specs as a single space-separated string
const expandedSpecs = fileSpecs.flatMap(s => s.split(' ').filter(Boolean))
for (const spec of expandedSpecs) {
const colonIndex = spec.indexOf(':')
if (colonIndex === -1) {
continue
}
const fileId = spec.substring(0, colonIndex)
const relativePath = spec.substring(colonIndex + 1)
if (!fileId || !relativePath) {
logDebugError(
`Invalid file spec: ${spec}. Both file_id and path are required`,
)
continue
}
files.push({ fileId, relativePath })
}
return files
}
+60
View File
@@ -0,0 +1,60 @@
import axios from 'axios'
import { getOauthConfig } from '../../constants/oauth.js'
import { getGlobalConfig, saveGlobalConfig } from '../../utils/config.js'
import { getAuthHeaders } from '../../utils/http.js'
import { logError } from '../../utils/log.js'
import { getClaudeCodeUserAgent } from '../../utils/userAgent.js'
/**
* Fetch the user's first Claude Code token date and store in config.
* This is called after successful login to cache when they started using Claude Code.
*/
export async function fetchAndStoreClaudeCodeFirstTokenDate(): Promise<void> {
try {
const config = getGlobalConfig()
if (config.claudeCodeFirstTokenDate !== undefined) {
return
}
const authHeaders = getAuthHeaders()
if (authHeaders.error) {
logError(new Error(`Failed to get auth headers: ${authHeaders.error}`))
return
}
const oauthConfig = getOauthConfig()
const url = `${oauthConfig.BASE_API_URL}/api/organization/claude_code_first_token_date`
const response = await axios.get(url, {
headers: {
...authHeaders.headers,
'User-Agent': getClaudeCodeUserAgent(),
},
timeout: 10000,
})
const firstTokenDate = response.data?.first_token_date ?? null
// Validate the date if it's not null
if (firstTokenDate !== null) {
const dateTime = new Date(firstTokenDate).getTime()
if (isNaN(dateTime)) {
logError(
new Error(
`Received invalid first_token_date from API: ${firstTokenDate}`,
),
)
// Don't save invalid dates
return
}
}
saveGlobalConfig(current => ({
...current,
claudeCodeFirstTokenDate: firstTokenDate,
}))
} catch (error) {
logError(error)
}
}
+357
View File
@@ -0,0 +1,357 @@
import axios from 'axios'
import memoize from 'lodash-es/memoize.js'
import {
type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
logEvent,
} from 'src/services/analytics/index.js'
import { getOauthAccountInfo, isConsumerSubscriber } from 'src/utils/auth.js'
import { logForDebugging } from 'src/utils/debug.js'
import { gracefulShutdown } from 'src/utils/gracefulShutdown.js'
import { isEssentialTrafficOnly } from 'src/utils/privacyLevel.js'
import { writeToStderr } from 'src/utils/process.js'
import { getOauthConfig } from '../../constants/oauth.js'
import { getGlobalConfig, saveGlobalConfig } from '../../utils/config.js'
import {
getAuthHeaders,
getUserAgent,
withOAuth401Retry,
} from '../../utils/http.js'
import { logError } from '../../utils/log.js'
import { getClaudeCodeUserAgent } from '../../utils/userAgent.js'
// Cache expiration: 24 hours
const GROVE_CACHE_EXPIRATION_MS = 24 * 60 * 60 * 1000
export type AccountSettings = {
grove_enabled: boolean | null
grove_notice_viewed_at: string | null
}
export type GroveConfig = {
grove_enabled: boolean
domain_excluded: boolean
notice_is_grace_period: boolean
notice_reminder_frequency: number | null
}
/**
* Result type that distinguishes between API failure and success.
* - success: true means API call succeeded (data may still contain null fields)
* - success: false means API call failed after retry
*/
export type ApiResult<T> = { success: true; data: T } | { success: false }
/**
* Get the current Grove settings for the user account.
* Returns ApiResult to distinguish between API failure and success.
* Uses existing OAuth 401 retry, then returns failure if that doesn't help.
*
* Memoized for the session to avoid redundant per-render requests.
* Cache is invalidated in updateGroveSettings() so post-toggle reads are fresh.
*/
export const getGroveSettings = memoize(
async (): Promise<ApiResult<AccountSettings>> => {
// Grove is a notification feature; during an outage, skipping it is correct.
if (isEssentialTrafficOnly()) {
return { success: false }
}
try {
const response = await withOAuth401Retry(() => {
const authHeaders = getAuthHeaders()
if (authHeaders.error) {
throw new Error(`Failed to get auth headers: ${authHeaders.error}`)
}
return axios.get<AccountSettings>(
`${getOauthConfig().BASE_API_URL}/api/oauth/account/settings`,
{
headers: {
...authHeaders.headers,
'User-Agent': getClaudeCodeUserAgent(),
},
},
)
})
return { success: true, data: response.data }
} catch (err) {
logError(err)
// Don't cache failures — transient network issues would lock the user
// out of privacy settings for the entire session (deadlock: dialog needs
// success to render the toggle, toggle calls updateGroveSettings which
// is the only other place the cache is cleared).
getGroveSettings.cache.clear?.()
return { success: false }
}
},
)
/**
* Mark that the Grove notice has been viewed by the user
*/
export async function markGroveNoticeViewed(): Promise<void> {
try {
await withOAuth401Retry(() => {
const authHeaders = getAuthHeaders()
if (authHeaders.error) {
throw new Error(`Failed to get auth headers: ${authHeaders.error}`)
}
return axios.post(
`${getOauthConfig().BASE_API_URL}/api/oauth/account/grove_notice_viewed`,
{},
{
headers: {
...authHeaders.headers,
'User-Agent': getClaudeCodeUserAgent(),
},
},
)
})
// This mutates grove_notice_viewed_at server-side — Grove.tsx:87 reads it
// to decide whether to show the dialog. Without invalidation a same-session
// remount would read stale viewed_at:null and re-show the dialog.
getGroveSettings.cache.clear?.()
} catch (err) {
logError(err)
}
}
/**
* Update Grove settings for the user account
*/
export async function updateGroveSettings(
groveEnabled: boolean,
): Promise<void> {
try {
await withOAuth401Retry(() => {
const authHeaders = getAuthHeaders()
if (authHeaders.error) {
throw new Error(`Failed to get auth headers: ${authHeaders.error}`)
}
return axios.patch(
`${getOauthConfig().BASE_API_URL}/api/oauth/account/settings`,
{
grove_enabled: groveEnabled,
},
{
headers: {
...authHeaders.headers,
'User-Agent': getClaudeCodeUserAgent(),
},
},
)
})
// Invalidate memoized settings so the post-toggle confirmation
// read in privacy-settings.tsx picks up the new value.
getGroveSettings.cache.clear?.()
} catch (err) {
logError(err)
}
}
/**
* Check if user is qualified for Grove (non-blocking, cache-first).
*
* This function never blocks on network - it returns cached data immediately
* and fetches in the background if needed. On cold start (no cache), it returns
* false and the Grove dialog won't show until the next session.
*/
export async function isQualifiedForGrove(): Promise<boolean> {
if (!isConsumerSubscriber()) {
return false
}
const accountId = getOauthAccountInfo()?.accountUuid
if (!accountId) {
return false
}
const globalConfig = getGlobalConfig()
const cachedEntry = globalConfig.groveConfigCache?.[accountId]
const now = Date.now()
// No cache - trigger background fetch and return false (non-blocking)
// The Grove dialog won't show this session, but will next time if eligible
if (!cachedEntry) {
logForDebugging(
'Grove: No cache, fetching config in background (dialog skipped this session)',
)
void fetchAndStoreGroveConfig(accountId)
return false
}
// Cache exists but is stale - return cached value and refresh in background
if (now - cachedEntry.timestamp > GROVE_CACHE_EXPIRATION_MS) {
logForDebugging(
'Grove: Cache stale, returning cached data and refreshing in background',
)
void fetchAndStoreGroveConfig(accountId)
return cachedEntry.grove_enabled
}
// Cache is fresh - return it immediately
logForDebugging('Grove: Using fresh cached config')
return cachedEntry.grove_enabled
}
/**
* Fetch Grove config from API and store in cache
*/
async function fetchAndStoreGroveConfig(accountId: string): Promise<void> {
try {
const result = await getGroveNoticeConfig()
if (!result.success) {
return
}
const groveEnabled = result.data.grove_enabled
const cachedEntry = getGlobalConfig().groveConfigCache?.[accountId]
if (
cachedEntry?.grove_enabled === groveEnabled &&
Date.now() - cachedEntry.timestamp <= GROVE_CACHE_EXPIRATION_MS
) {
return
}
saveGlobalConfig(current => ({
...current,
groveConfigCache: {
...current.groveConfigCache,
[accountId]: {
grove_enabled: groveEnabled,
timestamp: Date.now(),
},
},
}))
} catch (err) {
logForDebugging(`Grove: Failed to fetch and store config: ${err}`)
}
}
/**
* Get Grove Statsig configuration from the API.
* Returns ApiResult to distinguish between API failure and success.
* Uses existing OAuth 401 retry, then returns failure if that doesn't help.
*/
export const getGroveNoticeConfig = memoize(
async (): Promise<ApiResult<GroveConfig>> => {
// Grove is a notification feature; during an outage, skipping it is correct.
if (isEssentialTrafficOnly()) {
return { success: false }
}
try {
const response = await withOAuth401Retry(() => {
const authHeaders = getAuthHeaders()
if (authHeaders.error) {
throw new Error(`Failed to get auth headers: ${authHeaders.error}`)
}
return axios.get<GroveConfig>(
`${getOauthConfig().BASE_API_URL}/api/claude_code_grove`,
{
headers: {
...authHeaders.headers,
'User-Agent': getUserAgent(),
},
timeout: 3000, // Short timeout - if slow, skip Grove dialog
},
)
})
// Map the API response to the GroveConfig type
const {
grove_enabled,
domain_excluded,
notice_is_grace_period,
notice_reminder_frequency,
} = response.data
return {
success: true,
data: {
grove_enabled,
domain_excluded: domain_excluded ?? false,
notice_is_grace_period: notice_is_grace_period ?? true,
notice_reminder_frequency,
},
}
} catch (err) {
logForDebugging(`Failed to fetch Grove notice config: ${err}`)
return { success: false }
}
},
)
/**
* Determines whether the Grove dialog should be shown.
* Returns false if either API call failed (after retry) - we hide the dialog on API failure.
*/
export function calculateShouldShowGrove(
settingsResult: ApiResult<AccountSettings>,
configResult: ApiResult<GroveConfig>,
showIfAlreadyViewed: boolean,
): boolean {
// Hide dialog on API failure (after retry)
if (!settingsResult.success || !configResult.success) {
return false
}
const settings = settingsResult.data
const config = configResult.data
const hasChosen = settings.grove_enabled !== null
if (hasChosen) {
return false
}
if (showIfAlreadyViewed) {
return true
}
if (!config.notice_is_grace_period) {
return true
}
// Check if we need to remind the user to accept the terms and choose
// whether to help improve Claude.
const reminderFrequency = config.notice_reminder_frequency
if (reminderFrequency !== null && settings.grove_notice_viewed_at) {
const daysSinceViewed = Math.floor(
(Date.now() - new Date(settings.grove_notice_viewed_at).getTime()) /
(1000 * 60 * 60 * 24),
)
return daysSinceViewed >= reminderFrequency
} else {
// Show if never viewed before
const viewedAt = settings.grove_notice_viewed_at
return viewedAt === null || viewedAt === undefined
}
}
export async function checkGroveForNonInteractive(): Promise<void> {
const [settingsResult, configResult] = await Promise.all([
getGroveSettings(),
getGroveNoticeConfig(),
])
// Check if user hasn't made a choice yet (returns false on API failure)
const shouldShowGrove = calculateShouldShowGrove(
settingsResult,
configResult,
false,
)
if (shouldShowGrove) {
// shouldShowGrove is only true if both API calls succeeded
const config = configResult.success ? configResult.data : null
logEvent('tengu_grove_print_viewed', {
dismissable:
config?.notice_is_grace_period as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
})
if (config === null || config.notice_is_grace_period) {
// Grace period is still active - show informational message and continue
writeToStderr(
'\nAn update to our Consumer Terms and Privacy Policy will take effect on October 8, 2025. Run `claude` to review the updated terms.\n\n',
)
await markGroveNoticeViewed()
} else {
// Grace period has ended - show error message and exit
writeToStderr(
'\n[ACTION REQUIRED] An update to our Consumer Terms and Privacy Policy has taken effect on October 8, 2025. You must run `claude` to review the updated terms.\n\n',
)
await gracefulShutdown(1)
}
}
}
+788
View File
@@ -0,0 +1,788 @@
import { feature } from 'bun:bundle'
import { APIError } from '@anthropic-ai/sdk'
import type {
BetaStopReason,
BetaUsage as Usage,
} from '@anthropic-ai/sdk/resources/beta/messages/messages.mjs'
import {
addToTotalDurationState,
consumePostCompaction,
getIsNonInteractiveSession,
getLastApiCompletionTimestamp,
getTeleportedSessionInfo,
markFirstTeleportMessageLogged,
setLastApiCompletionTimestamp,
} from 'src/bootstrap/state.js'
import type { QueryChainTracking } from 'src/Tool.js'
import { isConnectorTextBlock } from 'src/types/connectorText.js'
import type { AssistantMessage } from 'src/types/message.js'
import { logForDebugging } from 'src/utils/debug.js'
import type { EffortLevel } from 'src/utils/effort.js'
import { logError } from 'src/utils/log.js'
import { getAPIProviderForStatsig } from 'src/utils/model/providers.js'
import type { PermissionMode } from 'src/utils/permissions/PermissionMode.js'
import { jsonStringify } from 'src/utils/slowOperations.js'
import { logOTelEvent } from 'src/utils/telemetry/events.js'
import {
endLLMRequestSpan,
isBetaTracingEnabled,
type Span,
} from 'src/utils/telemetry/sessionTracing.js'
import type { NonNullableUsage } from '../../entrypoints/sdk/sdkUtilityTypes.js'
import { consumeInvokingRequestId } from '../../utils/agentContext.js'
import {
type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
logEvent,
} from '../analytics/index.js'
import { sanitizeToolNameForAnalytics } from '../analytics/metadata.js'
import { EMPTY_USAGE } from './emptyUsage.js'
import { classifyAPIError } from './errors.js'
import { extractConnectionErrorDetails } from './errorUtils.js'
export type { NonNullableUsage }
export { EMPTY_USAGE }
// Strategy used for global prompt caching
export type GlobalCacheStrategy = 'tool_based' | 'system_prompt' | 'none'
function getErrorMessage(error: unknown): string {
if (error instanceof APIError) {
const body = error.error as { error?: { message?: string } } | undefined
if (body?.error?.message) return body.error.message
}
return error instanceof Error ? error.message : String(error)
}
type KnownGateway =
| 'litellm'
| 'helicone'
| 'portkey'
| 'cloudflare-ai-gateway'
| 'kong'
| 'braintrust'
| 'databricks'
// Gateway fingerprints for detecting AI gateways from response headers
const GATEWAY_FINGERPRINTS: Partial<
Record<KnownGateway, { prefixes: string[] }>
> = {
// https://docs.litellm.ai/docs/proxy/response_headers
litellm: {
prefixes: ['x-litellm-'],
},
// https://docs.helicone.ai/helicone-headers/header-directory
helicone: {
prefixes: ['helicone-'],
},
// https://portkey.ai/docs/api-reference/response-schema
portkey: {
prefixes: ['x-portkey-'],
},
// https://developers.cloudflare.com/ai-gateway/evaluations/add-human-feedback-api/
'cloudflare-ai-gateway': {
prefixes: ['cf-aig-'],
},
// https://developer.konghq.com/ai-gateway/ — X-Kong-Upstream-Latency, X-Kong-Proxy-Latency
kong: {
prefixes: ['x-kong-'],
},
// https://www.braintrust.dev/docs/guides/proxy — x-bt-used-endpoint, x-bt-cached
braintrust: {
prefixes: ['x-bt-'],
},
}
// Gateways that use provider-owned domains (not self-hosted), so the
// ANTHROPIC_BASE_URL hostname is a reliable signal even without a
// distinctive response header.
const GATEWAY_HOST_SUFFIXES: Partial<Record<KnownGateway, string[]>> = {
// https://docs.databricks.com/aws/en/ai-gateway/
databricks: [
'.cloud.databricks.com',
'.azuredatabricks.net',
'.gcp.databricks.com',
],
}
function detectGateway({
headers,
baseUrl,
}: {
headers?: globalThis.Headers
baseUrl?: string
}): KnownGateway | undefined {
if (headers) {
// Header names are already lowercase from the Headers API
const headerNames: string[] = []
headers.forEach((_, key) => headerNames.push(key))
for (const [gw, { prefixes }] of Object.entries(GATEWAY_FINGERPRINTS)) {
if (prefixes.some(p => headerNames.some(h => h.startsWith(p)))) {
return gw as KnownGateway
}
}
}
if (baseUrl) {
try {
const host = new URL(baseUrl).hostname.toLowerCase()
for (const [gw, suffixes] of Object.entries(GATEWAY_HOST_SUFFIXES)) {
if (suffixes.some(s => host.endsWith(s))) {
return gw as KnownGateway
}
}
} catch {
// malformed URL — ignore
}
}
return undefined
}
function getAnthropicEnvMetadata() {
return {
...(process.env.ANTHROPIC_BASE_URL
? {
baseUrl: process.env
.ANTHROPIC_BASE_URL as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
}
: {}),
...(process.env.ANTHROPIC_MODEL
? {
envModel: process.env
.ANTHROPIC_MODEL as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
}
: {}),
...(process.env.ANTHROPIC_SMALL_FAST_MODEL
? {
envSmallFastModel: process.env
.ANTHROPIC_SMALL_FAST_MODEL as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
}
: {}),
}
}
function getBuildAgeMinutes(): number | undefined {
if (!MACRO.BUILD_TIME) return undefined
const buildTime = new Date(MACRO.BUILD_TIME).getTime()
if (isNaN(buildTime)) return undefined
return Math.floor((Date.now() - buildTime) / 60000)
}
export function logAPIQuery({
model,
messagesLength,
temperature,
betas,
permissionMode,
querySource,
queryTracking,
thinkingType,
effortValue,
fastMode,
previousRequestId,
}: {
model: string
messagesLength: number
temperature: number
betas?: string[]
permissionMode?: PermissionMode
querySource: string
queryTracking?: QueryChainTracking
thinkingType?: 'adaptive' | 'enabled' | 'disabled'
effortValue?: EffortLevel | null
fastMode?: boolean
previousRequestId?: string | null
}): void {
logEvent('tengu_api_query', {
model: model as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
messagesLength,
temperature: temperature,
provider: getAPIProviderForStatsig(),
buildAgeMins: getBuildAgeMinutes(),
...(betas?.length
? {
betas: betas.join(
',',
) as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
}
: {}),
permissionMode:
permissionMode as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
querySource:
querySource as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
...(queryTracking
? {
queryChainId:
queryTracking.chainId as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
queryDepth: queryTracking.depth,
}
: {}),
thinkingType:
thinkingType as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
effortValue:
effortValue as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
fastMode,
...(previousRequestId
? {
previousRequestId:
previousRequestId as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
}
: {}),
...getAnthropicEnvMetadata(),
})
}
export function logAPIError({
error,
model,
messageCount,
messageTokens,
durationMs,
durationMsIncludingRetries,
attempt,
requestId,
clientRequestId,
didFallBackToNonStreaming,
promptCategory,
headers,
queryTracking,
querySource,
llmSpan,
fastMode,
previousRequestId,
}: {
error: unknown
model: string
messageCount: number
messageTokens?: number
durationMs: number
durationMsIncludingRetries: number
attempt: number
requestId?: string | null
/** Client-generated ID sent as x-client-request-id header (survives timeouts) */
clientRequestId?: string
didFallBackToNonStreaming?: boolean
promptCategory?: string
headers?: globalThis.Headers
queryTracking?: QueryChainTracking
querySource?: string
/** The span from startLLMRequestSpan - pass this to correctly match responses to requests */
llmSpan?: Span
fastMode?: boolean
previousRequestId?: string | null
}): void {
const gateway = detectGateway({
headers:
error instanceof APIError && error.headers ? error.headers : headers,
baseUrl: process.env.ANTHROPIC_BASE_URL,
})
const errStr = getErrorMessage(error)
const status = error instanceof APIError ? String(error.status) : undefined
const errorType = classifyAPIError(error)
// Log detailed connection error info to debug logs (visible via --debug)
const connectionDetails = extractConnectionErrorDetails(error)
if (connectionDetails) {
const sslLabel = connectionDetails.isSSLError ? ' (SSL error)' : ''
logForDebugging(
`Connection error details: code=${connectionDetails.code}${sslLabel}, message=${connectionDetails.message}`,
{ level: 'error' },
)
}
const invocation = consumeInvokingRequestId()
if (clientRequestId) {
logForDebugging(
`API error x-client-request-id=${clientRequestId} (give this to the API team for server-log lookup)`,
{ level: 'error' },
)
}
logError(error as Error)
logEvent('tengu_api_error', {
model: model as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
error: errStr as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
status:
status as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
errorType:
errorType as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
messageCount,
messageTokens,
durationMs,
durationMsIncludingRetries,
attempt,
provider: getAPIProviderForStatsig(),
requestId:
(requestId as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS) ||
undefined,
...(invocation
? {
invokingRequestId:
invocation.invokingRequestId as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
invocationKind:
invocation.invocationKind as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
}
: {}),
clientRequestId:
(clientRequestId as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS) ||
undefined,
didFallBackToNonStreaming,
...(promptCategory
? {
promptCategory:
promptCategory as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
}
: {}),
...(gateway
? {
gateway:
gateway as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
}
: {}),
...(queryTracking
? {
queryChainId:
queryTracking.chainId as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
queryDepth: queryTracking.depth,
}
: {}),
...(querySource
? {
querySource:
querySource as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
}
: {}),
fastMode,
...(previousRequestId
? {
previousRequestId:
previousRequestId as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
}
: {}),
...getAnthropicEnvMetadata(),
})
// Log API error event for OTLP
void logOTelEvent('api_error', {
model: model,
error: errStr,
status_code: String(status),
duration_ms: String(durationMs),
attempt: String(attempt),
speed: fastMode ? 'fast' : 'normal',
})
// Pass the span to correctly match responses to requests when beta tracing is enabled
endLLMRequestSpan(llmSpan, {
success: false,
statusCode: status ? parseInt(status) : undefined,
error: errStr,
attempt,
})
// Log first error for teleported sessions (reliability tracking)
const teleportInfo = getTeleportedSessionInfo()
if (teleportInfo?.isTeleported && !teleportInfo.hasLoggedFirstMessage) {
logEvent('tengu_teleport_first_message_error', {
session_id:
teleportInfo.sessionId as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
error_type:
errorType as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
})
markFirstTeleportMessageLogged()
}
}
function logAPISuccess({
model,
preNormalizedModel,
messageCount,
messageTokens,
usage,
durationMs,
durationMsIncludingRetries,
attempt,
ttftMs,
requestId,
stopReason,
costUSD,
didFallBackToNonStreaming,
querySource,
gateway,
queryTracking,
permissionMode,
globalCacheStrategy,
textContentLength,
thinkingContentLength,
toolUseContentLengths,
connectorTextBlockCount,
fastMode,
previousRequestId,
betas,
}: {
model: string
preNormalizedModel: string
messageCount: number
messageTokens: number
usage: Usage
durationMs: number
durationMsIncludingRetries: number
attempt: number
ttftMs: number | null
requestId: string | null
stopReason: BetaStopReason | null
costUSD: number
didFallBackToNonStreaming: boolean
querySource: string
gateway?: KnownGateway
queryTracking?: QueryChainTracking
permissionMode?: PermissionMode
globalCacheStrategy?: GlobalCacheStrategy
textContentLength?: number
thinkingContentLength?: number
toolUseContentLengths?: Record<string, number>
connectorTextBlockCount?: number
fastMode?: boolean
previousRequestId?: string | null
betas?: string[]
}): void {
const isNonInteractiveSession = getIsNonInteractiveSession()
const isPostCompaction = consumePostCompaction()
const hasPrintFlag =
process.argv.includes('-p') || process.argv.includes('--print')
const now = Date.now()
const lastCompletion = getLastApiCompletionTimestamp()
const timeSinceLastApiCallMs =
lastCompletion !== null ? now - lastCompletion : undefined
const invocation = consumeInvokingRequestId()
logEvent('tengu_api_success', {
model: model as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
...(preNormalizedModel !== model
? {
preNormalizedModel:
preNormalizedModel as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
}
: {}),
...(betas?.length
? {
betas: betas.join(
',',
) as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
}
: {}),
messageCount,
messageTokens,
inputTokens: usage.input_tokens,
outputTokens: usage.output_tokens,
cachedInputTokens: usage.cache_read_input_tokens ?? 0,
uncachedInputTokens: usage.cache_creation_input_tokens ?? 0,
durationMs: durationMs,
durationMsIncludingRetries: durationMsIncludingRetries,
attempt: attempt,
ttftMs: ttftMs ?? undefined,
buildAgeMins: getBuildAgeMinutes(),
provider: getAPIProviderForStatsig(),
requestId:
(requestId as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS) ??
undefined,
...(invocation
? {
invokingRequestId:
invocation.invokingRequestId as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
invocationKind:
invocation.invocationKind as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
}
: {}),
stop_reason:
(stopReason as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS) ??
undefined,
costUSD,
didFallBackToNonStreaming,
isNonInteractiveSession,
print: hasPrintFlag,
isTTY: process.stdout.isTTY ?? false,
querySource:
querySource as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
...(gateway
? {
gateway:
gateway as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
}
: {}),
...(queryTracking
? {
queryChainId:
queryTracking.chainId as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
queryDepth: queryTracking.depth,
}
: {}),
permissionMode:
permissionMode as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
...(globalCacheStrategy
? {
globalCacheStrategy:
globalCacheStrategy as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
}
: {}),
...(textContentLength !== undefined
? ({
textContentLength,
} as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS)
: {}),
...(thinkingContentLength !== undefined
? ({
thinkingContentLength,
} as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS)
: {}),
...(toolUseContentLengths !== undefined
? ({
toolUseContentLengths: jsonStringify(
toolUseContentLengths,
) as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
} as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS)
: {}),
...(connectorTextBlockCount !== undefined
? ({
connectorTextBlockCount,
} as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS)
: {}),
fastMode,
// Log cache_deleted_input_tokens for cache editing analysis. Casts needed
// because the field is intentionally not on NonNullableUsage (excluded from
// external builds). Set by updateUsage() when cache editing is active.
...(feature('CACHED_MICROCOMPACT') &&
((usage as unknown as { cache_deleted_input_tokens?: number })
.cache_deleted_input_tokens ?? 0) > 0
? {
cacheDeletedInputTokens: (
usage as unknown as { cache_deleted_input_tokens: number }
).cache_deleted_input_tokens,
}
: {}),
...(previousRequestId
? {
previousRequestId:
previousRequestId as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
}
: {}),
...(isPostCompaction ? { isPostCompaction } : {}),
...getAnthropicEnvMetadata(),
timeSinceLastApiCallMs,
})
setLastApiCompletionTimestamp(now)
}
export function logAPISuccessAndDuration({
model,
preNormalizedModel,
start,
startIncludingRetries,
ttftMs,
usage,
attempt,
messageCount,
messageTokens,
requestId,
stopReason,
didFallBackToNonStreaming,
querySource,
headers,
costUSD,
queryTracking,
permissionMode,
newMessages,
llmSpan,
globalCacheStrategy,
requestSetupMs,
attemptStartTimes,
fastMode,
previousRequestId,
betas,
}: {
model: string
preNormalizedModel: string
start: number
startIncludingRetries: number
ttftMs: number | null
usage: NonNullableUsage
attempt: number
messageCount: number
messageTokens: number
requestId: string | null
stopReason: BetaStopReason | null
didFallBackToNonStreaming: boolean
querySource: string
headers?: globalThis.Headers
costUSD: number
queryTracking?: QueryChainTracking
permissionMode?: PermissionMode
/** Assistant messages from the response - used to extract model_output and thinking_output
* when beta tracing is enabled */
newMessages?: AssistantMessage[]
/** The span from startLLMRequestSpan - pass this to correctly match responses to requests */
llmSpan?: Span
/** Strategy used for global prompt caching: 'tool_based', 'system_prompt', or 'none' */
globalCacheStrategy?: GlobalCacheStrategy
/** Time spent in pre-request setup before the successful attempt */
requestSetupMs?: number
/** Timestamps (Date.now()) of each attempt start — used for retry sub-spans in Perfetto */
attemptStartTimes?: number[]
fastMode?: boolean
/** Request ID from the previous API call in this session */
previousRequestId?: string | null
betas?: string[]
}): void {
const gateway = detectGateway({
headers,
baseUrl: process.env.ANTHROPIC_BASE_URL,
})
let textContentLength: number | undefined
let thinkingContentLength: number | undefined
let toolUseContentLengths: Record<string, number> | undefined
let connectorTextBlockCount: number | undefined
if (newMessages) {
let textLen = 0
let thinkingLen = 0
let hasToolUse = false
const toolLengths: Record<string, number> = {}
let connectorCount = 0
for (const msg of newMessages) {
for (const block of msg.message.content) {
if (block.type === 'text') {
textLen += block.text.length
} else if (feature('CONNECTOR_TEXT') && isConnectorTextBlock(block)) {
connectorCount++
} else if (block.type === 'thinking') {
thinkingLen += block.thinking.length
} else if (
block.type === 'tool_use' ||
block.type === 'server_tool_use' ||
block.type === 'mcp_tool_use'
) {
const inputLen = jsonStringify(block.input).length
const sanitizedName = sanitizeToolNameForAnalytics(block.name)
toolLengths[sanitizedName] =
(toolLengths[sanitizedName] ?? 0) + inputLen
hasToolUse = true
}
}
}
textContentLength = textLen
thinkingContentLength = thinkingLen > 0 ? thinkingLen : undefined
toolUseContentLengths = hasToolUse ? toolLengths : undefined
connectorTextBlockCount = connectorCount > 0 ? connectorCount : undefined
}
const durationMs = Date.now() - start
const durationMsIncludingRetries = Date.now() - startIncludingRetries
addToTotalDurationState(durationMsIncludingRetries, durationMs)
logAPISuccess({
model,
preNormalizedModel,
messageCount,
messageTokens,
usage,
durationMs,
durationMsIncludingRetries,
attempt,
ttftMs,
requestId,
stopReason,
costUSD,
didFallBackToNonStreaming,
querySource,
gateway,
queryTracking,
permissionMode,
globalCacheStrategy,
textContentLength,
thinkingContentLength,
toolUseContentLengths,
connectorTextBlockCount,
fastMode,
previousRequestId,
betas,
})
// Log API request event for OTLP
void logOTelEvent('api_request', {
model,
input_tokens: String(usage.input_tokens),
output_tokens: String(usage.output_tokens),
cache_read_tokens: String(usage.cache_read_input_tokens),
cache_creation_tokens: String(usage.cache_creation_input_tokens),
cost_usd: String(costUSD),
duration_ms: String(durationMs),
speed: fastMode ? 'fast' : 'normal',
})
// Extract model output, thinking output, and tool call flag when beta tracing is enabled
let modelOutput: string | undefined
let thinkingOutput: string | undefined
let hasToolCall: boolean | undefined
if (isBetaTracingEnabled() && newMessages) {
// Model output - visible to all users
modelOutput =
newMessages
.flatMap(m =>
m.message.content
.filter(c => c.type === 'text')
.map(c => (c as { type: 'text'; text: string }).text),
)
.join('\n') || undefined
// Thinking output - Ant-only (build-time gated)
if (process.env.USER_TYPE === 'ant') {
thinkingOutput =
newMessages
.flatMap(m =>
m.message.content
.filter(c => c.type === 'thinking')
.map(c => (c as { type: 'thinking'; thinking: string }).thinking),
)
.join('\n') || undefined
}
// Check if any tool_use blocks were in the output
hasToolCall = newMessages.some(m =>
m.message.content.some(c => c.type === 'tool_use'),
)
}
// Pass the span to correctly match responses to requests when beta tracing is enabled
endLLMRequestSpan(llmSpan, {
success: true,
inputTokens: usage.input_tokens,
outputTokens: usage.output_tokens,
cacheReadTokens: usage.cache_read_input_tokens,
cacheCreationTokens: usage.cache_creation_input_tokens,
attempt,
modelOutput,
thinkingOutput,
hasToolCall,
ttftMs: ttftMs ?? undefined,
requestSetupMs,
attemptStartTimes,
})
// Log first successful message for teleported sessions (reliability tracking)
const teleportInfo = getTeleportedSessionInfo()
if (teleportInfo?.isTeleported && !teleportInfo.hasLoggedFirstMessage) {
logEvent('tengu_teleport_first_message_success', {
session_id:
teleportInfo.sessionId as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
})
markFirstTeleportMessageLogged()
}
}
+159
View File
@@ -0,0 +1,159 @@
import axios from 'axios'
import { hasProfileScope, isClaudeAISubscriber } from '../../utils/auth.js'
import { getGlobalConfig, saveGlobalConfig } from '../../utils/config.js'
import { logForDebugging } from '../../utils/debug.js'
import { errorMessage } from '../../utils/errors.js'
import { getAuthHeaders, withOAuth401Retry } from '../../utils/http.js'
import { logError } from '../../utils/log.js'
import { memoizeWithTTLAsync } from '../../utils/memoize.js'
import { isEssentialTrafficOnly } from '../../utils/privacyLevel.js'
import { getClaudeCodeUserAgent } from '../../utils/userAgent.js'
type MetricsEnabledResponse = {
metrics_logging_enabled: boolean
}
type MetricsStatus = {
enabled: boolean
hasError: boolean
}
// In-memory TTL — dedupes calls within a single process
const CACHE_TTL_MS = 60 * 60 * 1000
// Disk TTL — org settings rarely change. When disk cache is fresher than this,
// we skip the network entirely (no background refresh). This is what collapses
// N `claude -p` invocations into ~1 API call/day.
const DISK_CACHE_TTL_MS = 24 * 60 * 60 * 1000
/**
* Internal function to call the API and check if metrics are enabled
* This is wrapped by memoizeWithTTLAsync to add caching behavior
*/
async function _fetchMetricsEnabled(): Promise<MetricsEnabledResponse> {
const authResult = getAuthHeaders()
if (authResult.error) {
throw new Error(`Auth error: ${authResult.error}`)
}
const headers = {
'Content-Type': 'application/json',
'User-Agent': getClaudeCodeUserAgent(),
...authResult.headers,
}
const endpoint = `https://api.anthropic.com/api/claude_code/organizations/metrics_enabled`
const response = await axios.get<MetricsEnabledResponse>(endpoint, {
headers,
timeout: 5000,
})
return response.data
}
async function _checkMetricsEnabledAPI(): Promise<MetricsStatus> {
// Incident kill switch: skip the network call when nonessential traffic is disabled.
// Returning enabled:false sheds load at the consumer (bigqueryExporter skips
// export). Matches the non-subscriber early-return shape below.
if (isEssentialTrafficOnly()) {
return { enabled: false, hasError: false }
}
try {
const data = await withOAuth401Retry(_fetchMetricsEnabled, {
also403Revoked: true,
})
logForDebugging(
`Metrics opt-out API response: enabled=${data.metrics_logging_enabled}`,
)
return {
enabled: data.metrics_logging_enabled,
hasError: false,
}
} catch (error) {
logForDebugging(
`Failed to check metrics opt-out status: ${errorMessage(error)}`,
)
logError(error)
return { enabled: false, hasError: true }
}
}
// Create memoized version with custom error handling
const memoizedCheckMetrics = memoizeWithTTLAsync(
_checkMetricsEnabledAPI,
CACHE_TTL_MS,
)
/**
* Fetch (in-memory memoized) and persist to disk on change.
* Errors are not persisted — a transient failure should not overwrite a
* known-good disk value.
*/
async function refreshMetricsStatus(): Promise<MetricsStatus> {
const result = await memoizedCheckMetrics()
if (result.hasError) {
return result
}
const cached = getGlobalConfig().metricsStatusCache
const unchanged = cached !== undefined && cached.enabled === result.enabled
// Skip write when unchanged AND timestamp still fresh — avoids config churn
// when concurrent callers race past a stale disk entry and all try to write.
if (unchanged && Date.now() - cached.timestamp < DISK_CACHE_TTL_MS) {
return result
}
saveGlobalConfig(current => ({
...current,
metricsStatusCache: {
enabled: result.enabled,
timestamp: Date.now(),
},
}))
return result
}
/**
* Check if metrics are enabled for the current organization.
*
* Two-tier cache:
* - Disk (24h TTL): survives process restarts. Fresh disk cache → zero network.
* - In-memory (1h TTL): dedupes the background refresh within a process.
*
* The caller (bigqueryExporter) tolerates stale reads — a missed export or
* an extra one during the 24h window is acceptable.
*/
export async function checkMetricsEnabled(): Promise<MetricsStatus> {
// Service key OAuth sessions lack user:profile scope → would 403.
// API key users (non-subscribers) fall through and use x-api-key auth.
// This check runs before the disk read so we never persist auth-state-derived
// answers — only real API responses go to disk. Otherwise a service-key
// session would poison the cache for a later full-OAuth session.
if (isClaudeAISubscriber() && !hasProfileScope()) {
return { enabled: false, hasError: false }
}
const cached = getGlobalConfig().metricsStatusCache
if (cached) {
if (Date.now() - cached.timestamp > DISK_CACHE_TTL_MS) {
// saveGlobalConfig's fallback path (config.ts:731) can throw if both
// locked and fallback writes fail — catch here so fire-and-forget
// doesn't become an unhandled rejection.
void refreshMetricsStatus().catch(logError)
}
return {
enabled: cached.enabled,
hasError: false,
}
}
// First-ever run on this machine: block on the network to populate disk.
return refreshMetricsStatus()
}
// Export for testing purposes only
export const _clearMetricsEnabledCacheForTesting = (): void => {
memoizedCheckMetrics.cache.clear()
}
+137
View File
@@ -0,0 +1,137 @@
import axios from 'axios'
import { getOauthConfig } from '../../constants/oauth.js'
import { getOauthAccountInfo } from '../../utils/auth.js'
import { getGlobalConfig, saveGlobalConfig } from '../../utils/config.js'
import { logError } from '../../utils/log.js'
import { isEssentialTrafficOnly } from '../../utils/privacyLevel.js'
import { getOAuthHeaders, prepareApiRequest } from '../../utils/teleport/api.js'
export type OverageCreditGrantInfo = {
available: boolean
eligible: boolean
granted: boolean
amount_minor_units: number | null
currency: string | null
}
type CachedGrantEntry = {
info: OverageCreditGrantInfo
timestamp: number
}
const CACHE_TTL_MS = 60 * 60 * 1000 // 1 hour
/**
* Fetch the current user's overage credit grant eligibility from the backend.
* The backend resolves tier-specific amounts and role-based claim permission,
* so the CLI just reads the response without replicating that logic.
*/
async function fetchOverageCreditGrant(): Promise<OverageCreditGrantInfo | null> {
try {
const { accessToken, orgUUID } = await prepareApiRequest()
const url = `${getOauthConfig().BASE_API_URL}/api/oauth/organizations/${orgUUID}/overage_credit_grant`
const response = await axios.get<OverageCreditGrantInfo>(url, {
headers: getOAuthHeaders(accessToken),
})
return response.data
} catch (err) {
logError(err)
return null
}
}
/**
* Get cached grant info. Returns null if no cache or cache is stale.
* Callers should render nothing (not block) when this returns null —
* refreshOverageCreditGrantCache fires lazily to populate it.
*/
export function getCachedOverageCreditGrant(): OverageCreditGrantInfo | null {
const orgId = getOauthAccountInfo()?.organizationUuid
if (!orgId) return null
const cached = getGlobalConfig().overageCreditGrantCache?.[orgId]
if (!cached) return null
if (Date.now() - cached.timestamp > CACHE_TTL_MS) return null
return cached.info
}
/**
* Drop the current org's cached entry so the next read refetches.
* Leaves other orgs' entries intact.
*/
export function invalidateOverageCreditGrantCache(): void {
const orgId = getOauthAccountInfo()?.organizationUuid
if (!orgId) return
const cache = getGlobalConfig().overageCreditGrantCache
if (!cache || !(orgId in cache)) return
saveGlobalConfig(prev => {
const next = { ...prev.overageCreditGrantCache }
delete next[orgId]
return { ...prev, overageCreditGrantCache: next }
})
}
/**
* Fetch and cache grant info. Fire-and-forget; call when an upsell surface
* is about to render and the cache is empty.
*/
export async function refreshOverageCreditGrantCache(): Promise<void> {
if (isEssentialTrafficOnly()) return
const orgId = getOauthAccountInfo()?.organizationUuid
if (!orgId) return
const info = await fetchOverageCreditGrant()
if (!info) return
// Skip rewriting info if grant data is unchanged — avoids config write
// amplification (inc-4552 pattern). Still refresh the timestamp so the
// TTL-based staleness check in getCachedOverageCreditGrant doesn't keep
// re-triggering API calls on every component mount.
saveGlobalConfig(prev => {
// Derive from prev (lock-fresh) rather than a pre-lock getGlobalConfig()
// read — saveConfigWithLock re-reads config from disk under the file lock,
// so another CLI instance may have written between any outer read and lock
// acquire.
const prevCached = prev.overageCreditGrantCache?.[orgId]
const existing = prevCached?.info
const dataUnchanged =
existing &&
existing.available === info.available &&
existing.eligible === info.eligible &&
existing.granted === info.granted &&
existing.amount_minor_units === info.amount_minor_units &&
existing.currency === info.currency
// When data is unchanged and timestamp is still fresh, skip the write entirely
if (
dataUnchanged &&
prevCached &&
Date.now() - prevCached.timestamp <= CACHE_TTL_MS
) {
return prev
}
const entry: CachedGrantEntry = {
info: dataUnchanged ? existing : info,
timestamp: Date.now(),
}
return {
...prev,
overageCreditGrantCache: {
...prev.overageCreditGrantCache,
[orgId]: entry,
},
}
})
}
/**
* Format the grant amount for display. Returns null if amount isn't available
* (not eligible, or currency we don't know how to format).
*/
export function formatGrantAmount(info: OverageCreditGrantInfo): string | null {
if (info.amount_minor_units == null || !info.currency) return null
// For now only USD; backend may expand later
if (info.currency.toUpperCase() === 'USD') {
const dollars = info.amount_minor_units / 100
return Number.isInteger(dollars) ? `$${dollars}` : `$${dollars.toFixed(2)}`
}
return null
}
export type { CachedGrantEntry as OverageCreditGrantCacheEntry }
+727
View File
@@ -0,0 +1,727 @@
import type { BetaToolUnion } from '@anthropic-ai/sdk/resources/beta/messages/messages.mjs'
import type { TextBlockParam } from '@anthropic-ai/sdk/resources/index.mjs'
import { createPatch } from 'diff'
import { mkdir, writeFile } from 'fs/promises'
import { join } from 'path'
import type { AgentId } from 'src/types/ids.js'
import type { Message } from 'src/types/message.js'
import { logForDebugging } from 'src/utils/debug.js'
import { djb2Hash } from 'src/utils/hash.js'
import { logError } from 'src/utils/log.js'
import { getClaudeTempDir } from 'src/utils/permissions/filesystem.js'
import { jsonStringify } from 'src/utils/slowOperations.js'
import type { QuerySource } from '../../constants/querySource.js'
import {
type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
logEvent,
} from '../analytics/index.js'
function getCacheBreakDiffPath(): string {
const chars = 'abcdefghijklmnopqrstuvwxyz0123456789'
let suffix = ''
for (let i = 0; i < 4; i++) {
suffix += chars[Math.floor(Math.random() * chars.length)]
}
return join(getClaudeTempDir(), `cache-break-${suffix}.diff`)
}
type PreviousState = {
systemHash: number
toolsHash: number
/** Hash of system blocks WITH cache_control intact. Catches scope/TTL flips
* (global↔org, 1h↔5m) that stripCacheControl erases from systemHash. */
cacheControlHash: number
toolNames: string[]
/** Per-tool schema hash. Diffed to name which tool's description changed
* when toolSchemasChanged but added=removed=0 (77% of tool breaks per
* BQ 2026-03-22). AgentTool/SkillTool embed dynamic agent/command lists. */
perToolHashes: Record<string, number>
systemCharCount: number
model: string
fastMode: boolean
/** 'tool_based' | 'system_prompt' | 'none' — flips when MCP tools are
* discovered/removed. */
globalCacheStrategy: string
/** Sorted beta header list. Diffed to show which headers were added/removed. */
betas: string[]
/** AFK_MODE_BETA_HEADER presence — should NOT break cache anymore
* (sticky-on latched in claude.ts). Tracked to verify the fix. */
autoModeActive: boolean
/** Overage state flip — should NOT break cache anymore (eligibility is
* latched session-stable in should1hCacheTTL). Tracked to verify the fix. */
isUsingOverage: boolean
/** Cache-editing beta header presence — should NOT break cache anymore
* (sticky-on latched in claude.ts). Tracked to verify the fix. */
cachedMCEnabled: boolean
/** Resolved effort (env → options → model default). Goes into output_config
* or anthropic_internal.effort_override. */
effortValue: string
/** Hash of getExtraBodyParams() — catches CLAUDE_CODE_EXTRA_BODY and
* anthropic_internal changes. */
extraBodyHash: number
callCount: number
pendingChanges: PendingChanges | null
prevCacheReadTokens: number | null
/** Set when cached microcompact sends cache_edits deletions. Cache reads
* will legitimately drop — this is expected, not a break. */
cacheDeletionsPending: boolean
buildDiffableContent: () => string
}
type PendingChanges = {
systemPromptChanged: boolean
toolSchemasChanged: boolean
modelChanged: boolean
fastModeChanged: boolean
cacheControlChanged: boolean
globalCacheStrategyChanged: boolean
betasChanged: boolean
autoModeChanged: boolean
overageChanged: boolean
cachedMCChanged: boolean
effortChanged: boolean
extraBodyChanged: boolean
addedToolCount: number
removedToolCount: number
systemCharDelta: number
addedTools: string[]
removedTools: string[]
changedToolSchemas: string[]
previousModel: string
newModel: string
prevGlobalCacheStrategy: string
newGlobalCacheStrategy: string
addedBetas: string[]
removedBetas: string[]
prevEffortValue: string
newEffortValue: string
buildPrevDiffableContent: () => string
}
const previousStateBySource = new Map<string, PreviousState>()
// Cap the number of tracked sources to prevent unbounded memory growth.
// Each entry stores a ~300KB+ diffableContent string (serialized system prompt
// + tool schemas). Without a cap, spawning many subagents (each with a unique
// agentId key) causes the map to grow indefinitely.
const MAX_TRACKED_SOURCES = 10
const TRACKED_SOURCE_PREFIXES = [
'repl_main_thread',
'sdk',
'agent:custom',
'agent:default',
'agent:builtin',
]
// Minimum absolute token drop required to trigger a cache break warning.
// Small drops (e.g., a few thousand tokens) can happen due to normal variation
// and aren't worth alerting on.
const MIN_CACHE_MISS_TOKENS = 2_000
// Anthropic's server-side prompt cache TTL thresholds to test.
// Cache breaks after these durations are likely due to TTL expiration
// rather than client-side changes.
const CACHE_TTL_5MIN_MS = 5 * 60 * 1000
export const CACHE_TTL_1HOUR_MS = 60 * 60 * 1000
// Models to exclude from cache break detection (e.g., haiku has different caching behavior)
function isExcludedModel(model: string): boolean {
return model.includes('haiku')
}
/**
* Returns the tracking key for a querySource, or null if untracked.
* Compact shares the same server-side cache as repl_main_thread
* (same cacheSafeParams), so they share tracking state.
*
* For subagents with a tracked querySource, uses the unique agentId to
* isolate tracking state. This prevents false positive cache break
* notifications when multiple instances of the same agent type run
* concurrently.
*
* Untracked sources (speculation, session_memory, prompt_suggestion, etc.)
* are short-lived forked agents where cache break detection provides no
* value — they run 1-3 turns with a fresh agentId each time, so there's
* nothing meaningful to compare against. Their cache metrics are still
* logged via tengu_api_success for analytics.
*/
function getTrackingKey(
querySource: QuerySource,
agentId?: AgentId,
): string | null {
if (querySource === 'compact') return 'repl_main_thread'
for (const prefix of TRACKED_SOURCE_PREFIXES) {
if (querySource.startsWith(prefix)) return agentId || querySource
}
return null
}
function stripCacheControl(
items: ReadonlyArray<Record<string, unknown>>,
): unknown[] {
return items.map(item => {
if (!('cache_control' in item)) return item
const { cache_control: _, ...rest } = item
return rest
})
}
function computeHash(data: unknown): number {
const str = jsonStringify(data)
if (typeof Bun !== 'undefined') {
const hash = Bun.hash(str)
// Bun.hash can return bigint for large inputs; convert to number safely
return typeof hash === 'bigint' ? Number(hash & 0xffffffffn) : hash
}
// Fallback for non-Bun runtimes (e.g. Node.js via npm global install)
return djb2Hash(str)
}
/** MCP tool names are user-controlled (server config) and may leak filepaths.
* Collapse them to 'mcp'; built-in names are a fixed vocabulary. */
function sanitizeToolName(name: string): string {
return name.startsWith('mcp__') ? 'mcp' : name
}
function computePerToolHashes(
strippedTools: ReadonlyArray<unknown>,
names: string[],
): Record<string, number> {
const hashes: Record<string, number> = {}
for (let i = 0; i < strippedTools.length; i++) {
hashes[names[i] ?? `__idx_${i}`] = computeHash(strippedTools[i])
}
return hashes
}
function getSystemCharCount(system: TextBlockParam[]): number {
let total = 0
for (const block of system) {
total += block.text.length
}
return total
}
function buildDiffableContent(
system: TextBlockParam[],
tools: BetaToolUnion[],
model: string,
): string {
const systemText = system.map(b => b.text).join('\n\n')
const toolDetails = tools
.map(t => {
if (!('name' in t)) return 'unknown'
const desc = 'description' in t ? t.description : ''
const schema = 'input_schema' in t ? jsonStringify(t.input_schema) : ''
return `${t.name}\n description: ${desc}\n input_schema: ${schema}`
})
.sort()
.join('\n\n')
return `Model: ${model}\n\n=== System Prompt ===\n\n${systemText}\n\n=== Tools (${tools.length}) ===\n\n${toolDetails}\n`
}
/** Extended tracking snapshot — everything that could affect the server-side
* cache key that we can observe from the client. All fields are optional so
* the call site can add incrementally; undefined fields compare as stable. */
export type PromptStateSnapshot = {
system: TextBlockParam[]
toolSchemas: BetaToolUnion[]
querySource: QuerySource
model: string
agentId?: AgentId
fastMode?: boolean
globalCacheStrategy?: string
betas?: readonly string[]
autoModeActive?: boolean
isUsingOverage?: boolean
cachedMCEnabled?: boolean
effortValue?: string | number
extraBodyParams?: unknown
}
/**
* Phase 1 (pre-call): Record the current prompt/tool state and detect what changed.
* Does NOT fire events — just stores pending changes for phase 2 to use.
*/
export function recordPromptState(snapshot: PromptStateSnapshot): void {
try {
const {
system,
toolSchemas,
querySource,
model,
agentId,
fastMode,
globalCacheStrategy = '',
betas = [],
autoModeActive = false,
isUsingOverage = false,
cachedMCEnabled = false,
effortValue,
extraBodyParams,
} = snapshot
const key = getTrackingKey(querySource, agentId)
if (!key) return
const strippedSystem = stripCacheControl(
system as unknown as ReadonlyArray<Record<string, unknown>>,
)
const strippedTools = stripCacheControl(
toolSchemas as unknown as ReadonlyArray<Record<string, unknown>>,
)
const systemHash = computeHash(strippedSystem)
const toolsHash = computeHash(strippedTools)
// Hash the full system array INCLUDING cache_control — this catches
// scope flips (global↔org/none) and TTL flips (1h↔5m) that the stripped
// hash can't see because the text content is identical.
const cacheControlHash = computeHash(
system.map(b => ('cache_control' in b ? b.cache_control : null)),
)
const toolNames = toolSchemas.map(t => ('name' in t ? t.name : 'unknown'))
// Only compute per-tool hashes when the aggregate changed — common case
// (tools unchanged) skips N extra jsonStringify calls.
const computeToolHashes = () =>
computePerToolHashes(strippedTools, toolNames)
const systemCharCount = getSystemCharCount(system)
const lazyDiffableContent = () =>
buildDiffableContent(system, toolSchemas, model)
const isFastMode = fastMode ?? false
const sortedBetas = [...betas].sort()
const effortStr = effortValue === undefined ? '' : String(effortValue)
const extraBodyHash =
extraBodyParams === undefined ? 0 : computeHash(extraBodyParams)
const prev = previousStateBySource.get(key)
if (!prev) {
// Evict oldest entries if map is at capacity
while (previousStateBySource.size >= MAX_TRACKED_SOURCES) {
const oldest = previousStateBySource.keys().next().value
if (oldest !== undefined) previousStateBySource.delete(oldest)
}
previousStateBySource.set(key, {
systemHash,
toolsHash,
cacheControlHash,
toolNames,
systemCharCount,
model,
fastMode: isFastMode,
globalCacheStrategy,
betas: sortedBetas,
autoModeActive,
isUsingOverage,
cachedMCEnabled,
effortValue: effortStr,
extraBodyHash,
callCount: 1,
pendingChanges: null,
prevCacheReadTokens: null,
cacheDeletionsPending: false,
buildDiffableContent: lazyDiffableContent,
perToolHashes: computeToolHashes(),
})
return
}
prev.callCount++
const systemPromptChanged = systemHash !== prev.systemHash
const toolSchemasChanged = toolsHash !== prev.toolsHash
const modelChanged = model !== prev.model
const fastModeChanged = isFastMode !== prev.fastMode
const cacheControlChanged = cacheControlHash !== prev.cacheControlHash
const globalCacheStrategyChanged =
globalCacheStrategy !== prev.globalCacheStrategy
const betasChanged =
sortedBetas.length !== prev.betas.length ||
sortedBetas.some((b, i) => b !== prev.betas[i])
const autoModeChanged = autoModeActive !== prev.autoModeActive
const overageChanged = isUsingOverage !== prev.isUsingOverage
const cachedMCChanged = cachedMCEnabled !== prev.cachedMCEnabled
const effortChanged = effortStr !== prev.effortValue
const extraBodyChanged = extraBodyHash !== prev.extraBodyHash
if (
systemPromptChanged ||
toolSchemasChanged ||
modelChanged ||
fastModeChanged ||
cacheControlChanged ||
globalCacheStrategyChanged ||
betasChanged ||
autoModeChanged ||
overageChanged ||
cachedMCChanged ||
effortChanged ||
extraBodyChanged
) {
const prevToolSet = new Set(prev.toolNames)
const newToolSet = new Set(toolNames)
const prevBetaSet = new Set(prev.betas)
const newBetaSet = new Set(sortedBetas)
const addedTools = toolNames.filter(n => !prevToolSet.has(n))
const removedTools = prev.toolNames.filter(n => !newToolSet.has(n))
const changedToolSchemas: string[] = []
if (toolSchemasChanged) {
const newHashes = computeToolHashes()
for (const name of toolNames) {
if (!prevToolSet.has(name)) continue
if (newHashes[name] !== prev.perToolHashes[name]) {
changedToolSchemas.push(name)
}
}
prev.perToolHashes = newHashes
}
prev.pendingChanges = {
systemPromptChanged,
toolSchemasChanged,
modelChanged,
fastModeChanged,
cacheControlChanged,
globalCacheStrategyChanged,
betasChanged,
autoModeChanged,
overageChanged,
cachedMCChanged,
effortChanged,
extraBodyChanged,
addedToolCount: addedTools.length,
removedToolCount: removedTools.length,
addedTools,
removedTools,
changedToolSchemas,
systemCharDelta: systemCharCount - prev.systemCharCount,
previousModel: prev.model,
newModel: model,
prevGlobalCacheStrategy: prev.globalCacheStrategy,
newGlobalCacheStrategy: globalCacheStrategy,
addedBetas: sortedBetas.filter(b => !prevBetaSet.has(b)),
removedBetas: prev.betas.filter(b => !newBetaSet.has(b)),
prevEffortValue: prev.effortValue,
newEffortValue: effortStr,
buildPrevDiffableContent: prev.buildDiffableContent,
}
} else {
prev.pendingChanges = null
}
prev.systemHash = systemHash
prev.toolsHash = toolsHash
prev.cacheControlHash = cacheControlHash
prev.toolNames = toolNames
prev.systemCharCount = systemCharCount
prev.model = model
prev.fastMode = isFastMode
prev.globalCacheStrategy = globalCacheStrategy
prev.betas = sortedBetas
prev.autoModeActive = autoModeActive
prev.isUsingOverage = isUsingOverage
prev.cachedMCEnabled = cachedMCEnabled
prev.effortValue = effortStr
prev.extraBodyHash = extraBodyHash
prev.buildDiffableContent = lazyDiffableContent
} catch (e: unknown) {
logError(e)
}
}
/**
* Phase 2 (post-call): Check the API response's cache tokens to determine
* if a cache break actually occurred. If it did, use the pending changes
* from phase 1 to explain why.
*/
export async function checkResponseForCacheBreak(
querySource: QuerySource,
cacheReadTokens: number,
cacheCreationTokens: number,
messages: Message[],
agentId?: AgentId,
requestId?: string | null,
): Promise<void> {
try {
const key = getTrackingKey(querySource, agentId)
if (!key) return
const state = previousStateBySource.get(key)
if (!state) return
// Skip excluded models (e.g., haiku has different caching behavior)
if (isExcludedModel(state.model)) return
const prevCacheRead = state.prevCacheReadTokens
state.prevCacheReadTokens = cacheReadTokens
// Calculate time since last call for TTL detection by finding the most recent
// assistant message timestamp in the messages array (before the current response)
const lastAssistantMessage = messages.findLast(m => m.type === 'assistant')
const timeSinceLastAssistantMsg = lastAssistantMessage
? Date.now() - new Date(lastAssistantMessage.timestamp).getTime()
: null
// Skip the first call — no previous value to compare against
if (prevCacheRead === null) return
const changes = state.pendingChanges
// Cache deletions via cached microcompact intentionally reduce the cached
// prefix. The drop in cache read tokens is expected — reset the baseline
// so we don't false-positive on the next call.
if (state.cacheDeletionsPending) {
state.cacheDeletionsPending = false
logForDebugging(
`[PROMPT CACHE] cache deletion applied, cache read: ${prevCacheRead}${cacheReadTokens} (expected drop)`,
)
// Don't flag as a break — the remaining state is still valid
state.pendingChanges = null
return
}
// Detect a cache break: cache read dropped >5% from previous AND
// the absolute drop exceeds the minimum threshold.
const tokenDrop = prevCacheRead - cacheReadTokens
if (
cacheReadTokens >= prevCacheRead * 0.95 ||
tokenDrop < MIN_CACHE_MISS_TOKENS
) {
state.pendingChanges = null
return
}
// Build explanation from pending changes (if any)
const parts: string[] = []
if (changes) {
if (changes.modelChanged) {
parts.push(
`model changed (${changes.previousModel}${changes.newModel})`,
)
}
if (changes.systemPromptChanged) {
const charDelta = changes.systemCharDelta
const charInfo =
charDelta === 0
? ''
: charDelta > 0
? ` (+${charDelta} chars)`
: ` (${charDelta} chars)`
parts.push(`system prompt changed${charInfo}`)
}
if (changes.toolSchemasChanged) {
const toolDiff =
changes.addedToolCount > 0 || changes.removedToolCount > 0
? ` (+${changes.addedToolCount}/-${changes.removedToolCount} tools)`
: ' (tool prompt/schema changed, same tool set)'
parts.push(`tools changed${toolDiff}`)
}
if (changes.fastModeChanged) {
parts.push('fast mode toggled')
}
if (changes.globalCacheStrategyChanged) {
parts.push(
`global cache strategy changed (${changes.prevGlobalCacheStrategy || 'none'}${changes.newGlobalCacheStrategy || 'none'})`,
)
}
if (
changes.cacheControlChanged &&
!changes.globalCacheStrategyChanged &&
!changes.systemPromptChanged
) {
// Only report as standalone cause if nothing else explains it —
// otherwise the scope/TTL flip is a consequence, not the root cause.
parts.push('cache_control changed (scope or TTL)')
}
if (changes.betasChanged) {
const added = changes.addedBetas.length
? `+${changes.addedBetas.join(',')}`
: ''
const removed = changes.removedBetas.length
? `-${changes.removedBetas.join(',')}`
: ''
const diff = [added, removed].filter(Boolean).join(' ')
parts.push(`betas changed${diff ? ` (${diff})` : ''}`)
}
if (changes.autoModeChanged) {
parts.push('auto mode toggled')
}
if (changes.overageChanged) {
parts.push('overage state changed (TTL latched, no flip)')
}
if (changes.cachedMCChanged) {
parts.push('cached microcompact toggled')
}
if (changes.effortChanged) {
parts.push(
`effort changed (${changes.prevEffortValue || 'default'}${changes.newEffortValue || 'default'})`,
)
}
if (changes.extraBodyChanged) {
parts.push('extra body params changed')
}
}
// Check if time gap suggests TTL expiration
const lastAssistantMsgOver5minAgo =
timeSinceLastAssistantMsg !== null &&
timeSinceLastAssistantMsg > CACHE_TTL_5MIN_MS
const lastAssistantMsgOver1hAgo =
timeSinceLastAssistantMsg !== null &&
timeSinceLastAssistantMsg > CACHE_TTL_1HOUR_MS
// Post PR #19823 BQ analysis (bq-queries/prompt-caching/cache_break_pr19823_analysis.sql):
// when all client-side flags are false and the gap is under TTL, ~90% of breaks
// are server-side routing/eviction or billed/inference disagreement. Label
// accordingly instead of implying a CC bug hunt.
let reason: string
if (parts.length > 0) {
reason = parts.join(', ')
} else if (lastAssistantMsgOver1hAgo) {
reason = 'possible 1h TTL expiry (prompt unchanged)'
} else if (lastAssistantMsgOver5minAgo) {
reason = 'possible 5min TTL expiry (prompt unchanged)'
} else if (timeSinceLastAssistantMsg !== null) {
reason = 'likely server-side (prompt unchanged, <5min gap)'
} else {
reason = 'unknown cause'
}
logEvent('tengu_prompt_cache_break', {
systemPromptChanged: changes?.systemPromptChanged ?? false,
toolSchemasChanged: changes?.toolSchemasChanged ?? false,
modelChanged: changes?.modelChanged ?? false,
fastModeChanged: changes?.fastModeChanged ?? false,
cacheControlChanged: changes?.cacheControlChanged ?? false,
globalCacheStrategyChanged: changes?.globalCacheStrategyChanged ?? false,
betasChanged: changes?.betasChanged ?? false,
autoModeChanged: changes?.autoModeChanged ?? false,
overageChanged: changes?.overageChanged ?? false,
cachedMCChanged: changes?.cachedMCChanged ?? false,
effortChanged: changes?.effortChanged ?? false,
extraBodyChanged: changes?.extraBodyChanged ?? false,
addedToolCount: changes?.addedToolCount ?? 0,
removedToolCount: changes?.removedToolCount ?? 0,
systemCharDelta: changes?.systemCharDelta ?? 0,
// Tool names are sanitized: built-in names are a fixed vocabulary,
// MCP tools collapse to 'mcp' (user-configured, could leak paths).
addedTools: (changes?.addedTools ?? [])
.map(sanitizeToolName)
.join(
',',
) as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
removedTools: (changes?.removedTools ?? [])
.map(sanitizeToolName)
.join(
',',
) as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
changedToolSchemas: (changes?.changedToolSchemas ?? [])
.map(sanitizeToolName)
.join(
',',
) as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
// Beta header names and cache strategy are fixed enum-like values,
// not code or filepaths. requestId is an opaque server-generated ID.
addedBetas: (changes?.addedBetas ?? []).join(
',',
) as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
removedBetas: (changes?.removedBetas ?? []).join(
',',
) as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
prevGlobalCacheStrategy: (changes?.prevGlobalCacheStrategy ??
'') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
newGlobalCacheStrategy: (changes?.newGlobalCacheStrategy ??
'') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
callNumber: state.callCount,
prevCacheReadTokens: prevCacheRead,
cacheReadTokens,
cacheCreationTokens,
timeSinceLastAssistantMsg: timeSinceLastAssistantMsg ?? -1,
lastAssistantMsgOver5minAgo,
lastAssistantMsgOver1hAgo,
requestId: (requestId ??
'') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
})
// Write diff file for ant debugging via --debug. The path is included in
// the summary log so ants can find it (DevBar UI removed — event data
// flows reliably to BQ for analytics).
let diffPath: string | undefined
if (changes?.buildPrevDiffableContent) {
diffPath = await writeCacheBreakDiff(
changes.buildPrevDiffableContent(),
state.buildDiffableContent(),
)
}
const diffSuffix = diffPath ? `, diff: ${diffPath}` : ''
const summary = `[PROMPT CACHE BREAK] ${reason} [source=${querySource}, call #${state.callCount}, cache read: ${prevCacheRead}${cacheReadTokens}, creation: ${cacheCreationTokens}${diffSuffix}]`
logForDebugging(summary, { level: 'warn' })
state.pendingChanges = null
} catch (e: unknown) {
logError(e)
}
}
/**
* Call when cached microcompact sends cache_edits deletions.
* The next API response will have lower cache read tokens — that's
* expected, not a cache break.
*/
export function notifyCacheDeletion(
querySource: QuerySource,
agentId?: AgentId,
): void {
const key = getTrackingKey(querySource, agentId)
const state = key ? previousStateBySource.get(key) : undefined
if (state) {
state.cacheDeletionsPending = true
}
}
/**
* Call after compaction to reset the cache read baseline.
* Compaction legitimately reduces message count, so cache read tokens
* will naturally drop on the next call — that's not a break.
*/
export function notifyCompaction(
querySource: QuerySource,
agentId?: AgentId,
): void {
const key = getTrackingKey(querySource, agentId)
const state = key ? previousStateBySource.get(key) : undefined
if (state) {
state.prevCacheReadTokens = null
}
}
export function cleanupAgentTracking(agentId: AgentId): void {
previousStateBySource.delete(agentId)
}
export function resetPromptCacheBreakDetection(): void {
previousStateBySource.clear()
}
async function writeCacheBreakDiff(
prevContent: string,
newContent: string,
): Promise<string | undefined> {
try {
const diffPath = getCacheBreakDiffPath()
await mkdir(getClaudeTempDir(), { recursive: true })
const patch = createPatch(
'prompt-state',
prevContent,
newContent,
'before',
'after',
)
await writeFile(diffPath, patch)
return diffPath
} catch {
return undefined
}
}
+281
View File
@@ -0,0 +1,281 @@
import axios from 'axios'
import { getOauthConfig } from '../../constants/oauth.js'
import {
getOauthAccountInfo,
getSubscriptionType,
isClaudeAISubscriber,
} from '../../utils/auth.js'
import { getGlobalConfig, saveGlobalConfig } from '../../utils/config.js'
import { logForDebugging } from '../../utils/debug.js'
import { logError } from '../../utils/log.js'
import { isEssentialTrafficOnly } from '../../utils/privacyLevel.js'
import { getOAuthHeaders, prepareApiRequest } from '../../utils/teleport/api.js'
import type {
ReferralCampaign,
ReferralEligibilityResponse,
ReferralRedemptionsResponse,
ReferrerRewardInfo,
} from '../oauth/types.js'
// Cache expiration time: 24 hours (eligibility changes only on subscription/experiment changes)
const CACHE_EXPIRATION_MS = 24 * 60 * 60 * 1000
// Track in-flight fetch to prevent duplicate API calls
let fetchInProgress: Promise<ReferralEligibilityResponse | null> | null = null
export async function fetchReferralEligibility(
campaign: ReferralCampaign = 'claude_code_guest_pass',
): Promise<ReferralEligibilityResponse> {
const { accessToken, orgUUID } = await prepareApiRequest()
const headers = {
...getOAuthHeaders(accessToken),
'x-organization-uuid': orgUUID,
}
const url = `${getOauthConfig().BASE_API_URL}/api/oauth/organizations/${orgUUID}/referral/eligibility`
const response = await axios.get(url, {
headers,
params: { campaign },
timeout: 5000, // 5 second timeout for background fetch
})
return response.data
}
export async function fetchReferralRedemptions(
campaign: string = 'claude_code_guest_pass',
): Promise<ReferralRedemptionsResponse> {
const { accessToken, orgUUID } = await prepareApiRequest()
const headers = {
...getOAuthHeaders(accessToken),
'x-organization-uuid': orgUUID,
}
const url = `${getOauthConfig().BASE_API_URL}/api/oauth/organizations/${orgUUID}/referral/redemptions`
const response = await axios.get<ReferralRedemptionsResponse>(url, {
headers,
params: { campaign },
timeout: 10000, // 10 second timeout
})
return response.data
}
/**
* Prechecks for if user can access guest passes feature
*/
function shouldCheckForPasses(): boolean {
return !!(
getOauthAccountInfo()?.organizationUuid &&
isClaudeAISubscriber() &&
getSubscriptionType() === 'max'
)
}
/**
* Check cached passes eligibility from GlobalConfig
* Returns current cached state and cache status
*/
export function checkCachedPassesEligibility(): {
eligible: boolean
needsRefresh: boolean
hasCache: boolean
} {
if (!shouldCheckForPasses()) {
return {
eligible: false,
needsRefresh: false,
hasCache: false,
}
}
const orgId = getOauthAccountInfo()?.organizationUuid
if (!orgId) {
return {
eligible: false,
needsRefresh: false,
hasCache: false,
}
}
const config = getGlobalConfig()
const cachedEntry = config.passesEligibilityCache?.[orgId]
if (!cachedEntry) {
// No cached entry, needs fetch
return {
eligible: false,
needsRefresh: true,
hasCache: false,
}
}
const { eligible, timestamp } = cachedEntry
const now = Date.now()
const needsRefresh = now - timestamp > CACHE_EXPIRATION_MS
return {
eligible,
needsRefresh,
hasCache: true,
}
}
const CURRENCY_SYMBOLS: Record<string, string> = {
USD: '$',
EUR: '€',
GBP: '£',
BRL: 'R$',
CAD: 'CA$',
AUD: 'A$',
NZD: 'NZ$',
SGD: 'S$',
}
export function formatCreditAmount(reward: ReferrerRewardInfo): string {
const symbol = CURRENCY_SYMBOLS[reward.currency] ?? `${reward.currency} `
const amount = reward.amount_minor_units / 100
const formatted = amount % 1 === 0 ? amount.toString() : amount.toFixed(2)
return `${symbol}${formatted}`
}
/**
* Get cached referrer reward info from eligibility cache
* Returns the reward info if the user is in a v1 campaign, null otherwise
*/
export function getCachedReferrerReward(): ReferrerRewardInfo | null {
const orgId = getOauthAccountInfo()?.organizationUuid
if (!orgId) return null
const config = getGlobalConfig()
const cachedEntry = config.passesEligibilityCache?.[orgId]
return cachedEntry?.referrer_reward ?? null
}
/**
* Get the cached remaining passes count from eligibility cache
* Returns the number of remaining passes, or null if not available
*/
export function getCachedRemainingPasses(): number | null {
const orgId = getOauthAccountInfo()?.organizationUuid
if (!orgId) return null
const config = getGlobalConfig()
const cachedEntry = config.passesEligibilityCache?.[orgId]
return cachedEntry?.remaining_passes ?? null
}
/**
* Fetch passes eligibility and store in GlobalConfig
* Returns the fetched response or null on error
*/
export async function fetchAndStorePassesEligibility(): Promise<ReferralEligibilityResponse | null> {
// Return existing promise if fetch is already in progress
if (fetchInProgress) {
logForDebugging('Passes: Reusing in-flight eligibility fetch')
return fetchInProgress
}
const orgId = getOauthAccountInfo()?.organizationUuid
if (!orgId) {
return null
}
// Store the promise to share with concurrent calls
fetchInProgress = (async () => {
try {
const response = await fetchReferralEligibility()
const cacheEntry = {
...response,
timestamp: Date.now(),
}
saveGlobalConfig(current => ({
...current,
passesEligibilityCache: {
...current.passesEligibilityCache,
[orgId]: cacheEntry,
},
}))
logForDebugging(
`Passes eligibility cached for org ${orgId}: ${response.eligible}`,
)
return response
} catch (error) {
logForDebugging('Failed to fetch and cache passes eligibility')
logError(error as Error)
return null
} finally {
// Clear the promise when done
fetchInProgress = null
}
})()
return fetchInProgress
}
/**
* Get cached passes eligibility data or fetch if needed
* Main entry point for all eligibility checks
*
* This function never blocks on network - it returns cached data immediately
* and fetches in the background if needed. On cold start (no cache), it returns
* null and the passes command won't be available until the next session.
*/
export async function getCachedOrFetchPassesEligibility(): Promise<ReferralEligibilityResponse | null> {
if (!shouldCheckForPasses()) {
return null
}
const orgId = getOauthAccountInfo()?.organizationUuid
if (!orgId) {
return null
}
const config = getGlobalConfig()
const cachedEntry = config.passesEligibilityCache?.[orgId]
const now = Date.now()
// No cache - trigger background fetch and return null (non-blocking)
// The passes command won't be available this session, but will be next time
if (!cachedEntry) {
logForDebugging(
'Passes: No cache, fetching eligibility in background (command unavailable this session)',
)
void fetchAndStorePassesEligibility()
return null
}
// Cache exists but is stale - return stale cache and trigger background refresh
if (now - cachedEntry.timestamp > CACHE_EXPIRATION_MS) {
logForDebugging(
'Passes: Cache stale, returning cached data and refreshing in background',
)
void fetchAndStorePassesEligibility() // Background refresh
const { timestamp, ...response } = cachedEntry
return response as ReferralEligibilityResponse
}
// Cache is fresh - return it immediately
logForDebugging('Passes: Using fresh cached eligibility data')
const { timestamp, ...response } = cachedEntry
return response as ReferralEligibilityResponse
}
/**
* Prefetch passes eligibility on startup
*/
export async function prefetchPassesEligibility(): Promise<void> {
// Skip network requests if nonessential traffic is disabled
if (isEssentialTrafficOnly()) {
return
}
void getCachedOrFetchPassesEligibility()
}
+514
View File
@@ -0,0 +1,514 @@
import axios, { type AxiosError } from 'axios'
import type { UUID } from 'crypto'
import { getOauthConfig } from '../../constants/oauth.js'
import type { Entry, TranscriptMessage } from '../../types/logs.js'
import { logForDebugging } from '../../utils/debug.js'
import { logForDiagnosticsNoPII } from '../../utils/diagLogs.js'
import { isEnvTruthy } from '../../utils/envUtils.js'
import { logError } from '../../utils/log.js'
import { sequential } from '../../utils/sequential.js'
import { getSessionIngressAuthToken } from '../../utils/sessionIngressAuth.js'
import { sleep } from '../../utils/sleep.js'
import { jsonStringify } from '../../utils/slowOperations.js'
import { getOAuthHeaders } from '../../utils/teleport/api.js'
interface SessionIngressError {
error?: {
message?: string
type?: string
}
}
// Module-level state
const lastUuidMap: Map<string, UUID> = new Map()
const MAX_RETRIES = 10
const BASE_DELAY_MS = 500
// Per-session sequential wrappers to prevent concurrent log writes
const sequentialAppendBySession: Map<
string,
(
entry: TranscriptMessage,
url: string,
headers: Record<string, string>,
) => Promise<boolean>
> = new Map()
/**
* Gets or creates a sequential wrapper for a session
* This ensures that log appends for a session are processed one at a time
*/
function getOrCreateSequentialAppend(sessionId: string) {
let sequentialAppend = sequentialAppendBySession.get(sessionId)
if (!sequentialAppend) {
sequentialAppend = sequential(
async (
entry: TranscriptMessage,
url: string,
headers: Record<string, string>,
) => await appendSessionLogImpl(sessionId, entry, url, headers),
)
sequentialAppendBySession.set(sessionId, sequentialAppend)
}
return sequentialAppend
}
/**
* Internal implementation of appendSessionLog with retry logic
* Retries on transient errors (network, 5xx, 429). On 409, adopts the server's
* last UUID and retries (handles stale state from killed process's in-flight
* requests). Fails immediately on 401.
*/
async function appendSessionLogImpl(
sessionId: string,
entry: TranscriptMessage,
url: string,
headers: Record<string, string>,
): Promise<boolean> {
for (let attempt = 1; attempt <= MAX_RETRIES; attempt++) {
try {
const lastUuid = lastUuidMap.get(sessionId)
const requestHeaders = { ...headers }
if (lastUuid) {
requestHeaders['Last-Uuid'] = lastUuid
}
const response = await axios.put(url, entry, {
headers: requestHeaders,
validateStatus: status => status < 500,
})
if (response.status === 200 || response.status === 201) {
lastUuidMap.set(sessionId, entry.uuid)
logForDebugging(
`Successfully persisted session log entry for session ${sessionId}`,
)
return true
}
if (response.status === 409) {
// Check if our entry was actually stored (server returned 409 but entry exists)
// This handles the scenario where entry was stored but client received an error
// response, causing lastUuidMap to be stale
const serverLastUuid = response.headers['x-last-uuid']
if (serverLastUuid === entry.uuid) {
// Our entry IS the last entry on server - it was stored successfully previously
lastUuidMap.set(sessionId, entry.uuid)
logForDebugging(
`Session entry ${entry.uuid} already present on server, recovering from stale state`,
)
logForDiagnosticsNoPII('info', 'session_persist_recovered_from_409')
return true
}
// Another writer (e.g. in-flight request from a killed process)
// advanced the server's chain. Try to adopt the server's last UUID
// from the response header, or re-fetch the session to discover it.
if (serverLastUuid) {
lastUuidMap.set(sessionId, serverLastUuid as UUID)
logForDebugging(
`Session 409: adopting server lastUuid=${serverLastUuid} from header, retrying entry ${entry.uuid}`,
)
} else {
// Server didn't return x-last-uuid (e.g. v1 endpoint). Re-fetch
// the session to discover the current head of the append chain.
const logs = await fetchSessionLogsFromUrl(sessionId, url, headers)
const adoptedUuid = findLastUuid(logs)
if (adoptedUuid) {
lastUuidMap.set(sessionId, adoptedUuid)
logForDebugging(
`Session 409: re-fetched ${logs!.length} entries, adopting lastUuid=${adoptedUuid}, retrying entry ${entry.uuid}`,
)
} else {
// Can't determine server state — give up
const errorData = response.data as SessionIngressError
const errorMessage =
errorData.error?.message || 'Concurrent modification detected'
logError(
new Error(
`Session persistence conflict: UUID mismatch for session ${sessionId}, entry ${entry.uuid}. ${errorMessage}`,
),
)
logForDiagnosticsNoPII(
'error',
'session_persist_fail_concurrent_modification',
)
return false
}
}
logForDiagnosticsNoPII('info', 'session_persist_409_adopt_server_uuid')
continue // retry with updated lastUuid
}
if (response.status === 401) {
logForDebugging('Session token expired or invalid')
logForDiagnosticsNoPII('error', 'session_persist_fail_bad_token')
return false // Non-retryable
}
// Other 4xx (429, etc.) - retryable
logForDebugging(
`Failed to persist session log: ${response.status} ${response.statusText}`,
)
logForDiagnosticsNoPII('error', 'session_persist_fail_status', {
status: response.status,
attempt,
})
} catch (error) {
// Network errors, 5xx - retryable
const axiosError = error as AxiosError<SessionIngressError>
logError(new Error(`Error persisting session log: ${axiosError.message}`))
logForDiagnosticsNoPII('error', 'session_persist_fail_status', {
status: axiosError.status,
attempt,
})
}
if (attempt === MAX_RETRIES) {
logForDebugging(`Remote persistence failed after ${MAX_RETRIES} attempts`)
logForDiagnosticsNoPII(
'error',
'session_persist_error_retries_exhausted',
{ attempt },
)
return false
}
const delayMs = Math.min(BASE_DELAY_MS * Math.pow(2, attempt - 1), 8000)
logForDebugging(
`Remote persistence attempt ${attempt}/${MAX_RETRIES} failed, retrying in ${delayMs}ms…`,
)
await sleep(delayMs)
}
return false
}
/**
* Append a log entry to the session using JWT token
* Uses optimistic concurrency control with Last-Uuid header
* Ensures sequential execution per session to prevent race conditions
*/
export async function appendSessionLog(
sessionId: string,
entry: TranscriptMessage,
url: string,
): Promise<boolean> {
const sessionToken = getSessionIngressAuthToken()
if (!sessionToken) {
logForDebugging('No session token available for session persistence')
logForDiagnosticsNoPII('error', 'session_persist_fail_jwt_no_token')
return false
}
const headers: Record<string, string> = {
Authorization: `Bearer ${sessionToken}`,
'Content-Type': 'application/json',
}
const sequentialAppend = getOrCreateSequentialAppend(sessionId)
return sequentialAppend(entry, url, headers)
}
/**
* Get all session logs for hydration
*/
export async function getSessionLogs(
sessionId: string,
url: string,
): Promise<Entry[] | null> {
const sessionToken = getSessionIngressAuthToken()
if (!sessionToken) {
logForDebugging('No session token available for fetching session logs')
logForDiagnosticsNoPII('error', 'session_get_fail_no_token')
return null
}
const headers = { Authorization: `Bearer ${sessionToken}` }
const logs = await fetchSessionLogsFromUrl(sessionId, url, headers)
if (logs && logs.length > 0) {
// Update our lastUuid to the last entry's UUID
const lastEntry = logs.at(-1)
if (lastEntry && 'uuid' in lastEntry && lastEntry.uuid) {
lastUuidMap.set(sessionId, lastEntry.uuid)
}
}
return logs
}
/**
* Get all session logs for hydration via OAuth
* Used for teleporting sessions from the Sessions API
*/
export async function getSessionLogsViaOAuth(
sessionId: string,
accessToken: string,
orgUUID: string,
): Promise<Entry[] | null> {
const url = `${getOauthConfig().BASE_API_URL}/v1/session_ingress/session/${sessionId}`
logForDebugging(`[session-ingress] Fetching session logs from: ${url}`)
const headers = {
...getOAuthHeaders(accessToken),
'x-organization-uuid': orgUUID,
}
const result = await fetchSessionLogsFromUrl(sessionId, url, headers)
return result
}
/**
* Response shape from GET /v1/code/sessions/{id}/teleport-events.
* WorkerEvent.payload IS the Entry (TranscriptMessage struct) — the CLI
* writes it via AddWorkerEvent, the server stores it opaque, we read it
* back here.
*/
type TeleportEventsResponse = {
data: Array<{
event_id: string
event_type: string
is_compaction: boolean
payload: Entry | null
created_at: string
}>
// Unset when there are no more pages — this IS the end-of-stream
// signal (no separate has_more field).
next_cursor?: string
}
/**
* Get worker events (transcript) via the CCR v2 Sessions API. Replaces
* getSessionLogsViaOAuth once session-ingress is retired.
*
* The server dispatches per-session: Spanner for v2-native sessions,
* threadstore for pre-backfill session_* IDs. The cursor is opaque to us —
* echo it back until next_cursor is unset.
*
* Paginated (500/page default, server max 1000). session-ingress's one-shot
* 50k is gone; we loop.
*/
export async function getTeleportEvents(
sessionId: string,
accessToken: string,
orgUUID: string,
): Promise<Entry[] | null> {
const baseUrl = `${getOauthConfig().BASE_API_URL}/v1/code/sessions/${sessionId}/teleport-events`
const headers = {
...getOAuthHeaders(accessToken),
'x-organization-uuid': orgUUID,
}
logForDebugging(`[teleport] Fetching events from: ${baseUrl}`)
const all: Entry[] = []
let cursor: string | undefined
let pages = 0
// Infinite-loop guard: 1000/page × 100 pages = 100k events. Larger than
// session-ingress's 50k one-shot. If we hit this, something's wrong
// (server not advancing cursor) — bail rather than hang.
const maxPages = 100
while (pages < maxPages) {
const params: Record<string, string | number> = { limit: 1000 }
if (cursor !== undefined) {
params.cursor = cursor
}
let response
try {
response = await axios.get<TeleportEventsResponse>(baseUrl, {
headers,
params,
timeout: 20000,
validateStatus: status => status < 500,
})
} catch (e) {
const err = e as AxiosError
logError(new Error(`Teleport events fetch failed: ${err.message}`))
logForDiagnosticsNoPII('error', 'teleport_events_fetch_fail')
return null
}
if (response.status === 404) {
// 404 on page 0 is ambiguous during the migration window:
// (a) Session genuinely not found (not in Spanner AND not in
// threadstore) — nothing to fetch.
// (b) Route-level 404: endpoint not deployed yet, or session is
// a threadstore session not yet backfilled into Spanner.
// We can't tell them apart from the response alone. Returning null
// lets the caller fall back to session-ingress, which will correctly
// return empty for case (a) and data for case (b). Once the backfill
// is complete and session-ingress is gone, the fallback also returns
// null → same "Failed to fetch session logs" error as today.
//
// 404 mid-pagination (pages > 0) means session was deleted between
// pages — return what we have.
logForDebugging(
`[teleport] Session ${sessionId} not found (page ${pages})`,
)
logForDiagnosticsNoPII('warn', 'teleport_events_not_found')
return pages === 0 ? null : all
}
if (response.status === 401) {
logForDiagnosticsNoPII('error', 'teleport_events_bad_token')
throw new Error(
'Your session has expired. Please run /login to sign in again.',
)
}
if (response.status !== 200) {
logError(
new Error(
`Teleport events returned ${response.status}: ${jsonStringify(response.data)}`,
),
)
logForDiagnosticsNoPII('error', 'teleport_events_bad_status')
return null
}
const { data, next_cursor } = response.data
if (!Array.isArray(data)) {
logError(
new Error(
`Teleport events invalid response shape: ${jsonStringify(response.data)}`,
),
)
logForDiagnosticsNoPII('error', 'teleport_events_invalid_shape')
return null
}
// payload IS the Entry. null payload happens for threadstore non-generic
// events (server skips them) or encryption failures — skip here too.
for (const ev of data) {
if (ev.payload !== null) {
all.push(ev.payload)
}
}
pages++
// == null covers both `null` and `undefined` — the proto omits the
// field at end-of-stream, but some serializers emit `null`. Strict
// `=== undefined` would loop forever on `null` (cursor=null in query
// params stringifies to "null", which the server rejects or echoes).
if (next_cursor == null) {
break
}
cursor = next_cursor
}
if (pages >= maxPages) {
// Don't fail — return what we have. Better to teleport with a
// truncated transcript than not at all.
logError(
new Error(`Teleport events hit page cap (${maxPages}) for ${sessionId}`),
)
logForDiagnosticsNoPII('warn', 'teleport_events_page_cap')
}
logForDebugging(
`[teleport] Fetched ${all.length} events over ${pages} page(s) for ${sessionId}`,
)
return all
}
/**
* Shared implementation for fetching session logs from a URL
*/
async function fetchSessionLogsFromUrl(
sessionId: string,
url: string,
headers: Record<string, string>,
): Promise<Entry[] | null> {
try {
const response = await axios.get(url, {
headers,
timeout: 20000,
validateStatus: status => status < 500,
params: isEnvTruthy(process.env.CLAUDE_AFTER_LAST_COMPACT)
? { after_last_compact: true }
: undefined,
})
if (response.status === 200) {
const data = response.data
// Validate the response structure
if (!data || typeof data !== 'object' || !Array.isArray(data.loglines)) {
logError(
new Error(
`Invalid session logs response format: ${jsonStringify(data)}`,
),
)
logForDiagnosticsNoPII('error', 'session_get_fail_invalid_response')
return null
}
const logs = data.loglines as Entry[]
logForDebugging(
`Fetched ${logs.length} session logs for session ${sessionId}`,
)
return logs
}
if (response.status === 404) {
logForDebugging(`No existing logs for session ${sessionId}`)
logForDiagnosticsNoPII('warn', 'session_get_no_logs_for_session')
return []
}
if (response.status === 401) {
logForDebugging('Auth token expired or invalid')
logForDiagnosticsNoPII('error', 'session_get_fail_bad_token')
throw new Error(
'Your session has expired. Please run /login to sign in again.',
)
}
logForDebugging(
`Failed to fetch session logs: ${response.status} ${response.statusText}`,
)
logForDiagnosticsNoPII('error', 'session_get_fail_status', {
status: response.status,
})
return null
} catch (error) {
const axiosError = error as AxiosError<SessionIngressError>
logError(new Error(`Error fetching session logs: ${axiosError.message}`))
logForDiagnosticsNoPII('error', 'session_get_fail_status', {
status: axiosError.status,
})
return null
}
}
/**
* Walk backward through entries to find the last one with a uuid.
* Some entry types (SummaryMessage, TagMessage) don't have one.
*/
function findLastUuid(logs: Entry[] | null): UUID | undefined {
if (!logs) {
return undefined
}
const entry = logs.findLast(e => 'uuid' in e && e.uuid)
return entry && 'uuid' in entry ? (entry.uuid as UUID) : undefined
}
/**
* Clear cached state for a session
*/
export function clearSession(sessionId: string): void {
lastUuidMap.delete(sessionId)
sequentialAppendBySession.delete(sessionId)
}
/**
* Clear all cached session state (all sessions).
* Use this on /clear to free sub-agent session entries.
*/
export function clearAllSessions(): void {
lastUuidMap.clear()
sequentialAppendBySession.clear()
}
+38
View File
@@ -0,0 +1,38 @@
import axios from 'axios'
import { getOauthConfig } from '../../constants/oauth.js'
import { isClaudeAISubscriber } from '../../utils/auth.js'
import { logForDebugging } from '../../utils/debug.js'
import { getOAuthHeaders, prepareApiRequest } from '../../utils/teleport/api.js'
export type UltrareviewQuotaResponse = {
reviews_used: number
reviews_limit: number
reviews_remaining: number
is_overage: boolean
}
/**
* Peek the ultrareview quota for display and nudge decisions. Consume
* happens server-side at session creation. Null when not a subscriber or
* the endpoint errors.
*/
export async function fetchUltrareviewQuota(): Promise<UltrareviewQuotaResponse | null> {
if (!isClaudeAISubscriber()) return null
try {
const { accessToken, orgUUID } = await prepareApiRequest()
const response = await axios.get<UltrareviewQuotaResponse>(
`${getOauthConfig().BASE_API_URL}/v1/ultrareview/quota`,
{
headers: {
...getOAuthHeaders(accessToken),
'x-organization-uuid': orgUUID,
},
timeout: 5000,
},
)
return response.data
} catch (error) {
logForDebugging(`fetchUltrareviewQuota failed: ${error}`)
return null
}
}
+63
View File
@@ -0,0 +1,63 @@
import axios from 'axios'
import { getOauthConfig } from '../../constants/oauth.js'
import {
getClaudeAIOAuthTokens,
hasProfileScope,
isClaudeAISubscriber,
} from '../../utils/auth.js'
import { getAuthHeaders } from '../../utils/http.js'
import { getClaudeCodeUserAgent } from '../../utils/userAgent.js'
import { isOAuthTokenExpired } from '../oauth/client.js'
export type RateLimit = {
utilization: number | null // a percentage from 0 to 100
resets_at: string | null // ISO 8601 timestamp
}
export type ExtraUsage = {
is_enabled: boolean
monthly_limit: number | null
used_credits: number | null
utilization: number | null
}
export type Utilization = {
five_hour?: RateLimit | null
seven_day?: RateLimit | null
seven_day_oauth_apps?: RateLimit | null
seven_day_opus?: RateLimit | null
seven_day_sonnet?: RateLimit | null
extra_usage?: ExtraUsage | null
}
export async function fetchUtilization(): Promise<Utilization | null> {
if (!isClaudeAISubscriber() || !hasProfileScope()) {
return {}
}
// Skip API call if OAuth token is expired to avoid 401 errors
const tokens = getClaudeAIOAuthTokens()
if (tokens && isOAuthTokenExpired(tokens.expiresAt)) {
return null
}
const authResult = getAuthHeaders()
if (authResult.error) {
throw new Error(`Auth error: ${authResult.error}`)
}
const headers = {
'Content-Type': 'application/json',
'User-Agent': getClaudeCodeUserAgent(),
...authResult.headers,
}
const url = `${getOauthConfig().BASE_API_URL}/api/oauth/usage`
const response = await axios.get<Utilization>(url, {
headers,
timeout: 5000, // 5 second timeout
})
return response.data
}
+822
View File
@@ -0,0 +1,822 @@
import { feature } from 'bun:bundle'
import type Anthropic from '@anthropic-ai/sdk'
import {
APIConnectionError,
APIError,
APIUserAbortError,
} from '@anthropic-ai/sdk'
import type { QuerySource } from 'src/constants/querySource.js'
import type { SystemAPIErrorMessage } from 'src/types/message.js'
import { isAwsCredentialsProviderError } from 'src/utils/aws.js'
import { logForDebugging } from 'src/utils/debug.js'
import { logError } from 'src/utils/log.js'
import { createSystemAPIErrorMessage } from 'src/utils/messages.js'
import { getAPIProviderForStatsig } from 'src/utils/model/providers.js'
import {
clearApiKeyHelperCache,
clearAwsCredentialsCache,
clearGcpCredentialsCache,
getClaudeAIOAuthTokens,
handleOAuth401Error,
isClaudeAISubscriber,
isEnterpriseSubscriber,
} from '../../utils/auth.js'
import { isEnvTruthy } from '../../utils/envUtils.js'
import { errorMessage } from '../../utils/errors.js'
import {
type CooldownReason,
handleFastModeOverageRejection,
handleFastModeRejectedByAPI,
isFastModeCooldown,
isFastModeEnabled,
triggerFastModeCooldown,
} from '../../utils/fastMode.js'
import { isNonCustomOpusModel } from '../../utils/model/model.js'
import { disableKeepAlive } from '../../utils/proxy.js'
import { sleep } from '../../utils/sleep.js'
import type { ThinkingConfig } from '../../utils/thinking.js'
import { getFeatureValue_CACHED_MAY_BE_STALE } from '../analytics/growthbook.js'
import {
type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
logEvent,
} from '../analytics/index.js'
import {
checkMockRateLimitError,
isMockRateLimitError,
} from '../rateLimitMocking.js'
import { REPEATED_529_ERROR_MESSAGE } from './errors.js'
import { extractConnectionErrorDetails } from './errorUtils.js'
const abortError = () => new APIUserAbortError()
const DEFAULT_MAX_RETRIES = 10
const FLOOR_OUTPUT_TOKENS = 3000
const MAX_529_RETRIES = 3
export const BASE_DELAY_MS = 500
// Foreground query sources where the user IS blocking on the result — these
// retry on 529. Everything else (summaries, titles, suggestions, classifiers)
// bails immediately: during a capacity cascade each retry is 3-10× gateway
// amplification, and the user never sees those fail anyway. New sources
// default to no-retry — add here only if the user is waiting on the result.
const FOREGROUND_529_RETRY_SOURCES = new Set<QuerySource>([
'repl_main_thread',
'repl_main_thread:outputStyle:custom',
'repl_main_thread:outputStyle:Explanatory',
'repl_main_thread:outputStyle:Learning',
'sdk',
'agent:custom',
'agent:default',
'agent:builtin',
'compact',
'hook_agent',
'hook_prompt',
'verification_agent',
'side_question',
// Security classifiers — must complete for auto-mode correctness.
// yoloClassifier.ts uses 'auto_mode' (not 'yolo_classifier' — that's
// type-only). bash_classifier is ant-only; feature-gate so the string
// tree-shakes out of external builds (excluded-strings.txt).
'auto_mode',
...(feature('BASH_CLASSIFIER') ? (['bash_classifier'] as const) : []),
])
function shouldRetry529(querySource: QuerySource | undefined): boolean {
// undefined → retry (conservative for untagged call paths)
return (
querySource === undefined || FOREGROUND_529_RETRY_SOURCES.has(querySource)
)
}
// CLAUDE_CODE_UNATTENDED_RETRY: for unattended sessions (ant-only). Retries 429/529
// indefinitely with higher backoff and periodic keep-alive yields so the host
// environment does not mark the session idle mid-wait.
// TODO(ANT-344): the keep-alive via SystemAPIErrorMessage yields is a stopgap
// until there's a dedicated keep-alive channel.
const PERSISTENT_MAX_BACKOFF_MS = 5 * 60 * 1000
const PERSISTENT_RESET_CAP_MS = 6 * 60 * 60 * 1000
const HEARTBEAT_INTERVAL_MS = 30_000
function isPersistentRetryEnabled(): boolean {
return feature('UNATTENDED_RETRY')
? isEnvTruthy(process.env.CLAUDE_CODE_UNATTENDED_RETRY)
: false
}
function isTransientCapacityError(error: unknown): boolean {
return (
is529Error(error) || (error instanceof APIError && error.status === 429)
)
}
function isStaleConnectionError(error: unknown): boolean {
if (!(error instanceof APIConnectionError)) {
return false
}
const details = extractConnectionErrorDetails(error)
return details?.code === 'ECONNRESET' || details?.code === 'EPIPE'
}
export interface RetryContext {
maxTokensOverride?: number
model: string
thinkingConfig: ThinkingConfig
fastMode?: boolean
}
interface RetryOptions {
maxRetries?: number
model: string
fallbackModel?: string
thinkingConfig: ThinkingConfig
fastMode?: boolean
signal?: AbortSignal
querySource?: QuerySource
/**
* Pre-seed the consecutive 529 counter. Used when this retry loop is a
* non-streaming fallback after a streaming 529 — the streaming 529 should
* count toward MAX_529_RETRIES so total 529s-before-fallback is consistent
* regardless of which request mode hit the overload.
*/
initialConsecutive529Errors?: number
}
export class CannotRetryError extends Error {
constructor(
public readonly originalError: unknown,
public readonly retryContext: RetryContext,
) {
const message = errorMessage(originalError)
super(message)
this.name = 'RetryError'
// Preserve the original stack trace if available
if (originalError instanceof Error && originalError.stack) {
this.stack = originalError.stack
}
}
}
export class FallbackTriggeredError extends Error {
constructor(
public readonly originalModel: string,
public readonly fallbackModel: string,
) {
super(`Model fallback triggered: ${originalModel} -> ${fallbackModel}`)
this.name = 'FallbackTriggeredError'
}
}
export async function* withRetry<T>(
getClient: () => Promise<Anthropic>,
operation: (
client: Anthropic,
attempt: number,
context: RetryContext,
) => Promise<T>,
options: RetryOptions,
): AsyncGenerator<SystemAPIErrorMessage, T> {
const maxRetries = getMaxRetries(options)
const retryContext: RetryContext = {
model: options.model,
thinkingConfig: options.thinkingConfig,
...(isFastModeEnabled() && { fastMode: options.fastMode }),
}
let client: Anthropic | null = null
let consecutive529Errors = options.initialConsecutive529Errors ?? 0
let lastError: unknown
let persistentAttempt = 0
for (let attempt = 1; attempt <= maxRetries + 1; attempt++) {
if (options.signal?.aborted) {
throw new APIUserAbortError()
}
// Capture whether fast mode is active before this attempt
// (fallback may change the state mid-loop)
const wasFastModeActive = isFastModeEnabled()
? retryContext.fastMode && !isFastModeCooldown()
: false
try {
// Check for mock rate limits (used by /mock-limits command for Ant employees)
if (process.env.USER_TYPE === 'ant') {
const mockError = checkMockRateLimitError(
retryContext.model,
wasFastModeActive,
)
if (mockError) {
throw mockError
}
}
// Get a fresh client instance on first attempt or after authentication errors
// - 401 for first-party API authentication failures
// - 403 "OAuth token has been revoked" (another process refreshed the token)
// - Bedrock-specific auth errors (403 or CredentialsProviderError)
// - Vertex-specific auth errors (credential refresh failures, 401)
// - ECONNRESET/EPIPE: stale keep-alive socket; disable pooling and reconnect
const isStaleConnection = isStaleConnectionError(lastError)
if (
isStaleConnection &&
getFeatureValue_CACHED_MAY_BE_STALE(
'tengu_disable_keepalive_on_econnreset',
false,
)
) {
logForDebugging(
'Stale connection (ECONNRESET/EPIPE) — disabling keep-alive for retry',
)
disableKeepAlive()
}
if (
client === null ||
(lastError instanceof APIError && lastError.status === 401) ||
isOAuthTokenRevokedError(lastError) ||
isBedrockAuthError(lastError) ||
isVertexAuthError(lastError) ||
isStaleConnection
) {
// On 401 "token expired" or 403 "token revoked", force a token refresh
if (
(lastError instanceof APIError && lastError.status === 401) ||
isOAuthTokenRevokedError(lastError)
) {
const failedAccessToken = getClaudeAIOAuthTokens()?.accessToken
if (failedAccessToken) {
await handleOAuth401Error(failedAccessToken)
}
}
client = await getClient()
}
return await operation(client, attempt, retryContext)
} catch (error) {
lastError = error
logForDebugging(
`API error (attempt ${attempt}/${maxRetries + 1}): ${error instanceof APIError ? `${error.status} ${error.message}` : errorMessage(error)}`,
{ level: 'error' },
)
// Fast mode fallback: on 429/529, either wait and retry (short delays)
// or fall back to standard speed (long delays) to avoid cache thrashing.
// Skip in persistent mode: the short-retry path below loops with fast
// mode still active, so its `continue` never reaches the attempt clamp
// and the for-loop terminates. Persistent sessions want the chunked
// keep-alive path instead of fast-mode cache-preservation anyway.
if (
wasFastModeActive &&
!isPersistentRetryEnabled() &&
error instanceof APIError &&
(error.status === 429 || is529Error(error))
) {
// If the 429 is specifically because extra usage (overage) is not
// available, permanently disable fast mode with a specific message.
const overageReason = error.headers?.get(
'anthropic-ratelimit-unified-overage-disabled-reason',
)
if (overageReason !== null && overageReason !== undefined) {
handleFastModeOverageRejection(overageReason)
retryContext.fastMode = false
continue
}
const retryAfterMs = getRetryAfterMs(error)
if (retryAfterMs !== null && retryAfterMs < SHORT_RETRY_THRESHOLD_MS) {
// Short retry-after: wait and retry with fast mode still active
// to preserve prompt cache (same model name on retry).
await sleep(retryAfterMs, options.signal, { abortError })
continue
}
// Long or unknown retry-after: enter cooldown (switches to standard
// speed model), with a minimum floor to avoid flip-flopping.
const cooldownMs = Math.max(
retryAfterMs ?? DEFAULT_FAST_MODE_FALLBACK_HOLD_MS,
MIN_COOLDOWN_MS,
)
const cooldownReason: CooldownReason = is529Error(error)
? 'overloaded'
: 'rate_limit'
triggerFastModeCooldown(Date.now() + cooldownMs, cooldownReason)
if (isFastModeEnabled()) {
retryContext.fastMode = false
}
continue
}
// Fast mode fallback: if the API rejects the fast mode parameter
// (e.g., org doesn't have fast mode enabled), permanently disable fast
// mode and retry at standard speed.
if (wasFastModeActive && isFastModeNotEnabledError(error)) {
handleFastModeRejectedByAPI()
retryContext.fastMode = false
continue
}
// Non-foreground sources bail immediately on 529 — no retry amplification
// during capacity cascades. User never sees these fail.
if (is529Error(error) && !shouldRetry529(options.querySource)) {
logEvent('tengu_api_529_background_dropped', {
query_source:
options.querySource as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
})
throw new CannotRetryError(error, retryContext)
}
// Track consecutive 529 errors
if (
is529Error(error) &&
// If FALLBACK_FOR_ALL_PRIMARY_MODELS is not set, fall through only if the primary model is a non-custom Opus model.
// TODO: Revisit if the isNonCustomOpusModel check should still exist, or if isNonCustomOpusModel is a stale artifact of when Claude Code was hardcoded on Opus.
(process.env.FALLBACK_FOR_ALL_PRIMARY_MODELS ||
(!isClaudeAISubscriber() && isNonCustomOpusModel(options.model)))
) {
consecutive529Errors++
if (consecutive529Errors >= MAX_529_RETRIES) {
// Check if fallback model is specified
if (options.fallbackModel) {
logEvent('tengu_api_opus_fallback_triggered', {
original_model:
options.model as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
fallback_model:
options.fallbackModel as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
provider: getAPIProviderForStatsig(),
})
// Throw special error to indicate fallback was triggered
throw new FallbackTriggeredError(
options.model,
options.fallbackModel,
)
}
if (
process.env.USER_TYPE === 'external' &&
!process.env.IS_SANDBOX &&
!isPersistentRetryEnabled()
) {
logEvent('tengu_api_custom_529_overloaded_error', {})
throw new CannotRetryError(
new Error(REPEATED_529_ERROR_MESSAGE),
retryContext,
)
}
}
}
// Only retry if the error indicates we should
const persistent =
isPersistentRetryEnabled() && isTransientCapacityError(error)
if (attempt > maxRetries && !persistent) {
throw new CannotRetryError(error, retryContext)
}
// AWS/GCP errors aren't always APIError, but can be retried
const handledCloudAuthError =
handleAwsCredentialError(error) || handleGcpCredentialError(error)
if (
!handledCloudAuthError &&
(!(error instanceof APIError) || !shouldRetry(error))
) {
throw new CannotRetryError(error, retryContext)
}
// Handle max tokens context overflow errors by adjusting max_tokens for the next attempt
// NOTE: With extended-context-window beta, this 400 error should not occur.
// The API now returns 'model_context_window_exceeded' stop_reason instead.
// Keeping for backward compatibility.
if (error instanceof APIError) {
const overflowData = parseMaxTokensContextOverflowError(error)
if (overflowData) {
const { inputTokens, contextLimit } = overflowData
const safetyBuffer = 1000
const availableContext = Math.max(
0,
contextLimit - inputTokens - safetyBuffer,
)
if (availableContext < FLOOR_OUTPUT_TOKENS) {
logError(
new Error(
`availableContext ${availableContext} is less than FLOOR_OUTPUT_TOKENS ${FLOOR_OUTPUT_TOKENS}`,
),
)
throw error
}
// Ensure we have enough tokens for thinking + at least 1 output token
const minRequired =
(retryContext.thinkingConfig.type === 'enabled'
? retryContext.thinkingConfig.budgetTokens
: 0) + 1
const adjustedMaxTokens = Math.max(
FLOOR_OUTPUT_TOKENS,
availableContext,
minRequired,
)
retryContext.maxTokensOverride = adjustedMaxTokens
logEvent('tengu_max_tokens_context_overflow_adjustment', {
inputTokens,
contextLimit,
adjustedMaxTokens,
attempt,
})
continue
}
}
// For other errors, proceed with normal retry logic
// Get retry-after header if available
const retryAfter = getRetryAfter(error)
let delayMs: number
if (persistent && error instanceof APIError && error.status === 429) {
persistentAttempt++
// Window-based limits (e.g. 5hr Max/Pro) include a reset timestamp.
// Wait until reset rather than polling every 5 min uselessly.
const resetDelay = getRateLimitResetDelayMs(error)
delayMs =
resetDelay ??
Math.min(
getRetryDelay(
persistentAttempt,
retryAfter,
PERSISTENT_MAX_BACKOFF_MS,
),
PERSISTENT_RESET_CAP_MS,
)
} else if (persistent) {
persistentAttempt++
// Retry-After is a server directive and bypasses maxDelayMs inside
// getRetryDelay (intentional — honoring it is correct). Cap at the
// 6hr reset-cap here so a pathological header can't wait unbounded.
delayMs = Math.min(
getRetryDelay(
persistentAttempt,
retryAfter,
PERSISTENT_MAX_BACKOFF_MS,
),
PERSISTENT_RESET_CAP_MS,
)
} else {
delayMs = getRetryDelay(attempt, retryAfter)
}
// In persistent mode the for-loop `attempt` is clamped at maxRetries+1;
// use persistentAttempt for telemetry/yields so they show the true count.
const reportedAttempt = persistent ? persistentAttempt : attempt
logEvent('tengu_api_retry', {
attempt: reportedAttempt,
delayMs: delayMs,
error: (error as APIError)
.message as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
status: (error as APIError).status,
provider: getAPIProviderForStatsig(),
})
if (persistent) {
if (delayMs > 60_000) {
logEvent('tengu_api_persistent_retry_wait', {
status: (error as APIError).status,
delayMs,
attempt: reportedAttempt,
provider: getAPIProviderForStatsig(),
})
}
// Chunk long sleeps so the host sees periodic stdout activity and
// does not mark the session idle. Each yield surfaces as
// {type:'system', subtype:'api_retry'} on stdout via QueryEngine.
let remaining = delayMs
while (remaining > 0) {
if (options.signal?.aborted) throw new APIUserAbortError()
if (error instanceof APIError) {
yield createSystemAPIErrorMessage(
error,
remaining,
reportedAttempt,
maxRetries,
)
}
const chunk = Math.min(remaining, HEARTBEAT_INTERVAL_MS)
await sleep(chunk, options.signal, { abortError })
remaining -= chunk
}
// Clamp so the for-loop never terminates. Backoff uses the separate
// persistentAttempt counter which keeps growing to the 5-min cap.
if (attempt >= maxRetries) attempt = maxRetries
} else {
if (error instanceof APIError) {
yield createSystemAPIErrorMessage(error, delayMs, attempt, maxRetries)
}
await sleep(delayMs, options.signal, { abortError })
}
}
}
throw new CannotRetryError(lastError, retryContext)
}
function getRetryAfter(error: unknown): string | null {
return (
((error as { headers?: { 'retry-after'?: string } }).headers?.[
'retry-after'
] ||
// eslint-disable-next-line eslint-plugin-n/no-unsupported-features/node-builtins
((error as APIError).headers as Headers)?.get?.('retry-after')) ??
null
)
}
export function getRetryDelay(
attempt: number,
retryAfterHeader?: string | null,
maxDelayMs = 32000,
): number {
if (retryAfterHeader) {
const seconds = parseInt(retryAfterHeader, 10)
if (!isNaN(seconds)) {
return seconds * 1000
}
}
const baseDelay = Math.min(
BASE_DELAY_MS * Math.pow(2, attempt - 1),
maxDelayMs,
)
const jitter = Math.random() * 0.25 * baseDelay
return baseDelay + jitter
}
export function parseMaxTokensContextOverflowError(error: APIError):
| {
inputTokens: number
maxTokens: number
contextLimit: number
}
| undefined {
if (error.status !== 400 || !error.message) {
return undefined
}
if (
!error.message.includes(
'input length and `max_tokens` exceed context limit',
)
) {
return undefined
}
// Example format: "input length and `max_tokens` exceed context limit: 188059 + 20000 > 200000"
const regex =
/input length and `max_tokens` exceed context limit: (\d+) \+ (\d+) > (\d+)/
const match = error.message.match(regex)
if (!match || match.length !== 4) {
return undefined
}
if (!match[1] || !match[2] || !match[3]) {
logError(
new Error(
'Unable to parse max_tokens from max_tokens exceed context limit error message',
),
)
return undefined
}
const inputTokens = parseInt(match[1], 10)
const maxTokens = parseInt(match[2], 10)
const contextLimit = parseInt(match[3], 10)
if (isNaN(inputTokens) || isNaN(maxTokens) || isNaN(contextLimit)) {
return undefined
}
return { inputTokens, maxTokens, contextLimit }
}
// TODO: Replace with a response header check once the API adds a dedicated
// header for fast-mode rejection (e.g., x-fast-mode-rejected). String-matching
// the error message is fragile and will break if the API wording changes.
function isFastModeNotEnabledError(error: unknown): boolean {
if (!(error instanceof APIError)) {
return false
}
return (
error.status === 400 &&
(error.message?.includes('Fast mode is not enabled') ?? false)
)
}
export function is529Error(error: unknown): boolean {
if (!(error instanceof APIError)) {
return false
}
// Check for 529 status code or overloaded error in message
return (
error.status === 529 ||
// See below: the SDK sometimes fails to properly pass the 529 status code during streaming
(error.message?.includes('"type":"overloaded_error"') ?? false)
)
}
function isOAuthTokenRevokedError(error: unknown): boolean {
return (
error instanceof APIError &&
error.status === 403 &&
(error.message?.includes('OAuth token has been revoked') ?? false)
)
}
function isBedrockAuthError(error: unknown): boolean {
if (isEnvTruthy(process.env.CLAUDE_CODE_USE_BEDROCK)) {
// AWS libs reject without an API call if .aws holds a past Expiration value
// otherwise, API calls that receive expired tokens give generic 403
// "The security token included in the request is invalid"
if (
isAwsCredentialsProviderError(error) ||
(error instanceof APIError && error.status === 403)
) {
return true
}
}
return false
}
/**
* Clear AWS auth caches if appropriate.
* @returns true if action was taken.
*/
function handleAwsCredentialError(error: unknown): boolean {
if (isBedrockAuthError(error)) {
clearAwsCredentialsCache()
return true
}
return false
}
// google-auth-library throws plain Error (no typed name like AWS's
// CredentialsProviderError). Match common SDK-level credential-failure messages.
function isGoogleAuthLibraryCredentialError(error: unknown): boolean {
if (!(error instanceof Error)) return false
const msg = error.message
return (
msg.includes('Could not load the default credentials') ||
msg.includes('Could not refresh access token') ||
msg.includes('invalid_grant')
)
}
function isVertexAuthError(error: unknown): boolean {
if (isEnvTruthy(process.env.CLAUDE_CODE_USE_VERTEX)) {
// SDK-level: google-auth-library fails in prepareOptions() before the HTTP call
if (isGoogleAuthLibraryCredentialError(error)) {
return true
}
// Server-side: Vertex returns 401 for expired/invalid tokens
if (error instanceof APIError && error.status === 401) {
return true
}
}
return false
}
/**
* Clear GCP auth caches if appropriate.
* @returns true if action was taken.
*/
function handleGcpCredentialError(error: unknown): boolean {
if (isVertexAuthError(error)) {
clearGcpCredentialsCache()
return true
}
return false
}
function shouldRetry(error: APIError): boolean {
// Never retry mock errors - they're from /mock-limits command for testing
if (isMockRateLimitError(error)) {
return false
}
// Persistent mode: 429/529 always retryable, bypass subscriber gates and
// x-should-retry header.
if (isPersistentRetryEnabled() && isTransientCapacityError(error)) {
return true
}
// CCR mode: auth is via infrastructure-provided JWTs, so a 401/403 is a
// transient blip (auth service flap, network hiccup) rather than bad
// credentials. Bypass x-should-retry:false — the server assumes we'd retry
// the same bad key, but our key is fine.
if (
isEnvTruthy(process.env.CLAUDE_CODE_REMOTE) &&
(error.status === 401 || error.status === 403)
) {
return true
}
// Check for overloaded errors first by examining the message content
// The SDK sometimes fails to properly pass the 529 status code during streaming,
// so we need to check the error message directly
if (error.message?.includes('"type":"overloaded_error"')) {
return true
}
// Check for max tokens context overflow errors that we can handle
if (parseMaxTokensContextOverflowError(error)) {
return true
}
// Note this is not a standard header.
const shouldRetryHeader = error.headers?.get('x-should-retry')
// If the server explicitly says whether or not to retry, obey.
// For Max and Pro users, should-retry is true, but in several hours, so we shouldn't.
// Enterprise users can retry because they typically use PAYG instead of rate limits.
if (
shouldRetryHeader === 'true' &&
(!isClaudeAISubscriber() || isEnterpriseSubscriber())
) {
return true
}
// Ants can ignore x-should-retry: false for 5xx server errors only.
// For other status codes (401, 403, 400, 429, etc.), respect the header.
if (shouldRetryHeader === 'false') {
const is5xxError = error.status !== undefined && error.status >= 500
if (!(process.env.USER_TYPE === 'ant' && is5xxError)) {
return false
}
}
if (error instanceof APIConnectionError) {
return true
}
if (!error.status) return false
// Retry on request timeouts.
if (error.status === 408) return true
// Retry on lock timeouts.
if (error.status === 409) return true
// Retry on rate limits, but not for ClaudeAI Subscription users
// Enterprise users can retry because they typically use PAYG instead of rate limits
if (error.status === 429) {
return !isClaudeAISubscriber() || isEnterpriseSubscriber()
}
// Clear API key cache on 401 and allow retry.
// OAuth token handling is done in the main retry loop via handleOAuth401Error.
if (error.status === 401) {
clearApiKeyHelperCache()
return true
}
// Retry on 403 "token revoked" (same refresh logic as 401, see above)
if (isOAuthTokenRevokedError(error)) {
return true
}
// Retry internal errors.
if (error.status && error.status >= 500) return true
return false
}
export function getDefaultMaxRetries(): number {
if (process.env.CLAUDE_CODE_MAX_RETRIES) {
return parseInt(process.env.CLAUDE_CODE_MAX_RETRIES, 10)
}
return DEFAULT_MAX_RETRIES
}
function getMaxRetries(options: RetryOptions): number {
return options.maxRetries ?? getDefaultMaxRetries()
}
const DEFAULT_FAST_MODE_FALLBACK_HOLD_MS = 30 * 60 * 1000 // 30 minutes
const SHORT_RETRY_THRESHOLD_MS = 20 * 1000 // 20 seconds
const MIN_COOLDOWN_MS = 10 * 60 * 1000 // 10 minutes
function getRetryAfterMs(error: APIError): number | null {
const retryAfter = getRetryAfter(error)
if (retryAfter) {
const seconds = parseInt(retryAfter, 10)
if (!isNaN(seconds)) {
return seconds * 1000
}
}
return null
}
function getRateLimitResetDelayMs(error: APIError): number | null {
const resetHeader = error.headers?.get?.('anthropic-ratelimit-unified-reset')
if (!resetHeader) return null
const resetUnixSec = Number(resetHeader)
if (!Number.isFinite(resetUnixSec)) return null
const delayMs = resetUnixSec * 1000 - Date.now()
if (delayMs <= 0) return null
return Math.min(delayMs, PERSISTENT_RESET_CAP_MS)
}