init claude-code
This commit is contained in:
@@ -0,0 +1,119 @@
|
||||
import axios from 'axios'
|
||||
import { getOauthConfig } from '../../constants/oauth.js'
|
||||
import { getOAuthHeaders, prepareApiRequest } from '../../utils/teleport/api.js'
|
||||
|
||||
export type AdminRequestType = 'limit_increase' | 'seat_upgrade'
|
||||
|
||||
export type AdminRequestStatus = 'pending' | 'approved' | 'dismissed'
|
||||
|
||||
export type AdminRequestSeatUpgradeDetails = {
|
||||
message?: string | null
|
||||
current_seat_tier?: string | null
|
||||
}
|
||||
|
||||
export type AdminRequestCreateParams =
|
||||
| {
|
||||
request_type: 'limit_increase'
|
||||
details: null
|
||||
}
|
||||
| {
|
||||
request_type: 'seat_upgrade'
|
||||
details: AdminRequestSeatUpgradeDetails
|
||||
}
|
||||
|
||||
export type AdminRequest = {
|
||||
uuid: string
|
||||
status: AdminRequestStatus
|
||||
requester_uuid?: string | null
|
||||
created_at: string
|
||||
} & (
|
||||
| {
|
||||
request_type: 'limit_increase'
|
||||
details: null
|
||||
}
|
||||
| {
|
||||
request_type: 'seat_upgrade'
|
||||
details: AdminRequestSeatUpgradeDetails
|
||||
}
|
||||
)
|
||||
|
||||
/**
|
||||
* Create an admin request (limit increase or seat upgrade).
|
||||
*
|
||||
* For Team/Enterprise users who don't have billing/admin permissions,
|
||||
* this creates a request that their admin can act on.
|
||||
*
|
||||
* If a pending request of the same type already exists for this user,
|
||||
* returns the existing request instead of creating a new one.
|
||||
*/
|
||||
export async function createAdminRequest(
|
||||
params: AdminRequestCreateParams,
|
||||
): Promise<AdminRequest> {
|
||||
const { accessToken, orgUUID } = await prepareApiRequest()
|
||||
|
||||
const headers = {
|
||||
...getOAuthHeaders(accessToken),
|
||||
'x-organization-uuid': orgUUID,
|
||||
}
|
||||
|
||||
const url = `${getOauthConfig().BASE_API_URL}/api/oauth/organizations/${orgUUID}/admin_requests`
|
||||
|
||||
const response = await axios.post<AdminRequest>(url, params, { headers })
|
||||
|
||||
return response.data
|
||||
}
|
||||
|
||||
/**
|
||||
* Get pending admin request of a specific type for the current user.
|
||||
*
|
||||
* Returns the pending request if one exists, otherwise null.
|
||||
*/
|
||||
export async function getMyAdminRequests(
|
||||
requestType: AdminRequestType,
|
||||
statuses: AdminRequestStatus[],
|
||||
): Promise<AdminRequest[] | null> {
|
||||
const { accessToken, orgUUID } = await prepareApiRequest()
|
||||
|
||||
const headers = {
|
||||
...getOAuthHeaders(accessToken),
|
||||
'x-organization-uuid': orgUUID,
|
||||
}
|
||||
|
||||
let url = `${getOauthConfig().BASE_API_URL}/api/oauth/organizations/${orgUUID}/admin_requests/me?request_type=${requestType}`
|
||||
for (const status of statuses) {
|
||||
url += `&statuses=${status}`
|
||||
}
|
||||
|
||||
const response = await axios.get<AdminRequest[] | null>(url, {
|
||||
headers,
|
||||
})
|
||||
|
||||
return response.data
|
||||
}
|
||||
|
||||
type AdminRequestEligibilityResponse = {
|
||||
request_type: AdminRequestType
|
||||
is_allowed: boolean
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a specific admin request type is allowed for this org.
|
||||
*/
|
||||
export async function checkAdminRequestEligibility(
|
||||
requestType: AdminRequestType,
|
||||
): Promise<AdminRequestEligibilityResponse | null> {
|
||||
const { accessToken, orgUUID } = await prepareApiRequest()
|
||||
|
||||
const headers = {
|
||||
...getOAuthHeaders(accessToken),
|
||||
'x-organization-uuid': orgUUID,
|
||||
}
|
||||
|
||||
const url = `${getOauthConfig().BASE_API_URL}/api/oauth/organizations/${orgUUID}/admin_requests/eligibility?request_type=${requestType}`
|
||||
|
||||
const response = await axios.get<AdminRequestEligibilityResponse>(url, {
|
||||
headers,
|
||||
})
|
||||
|
||||
return response.data
|
||||
}
|
||||
@@ -0,0 +1,141 @@
|
||||
import axios from 'axios'
|
||||
import isEqual from 'lodash-es/isEqual.js'
|
||||
import {
|
||||
getAnthropicApiKey,
|
||||
getClaudeAIOAuthTokens,
|
||||
hasProfileScope,
|
||||
} from 'src/utils/auth.js'
|
||||
import { z } from 'zod'
|
||||
import { getOauthConfig, OAUTH_BETA_HEADER } from '../../constants/oauth.js'
|
||||
import { getGlobalConfig, saveGlobalConfig } from '../../utils/config.js'
|
||||
import { logForDebugging } from '../../utils/debug.js'
|
||||
import { withOAuth401Retry } from '../../utils/http.js'
|
||||
import { lazySchema } from '../../utils/lazySchema.js'
|
||||
import { logError } from '../../utils/log.js'
|
||||
import { getAPIProvider } from '../../utils/model/providers.js'
|
||||
import { isEssentialTrafficOnly } from '../../utils/privacyLevel.js'
|
||||
import { getClaudeCodeUserAgent } from '../../utils/userAgent.js'
|
||||
|
||||
const bootstrapResponseSchema = lazySchema(() =>
|
||||
z.object({
|
||||
client_data: z.record(z.unknown()).nullish(),
|
||||
additional_model_options: z
|
||||
.array(
|
||||
z
|
||||
.object({
|
||||
model: z.string(),
|
||||
name: z.string(),
|
||||
description: z.string(),
|
||||
})
|
||||
.transform(({ model, name, description }) => ({
|
||||
value: model,
|
||||
label: name,
|
||||
description,
|
||||
})),
|
||||
)
|
||||
.nullish(),
|
||||
}),
|
||||
)
|
||||
|
||||
type BootstrapResponse = z.infer<ReturnType<typeof bootstrapResponseSchema>>
|
||||
|
||||
async function fetchBootstrapAPI(): Promise<BootstrapResponse | null> {
|
||||
if (isEssentialTrafficOnly()) {
|
||||
logForDebugging('[Bootstrap] Skipped: Nonessential traffic disabled')
|
||||
return null
|
||||
}
|
||||
|
||||
if (getAPIProvider() !== 'firstParty') {
|
||||
logForDebugging('[Bootstrap] Skipped: 3P provider')
|
||||
return null
|
||||
}
|
||||
|
||||
// OAuth preferred (requires user:profile scope — service-key OAuth tokens
|
||||
// lack it and would 403). Fall back to API key auth for console users.
|
||||
const apiKey = getAnthropicApiKey()
|
||||
const hasUsableOAuth =
|
||||
getClaudeAIOAuthTokens()?.accessToken && hasProfileScope()
|
||||
if (!hasUsableOAuth && !apiKey) {
|
||||
logForDebugging('[Bootstrap] Skipped: no usable OAuth or API key')
|
||||
return null
|
||||
}
|
||||
|
||||
const endpoint = `${getOauthConfig().BASE_API_URL}/api/claude_cli/bootstrap`
|
||||
|
||||
// withOAuth401Retry handles the refresh-and-retry. API key users fail
|
||||
// through on 401 (no refresh mechanism — no OAuth token to pass).
|
||||
try {
|
||||
return await withOAuth401Retry(async () => {
|
||||
// Re-read OAuth each call so the retry picks up the refreshed token.
|
||||
const token = getClaudeAIOAuthTokens()?.accessToken
|
||||
let authHeaders: Record<string, string>
|
||||
if (token && hasProfileScope()) {
|
||||
authHeaders = {
|
||||
Authorization: `Bearer ${token}`,
|
||||
'anthropic-beta': OAUTH_BETA_HEADER,
|
||||
}
|
||||
} else if (apiKey) {
|
||||
authHeaders = { 'x-api-key': apiKey }
|
||||
} else {
|
||||
logForDebugging('[Bootstrap] No auth available on retry, aborting')
|
||||
return null
|
||||
}
|
||||
|
||||
logForDebugging('[Bootstrap] Fetching')
|
||||
const response = await axios.get<unknown>(endpoint, {
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
'User-Agent': getClaudeCodeUserAgent(),
|
||||
...authHeaders,
|
||||
},
|
||||
timeout: 5000,
|
||||
})
|
||||
const parsed = bootstrapResponseSchema().safeParse(response.data)
|
||||
if (!parsed.success) {
|
||||
logForDebugging(
|
||||
`[Bootstrap] Response failed validation: ${parsed.error.message}`,
|
||||
)
|
||||
return null
|
||||
}
|
||||
logForDebugging('[Bootstrap] Fetch ok')
|
||||
return parsed.data
|
||||
})
|
||||
} catch (error) {
|
||||
logForDebugging(
|
||||
`[Bootstrap] Fetch failed: ${axios.isAxiosError(error) ? (error.response?.status ?? error.code) : 'unknown'}`,
|
||||
)
|
||||
throw error
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetch bootstrap data from the API and persist to disk cache.
|
||||
*/
|
||||
export async function fetchBootstrapData(): Promise<void> {
|
||||
try {
|
||||
const response = await fetchBootstrapAPI()
|
||||
if (!response) return
|
||||
|
||||
const clientData = response.client_data ?? null
|
||||
const additionalModelOptions = response.additional_model_options ?? []
|
||||
|
||||
// Only persist if data actually changed — avoids a config write on every startup.
|
||||
const config = getGlobalConfig()
|
||||
if (
|
||||
isEqual(config.clientDataCache, clientData) &&
|
||||
isEqual(config.additionalModelOptionsCache, additionalModelOptions)
|
||||
) {
|
||||
logForDebugging('[Bootstrap] Cache unchanged, skipping write')
|
||||
return
|
||||
}
|
||||
|
||||
logForDebugging('[Bootstrap] Cache updated, persisting to disk')
|
||||
saveGlobalConfig(current => ({
|
||||
...current,
|
||||
clientDataCache: clientData,
|
||||
additionalModelOptionsCache: additionalModelOptions,
|
||||
}))
|
||||
} catch (error) {
|
||||
logError(error)
|
||||
}
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,389 @@
|
||||
import Anthropic, { type ClientOptions } from '@anthropic-ai/sdk'
|
||||
import { randomUUID } from 'crypto'
|
||||
import type { GoogleAuth } from 'google-auth-library'
|
||||
import {
|
||||
checkAndRefreshOAuthTokenIfNeeded,
|
||||
getAnthropicApiKey,
|
||||
getApiKeyFromApiKeyHelper,
|
||||
getClaudeAIOAuthTokens,
|
||||
isClaudeAISubscriber,
|
||||
refreshAndGetAwsCredentials,
|
||||
refreshGcpCredentialsIfNeeded,
|
||||
} from 'src/utils/auth.js'
|
||||
import { getUserAgent } from 'src/utils/http.js'
|
||||
import { getSmallFastModel } from 'src/utils/model/model.js'
|
||||
import {
|
||||
getAPIProvider,
|
||||
isFirstPartyAnthropicBaseUrl,
|
||||
} from 'src/utils/model/providers.js'
|
||||
import { getProxyFetchOptions } from 'src/utils/proxy.js'
|
||||
import {
|
||||
getIsNonInteractiveSession,
|
||||
getSessionId,
|
||||
} from '../../bootstrap/state.js'
|
||||
import { getOauthConfig } from '../../constants/oauth.js'
|
||||
import { isDebugToStdErr, logForDebugging } from '../../utils/debug.js'
|
||||
import {
|
||||
getAWSRegion,
|
||||
getVertexRegionForModel,
|
||||
isEnvTruthy,
|
||||
} from '../../utils/envUtils.js'
|
||||
|
||||
/**
|
||||
* Environment variables for different client types:
|
||||
*
|
||||
* Direct API:
|
||||
* - ANTHROPIC_API_KEY: Required for direct API access
|
||||
*
|
||||
* AWS Bedrock:
|
||||
* - AWS credentials configured via aws-sdk defaults
|
||||
* - AWS_REGION or AWS_DEFAULT_REGION: Sets the AWS region for all models (default: us-east-1)
|
||||
* - ANTHROPIC_SMALL_FAST_MODEL_AWS_REGION: Optional. Override AWS region specifically for the small fast model (Haiku)
|
||||
*
|
||||
* Foundry (Azure):
|
||||
* - ANTHROPIC_FOUNDRY_RESOURCE: Your Azure resource name (e.g., 'my-resource')
|
||||
* For the full endpoint: https://{resource}.services.ai.azure.com/anthropic/v1/messages
|
||||
* - ANTHROPIC_FOUNDRY_BASE_URL: Optional. Alternative to resource - provide full base URL directly
|
||||
* (e.g., 'https://my-resource.services.ai.azure.com')
|
||||
*
|
||||
* Authentication (one of the following):
|
||||
* - ANTHROPIC_FOUNDRY_API_KEY: Your Microsoft Foundry API key (if using API key auth)
|
||||
* - Azure AD authentication: If no API key is provided, uses DefaultAzureCredential
|
||||
* which supports multiple auth methods (environment variables, managed identity,
|
||||
* Azure CLI, etc.). See: https://docs.microsoft.com/en-us/javascript/api/@azure/identity
|
||||
*
|
||||
* Vertex AI:
|
||||
* - Model-specific region variables (highest priority):
|
||||
* - VERTEX_REGION_CLAUDE_3_5_HAIKU: Region for Claude 3.5 Haiku model
|
||||
* - VERTEX_REGION_CLAUDE_HAIKU_4_5: Region for Claude Haiku 4.5 model
|
||||
* - VERTEX_REGION_CLAUDE_3_5_SONNET: Region for Claude 3.5 Sonnet model
|
||||
* - VERTEX_REGION_CLAUDE_3_7_SONNET: Region for Claude 3.7 Sonnet model
|
||||
* - CLOUD_ML_REGION: Optional. The default GCP region to use for all models
|
||||
* If specific model region not specified above
|
||||
* - ANTHROPIC_VERTEX_PROJECT_ID: Required. Your GCP project ID
|
||||
* - Standard GCP credentials configured via google-auth-library
|
||||
*
|
||||
* Priority for determining region:
|
||||
* 1. Hardcoded model-specific environment variables
|
||||
* 2. Global CLOUD_ML_REGION variable
|
||||
* 3. Default region from config
|
||||
* 4. Fallback region (us-east5)
|
||||
*/
|
||||
|
||||
function createStderrLogger(): ClientOptions['logger'] {
|
||||
return {
|
||||
error: (msg, ...args) =>
|
||||
// biome-ignore lint/suspicious/noConsole:: intentional console output -- SDK logger must use console
|
||||
console.error('[Anthropic SDK ERROR]', msg, ...args),
|
||||
// biome-ignore lint/suspicious/noConsole:: intentional console output -- SDK logger must use console
|
||||
warn: (msg, ...args) => console.error('[Anthropic SDK WARN]', msg, ...args),
|
||||
// biome-ignore lint/suspicious/noConsole:: intentional console output -- SDK logger must use console
|
||||
info: (msg, ...args) => console.error('[Anthropic SDK INFO]', msg, ...args),
|
||||
debug: (msg, ...args) =>
|
||||
// biome-ignore lint/suspicious/noConsole:: intentional console output -- SDK logger must use console
|
||||
console.error('[Anthropic SDK DEBUG]', msg, ...args),
|
||||
}
|
||||
}
|
||||
|
||||
export async function getAnthropicClient({
|
||||
apiKey,
|
||||
maxRetries,
|
||||
model,
|
||||
fetchOverride,
|
||||
source,
|
||||
}: {
|
||||
apiKey?: string
|
||||
maxRetries: number
|
||||
model?: string
|
||||
fetchOverride?: ClientOptions['fetch']
|
||||
source?: string
|
||||
}): Promise<Anthropic> {
|
||||
const containerId = process.env.CLAUDE_CODE_CONTAINER_ID
|
||||
const remoteSessionId = process.env.CLAUDE_CODE_REMOTE_SESSION_ID
|
||||
const clientApp = process.env.CLAUDE_AGENT_SDK_CLIENT_APP
|
||||
const customHeaders = getCustomHeaders()
|
||||
const defaultHeaders: { [key: string]: string } = {
|
||||
'x-app': 'cli',
|
||||
'User-Agent': getUserAgent(),
|
||||
'X-Claude-Code-Session-Id': getSessionId(),
|
||||
...customHeaders,
|
||||
...(containerId ? { 'x-claude-remote-container-id': containerId } : {}),
|
||||
...(remoteSessionId
|
||||
? { 'x-claude-remote-session-id': remoteSessionId }
|
||||
: {}),
|
||||
// SDK consumers can identify their app/library for backend analytics
|
||||
...(clientApp ? { 'x-client-app': clientApp } : {}),
|
||||
}
|
||||
|
||||
// Log API client configuration for HFI debugging
|
||||
logForDebugging(
|
||||
`[API:request] Creating client, ANTHROPIC_CUSTOM_HEADERS present: ${!!process.env.ANTHROPIC_CUSTOM_HEADERS}, has Authorization header: ${!!customHeaders['Authorization']}`,
|
||||
)
|
||||
|
||||
// Add additional protection header if enabled via env var
|
||||
const additionalProtectionEnabled = isEnvTruthy(
|
||||
process.env.CLAUDE_CODE_ADDITIONAL_PROTECTION,
|
||||
)
|
||||
if (additionalProtectionEnabled) {
|
||||
defaultHeaders['x-anthropic-additional-protection'] = 'true'
|
||||
}
|
||||
|
||||
logForDebugging('[API:auth] OAuth token check starting')
|
||||
await checkAndRefreshOAuthTokenIfNeeded()
|
||||
logForDebugging('[API:auth] OAuth token check complete')
|
||||
|
||||
if (!isClaudeAISubscriber()) {
|
||||
await configureApiKeyHeaders(defaultHeaders, getIsNonInteractiveSession())
|
||||
}
|
||||
|
||||
const resolvedFetch = buildFetch(fetchOverride, source)
|
||||
|
||||
const ARGS = {
|
||||
defaultHeaders,
|
||||
maxRetries,
|
||||
timeout: parseInt(process.env.API_TIMEOUT_MS || String(600 * 1000), 10),
|
||||
dangerouslyAllowBrowser: true,
|
||||
fetchOptions: getProxyFetchOptions({
|
||||
forAnthropicAPI: true,
|
||||
}) as ClientOptions['fetchOptions'],
|
||||
...(resolvedFetch && {
|
||||
fetch: resolvedFetch,
|
||||
}),
|
||||
}
|
||||
if (isEnvTruthy(process.env.CLAUDE_CODE_USE_BEDROCK)) {
|
||||
const { AnthropicBedrock } = await import('@anthropic-ai/bedrock-sdk')
|
||||
// Use region override for small fast model if specified
|
||||
const awsRegion =
|
||||
model === getSmallFastModel() &&
|
||||
process.env.ANTHROPIC_SMALL_FAST_MODEL_AWS_REGION
|
||||
? process.env.ANTHROPIC_SMALL_FAST_MODEL_AWS_REGION
|
||||
: getAWSRegion()
|
||||
|
||||
const bedrockArgs: ConstructorParameters<typeof AnthropicBedrock>[0] = {
|
||||
...ARGS,
|
||||
awsRegion,
|
||||
...(isEnvTruthy(process.env.CLAUDE_CODE_SKIP_BEDROCK_AUTH) && {
|
||||
skipAuth: true,
|
||||
}),
|
||||
...(isDebugToStdErr() && { logger: createStderrLogger() }),
|
||||
}
|
||||
|
||||
// Add API key authentication if available
|
||||
if (process.env.AWS_BEARER_TOKEN_BEDROCK) {
|
||||
bedrockArgs.skipAuth = true
|
||||
// Add the Bearer token for Bedrock API key authentication
|
||||
bedrockArgs.defaultHeaders = {
|
||||
...bedrockArgs.defaultHeaders,
|
||||
Authorization: `Bearer ${process.env.AWS_BEARER_TOKEN_BEDROCK}`,
|
||||
}
|
||||
} else if (!isEnvTruthy(process.env.CLAUDE_CODE_SKIP_BEDROCK_AUTH)) {
|
||||
// Refresh auth and get credentials with cache clearing
|
||||
const cachedCredentials = await refreshAndGetAwsCredentials()
|
||||
if (cachedCredentials) {
|
||||
bedrockArgs.awsAccessKey = cachedCredentials.accessKeyId
|
||||
bedrockArgs.awsSecretKey = cachedCredentials.secretAccessKey
|
||||
bedrockArgs.awsSessionToken = cachedCredentials.sessionToken
|
||||
}
|
||||
}
|
||||
// we have always been lying about the return type - this doesn't support batching or models
|
||||
return new AnthropicBedrock(bedrockArgs) as unknown as Anthropic
|
||||
}
|
||||
if (isEnvTruthy(process.env.CLAUDE_CODE_USE_FOUNDRY)) {
|
||||
const { AnthropicFoundry } = await import('@anthropic-ai/foundry-sdk')
|
||||
// Determine Azure AD token provider based on configuration
|
||||
// SDK reads ANTHROPIC_FOUNDRY_API_KEY by default
|
||||
let azureADTokenProvider: (() => Promise<string>) | undefined
|
||||
if (!process.env.ANTHROPIC_FOUNDRY_API_KEY) {
|
||||
if (isEnvTruthy(process.env.CLAUDE_CODE_SKIP_FOUNDRY_AUTH)) {
|
||||
// Mock token provider for testing/proxy scenarios (similar to Vertex mock GoogleAuth)
|
||||
azureADTokenProvider = () => Promise.resolve('')
|
||||
} else {
|
||||
// Use real Azure AD authentication with DefaultAzureCredential
|
||||
const {
|
||||
DefaultAzureCredential: AzureCredential,
|
||||
getBearerTokenProvider,
|
||||
} = await import('@azure/identity')
|
||||
azureADTokenProvider = getBearerTokenProvider(
|
||||
new AzureCredential(),
|
||||
'https://cognitiveservices.azure.com/.default',
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
const foundryArgs: ConstructorParameters<typeof AnthropicFoundry>[0] = {
|
||||
...ARGS,
|
||||
...(azureADTokenProvider && { azureADTokenProvider }),
|
||||
...(isDebugToStdErr() && { logger: createStderrLogger() }),
|
||||
}
|
||||
// we have always been lying about the return type - this doesn't support batching or models
|
||||
return new AnthropicFoundry(foundryArgs) as unknown as Anthropic
|
||||
}
|
||||
if (isEnvTruthy(process.env.CLAUDE_CODE_USE_VERTEX)) {
|
||||
// Refresh GCP credentials if gcpAuthRefresh is configured and credentials are expired
|
||||
// This is similar to how we handle AWS credential refresh for Bedrock
|
||||
if (!isEnvTruthy(process.env.CLAUDE_CODE_SKIP_VERTEX_AUTH)) {
|
||||
await refreshGcpCredentialsIfNeeded()
|
||||
}
|
||||
|
||||
const [{ AnthropicVertex }, { GoogleAuth }] = await Promise.all([
|
||||
import('@anthropic-ai/vertex-sdk'),
|
||||
import('google-auth-library'),
|
||||
])
|
||||
// TODO: Cache either GoogleAuth instance or AuthClient to improve performance
|
||||
// Currently we create a new GoogleAuth instance for every getAnthropicClient() call
|
||||
// This could cause repeated authentication flows and metadata server checks
|
||||
// However, caching needs careful handling of:
|
||||
// - Credential refresh/expiration
|
||||
// - Environment variable changes (GOOGLE_APPLICATION_CREDENTIALS, project vars)
|
||||
// - Cross-request auth state management
|
||||
// See: https://github.com/googleapis/google-auth-library-nodejs/issues/390 for caching challenges
|
||||
|
||||
// Prevent metadata server timeout by providing projectId as fallback
|
||||
// google-auth-library checks project ID in this order:
|
||||
// 1. Environment variables (GCLOUD_PROJECT, GOOGLE_CLOUD_PROJECT, etc.)
|
||||
// 2. Credential files (service account JSON, ADC file)
|
||||
// 3. gcloud config
|
||||
// 4. GCE metadata server (causes 12s timeout outside GCP)
|
||||
//
|
||||
// We only set projectId if user hasn't configured other discovery methods
|
||||
// to avoid interfering with their existing auth setup
|
||||
|
||||
// Check project environment variables in same order as google-auth-library
|
||||
// See: https://github.com/googleapis/google-auth-library-nodejs/blob/main/src/auth/googleauth.ts
|
||||
const hasProjectEnvVar =
|
||||
process.env['GCLOUD_PROJECT'] ||
|
||||
process.env['GOOGLE_CLOUD_PROJECT'] ||
|
||||
process.env['gcloud_project'] ||
|
||||
process.env['google_cloud_project']
|
||||
|
||||
// Check for credential file paths (service account or ADC)
|
||||
// Note: We're checking both standard and lowercase variants to be safe,
|
||||
// though we should verify what google-auth-library actually checks
|
||||
const hasKeyFile =
|
||||
process.env['GOOGLE_APPLICATION_CREDENTIALS'] ||
|
||||
process.env['google_application_credentials']
|
||||
|
||||
const googleAuth = isEnvTruthy(process.env.CLAUDE_CODE_SKIP_VERTEX_AUTH)
|
||||
? ({
|
||||
// Mock GoogleAuth for testing/proxy scenarios
|
||||
getClient: () => ({
|
||||
getRequestHeaders: () => ({}),
|
||||
}),
|
||||
} as unknown as GoogleAuth)
|
||||
: new GoogleAuth({
|
||||
scopes: ['https://www.googleapis.com/auth/cloud-platform'],
|
||||
// Only use ANTHROPIC_VERTEX_PROJECT_ID as last resort fallback
|
||||
// This prevents the 12-second metadata server timeout when:
|
||||
// - No project env vars are set AND
|
||||
// - No credential keyfile is specified AND
|
||||
// - ADC file exists but lacks project_id field
|
||||
//
|
||||
// Risk: If auth project != API target project, this could cause billing/audit issues
|
||||
// Mitigation: Users can set GOOGLE_CLOUD_PROJECT to override
|
||||
...(hasProjectEnvVar || hasKeyFile
|
||||
? {}
|
||||
: {
|
||||
projectId: process.env.ANTHROPIC_VERTEX_PROJECT_ID,
|
||||
}),
|
||||
})
|
||||
|
||||
const vertexArgs: ConstructorParameters<typeof AnthropicVertex>[0] = {
|
||||
...ARGS,
|
||||
region: getVertexRegionForModel(model),
|
||||
googleAuth,
|
||||
...(isDebugToStdErr() && { logger: createStderrLogger() }),
|
||||
}
|
||||
// we have always been lying about the return type - this doesn't support batching or models
|
||||
return new AnthropicVertex(vertexArgs) as unknown as Anthropic
|
||||
}
|
||||
|
||||
// Determine authentication method based on available tokens
|
||||
const clientConfig: ConstructorParameters<typeof Anthropic>[0] = {
|
||||
apiKey: isClaudeAISubscriber() ? null : apiKey || getAnthropicApiKey(),
|
||||
authToken: isClaudeAISubscriber()
|
||||
? getClaudeAIOAuthTokens()?.accessToken
|
||||
: undefined,
|
||||
// Set baseURL from OAuth config when using staging OAuth
|
||||
...(process.env.USER_TYPE === 'ant' &&
|
||||
isEnvTruthy(process.env.USE_STAGING_OAUTH)
|
||||
? { baseURL: getOauthConfig().BASE_API_URL }
|
||||
: {}),
|
||||
...ARGS,
|
||||
...(isDebugToStdErr() && { logger: createStderrLogger() }),
|
||||
}
|
||||
|
||||
return new Anthropic(clientConfig)
|
||||
}
|
||||
|
||||
async function configureApiKeyHeaders(
|
||||
headers: Record<string, string>,
|
||||
isNonInteractiveSession: boolean,
|
||||
): Promise<void> {
|
||||
const token =
|
||||
process.env.ANTHROPIC_AUTH_TOKEN ||
|
||||
(await getApiKeyFromApiKeyHelper(isNonInteractiveSession))
|
||||
if (token) {
|
||||
headers['Authorization'] = `Bearer ${token}`
|
||||
}
|
||||
}
|
||||
|
||||
function getCustomHeaders(): Record<string, string> {
|
||||
const customHeaders: Record<string, string> = {}
|
||||
const customHeadersEnv = process.env.ANTHROPIC_CUSTOM_HEADERS
|
||||
|
||||
if (!customHeadersEnv) return customHeaders
|
||||
|
||||
// Split by newlines to support multiple headers
|
||||
const headerStrings = customHeadersEnv.split(/\n|\r\n/)
|
||||
|
||||
for (const headerString of headerStrings) {
|
||||
if (!headerString.trim()) continue
|
||||
|
||||
// Parse header in format "Name: Value" (curl style). Split on first `:`
|
||||
// then trim — avoids regex backtracking on malformed long header lines.
|
||||
const colonIdx = headerString.indexOf(':')
|
||||
if (colonIdx === -1) continue
|
||||
const name = headerString.slice(0, colonIdx).trim()
|
||||
const value = headerString.slice(colonIdx + 1).trim()
|
||||
if (name) {
|
||||
customHeaders[name] = value
|
||||
}
|
||||
}
|
||||
|
||||
return customHeaders
|
||||
}
|
||||
|
||||
export const CLIENT_REQUEST_ID_HEADER = 'x-client-request-id'
|
||||
|
||||
function buildFetch(
|
||||
fetchOverride: ClientOptions['fetch'],
|
||||
source: string | undefined,
|
||||
): ClientOptions['fetch'] {
|
||||
// eslint-disable-next-line eslint-plugin-n/no-unsupported-features/node-builtins
|
||||
const inner = fetchOverride ?? globalThis.fetch
|
||||
// Only send to the first-party API — Bedrock/Vertex/Foundry don't log it
|
||||
// and unknown headers risk rejection by strict proxies (inc-4029 class).
|
||||
const injectClientRequestId =
|
||||
getAPIProvider() === 'firstParty' && isFirstPartyAnthropicBaseUrl()
|
||||
return (input, init) => {
|
||||
// eslint-disable-next-line eslint-plugin-n/no-unsupported-features/node-builtins
|
||||
const headers = new Headers(init?.headers)
|
||||
// Generate a client-side request ID so timeouts (which return no server
|
||||
// request ID) can still be correlated with server logs by the API team.
|
||||
// Callers that want to track the ID themselves can pre-set the header.
|
||||
if (injectClientRequestId && !headers.has(CLIENT_REQUEST_ID_HEADER)) {
|
||||
headers.set(CLIENT_REQUEST_ID_HEADER, randomUUID())
|
||||
}
|
||||
try {
|
||||
// eslint-disable-next-line eslint-plugin-n/no-unsupported-features/node-builtins
|
||||
const url = input instanceof Request ? input.url : String(input)
|
||||
const id = headers.get(CLIENT_REQUEST_ID_HEADER)
|
||||
logForDebugging(
|
||||
`[API REQUEST] ${new URL(url).pathname}${id ? ` ${CLIENT_REQUEST_ID_HEADER}=${id}` : ''} source=${source ?? 'unknown'}`,
|
||||
)
|
||||
} catch {
|
||||
// never let logging crash the fetch
|
||||
}
|
||||
return inner(input, { ...init, headers })
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,226 @@
|
||||
import type { ClientOptions } from '@anthropic-ai/sdk'
|
||||
import { createHash } from 'crypto'
|
||||
import { promises as fs } from 'fs'
|
||||
import { dirname, join } from 'path'
|
||||
import { getSessionId } from 'src/bootstrap/state.js'
|
||||
import { getClaudeConfigHomeDir } from '../../utils/envUtils.js'
|
||||
import { jsonParse, jsonStringify } from '../../utils/slowOperations.js'
|
||||
|
||||
function hashString(str: string): string {
|
||||
return createHash('sha256').update(str).digest('hex')
|
||||
}
|
||||
|
||||
// Cache last few API requests for ant users (e.g., for /issue command)
|
||||
const MAX_CACHED_REQUESTS = 5
|
||||
const cachedApiRequests: Array<{ timestamp: string; request: unknown }> = []
|
||||
|
||||
type DumpState = {
|
||||
initialized: boolean
|
||||
messageCountSeen: number
|
||||
lastInitDataHash: string
|
||||
// Cheap proxy for change detection — skips the expensive stringify+hash
|
||||
// when model/tools/system are structurally identical to the last call.
|
||||
lastInitFingerprint: string
|
||||
}
|
||||
|
||||
// Track state per session to avoid duplicating data
|
||||
const dumpState = new Map<string, DumpState>()
|
||||
|
||||
export function getLastApiRequests(): Array<{
|
||||
timestamp: string
|
||||
request: unknown
|
||||
}> {
|
||||
return [...cachedApiRequests]
|
||||
}
|
||||
|
||||
export function clearApiRequestCache(): void {
|
||||
cachedApiRequests.length = 0
|
||||
}
|
||||
|
||||
export function clearDumpState(agentIdOrSessionId: string): void {
|
||||
dumpState.delete(agentIdOrSessionId)
|
||||
}
|
||||
|
||||
export function clearAllDumpState(): void {
|
||||
dumpState.clear()
|
||||
}
|
||||
|
||||
export function addApiRequestToCache(requestData: unknown): void {
|
||||
if (process.env.USER_TYPE !== 'ant') return
|
||||
cachedApiRequests.push({
|
||||
timestamp: new Date().toISOString(),
|
||||
request: requestData,
|
||||
})
|
||||
if (cachedApiRequests.length > MAX_CACHED_REQUESTS) {
|
||||
cachedApiRequests.shift()
|
||||
}
|
||||
}
|
||||
|
||||
export function getDumpPromptsPath(agentIdOrSessionId?: string): string {
|
||||
return join(
|
||||
getClaudeConfigHomeDir(),
|
||||
'dump-prompts',
|
||||
`${agentIdOrSessionId ?? getSessionId()}.jsonl`,
|
||||
)
|
||||
}
|
||||
|
||||
function appendToFile(filePath: string, entries: string[]): void {
|
||||
if (entries.length === 0) return
|
||||
fs.mkdir(dirname(filePath), { recursive: true })
|
||||
.then(() => fs.appendFile(filePath, entries.join('\n') + '\n'))
|
||||
.catch(() => {})
|
||||
}
|
||||
|
||||
function initFingerprint(req: Record<string, unknown>): string {
|
||||
const tools = req.tools as Array<{ name?: string }> | undefined
|
||||
const system = req.system as unknown[] | string | undefined
|
||||
const sysLen =
|
||||
typeof system === 'string'
|
||||
? system.length
|
||||
: Array.isArray(system)
|
||||
? system.reduce(
|
||||
(n: number, b) => n + ((b as { text?: string }).text?.length ?? 0),
|
||||
0,
|
||||
)
|
||||
: 0
|
||||
const toolNames = tools?.map(t => t.name ?? '').join(',') ?? ''
|
||||
return `${req.model}|${toolNames}|${sysLen}`
|
||||
}
|
||||
|
||||
function dumpRequest(
|
||||
body: string,
|
||||
ts: string,
|
||||
state: DumpState,
|
||||
filePath: string,
|
||||
): void {
|
||||
try {
|
||||
const req = jsonParse(body) as Record<string, unknown>
|
||||
addApiRequestToCache(req)
|
||||
|
||||
if (process.env.USER_TYPE !== 'ant') return
|
||||
const entries: string[] = []
|
||||
const messages = (req.messages ?? []) as Array<{ role?: string }>
|
||||
|
||||
// Write init data (system, tools, metadata) on first request,
|
||||
// and a system_update entry whenever it changes.
|
||||
// Cheap fingerprint first: system+tools don't change between turns,
|
||||
// so skip the 300ms stringify when the shape is unchanged.
|
||||
const fingerprint = initFingerprint(req)
|
||||
if (!state.initialized || fingerprint !== state.lastInitFingerprint) {
|
||||
const { messages: _, ...initData } = req
|
||||
const initDataStr = jsonStringify(initData)
|
||||
const initDataHash = hashString(initDataStr)
|
||||
state.lastInitFingerprint = fingerprint
|
||||
if (!state.initialized) {
|
||||
state.initialized = true
|
||||
state.lastInitDataHash = initDataHash
|
||||
// Reuse initDataStr rather than re-serializing initData inside a wrapper.
|
||||
// timestamp from toISOString() contains no chars needing JSON escaping.
|
||||
entries.push(
|
||||
`{"type":"init","timestamp":"${ts}","data":${initDataStr}}`,
|
||||
)
|
||||
} else if (initDataHash !== state.lastInitDataHash) {
|
||||
state.lastInitDataHash = initDataHash
|
||||
entries.push(
|
||||
`{"type":"system_update","timestamp":"${ts}","data":${initDataStr}}`,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
// Write only new user messages (assistant messages captured in response)
|
||||
for (const msg of messages.slice(state.messageCountSeen)) {
|
||||
if (msg.role === 'user') {
|
||||
entries.push(
|
||||
jsonStringify({ type: 'message', timestamp: ts, data: msg }),
|
||||
)
|
||||
}
|
||||
}
|
||||
state.messageCountSeen = messages.length
|
||||
|
||||
appendToFile(filePath, entries)
|
||||
} catch {
|
||||
// Ignore parsing errors
|
||||
}
|
||||
}
|
||||
|
||||
export function createDumpPromptsFetch(
|
||||
agentIdOrSessionId: string,
|
||||
): ClientOptions['fetch'] {
|
||||
const filePath = getDumpPromptsPath(agentIdOrSessionId)
|
||||
|
||||
return async (input: RequestInfo | URL, init?: RequestInit) => {
|
||||
const state = dumpState.get(agentIdOrSessionId) ?? {
|
||||
initialized: false,
|
||||
messageCountSeen: 0,
|
||||
lastInitDataHash: '',
|
||||
lastInitFingerprint: '',
|
||||
}
|
||||
dumpState.set(agentIdOrSessionId, state)
|
||||
|
||||
let timestamp: string | undefined
|
||||
|
||||
if (init?.method === 'POST' && init.body) {
|
||||
timestamp = new Date().toISOString()
|
||||
// Parsing + stringifying the request (system prompt + tool schemas = MBs)
|
||||
// takes hundreds of ms. Defer so it doesn't block the actual API call —
|
||||
// this is debug tooling for /issue, not on the critical path.
|
||||
setImmediate(dumpRequest, init.body as string, timestamp, state, filePath)
|
||||
}
|
||||
|
||||
// eslint-disable-next-line eslint-plugin-n/no-unsupported-features/node-builtins
|
||||
const response = await globalThis.fetch(input, init)
|
||||
|
||||
// Save response async
|
||||
if (timestamp && response.ok && process.env.USER_TYPE === 'ant') {
|
||||
const cloned = response.clone()
|
||||
void (async () => {
|
||||
try {
|
||||
const isStreaming = cloned.headers
|
||||
.get('content-type')
|
||||
?.includes('text/event-stream')
|
||||
|
||||
let data: unknown
|
||||
if (isStreaming && cloned.body) {
|
||||
// Parse SSE stream into chunks
|
||||
const reader = cloned.body.getReader()
|
||||
const decoder = new TextDecoder()
|
||||
let buffer = ''
|
||||
try {
|
||||
while (true) {
|
||||
const { done, value } = await reader.read()
|
||||
if (done) break
|
||||
buffer += decoder.decode(value, { stream: true })
|
||||
}
|
||||
} finally {
|
||||
reader.releaseLock()
|
||||
}
|
||||
const chunks: unknown[] = []
|
||||
for (const event of buffer.split('\n\n')) {
|
||||
for (const line of event.split('\n')) {
|
||||
if (line.startsWith('data: ') && line !== 'data: [DONE]') {
|
||||
try {
|
||||
chunks.push(jsonParse(line.slice(6)))
|
||||
} catch {
|
||||
// Ignore parse errors
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
data = { stream: true, chunks }
|
||||
} else {
|
||||
data = await cloned.json()
|
||||
}
|
||||
|
||||
await fs.appendFile(
|
||||
filePath,
|
||||
jsonStringify({ type: 'response', timestamp, data }) + '\n',
|
||||
)
|
||||
} catch {
|
||||
// Best effort
|
||||
}
|
||||
})()
|
||||
}
|
||||
|
||||
return response
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,22 @@
|
||||
import type { NonNullableUsage } from '../../entrypoints/sdk/sdkUtilityTypes.js'
|
||||
|
||||
/**
|
||||
* Zero-initialized usage object. Extracted from logging.ts so that
|
||||
* bridge/replBridge.ts can import it without transitively pulling in
|
||||
* api/errors.ts → utils/messages.ts → BashTool.tsx → the world.
|
||||
*/
|
||||
export const EMPTY_USAGE: Readonly<NonNullableUsage> = {
|
||||
input_tokens: 0,
|
||||
cache_creation_input_tokens: 0,
|
||||
cache_read_input_tokens: 0,
|
||||
output_tokens: 0,
|
||||
server_tool_use: { web_search_requests: 0, web_fetch_requests: 0 },
|
||||
service_tier: 'standard',
|
||||
cache_creation: {
|
||||
ephemeral_1h_input_tokens: 0,
|
||||
ephemeral_5m_input_tokens: 0,
|
||||
},
|
||||
inference_geo: '',
|
||||
iterations: [],
|
||||
speed: 'standard',
|
||||
}
|
||||
@@ -0,0 +1,260 @@
|
||||
import type { APIError } from '@anthropic-ai/sdk'
|
||||
|
||||
// SSL/TLS error codes from OpenSSL (used by both Node.js and Bun)
|
||||
// See: https://www.openssl.org/docs/man3.1/man3/X509_STORE_CTX_get_error.html
|
||||
const SSL_ERROR_CODES = new Set([
|
||||
// Certificate verification errors
|
||||
'UNABLE_TO_VERIFY_LEAF_SIGNATURE',
|
||||
'UNABLE_TO_GET_ISSUER_CERT',
|
||||
'UNABLE_TO_GET_ISSUER_CERT_LOCALLY',
|
||||
'CERT_SIGNATURE_FAILURE',
|
||||
'CERT_NOT_YET_VALID',
|
||||
'CERT_HAS_EXPIRED',
|
||||
'CERT_REVOKED',
|
||||
'CERT_REJECTED',
|
||||
'CERT_UNTRUSTED',
|
||||
// Self-signed certificate errors
|
||||
'DEPTH_ZERO_SELF_SIGNED_CERT',
|
||||
'SELF_SIGNED_CERT_IN_CHAIN',
|
||||
// Chain errors
|
||||
'CERT_CHAIN_TOO_LONG',
|
||||
'PATH_LENGTH_EXCEEDED',
|
||||
// Hostname/altname errors
|
||||
'ERR_TLS_CERT_ALTNAME_INVALID',
|
||||
'HOSTNAME_MISMATCH',
|
||||
// TLS handshake errors
|
||||
'ERR_TLS_HANDSHAKE_TIMEOUT',
|
||||
'ERR_SSL_WRONG_VERSION_NUMBER',
|
||||
'ERR_SSL_DECRYPTION_FAILED_OR_BAD_RECORD_MAC',
|
||||
])
|
||||
|
||||
export type ConnectionErrorDetails = {
|
||||
code: string
|
||||
message: string
|
||||
isSSLError: boolean
|
||||
}
|
||||
|
||||
/**
|
||||
* Extracts connection error details from the error cause chain.
|
||||
* The Anthropic SDK wraps underlying errors in the `cause` property.
|
||||
* This function walks the cause chain to find the root error code/message.
|
||||
*/
|
||||
export function extractConnectionErrorDetails(
|
||||
error: unknown,
|
||||
): ConnectionErrorDetails | null {
|
||||
if (!error || typeof error !== 'object') {
|
||||
return null
|
||||
}
|
||||
|
||||
// Walk the cause chain to find the root error with a code
|
||||
let current: unknown = error
|
||||
const maxDepth = 5 // Prevent infinite loops
|
||||
let depth = 0
|
||||
|
||||
while (current && depth < maxDepth) {
|
||||
if (
|
||||
current instanceof Error &&
|
||||
'code' in current &&
|
||||
typeof current.code === 'string'
|
||||
) {
|
||||
const code = current.code
|
||||
const isSSLError = SSL_ERROR_CODES.has(code)
|
||||
return {
|
||||
code,
|
||||
message: current.message,
|
||||
isSSLError,
|
||||
}
|
||||
}
|
||||
|
||||
// Move to the next cause in the chain
|
||||
if (
|
||||
current instanceof Error &&
|
||||
'cause' in current &&
|
||||
current.cause !== current
|
||||
) {
|
||||
current = current.cause
|
||||
depth++
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
return null
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an actionable hint for SSL/TLS errors, intended for contexts outside
|
||||
* the main API client (OAuth token exchange, preflight connectivity checks)
|
||||
* where `formatAPIError` doesn't apply.
|
||||
*
|
||||
* Motivation: enterprise users behind TLS-intercepting proxies (Zscaler et al.)
|
||||
* see OAuth complete in-browser but the CLI's token exchange silently fails
|
||||
* with a raw SSL code. Surfacing the likely fix saves a support round-trip.
|
||||
*/
|
||||
export function getSSLErrorHint(error: unknown): string | null {
|
||||
const details = extractConnectionErrorDetails(error)
|
||||
if (!details?.isSSLError) {
|
||||
return null
|
||||
}
|
||||
return `SSL certificate error (${details.code}). If you are behind a corporate proxy or TLS-intercepting firewall, set NODE_EXTRA_CA_CERTS to your CA bundle path, or ask IT to allowlist *.anthropic.com. Run /doctor for details.`
|
||||
}
|
||||
|
||||
/**
|
||||
* Strips HTML content (e.g., CloudFlare error pages) from a message string,
|
||||
* returning a user-friendly title or empty string if HTML is detected.
|
||||
* Returns the original message unchanged if no HTML is found.
|
||||
*/
|
||||
function sanitizeMessageHTML(message: string): string {
|
||||
if (message.includes('<!DOCTYPE html') || message.includes('<html')) {
|
||||
const titleMatch = message.match(/<title>([^<]+)<\/title>/)
|
||||
if (titleMatch && titleMatch[1]) {
|
||||
return titleMatch[1].trim()
|
||||
}
|
||||
return ''
|
||||
}
|
||||
return message
|
||||
}
|
||||
|
||||
/**
|
||||
* Detects if an error message contains HTML content (e.g., CloudFlare error pages)
|
||||
* and returns a user-friendly message instead
|
||||
*/
|
||||
export function sanitizeAPIError(apiError: APIError): string {
|
||||
const message = apiError.message
|
||||
if (!message) {
|
||||
// Sometimes message is undefined
|
||||
// TODO: figure out why
|
||||
return ''
|
||||
}
|
||||
return sanitizeMessageHTML(message)
|
||||
}
|
||||
|
||||
/**
|
||||
* Shapes of deserialized API errors from session JSONL.
|
||||
*
|
||||
* After JSON round-tripping, the SDK's APIError loses its `.message` property.
|
||||
* The actual message lives at different nesting levels depending on the provider:
|
||||
*
|
||||
* - Bedrock/proxy: `{ error: { message: "..." } }`
|
||||
* - Standard Anthropic API: `{ error: { error: { message: "..." } } }`
|
||||
* (the outer `.error` is the response body, the inner `.error` is the API error)
|
||||
*
|
||||
* See also: `getErrorMessage` in `logging.ts` which handles the same shapes.
|
||||
*/
|
||||
type NestedAPIError = {
|
||||
error?: {
|
||||
message?: string
|
||||
error?: { message?: string }
|
||||
}
|
||||
}
|
||||
|
||||
function hasNestedError(value: unknown): value is NestedAPIError {
|
||||
return (
|
||||
typeof value === 'object' &&
|
||||
value !== null &&
|
||||
'error' in value &&
|
||||
typeof value.error === 'object' &&
|
||||
value.error !== null
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract a human-readable message from a deserialized API error that lacks
|
||||
* a top-level `.message`.
|
||||
*
|
||||
* Checks two nesting levels (deeper first for specificity):
|
||||
* 1. `error.error.error.message` — standard Anthropic API shape
|
||||
* 2. `error.error.message` — Bedrock shape
|
||||
*/
|
||||
function extractNestedErrorMessage(error: APIError): string | null {
|
||||
if (!hasNestedError(error)) {
|
||||
return null
|
||||
}
|
||||
|
||||
// Access `.error` via the narrowed type so TypeScript sees the nested shape
|
||||
// instead of the SDK's `Object | undefined`.
|
||||
const narrowed: NestedAPIError = error
|
||||
const nested = narrowed.error
|
||||
|
||||
// Standard Anthropic API shape: { error: { error: { message } } }
|
||||
const deepMsg = nested?.error?.message
|
||||
if (typeof deepMsg === 'string' && deepMsg.length > 0) {
|
||||
const sanitized = sanitizeMessageHTML(deepMsg)
|
||||
if (sanitized.length > 0) {
|
||||
return sanitized
|
||||
}
|
||||
}
|
||||
|
||||
// Bedrock shape: { error: { message } }
|
||||
const msg = nested?.message
|
||||
if (typeof msg === 'string' && msg.length > 0) {
|
||||
const sanitized = sanitizeMessageHTML(msg)
|
||||
if (sanitized.length > 0) {
|
||||
return sanitized
|
||||
}
|
||||
}
|
||||
|
||||
return null
|
||||
}
|
||||
|
||||
export function formatAPIError(error: APIError): string {
|
||||
// Extract connection error details from the cause chain
|
||||
const connectionDetails = extractConnectionErrorDetails(error)
|
||||
|
||||
if (connectionDetails) {
|
||||
const { code, isSSLError } = connectionDetails
|
||||
|
||||
// Handle timeout errors
|
||||
if (code === 'ETIMEDOUT') {
|
||||
return 'Request timed out. Check your internet connection and proxy settings'
|
||||
}
|
||||
|
||||
// Handle SSL/TLS errors with specific messages
|
||||
if (isSSLError) {
|
||||
switch (code) {
|
||||
case 'UNABLE_TO_VERIFY_LEAF_SIGNATURE':
|
||||
case 'UNABLE_TO_GET_ISSUER_CERT':
|
||||
case 'UNABLE_TO_GET_ISSUER_CERT_LOCALLY':
|
||||
return 'Unable to connect to API: SSL certificate verification failed. Check your proxy or corporate SSL certificates'
|
||||
case 'CERT_HAS_EXPIRED':
|
||||
return 'Unable to connect to API: SSL certificate has expired'
|
||||
case 'CERT_REVOKED':
|
||||
return 'Unable to connect to API: SSL certificate has been revoked'
|
||||
case 'DEPTH_ZERO_SELF_SIGNED_CERT':
|
||||
case 'SELF_SIGNED_CERT_IN_CHAIN':
|
||||
return 'Unable to connect to API: Self-signed certificate detected. Check your proxy or corporate SSL certificates'
|
||||
case 'ERR_TLS_CERT_ALTNAME_INVALID':
|
||||
case 'HOSTNAME_MISMATCH':
|
||||
return 'Unable to connect to API: SSL certificate hostname mismatch'
|
||||
case 'CERT_NOT_YET_VALID':
|
||||
return 'Unable to connect to API: SSL certificate is not yet valid'
|
||||
default:
|
||||
return `Unable to connect to API: SSL error (${code})`
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (error.message === 'Connection error.') {
|
||||
// If we have a code but it's not SSL, include it for debugging
|
||||
if (connectionDetails?.code) {
|
||||
return `Unable to connect to API (${connectionDetails.code})`
|
||||
}
|
||||
return 'Unable to connect to API. Check your internet connection'
|
||||
}
|
||||
|
||||
// Guard: when deserialized from JSONL (e.g. --resume), the error object may
|
||||
// be a plain object without a `.message` property. Return a safe fallback
|
||||
// instead of undefined, which would crash callers that access `.length`.
|
||||
if (!error.message) {
|
||||
return (
|
||||
extractNestedErrorMessage(error) ??
|
||||
`API error (status ${error.status ?? 'unknown'})`
|
||||
)
|
||||
}
|
||||
|
||||
const sanitizedMessage = sanitizeAPIError(error)
|
||||
// Use sanitized message if it's different from the original (i.e., HTML was sanitized)
|
||||
return sanitizedMessage !== error.message && sanitizedMessage.length > 0
|
||||
? sanitizedMessage
|
||||
: error.message
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,748 @@
|
||||
/**
|
||||
* Files API client for managing files
|
||||
*
|
||||
* This module provides functionality to download and upload files to Anthropic Public Files API.
|
||||
* Used by the Claude Code agent to download file attachments at session startup.
|
||||
*
|
||||
* API Reference: https://docs.anthropic.com/en/api/files-content
|
||||
*/
|
||||
|
||||
import axios from 'axios'
|
||||
import { randomUUID } from 'crypto'
|
||||
import * as fs from 'fs/promises'
|
||||
import * as path from 'path'
|
||||
import { count } from '../../utils/array.js'
|
||||
import { getCwd } from '../../utils/cwd.js'
|
||||
import { logForDebugging } from '../../utils/debug.js'
|
||||
import { errorMessage } from '../../utils/errors.js'
|
||||
import { logError } from '../../utils/log.js'
|
||||
import { sleep } from '../../utils/sleep.js'
|
||||
import {
|
||||
type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
||||
logEvent,
|
||||
} from '../analytics/index.js'
|
||||
|
||||
// Files API is currently in beta. oauth-2025-04-20 enables Bearer OAuth
|
||||
// on public-api routes (auth.py: "oauth_auth" not in beta_versions → 404).
|
||||
const FILES_API_BETA_HEADER = 'files-api-2025-04-14,oauth-2025-04-20'
|
||||
const ANTHROPIC_VERSION = '2023-06-01'
|
||||
|
||||
// API base URL - uses ANTHROPIC_BASE_URL set by env-manager for the appropriate environment
|
||||
// Falls back to public API for standalone usage
|
||||
function getDefaultApiBaseUrl(): string {
|
||||
return (
|
||||
process.env.ANTHROPIC_BASE_URL ||
|
||||
process.env.CLAUDE_CODE_API_BASE_URL ||
|
||||
'https://api.anthropic.com'
|
||||
)
|
||||
}
|
||||
|
||||
function logDebugError(message: string): void {
|
||||
logForDebugging(`[files-api] ${message}`, { level: 'error' })
|
||||
}
|
||||
|
||||
function logDebug(message: string): void {
|
||||
logForDebugging(`[files-api] ${message}`)
|
||||
}
|
||||
|
||||
/**
|
||||
* File specification parsed from CLI args
|
||||
* Format: --file=<file_id>:<relative_path>
|
||||
*/
|
||||
export type File = {
|
||||
fileId: string
|
||||
relativePath: string
|
||||
}
|
||||
|
||||
/**
|
||||
* Configuration for the files API client
|
||||
*/
|
||||
export type FilesApiConfig = {
|
||||
/** OAuth token for authentication (from session JWT) */
|
||||
oauthToken: string
|
||||
/** Base URL for the API (default: https://api.anthropic.com) */
|
||||
baseUrl?: string
|
||||
/** Session ID for creating session-specific directories */
|
||||
sessionId: string
|
||||
}
|
||||
|
||||
/**
|
||||
* Result of a file download operation
|
||||
*/
|
||||
export type DownloadResult = {
|
||||
fileId: string
|
||||
path: string
|
||||
success: boolean
|
||||
error?: string
|
||||
bytesWritten?: number
|
||||
}
|
||||
|
||||
const MAX_RETRIES = 3
|
||||
const BASE_DELAY_MS = 500
|
||||
const MAX_FILE_SIZE_BYTES = 500 * 1024 * 1024 // 500MB
|
||||
|
||||
/**
|
||||
* Result type for retry operations - signals whether to continue retrying
|
||||
*/
|
||||
type RetryResult<T> = { done: true; value: T } | { done: false; error?: string }
|
||||
|
||||
/**
|
||||
* Executes an operation with exponential backoff retry logic
|
||||
*
|
||||
* @param operation - Operation name for logging
|
||||
* @param attemptFn - Function to execute on each attempt, returns RetryResult
|
||||
* @returns The successful result value
|
||||
* @throws Error if all retries exhausted
|
||||
*/
|
||||
async function retryWithBackoff<T>(
|
||||
operation: string,
|
||||
attemptFn: (attempt: number) => Promise<RetryResult<T>>,
|
||||
): Promise<T> {
|
||||
let lastError = ''
|
||||
|
||||
for (let attempt = 1; attempt <= MAX_RETRIES; attempt++) {
|
||||
const result = await attemptFn(attempt)
|
||||
|
||||
if (result.done) {
|
||||
return result.value
|
||||
}
|
||||
|
||||
lastError = result.error || `${operation} failed`
|
||||
logDebug(
|
||||
`${operation} attempt ${attempt}/${MAX_RETRIES} failed: ${lastError}`,
|
||||
)
|
||||
|
||||
if (attempt < MAX_RETRIES) {
|
||||
const delayMs = BASE_DELAY_MS * Math.pow(2, attempt - 1)
|
||||
logDebug(`Retrying ${operation} in ${delayMs}ms...`)
|
||||
await sleep(delayMs)
|
||||
}
|
||||
}
|
||||
|
||||
throw new Error(`${lastError} after ${MAX_RETRIES} attempts`)
|
||||
}
|
||||
|
||||
/**
|
||||
* Downloads a single file from the Anthropic Public Files API
|
||||
*
|
||||
* @param fileId - The file ID (e.g., "file_011CNha8iCJcU1wXNR6q4V8w")
|
||||
* @param config - Files API configuration
|
||||
* @returns The file content as a Buffer
|
||||
*/
|
||||
export async function downloadFile(
|
||||
fileId: string,
|
||||
config: FilesApiConfig,
|
||||
): Promise<Buffer> {
|
||||
const baseUrl = config.baseUrl || getDefaultApiBaseUrl()
|
||||
const url = `${baseUrl}/v1/files/${fileId}/content`
|
||||
|
||||
const headers = {
|
||||
Authorization: `Bearer ${config.oauthToken}`,
|
||||
'anthropic-version': ANTHROPIC_VERSION,
|
||||
'anthropic-beta': FILES_API_BETA_HEADER,
|
||||
}
|
||||
|
||||
logDebug(`Downloading file ${fileId} from ${url}`)
|
||||
|
||||
return retryWithBackoff(`Download file ${fileId}`, async () => {
|
||||
try {
|
||||
const response = await axios.get(url, {
|
||||
headers,
|
||||
responseType: 'arraybuffer',
|
||||
timeout: 60000, // 60 second timeout for large files
|
||||
validateStatus: status => status < 500,
|
||||
})
|
||||
|
||||
if (response.status === 200) {
|
||||
logDebug(`Downloaded file ${fileId} (${response.data.length} bytes)`)
|
||||
return { done: true, value: Buffer.from(response.data) }
|
||||
}
|
||||
|
||||
// Non-retriable errors - throw immediately
|
||||
if (response.status === 404) {
|
||||
throw new Error(`File not found: ${fileId}`)
|
||||
}
|
||||
if (response.status === 401) {
|
||||
throw new Error('Authentication failed: invalid or missing API key')
|
||||
}
|
||||
if (response.status === 403) {
|
||||
throw new Error(`Access denied to file: ${fileId}`)
|
||||
}
|
||||
|
||||
return { done: false, error: `status ${response.status}` }
|
||||
} catch (error) {
|
||||
if (!axios.isAxiosError(error)) {
|
||||
throw error
|
||||
}
|
||||
return { done: false, error: error.message }
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
/**
|
||||
* Normalizes a relative path, strips redundant prefixes, and builds the full
|
||||
* download path under {basePath}/{session_id}/uploads/.
|
||||
* Returns null if the path is invalid (e.g., path traversal).
|
||||
*/
|
||||
export function buildDownloadPath(
|
||||
basePath: string,
|
||||
sessionId: string,
|
||||
relativePath: string,
|
||||
): string | null {
|
||||
const normalized = path.normalize(relativePath)
|
||||
if (normalized.startsWith('..')) {
|
||||
logDebugError(
|
||||
`Invalid file path: ${relativePath}. Path must not traverse above workspace`,
|
||||
)
|
||||
return null
|
||||
}
|
||||
|
||||
const uploadsBase = path.join(basePath, sessionId, 'uploads')
|
||||
const redundantPrefixes = [
|
||||
path.join(basePath, sessionId, 'uploads') + path.sep,
|
||||
path.sep + 'uploads' + path.sep,
|
||||
]
|
||||
const matchedPrefix = redundantPrefixes.find(p => normalized.startsWith(p))
|
||||
const cleanPath = matchedPrefix
|
||||
? normalized.slice(matchedPrefix.length)
|
||||
: normalized
|
||||
return path.join(uploadsBase, cleanPath)
|
||||
}
|
||||
|
||||
/**
|
||||
* Downloads a file and saves it to the session-specific workspace directory
|
||||
*
|
||||
* @param attachment - The file attachment to download
|
||||
* @param config - Files API configuration
|
||||
* @returns Download result with success/failure status
|
||||
*/
|
||||
export async function downloadAndSaveFile(
|
||||
attachment: File,
|
||||
config: FilesApiConfig,
|
||||
): Promise<DownloadResult> {
|
||||
const { fileId, relativePath } = attachment
|
||||
const fullPath = buildDownloadPath(getCwd(), config.sessionId, relativePath)
|
||||
|
||||
if (!fullPath) {
|
||||
return {
|
||||
fileId,
|
||||
path: '',
|
||||
success: false,
|
||||
error: `Invalid file path: ${relativePath}`,
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
// Download the file content
|
||||
const content = await downloadFile(fileId, config)
|
||||
|
||||
// Ensure the parent directory exists
|
||||
const parentDir = path.dirname(fullPath)
|
||||
await fs.mkdir(parentDir, { recursive: true })
|
||||
|
||||
// Write the file
|
||||
await fs.writeFile(fullPath, content)
|
||||
|
||||
logDebug(`Saved file ${fileId} to ${fullPath} (${content.length} bytes)`)
|
||||
|
||||
return {
|
||||
fileId,
|
||||
path: fullPath,
|
||||
success: true,
|
||||
bytesWritten: content.length,
|
||||
}
|
||||
} catch (error) {
|
||||
logDebugError(`Failed to download file ${fileId}: ${errorMessage(error)}`)
|
||||
if (error instanceof Error) {
|
||||
logError(error)
|
||||
}
|
||||
|
||||
return {
|
||||
fileId,
|
||||
path: fullPath,
|
||||
success: false,
|
||||
error: errorMessage(error),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Default concurrency limit for parallel downloads
|
||||
const DEFAULT_CONCURRENCY = 5
|
||||
|
||||
/**
|
||||
* Execute promises with limited concurrency
|
||||
*
|
||||
* @param items - Items to process
|
||||
* @param fn - Async function to apply to each item
|
||||
* @param concurrency - Maximum concurrent operations
|
||||
* @returns Results in the same order as input items
|
||||
*/
|
||||
async function parallelWithLimit<T, R>(
|
||||
items: T[],
|
||||
fn: (item: T, index: number) => Promise<R>,
|
||||
concurrency: number,
|
||||
): Promise<R[]> {
|
||||
const results: R[] = new Array(items.length)
|
||||
let currentIndex = 0
|
||||
|
||||
async function worker(): Promise<void> {
|
||||
while (currentIndex < items.length) {
|
||||
const index = currentIndex++
|
||||
const item = items[index]
|
||||
if (item !== undefined) {
|
||||
results[index] = await fn(item, index)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Start workers up to the concurrency limit
|
||||
const workers: Promise<void>[] = []
|
||||
const workerCount = Math.min(concurrency, items.length)
|
||||
for (let i = 0; i < workerCount; i++) {
|
||||
workers.push(worker())
|
||||
}
|
||||
|
||||
await Promise.all(workers)
|
||||
return results
|
||||
}
|
||||
|
||||
/**
|
||||
* Downloads all file attachments for a session in parallel
|
||||
*
|
||||
* @param attachments - List of file attachments to download
|
||||
* @param config - Files API configuration
|
||||
* @param concurrency - Maximum concurrent downloads (default: 5)
|
||||
* @returns Array of download results in the same order as input
|
||||
*/
|
||||
export async function downloadSessionFiles(
|
||||
files: File[],
|
||||
config: FilesApiConfig,
|
||||
concurrency: number = DEFAULT_CONCURRENCY,
|
||||
): Promise<DownloadResult[]> {
|
||||
if (files.length === 0) {
|
||||
return []
|
||||
}
|
||||
|
||||
logDebug(
|
||||
`Downloading ${files.length} file(s) for session ${config.sessionId}`,
|
||||
)
|
||||
const startTime = Date.now()
|
||||
|
||||
// Download files in parallel with concurrency limit
|
||||
const results = await parallelWithLimit(
|
||||
files,
|
||||
file => downloadAndSaveFile(file, config),
|
||||
concurrency,
|
||||
)
|
||||
|
||||
const elapsedMs = Date.now() - startTime
|
||||
const successCount = count(results, r => r.success)
|
||||
logDebug(
|
||||
`Downloaded ${successCount}/${files.length} file(s) in ${elapsedMs}ms`,
|
||||
)
|
||||
|
||||
return results
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Upload Functions (BYOC mode)
|
||||
// ============================================================================
|
||||
|
||||
/**
|
||||
* Result of a file upload operation
|
||||
*/
|
||||
export type UploadResult =
|
||||
| {
|
||||
path: string
|
||||
fileId: string
|
||||
size: number
|
||||
success: true
|
||||
}
|
||||
| {
|
||||
path: string
|
||||
error: string
|
||||
success: false
|
||||
}
|
||||
|
||||
/**
|
||||
* Upload a single file to the Files API (BYOC mode)
|
||||
*
|
||||
* Size validation is performed after reading the file to avoid TOCTOU race
|
||||
* conditions where the file size could change between initial check and upload.
|
||||
*
|
||||
* @param filePath - Absolute path to the file to upload
|
||||
* @param relativePath - Relative path for the file (used as filename in API)
|
||||
* @param config - Files API configuration
|
||||
* @returns Upload result with success/failure status
|
||||
*/
|
||||
export async function uploadFile(
|
||||
filePath: string,
|
||||
relativePath: string,
|
||||
config: FilesApiConfig,
|
||||
opts?: { signal?: AbortSignal },
|
||||
): Promise<UploadResult> {
|
||||
const baseUrl = config.baseUrl || getDefaultApiBaseUrl()
|
||||
const url = `${baseUrl}/v1/files`
|
||||
|
||||
const headers = {
|
||||
Authorization: `Bearer ${config.oauthToken}`,
|
||||
'anthropic-version': ANTHROPIC_VERSION,
|
||||
'anthropic-beta': FILES_API_BETA_HEADER,
|
||||
}
|
||||
|
||||
logDebug(`Uploading file ${filePath} as ${relativePath}`)
|
||||
|
||||
// Read file content first (outside retry loop since it's not a network operation)
|
||||
let content: Buffer
|
||||
try {
|
||||
content = await fs.readFile(filePath)
|
||||
} catch (error) {
|
||||
logEvent('tengu_file_upload_failed', {
|
||||
error_type:
|
||||
'file_read' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
||||
})
|
||||
return {
|
||||
path: relativePath,
|
||||
error: errorMessage(error),
|
||||
success: false,
|
||||
}
|
||||
}
|
||||
|
||||
const fileSize = content.length
|
||||
|
||||
if (fileSize > MAX_FILE_SIZE_BYTES) {
|
||||
logEvent('tengu_file_upload_failed', {
|
||||
error_type:
|
||||
'file_too_large' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
||||
})
|
||||
return {
|
||||
path: relativePath,
|
||||
error: `File exceeds maximum size of ${MAX_FILE_SIZE_BYTES} bytes (actual: ${fileSize})`,
|
||||
success: false,
|
||||
}
|
||||
}
|
||||
|
||||
// Use crypto.randomUUID for boundary to avoid collisions when uploads start same millisecond
|
||||
const boundary = `----FormBoundary${randomUUID()}`
|
||||
const filename = path.basename(relativePath)
|
||||
|
||||
// Build the multipart body
|
||||
const bodyParts: Buffer[] = []
|
||||
|
||||
// File part
|
||||
bodyParts.push(
|
||||
Buffer.from(
|
||||
`--${boundary}\r\n` +
|
||||
`Content-Disposition: form-data; name="file"; filename="${filename}"\r\n` +
|
||||
`Content-Type: application/octet-stream\r\n\r\n`,
|
||||
),
|
||||
)
|
||||
bodyParts.push(content)
|
||||
bodyParts.push(Buffer.from('\r\n'))
|
||||
|
||||
// Purpose part
|
||||
bodyParts.push(
|
||||
Buffer.from(
|
||||
`--${boundary}\r\n` +
|
||||
`Content-Disposition: form-data; name="purpose"\r\n\r\n` +
|
||||
`user_data\r\n`,
|
||||
),
|
||||
)
|
||||
|
||||
// End boundary
|
||||
bodyParts.push(Buffer.from(`--${boundary}--\r\n`))
|
||||
|
||||
const body = Buffer.concat(bodyParts)
|
||||
|
||||
try {
|
||||
return await retryWithBackoff(`Upload file ${relativePath}`, async () => {
|
||||
try {
|
||||
const response = await axios.post(url, body, {
|
||||
headers: {
|
||||
...headers,
|
||||
'Content-Type': `multipart/form-data; boundary=${boundary}`,
|
||||
'Content-Length': body.length.toString(),
|
||||
},
|
||||
timeout: 120000, // 2 minute timeout for uploads
|
||||
signal: opts?.signal,
|
||||
validateStatus: status => status < 500,
|
||||
})
|
||||
|
||||
if (response.status === 200 || response.status === 201) {
|
||||
const fileId = response.data?.id
|
||||
if (!fileId) {
|
||||
return {
|
||||
done: false,
|
||||
error: 'Upload succeeded but no file ID returned',
|
||||
}
|
||||
}
|
||||
logDebug(`Uploaded file ${filePath} -> ${fileId} (${fileSize} bytes)`)
|
||||
return {
|
||||
done: true,
|
||||
value: {
|
||||
path: relativePath,
|
||||
fileId,
|
||||
size: fileSize,
|
||||
success: true as const,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// Non-retriable errors - throw to exit retry loop
|
||||
if (response.status === 401) {
|
||||
logEvent('tengu_file_upload_failed', {
|
||||
error_type:
|
||||
'auth' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
||||
})
|
||||
throw new UploadNonRetriableError(
|
||||
'Authentication failed: invalid or missing API key',
|
||||
)
|
||||
}
|
||||
|
||||
if (response.status === 403) {
|
||||
logEvent('tengu_file_upload_failed', {
|
||||
error_type:
|
||||
'forbidden' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
||||
})
|
||||
throw new UploadNonRetriableError('Access denied for upload')
|
||||
}
|
||||
|
||||
if (response.status === 413) {
|
||||
logEvent('tengu_file_upload_failed', {
|
||||
error_type:
|
||||
'size' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
||||
})
|
||||
throw new UploadNonRetriableError('File too large for upload')
|
||||
}
|
||||
|
||||
return { done: false, error: `status ${response.status}` }
|
||||
} catch (error) {
|
||||
// Non-retriable errors propagate up
|
||||
if (error instanceof UploadNonRetriableError) {
|
||||
throw error
|
||||
}
|
||||
if (axios.isCancel(error)) {
|
||||
throw new UploadNonRetriableError('Upload canceled')
|
||||
}
|
||||
// Network errors are retriable
|
||||
if (axios.isAxiosError(error)) {
|
||||
return { done: false, error: error.message }
|
||||
}
|
||||
throw error
|
||||
}
|
||||
})
|
||||
} catch (error) {
|
||||
if (error instanceof UploadNonRetriableError) {
|
||||
return {
|
||||
path: relativePath,
|
||||
error: error.message,
|
||||
success: false,
|
||||
}
|
||||
}
|
||||
logEvent('tengu_file_upload_failed', {
|
||||
error_type:
|
||||
'network' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
||||
})
|
||||
return {
|
||||
path: relativePath,
|
||||
error: errorMessage(error),
|
||||
success: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** Error class for non-retriable upload failures */
|
||||
class UploadNonRetriableError extends Error {
|
||||
constructor(message: string) {
|
||||
super(message)
|
||||
this.name = 'UploadNonRetriableError'
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Upload multiple files in parallel with concurrency limit (BYOC mode)
|
||||
*
|
||||
* @param files - Array of files to upload (path and relativePath)
|
||||
* @param config - Files API configuration
|
||||
* @param concurrency - Maximum concurrent uploads (default: 5)
|
||||
* @returns Array of upload results in the same order as input
|
||||
*/
|
||||
export async function uploadSessionFiles(
|
||||
files: Array<{ path: string; relativePath: string }>,
|
||||
config: FilesApiConfig,
|
||||
concurrency: number = DEFAULT_CONCURRENCY,
|
||||
): Promise<UploadResult[]> {
|
||||
if (files.length === 0) {
|
||||
return []
|
||||
}
|
||||
|
||||
logDebug(`Uploading ${files.length} file(s) for session ${config.sessionId}`)
|
||||
const startTime = Date.now()
|
||||
|
||||
const results = await parallelWithLimit(
|
||||
files,
|
||||
file => uploadFile(file.path, file.relativePath, config),
|
||||
concurrency,
|
||||
)
|
||||
|
||||
const elapsedMs = Date.now() - startTime
|
||||
const successCount = count(results, r => r.success)
|
||||
logDebug(`Uploaded ${successCount}/${files.length} file(s) in ${elapsedMs}ms`)
|
||||
|
||||
return results
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// List Files Functions (1P/Cloud mode)
|
||||
// ============================================================================
|
||||
|
||||
/**
|
||||
* File metadata returned from listFilesCreatedAfter
|
||||
*/
|
||||
export type FileMetadata = {
|
||||
filename: string
|
||||
fileId: string
|
||||
size: number
|
||||
}
|
||||
|
||||
/**
|
||||
* List files created after a given timestamp (1P/Cloud mode).
|
||||
* Uses the public GET /v1/files endpoint with after_created_at query param.
|
||||
* Handles pagination via after_id cursor when has_more is true.
|
||||
*
|
||||
* @param afterCreatedAt - ISO 8601 timestamp to filter files created after
|
||||
* @param config - Files API configuration
|
||||
* @returns Array of file metadata for files created after the timestamp
|
||||
*/
|
||||
export async function listFilesCreatedAfter(
|
||||
afterCreatedAt: string,
|
||||
config: FilesApiConfig,
|
||||
): Promise<FileMetadata[]> {
|
||||
const baseUrl = config.baseUrl || getDefaultApiBaseUrl()
|
||||
const headers = {
|
||||
Authorization: `Bearer ${config.oauthToken}`,
|
||||
'anthropic-version': ANTHROPIC_VERSION,
|
||||
'anthropic-beta': FILES_API_BETA_HEADER,
|
||||
}
|
||||
|
||||
logDebug(`Listing files created after ${afterCreatedAt}`)
|
||||
|
||||
const allFiles: FileMetadata[] = []
|
||||
let afterId: string | undefined
|
||||
|
||||
// Paginate through results
|
||||
while (true) {
|
||||
const params: Record<string, string> = {
|
||||
after_created_at: afterCreatedAt,
|
||||
}
|
||||
if (afterId) {
|
||||
params.after_id = afterId
|
||||
}
|
||||
|
||||
const page = await retryWithBackoff(
|
||||
`List files after ${afterCreatedAt}`,
|
||||
async () => {
|
||||
try {
|
||||
const response = await axios.get(`${baseUrl}/v1/files`, {
|
||||
headers,
|
||||
params,
|
||||
timeout: 60000,
|
||||
validateStatus: status => status < 500,
|
||||
})
|
||||
|
||||
if (response.status === 200) {
|
||||
return { done: true, value: response.data }
|
||||
}
|
||||
|
||||
if (response.status === 401) {
|
||||
logEvent('tengu_file_list_failed', {
|
||||
error_type:
|
||||
'auth' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
||||
})
|
||||
throw new Error('Authentication failed: invalid or missing API key')
|
||||
}
|
||||
if (response.status === 403) {
|
||||
logEvent('tengu_file_list_failed', {
|
||||
error_type:
|
||||
'forbidden' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
||||
})
|
||||
throw new Error('Access denied to list files')
|
||||
}
|
||||
|
||||
return { done: false, error: `status ${response.status}` }
|
||||
} catch (error) {
|
||||
if (!axios.isAxiosError(error)) {
|
||||
throw error
|
||||
}
|
||||
logEvent('tengu_file_list_failed', {
|
||||
error_type:
|
||||
'network' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
||||
})
|
||||
return { done: false, error: error.message }
|
||||
}
|
||||
},
|
||||
)
|
||||
|
||||
const files = page.data || []
|
||||
for (const f of files) {
|
||||
allFiles.push({
|
||||
filename: f.filename,
|
||||
fileId: f.id,
|
||||
size: f.size_bytes,
|
||||
})
|
||||
}
|
||||
|
||||
if (!page.has_more) {
|
||||
break
|
||||
}
|
||||
|
||||
// Use the last file's ID as cursor for next page
|
||||
const lastFile = files.at(-1)
|
||||
if (!lastFile?.id) {
|
||||
break
|
||||
}
|
||||
afterId = lastFile.id
|
||||
}
|
||||
|
||||
logDebug(`Listed ${allFiles.length} files created after ${afterCreatedAt}`)
|
||||
return allFiles
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Parse Functions
|
||||
// ============================================================================
|
||||
|
||||
/**
|
||||
* Parse file attachment specs from CLI arguments
|
||||
* Format: <file_id>:<relative_path>
|
||||
*
|
||||
* @param fileSpecs - Array of file spec strings
|
||||
* @returns Parsed file attachments
|
||||
*/
|
||||
export function parseFileSpecs(fileSpecs: string[]): File[] {
|
||||
const files: File[] = []
|
||||
|
||||
// Sandbox-gateway may pass multiple specs as a single space-separated string
|
||||
const expandedSpecs = fileSpecs.flatMap(s => s.split(' ').filter(Boolean))
|
||||
|
||||
for (const spec of expandedSpecs) {
|
||||
const colonIndex = spec.indexOf(':')
|
||||
if (colonIndex === -1) {
|
||||
continue
|
||||
}
|
||||
|
||||
const fileId = spec.substring(0, colonIndex)
|
||||
const relativePath = spec.substring(colonIndex + 1)
|
||||
|
||||
if (!fileId || !relativePath) {
|
||||
logDebugError(
|
||||
`Invalid file spec: ${spec}. Both file_id and path are required`,
|
||||
)
|
||||
continue
|
||||
}
|
||||
|
||||
files.push({ fileId, relativePath })
|
||||
}
|
||||
|
||||
return files
|
||||
}
|
||||
@@ -0,0 +1,60 @@
|
||||
import axios from 'axios'
|
||||
import { getOauthConfig } from '../../constants/oauth.js'
|
||||
import { getGlobalConfig, saveGlobalConfig } from '../../utils/config.js'
|
||||
import { getAuthHeaders } from '../../utils/http.js'
|
||||
import { logError } from '../../utils/log.js'
|
||||
import { getClaudeCodeUserAgent } from '../../utils/userAgent.js'
|
||||
|
||||
/**
|
||||
* Fetch the user's first Claude Code token date and store in config.
|
||||
* This is called after successful login to cache when they started using Claude Code.
|
||||
*/
|
||||
export async function fetchAndStoreClaudeCodeFirstTokenDate(): Promise<void> {
|
||||
try {
|
||||
const config = getGlobalConfig()
|
||||
|
||||
if (config.claudeCodeFirstTokenDate !== undefined) {
|
||||
return
|
||||
}
|
||||
|
||||
const authHeaders = getAuthHeaders()
|
||||
if (authHeaders.error) {
|
||||
logError(new Error(`Failed to get auth headers: ${authHeaders.error}`))
|
||||
return
|
||||
}
|
||||
|
||||
const oauthConfig = getOauthConfig()
|
||||
const url = `${oauthConfig.BASE_API_URL}/api/organization/claude_code_first_token_date`
|
||||
|
||||
const response = await axios.get(url, {
|
||||
headers: {
|
||||
...authHeaders.headers,
|
||||
'User-Agent': getClaudeCodeUserAgent(),
|
||||
},
|
||||
timeout: 10000,
|
||||
})
|
||||
|
||||
const firstTokenDate = response.data?.first_token_date ?? null
|
||||
|
||||
// Validate the date if it's not null
|
||||
if (firstTokenDate !== null) {
|
||||
const dateTime = new Date(firstTokenDate).getTime()
|
||||
if (isNaN(dateTime)) {
|
||||
logError(
|
||||
new Error(
|
||||
`Received invalid first_token_date from API: ${firstTokenDate}`,
|
||||
),
|
||||
)
|
||||
// Don't save invalid dates
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
saveGlobalConfig(current => ({
|
||||
...current,
|
||||
claudeCodeFirstTokenDate: firstTokenDate,
|
||||
}))
|
||||
} catch (error) {
|
||||
logError(error)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,357 @@
|
||||
import axios from 'axios'
|
||||
import memoize from 'lodash-es/memoize.js'
|
||||
import {
|
||||
type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
||||
logEvent,
|
||||
} from 'src/services/analytics/index.js'
|
||||
import { getOauthAccountInfo, isConsumerSubscriber } from 'src/utils/auth.js'
|
||||
import { logForDebugging } from 'src/utils/debug.js'
|
||||
import { gracefulShutdown } from 'src/utils/gracefulShutdown.js'
|
||||
import { isEssentialTrafficOnly } from 'src/utils/privacyLevel.js'
|
||||
import { writeToStderr } from 'src/utils/process.js'
|
||||
import { getOauthConfig } from '../../constants/oauth.js'
|
||||
import { getGlobalConfig, saveGlobalConfig } from '../../utils/config.js'
|
||||
import {
|
||||
getAuthHeaders,
|
||||
getUserAgent,
|
||||
withOAuth401Retry,
|
||||
} from '../../utils/http.js'
|
||||
import { logError } from '../../utils/log.js'
|
||||
import { getClaudeCodeUserAgent } from '../../utils/userAgent.js'
|
||||
|
||||
// Cache expiration: 24 hours
|
||||
const GROVE_CACHE_EXPIRATION_MS = 24 * 60 * 60 * 1000
|
||||
|
||||
export type AccountSettings = {
|
||||
grove_enabled: boolean | null
|
||||
grove_notice_viewed_at: string | null
|
||||
}
|
||||
|
||||
export type GroveConfig = {
|
||||
grove_enabled: boolean
|
||||
domain_excluded: boolean
|
||||
notice_is_grace_period: boolean
|
||||
notice_reminder_frequency: number | null
|
||||
}
|
||||
|
||||
/**
|
||||
* Result type that distinguishes between API failure and success.
|
||||
* - success: true means API call succeeded (data may still contain null fields)
|
||||
* - success: false means API call failed after retry
|
||||
*/
|
||||
export type ApiResult<T> = { success: true; data: T } | { success: false }
|
||||
|
||||
/**
|
||||
* Get the current Grove settings for the user account.
|
||||
* Returns ApiResult to distinguish between API failure and success.
|
||||
* Uses existing OAuth 401 retry, then returns failure if that doesn't help.
|
||||
*
|
||||
* Memoized for the session to avoid redundant per-render requests.
|
||||
* Cache is invalidated in updateGroveSettings() so post-toggle reads are fresh.
|
||||
*/
|
||||
export const getGroveSettings = memoize(
|
||||
async (): Promise<ApiResult<AccountSettings>> => {
|
||||
// Grove is a notification feature; during an outage, skipping it is correct.
|
||||
if (isEssentialTrafficOnly()) {
|
||||
return { success: false }
|
||||
}
|
||||
try {
|
||||
const response = await withOAuth401Retry(() => {
|
||||
const authHeaders = getAuthHeaders()
|
||||
if (authHeaders.error) {
|
||||
throw new Error(`Failed to get auth headers: ${authHeaders.error}`)
|
||||
}
|
||||
return axios.get<AccountSettings>(
|
||||
`${getOauthConfig().BASE_API_URL}/api/oauth/account/settings`,
|
||||
{
|
||||
headers: {
|
||||
...authHeaders.headers,
|
||||
'User-Agent': getClaudeCodeUserAgent(),
|
||||
},
|
||||
},
|
||||
)
|
||||
})
|
||||
return { success: true, data: response.data }
|
||||
} catch (err) {
|
||||
logError(err)
|
||||
// Don't cache failures — transient network issues would lock the user
|
||||
// out of privacy settings for the entire session (deadlock: dialog needs
|
||||
// success to render the toggle, toggle calls updateGroveSettings which
|
||||
// is the only other place the cache is cleared).
|
||||
getGroveSettings.cache.clear?.()
|
||||
return { success: false }
|
||||
}
|
||||
},
|
||||
)
|
||||
|
||||
/**
|
||||
* Mark that the Grove notice has been viewed by the user
|
||||
*/
|
||||
export async function markGroveNoticeViewed(): Promise<void> {
|
||||
try {
|
||||
await withOAuth401Retry(() => {
|
||||
const authHeaders = getAuthHeaders()
|
||||
if (authHeaders.error) {
|
||||
throw new Error(`Failed to get auth headers: ${authHeaders.error}`)
|
||||
}
|
||||
return axios.post(
|
||||
`${getOauthConfig().BASE_API_URL}/api/oauth/account/grove_notice_viewed`,
|
||||
{},
|
||||
{
|
||||
headers: {
|
||||
...authHeaders.headers,
|
||||
'User-Agent': getClaudeCodeUserAgent(),
|
||||
},
|
||||
},
|
||||
)
|
||||
})
|
||||
// This mutates grove_notice_viewed_at server-side — Grove.tsx:87 reads it
|
||||
// to decide whether to show the dialog. Without invalidation a same-session
|
||||
// remount would read stale viewed_at:null and re-show the dialog.
|
||||
getGroveSettings.cache.clear?.()
|
||||
} catch (err) {
|
||||
logError(err)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Update Grove settings for the user account
|
||||
*/
|
||||
export async function updateGroveSettings(
|
||||
groveEnabled: boolean,
|
||||
): Promise<void> {
|
||||
try {
|
||||
await withOAuth401Retry(() => {
|
||||
const authHeaders = getAuthHeaders()
|
||||
if (authHeaders.error) {
|
||||
throw new Error(`Failed to get auth headers: ${authHeaders.error}`)
|
||||
}
|
||||
return axios.patch(
|
||||
`${getOauthConfig().BASE_API_URL}/api/oauth/account/settings`,
|
||||
{
|
||||
grove_enabled: groveEnabled,
|
||||
},
|
||||
{
|
||||
headers: {
|
||||
...authHeaders.headers,
|
||||
'User-Agent': getClaudeCodeUserAgent(),
|
||||
},
|
||||
},
|
||||
)
|
||||
})
|
||||
// Invalidate memoized settings so the post-toggle confirmation
|
||||
// read in privacy-settings.tsx picks up the new value.
|
||||
getGroveSettings.cache.clear?.()
|
||||
} catch (err) {
|
||||
logError(err)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if user is qualified for Grove (non-blocking, cache-first).
|
||||
*
|
||||
* This function never blocks on network - it returns cached data immediately
|
||||
* and fetches in the background if needed. On cold start (no cache), it returns
|
||||
* false and the Grove dialog won't show until the next session.
|
||||
*/
|
||||
export async function isQualifiedForGrove(): Promise<boolean> {
|
||||
if (!isConsumerSubscriber()) {
|
||||
return false
|
||||
}
|
||||
|
||||
const accountId = getOauthAccountInfo()?.accountUuid
|
||||
if (!accountId) {
|
||||
return false
|
||||
}
|
||||
|
||||
const globalConfig = getGlobalConfig()
|
||||
const cachedEntry = globalConfig.groveConfigCache?.[accountId]
|
||||
const now = Date.now()
|
||||
|
||||
// No cache - trigger background fetch and return false (non-blocking)
|
||||
// The Grove dialog won't show this session, but will next time if eligible
|
||||
if (!cachedEntry) {
|
||||
logForDebugging(
|
||||
'Grove: No cache, fetching config in background (dialog skipped this session)',
|
||||
)
|
||||
void fetchAndStoreGroveConfig(accountId)
|
||||
return false
|
||||
}
|
||||
|
||||
// Cache exists but is stale - return cached value and refresh in background
|
||||
if (now - cachedEntry.timestamp > GROVE_CACHE_EXPIRATION_MS) {
|
||||
logForDebugging(
|
||||
'Grove: Cache stale, returning cached data and refreshing in background',
|
||||
)
|
||||
void fetchAndStoreGroveConfig(accountId)
|
||||
return cachedEntry.grove_enabled
|
||||
}
|
||||
|
||||
// Cache is fresh - return it immediately
|
||||
logForDebugging('Grove: Using fresh cached config')
|
||||
return cachedEntry.grove_enabled
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetch Grove config from API and store in cache
|
||||
*/
|
||||
async function fetchAndStoreGroveConfig(accountId: string): Promise<void> {
|
||||
try {
|
||||
const result = await getGroveNoticeConfig()
|
||||
if (!result.success) {
|
||||
return
|
||||
}
|
||||
const groveEnabled = result.data.grove_enabled
|
||||
const cachedEntry = getGlobalConfig().groveConfigCache?.[accountId]
|
||||
if (
|
||||
cachedEntry?.grove_enabled === groveEnabled &&
|
||||
Date.now() - cachedEntry.timestamp <= GROVE_CACHE_EXPIRATION_MS
|
||||
) {
|
||||
return
|
||||
}
|
||||
saveGlobalConfig(current => ({
|
||||
...current,
|
||||
groveConfigCache: {
|
||||
...current.groveConfigCache,
|
||||
[accountId]: {
|
||||
grove_enabled: groveEnabled,
|
||||
timestamp: Date.now(),
|
||||
},
|
||||
},
|
||||
}))
|
||||
} catch (err) {
|
||||
logForDebugging(`Grove: Failed to fetch and store config: ${err}`)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get Grove Statsig configuration from the API.
|
||||
* Returns ApiResult to distinguish between API failure and success.
|
||||
* Uses existing OAuth 401 retry, then returns failure if that doesn't help.
|
||||
*/
|
||||
export const getGroveNoticeConfig = memoize(
|
||||
async (): Promise<ApiResult<GroveConfig>> => {
|
||||
// Grove is a notification feature; during an outage, skipping it is correct.
|
||||
if (isEssentialTrafficOnly()) {
|
||||
return { success: false }
|
||||
}
|
||||
try {
|
||||
const response = await withOAuth401Retry(() => {
|
||||
const authHeaders = getAuthHeaders()
|
||||
if (authHeaders.error) {
|
||||
throw new Error(`Failed to get auth headers: ${authHeaders.error}`)
|
||||
}
|
||||
return axios.get<GroveConfig>(
|
||||
`${getOauthConfig().BASE_API_URL}/api/claude_code_grove`,
|
||||
{
|
||||
headers: {
|
||||
...authHeaders.headers,
|
||||
'User-Agent': getUserAgent(),
|
||||
},
|
||||
timeout: 3000, // Short timeout - if slow, skip Grove dialog
|
||||
},
|
||||
)
|
||||
})
|
||||
|
||||
// Map the API response to the GroveConfig type
|
||||
const {
|
||||
grove_enabled,
|
||||
domain_excluded,
|
||||
notice_is_grace_period,
|
||||
notice_reminder_frequency,
|
||||
} = response.data
|
||||
|
||||
return {
|
||||
success: true,
|
||||
data: {
|
||||
grove_enabled,
|
||||
domain_excluded: domain_excluded ?? false,
|
||||
notice_is_grace_period: notice_is_grace_period ?? true,
|
||||
notice_reminder_frequency,
|
||||
},
|
||||
}
|
||||
} catch (err) {
|
||||
logForDebugging(`Failed to fetch Grove notice config: ${err}`)
|
||||
return { success: false }
|
||||
}
|
||||
},
|
||||
)
|
||||
|
||||
/**
|
||||
* Determines whether the Grove dialog should be shown.
|
||||
* Returns false if either API call failed (after retry) - we hide the dialog on API failure.
|
||||
*/
|
||||
export function calculateShouldShowGrove(
|
||||
settingsResult: ApiResult<AccountSettings>,
|
||||
configResult: ApiResult<GroveConfig>,
|
||||
showIfAlreadyViewed: boolean,
|
||||
): boolean {
|
||||
// Hide dialog on API failure (after retry)
|
||||
if (!settingsResult.success || !configResult.success) {
|
||||
return false
|
||||
}
|
||||
|
||||
const settings = settingsResult.data
|
||||
const config = configResult.data
|
||||
|
||||
const hasChosen = settings.grove_enabled !== null
|
||||
if (hasChosen) {
|
||||
return false
|
||||
}
|
||||
if (showIfAlreadyViewed) {
|
||||
return true
|
||||
}
|
||||
if (!config.notice_is_grace_period) {
|
||||
return true
|
||||
}
|
||||
// Check if we need to remind the user to accept the terms and choose
|
||||
// whether to help improve Claude.
|
||||
const reminderFrequency = config.notice_reminder_frequency
|
||||
if (reminderFrequency !== null && settings.grove_notice_viewed_at) {
|
||||
const daysSinceViewed = Math.floor(
|
||||
(Date.now() - new Date(settings.grove_notice_viewed_at).getTime()) /
|
||||
(1000 * 60 * 60 * 24),
|
||||
)
|
||||
return daysSinceViewed >= reminderFrequency
|
||||
} else {
|
||||
// Show if never viewed before
|
||||
const viewedAt = settings.grove_notice_viewed_at
|
||||
return viewedAt === null || viewedAt === undefined
|
||||
}
|
||||
}
|
||||
|
||||
export async function checkGroveForNonInteractive(): Promise<void> {
|
||||
const [settingsResult, configResult] = await Promise.all([
|
||||
getGroveSettings(),
|
||||
getGroveNoticeConfig(),
|
||||
])
|
||||
|
||||
// Check if user hasn't made a choice yet (returns false on API failure)
|
||||
const shouldShowGrove = calculateShouldShowGrove(
|
||||
settingsResult,
|
||||
configResult,
|
||||
false,
|
||||
)
|
||||
|
||||
if (shouldShowGrove) {
|
||||
// shouldShowGrove is only true if both API calls succeeded
|
||||
const config = configResult.success ? configResult.data : null
|
||||
logEvent('tengu_grove_print_viewed', {
|
||||
dismissable:
|
||||
config?.notice_is_grace_period as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
||||
})
|
||||
if (config === null || config.notice_is_grace_period) {
|
||||
// Grace period is still active - show informational message and continue
|
||||
writeToStderr(
|
||||
'\nAn update to our Consumer Terms and Privacy Policy will take effect on October 8, 2025. Run `claude` to review the updated terms.\n\n',
|
||||
)
|
||||
await markGroveNoticeViewed()
|
||||
} else {
|
||||
// Grace period has ended - show error message and exit
|
||||
writeToStderr(
|
||||
'\n[ACTION REQUIRED] An update to our Consumer Terms and Privacy Policy has taken effect on October 8, 2025. You must run `claude` to review the updated terms.\n\n',
|
||||
)
|
||||
await gracefulShutdown(1)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,788 @@
|
||||
import { feature } from 'bun:bundle'
|
||||
import { APIError } from '@anthropic-ai/sdk'
|
||||
import type {
|
||||
BetaStopReason,
|
||||
BetaUsage as Usage,
|
||||
} from '@anthropic-ai/sdk/resources/beta/messages/messages.mjs'
|
||||
import {
|
||||
addToTotalDurationState,
|
||||
consumePostCompaction,
|
||||
getIsNonInteractiveSession,
|
||||
getLastApiCompletionTimestamp,
|
||||
getTeleportedSessionInfo,
|
||||
markFirstTeleportMessageLogged,
|
||||
setLastApiCompletionTimestamp,
|
||||
} from 'src/bootstrap/state.js'
|
||||
import type { QueryChainTracking } from 'src/Tool.js'
|
||||
import { isConnectorTextBlock } from 'src/types/connectorText.js'
|
||||
import type { AssistantMessage } from 'src/types/message.js'
|
||||
import { logForDebugging } from 'src/utils/debug.js'
|
||||
import type { EffortLevel } from 'src/utils/effort.js'
|
||||
import { logError } from 'src/utils/log.js'
|
||||
import { getAPIProviderForStatsig } from 'src/utils/model/providers.js'
|
||||
import type { PermissionMode } from 'src/utils/permissions/PermissionMode.js'
|
||||
import { jsonStringify } from 'src/utils/slowOperations.js'
|
||||
import { logOTelEvent } from 'src/utils/telemetry/events.js'
|
||||
import {
|
||||
endLLMRequestSpan,
|
||||
isBetaTracingEnabled,
|
||||
type Span,
|
||||
} from 'src/utils/telemetry/sessionTracing.js'
|
||||
import type { NonNullableUsage } from '../../entrypoints/sdk/sdkUtilityTypes.js'
|
||||
import { consumeInvokingRequestId } from '../../utils/agentContext.js'
|
||||
import {
|
||||
type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
||||
logEvent,
|
||||
} from '../analytics/index.js'
|
||||
import { sanitizeToolNameForAnalytics } from '../analytics/metadata.js'
|
||||
import { EMPTY_USAGE } from './emptyUsage.js'
|
||||
import { classifyAPIError } from './errors.js'
|
||||
import { extractConnectionErrorDetails } from './errorUtils.js'
|
||||
|
||||
export type { NonNullableUsage }
|
||||
export { EMPTY_USAGE }
|
||||
|
||||
// Strategy used for global prompt caching
|
||||
export type GlobalCacheStrategy = 'tool_based' | 'system_prompt' | 'none'
|
||||
|
||||
function getErrorMessage(error: unknown): string {
|
||||
if (error instanceof APIError) {
|
||||
const body = error.error as { error?: { message?: string } } | undefined
|
||||
if (body?.error?.message) return body.error.message
|
||||
}
|
||||
return error instanceof Error ? error.message : String(error)
|
||||
}
|
||||
|
||||
type KnownGateway =
|
||||
| 'litellm'
|
||||
| 'helicone'
|
||||
| 'portkey'
|
||||
| 'cloudflare-ai-gateway'
|
||||
| 'kong'
|
||||
| 'braintrust'
|
||||
| 'databricks'
|
||||
|
||||
// Gateway fingerprints for detecting AI gateways from response headers
|
||||
const GATEWAY_FINGERPRINTS: Partial<
|
||||
Record<KnownGateway, { prefixes: string[] }>
|
||||
> = {
|
||||
// https://docs.litellm.ai/docs/proxy/response_headers
|
||||
litellm: {
|
||||
prefixes: ['x-litellm-'],
|
||||
},
|
||||
// https://docs.helicone.ai/helicone-headers/header-directory
|
||||
helicone: {
|
||||
prefixes: ['helicone-'],
|
||||
},
|
||||
// https://portkey.ai/docs/api-reference/response-schema
|
||||
portkey: {
|
||||
prefixes: ['x-portkey-'],
|
||||
},
|
||||
// https://developers.cloudflare.com/ai-gateway/evaluations/add-human-feedback-api/
|
||||
'cloudflare-ai-gateway': {
|
||||
prefixes: ['cf-aig-'],
|
||||
},
|
||||
// https://developer.konghq.com/ai-gateway/ — X-Kong-Upstream-Latency, X-Kong-Proxy-Latency
|
||||
kong: {
|
||||
prefixes: ['x-kong-'],
|
||||
},
|
||||
// https://www.braintrust.dev/docs/guides/proxy — x-bt-used-endpoint, x-bt-cached
|
||||
braintrust: {
|
||||
prefixes: ['x-bt-'],
|
||||
},
|
||||
}
|
||||
|
||||
// Gateways that use provider-owned domains (not self-hosted), so the
|
||||
// ANTHROPIC_BASE_URL hostname is a reliable signal even without a
|
||||
// distinctive response header.
|
||||
const GATEWAY_HOST_SUFFIXES: Partial<Record<KnownGateway, string[]>> = {
|
||||
// https://docs.databricks.com/aws/en/ai-gateway/
|
||||
databricks: [
|
||||
'.cloud.databricks.com',
|
||||
'.azuredatabricks.net',
|
||||
'.gcp.databricks.com',
|
||||
],
|
||||
}
|
||||
|
||||
function detectGateway({
|
||||
headers,
|
||||
baseUrl,
|
||||
}: {
|
||||
headers?: globalThis.Headers
|
||||
baseUrl?: string
|
||||
}): KnownGateway | undefined {
|
||||
if (headers) {
|
||||
// Header names are already lowercase from the Headers API
|
||||
const headerNames: string[] = []
|
||||
headers.forEach((_, key) => headerNames.push(key))
|
||||
for (const [gw, { prefixes }] of Object.entries(GATEWAY_FINGERPRINTS)) {
|
||||
if (prefixes.some(p => headerNames.some(h => h.startsWith(p)))) {
|
||||
return gw as KnownGateway
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (baseUrl) {
|
||||
try {
|
||||
const host = new URL(baseUrl).hostname.toLowerCase()
|
||||
for (const [gw, suffixes] of Object.entries(GATEWAY_HOST_SUFFIXES)) {
|
||||
if (suffixes.some(s => host.endsWith(s))) {
|
||||
return gw as KnownGateway
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
// malformed URL — ignore
|
||||
}
|
||||
}
|
||||
|
||||
return undefined
|
||||
}
|
||||
|
||||
function getAnthropicEnvMetadata() {
|
||||
return {
|
||||
...(process.env.ANTHROPIC_BASE_URL
|
||||
? {
|
||||
baseUrl: process.env
|
||||
.ANTHROPIC_BASE_URL as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
||||
}
|
||||
: {}),
|
||||
...(process.env.ANTHROPIC_MODEL
|
||||
? {
|
||||
envModel: process.env
|
||||
.ANTHROPIC_MODEL as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
||||
}
|
||||
: {}),
|
||||
...(process.env.ANTHROPIC_SMALL_FAST_MODEL
|
||||
? {
|
||||
envSmallFastModel: process.env
|
||||
.ANTHROPIC_SMALL_FAST_MODEL as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
||||
}
|
||||
: {}),
|
||||
}
|
||||
}
|
||||
|
||||
function getBuildAgeMinutes(): number | undefined {
|
||||
if (!MACRO.BUILD_TIME) return undefined
|
||||
const buildTime = new Date(MACRO.BUILD_TIME).getTime()
|
||||
if (isNaN(buildTime)) return undefined
|
||||
return Math.floor((Date.now() - buildTime) / 60000)
|
||||
}
|
||||
|
||||
export function logAPIQuery({
|
||||
model,
|
||||
messagesLength,
|
||||
temperature,
|
||||
betas,
|
||||
permissionMode,
|
||||
querySource,
|
||||
queryTracking,
|
||||
thinkingType,
|
||||
effortValue,
|
||||
fastMode,
|
||||
previousRequestId,
|
||||
}: {
|
||||
model: string
|
||||
messagesLength: number
|
||||
temperature: number
|
||||
betas?: string[]
|
||||
permissionMode?: PermissionMode
|
||||
querySource: string
|
||||
queryTracking?: QueryChainTracking
|
||||
thinkingType?: 'adaptive' | 'enabled' | 'disabled'
|
||||
effortValue?: EffortLevel | null
|
||||
fastMode?: boolean
|
||||
previousRequestId?: string | null
|
||||
}): void {
|
||||
logEvent('tengu_api_query', {
|
||||
model: model as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
||||
messagesLength,
|
||||
temperature: temperature,
|
||||
provider: getAPIProviderForStatsig(),
|
||||
buildAgeMins: getBuildAgeMinutes(),
|
||||
...(betas?.length
|
||||
? {
|
||||
betas: betas.join(
|
||||
',',
|
||||
) as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
||||
}
|
||||
: {}),
|
||||
permissionMode:
|
||||
permissionMode as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
||||
querySource:
|
||||
querySource as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
||||
...(queryTracking
|
||||
? {
|
||||
queryChainId:
|
||||
queryTracking.chainId as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
||||
queryDepth: queryTracking.depth,
|
||||
}
|
||||
: {}),
|
||||
thinkingType:
|
||||
thinkingType as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
||||
effortValue:
|
||||
effortValue as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
||||
fastMode,
|
||||
...(previousRequestId
|
||||
? {
|
||||
previousRequestId:
|
||||
previousRequestId as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
||||
}
|
||||
: {}),
|
||||
...getAnthropicEnvMetadata(),
|
||||
})
|
||||
}
|
||||
|
||||
export function logAPIError({
|
||||
error,
|
||||
model,
|
||||
messageCount,
|
||||
messageTokens,
|
||||
durationMs,
|
||||
durationMsIncludingRetries,
|
||||
attempt,
|
||||
requestId,
|
||||
clientRequestId,
|
||||
didFallBackToNonStreaming,
|
||||
promptCategory,
|
||||
headers,
|
||||
queryTracking,
|
||||
querySource,
|
||||
llmSpan,
|
||||
fastMode,
|
||||
previousRequestId,
|
||||
}: {
|
||||
error: unknown
|
||||
model: string
|
||||
messageCount: number
|
||||
messageTokens?: number
|
||||
durationMs: number
|
||||
durationMsIncludingRetries: number
|
||||
attempt: number
|
||||
requestId?: string | null
|
||||
/** Client-generated ID sent as x-client-request-id header (survives timeouts) */
|
||||
clientRequestId?: string
|
||||
didFallBackToNonStreaming?: boolean
|
||||
promptCategory?: string
|
||||
headers?: globalThis.Headers
|
||||
queryTracking?: QueryChainTracking
|
||||
querySource?: string
|
||||
/** The span from startLLMRequestSpan - pass this to correctly match responses to requests */
|
||||
llmSpan?: Span
|
||||
fastMode?: boolean
|
||||
previousRequestId?: string | null
|
||||
}): void {
|
||||
const gateway = detectGateway({
|
||||
headers:
|
||||
error instanceof APIError && error.headers ? error.headers : headers,
|
||||
baseUrl: process.env.ANTHROPIC_BASE_URL,
|
||||
})
|
||||
|
||||
const errStr = getErrorMessage(error)
|
||||
const status = error instanceof APIError ? String(error.status) : undefined
|
||||
const errorType = classifyAPIError(error)
|
||||
|
||||
// Log detailed connection error info to debug logs (visible via --debug)
|
||||
const connectionDetails = extractConnectionErrorDetails(error)
|
||||
if (connectionDetails) {
|
||||
const sslLabel = connectionDetails.isSSLError ? ' (SSL error)' : ''
|
||||
logForDebugging(
|
||||
`Connection error details: code=${connectionDetails.code}${sslLabel}, message=${connectionDetails.message}`,
|
||||
{ level: 'error' },
|
||||
)
|
||||
}
|
||||
|
||||
const invocation = consumeInvokingRequestId()
|
||||
|
||||
if (clientRequestId) {
|
||||
logForDebugging(
|
||||
`API error x-client-request-id=${clientRequestId} (give this to the API team for server-log lookup)`,
|
||||
{ level: 'error' },
|
||||
)
|
||||
}
|
||||
|
||||
logError(error as Error)
|
||||
logEvent('tengu_api_error', {
|
||||
model: model as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
||||
error: errStr as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
||||
status:
|
||||
status as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
||||
errorType:
|
||||
errorType as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
||||
messageCount,
|
||||
messageTokens,
|
||||
durationMs,
|
||||
durationMsIncludingRetries,
|
||||
attempt,
|
||||
provider: getAPIProviderForStatsig(),
|
||||
requestId:
|
||||
(requestId as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS) ||
|
||||
undefined,
|
||||
...(invocation
|
||||
? {
|
||||
invokingRequestId:
|
||||
invocation.invokingRequestId as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
||||
invocationKind:
|
||||
invocation.invocationKind as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
||||
}
|
||||
: {}),
|
||||
clientRequestId:
|
||||
(clientRequestId as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS) ||
|
||||
undefined,
|
||||
didFallBackToNonStreaming,
|
||||
...(promptCategory
|
||||
? {
|
||||
promptCategory:
|
||||
promptCategory as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
||||
}
|
||||
: {}),
|
||||
...(gateway
|
||||
? {
|
||||
gateway:
|
||||
gateway as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
||||
}
|
||||
: {}),
|
||||
...(queryTracking
|
||||
? {
|
||||
queryChainId:
|
||||
queryTracking.chainId as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
||||
queryDepth: queryTracking.depth,
|
||||
}
|
||||
: {}),
|
||||
...(querySource
|
||||
? {
|
||||
querySource:
|
||||
querySource as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
||||
}
|
||||
: {}),
|
||||
fastMode,
|
||||
...(previousRequestId
|
||||
? {
|
||||
previousRequestId:
|
||||
previousRequestId as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
||||
}
|
||||
: {}),
|
||||
...getAnthropicEnvMetadata(),
|
||||
})
|
||||
|
||||
// Log API error event for OTLP
|
||||
void logOTelEvent('api_error', {
|
||||
model: model,
|
||||
error: errStr,
|
||||
status_code: String(status),
|
||||
duration_ms: String(durationMs),
|
||||
attempt: String(attempt),
|
||||
speed: fastMode ? 'fast' : 'normal',
|
||||
})
|
||||
|
||||
// Pass the span to correctly match responses to requests when beta tracing is enabled
|
||||
endLLMRequestSpan(llmSpan, {
|
||||
success: false,
|
||||
statusCode: status ? parseInt(status) : undefined,
|
||||
error: errStr,
|
||||
attempt,
|
||||
})
|
||||
|
||||
// Log first error for teleported sessions (reliability tracking)
|
||||
const teleportInfo = getTeleportedSessionInfo()
|
||||
if (teleportInfo?.isTeleported && !teleportInfo.hasLoggedFirstMessage) {
|
||||
logEvent('tengu_teleport_first_message_error', {
|
||||
session_id:
|
||||
teleportInfo.sessionId as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
||||
error_type:
|
||||
errorType as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
||||
})
|
||||
markFirstTeleportMessageLogged()
|
||||
}
|
||||
}
|
||||
|
||||
function logAPISuccess({
|
||||
model,
|
||||
preNormalizedModel,
|
||||
messageCount,
|
||||
messageTokens,
|
||||
usage,
|
||||
durationMs,
|
||||
durationMsIncludingRetries,
|
||||
attempt,
|
||||
ttftMs,
|
||||
requestId,
|
||||
stopReason,
|
||||
costUSD,
|
||||
didFallBackToNonStreaming,
|
||||
querySource,
|
||||
gateway,
|
||||
queryTracking,
|
||||
permissionMode,
|
||||
globalCacheStrategy,
|
||||
textContentLength,
|
||||
thinkingContentLength,
|
||||
toolUseContentLengths,
|
||||
connectorTextBlockCount,
|
||||
fastMode,
|
||||
previousRequestId,
|
||||
betas,
|
||||
}: {
|
||||
model: string
|
||||
preNormalizedModel: string
|
||||
messageCount: number
|
||||
messageTokens: number
|
||||
usage: Usage
|
||||
durationMs: number
|
||||
durationMsIncludingRetries: number
|
||||
attempt: number
|
||||
ttftMs: number | null
|
||||
requestId: string | null
|
||||
stopReason: BetaStopReason | null
|
||||
costUSD: number
|
||||
didFallBackToNonStreaming: boolean
|
||||
querySource: string
|
||||
gateway?: KnownGateway
|
||||
queryTracking?: QueryChainTracking
|
||||
permissionMode?: PermissionMode
|
||||
globalCacheStrategy?: GlobalCacheStrategy
|
||||
textContentLength?: number
|
||||
thinkingContentLength?: number
|
||||
toolUseContentLengths?: Record<string, number>
|
||||
connectorTextBlockCount?: number
|
||||
fastMode?: boolean
|
||||
previousRequestId?: string | null
|
||||
betas?: string[]
|
||||
}): void {
|
||||
const isNonInteractiveSession = getIsNonInteractiveSession()
|
||||
const isPostCompaction = consumePostCompaction()
|
||||
const hasPrintFlag =
|
||||
process.argv.includes('-p') || process.argv.includes('--print')
|
||||
|
||||
const now = Date.now()
|
||||
const lastCompletion = getLastApiCompletionTimestamp()
|
||||
const timeSinceLastApiCallMs =
|
||||
lastCompletion !== null ? now - lastCompletion : undefined
|
||||
|
||||
const invocation = consumeInvokingRequestId()
|
||||
|
||||
logEvent('tengu_api_success', {
|
||||
model: model as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
||||
...(preNormalizedModel !== model
|
||||
? {
|
||||
preNormalizedModel:
|
||||
preNormalizedModel as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
||||
}
|
||||
: {}),
|
||||
...(betas?.length
|
||||
? {
|
||||
betas: betas.join(
|
||||
',',
|
||||
) as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
||||
}
|
||||
: {}),
|
||||
messageCount,
|
||||
messageTokens,
|
||||
inputTokens: usage.input_tokens,
|
||||
outputTokens: usage.output_tokens,
|
||||
cachedInputTokens: usage.cache_read_input_tokens ?? 0,
|
||||
uncachedInputTokens: usage.cache_creation_input_tokens ?? 0,
|
||||
durationMs: durationMs,
|
||||
durationMsIncludingRetries: durationMsIncludingRetries,
|
||||
attempt: attempt,
|
||||
ttftMs: ttftMs ?? undefined,
|
||||
buildAgeMins: getBuildAgeMinutes(),
|
||||
provider: getAPIProviderForStatsig(),
|
||||
requestId:
|
||||
(requestId as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS) ??
|
||||
undefined,
|
||||
...(invocation
|
||||
? {
|
||||
invokingRequestId:
|
||||
invocation.invokingRequestId as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
||||
invocationKind:
|
||||
invocation.invocationKind as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
||||
}
|
||||
: {}),
|
||||
stop_reason:
|
||||
(stopReason as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS) ??
|
||||
undefined,
|
||||
costUSD,
|
||||
didFallBackToNonStreaming,
|
||||
isNonInteractiveSession,
|
||||
print: hasPrintFlag,
|
||||
isTTY: process.stdout.isTTY ?? false,
|
||||
querySource:
|
||||
querySource as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
||||
...(gateway
|
||||
? {
|
||||
gateway:
|
||||
gateway as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
||||
}
|
||||
: {}),
|
||||
...(queryTracking
|
||||
? {
|
||||
queryChainId:
|
||||
queryTracking.chainId as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
||||
queryDepth: queryTracking.depth,
|
||||
}
|
||||
: {}),
|
||||
permissionMode:
|
||||
permissionMode as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
||||
...(globalCacheStrategy
|
||||
? {
|
||||
globalCacheStrategy:
|
||||
globalCacheStrategy as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
||||
}
|
||||
: {}),
|
||||
...(textContentLength !== undefined
|
||||
? ({
|
||||
textContentLength,
|
||||
} as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS)
|
||||
: {}),
|
||||
...(thinkingContentLength !== undefined
|
||||
? ({
|
||||
thinkingContentLength,
|
||||
} as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS)
|
||||
: {}),
|
||||
...(toolUseContentLengths !== undefined
|
||||
? ({
|
||||
toolUseContentLengths: jsonStringify(
|
||||
toolUseContentLengths,
|
||||
) as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
||||
} as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS)
|
||||
: {}),
|
||||
...(connectorTextBlockCount !== undefined
|
||||
? ({
|
||||
connectorTextBlockCount,
|
||||
} as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS)
|
||||
: {}),
|
||||
fastMode,
|
||||
// Log cache_deleted_input_tokens for cache editing analysis. Casts needed
|
||||
// because the field is intentionally not on NonNullableUsage (excluded from
|
||||
// external builds). Set by updateUsage() when cache editing is active.
|
||||
...(feature('CACHED_MICROCOMPACT') &&
|
||||
((usage as unknown as { cache_deleted_input_tokens?: number })
|
||||
.cache_deleted_input_tokens ?? 0) > 0
|
||||
? {
|
||||
cacheDeletedInputTokens: (
|
||||
usage as unknown as { cache_deleted_input_tokens: number }
|
||||
).cache_deleted_input_tokens,
|
||||
}
|
||||
: {}),
|
||||
...(previousRequestId
|
||||
? {
|
||||
previousRequestId:
|
||||
previousRequestId as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
||||
}
|
||||
: {}),
|
||||
...(isPostCompaction ? { isPostCompaction } : {}),
|
||||
...getAnthropicEnvMetadata(),
|
||||
timeSinceLastApiCallMs,
|
||||
})
|
||||
|
||||
setLastApiCompletionTimestamp(now)
|
||||
}
|
||||
|
||||
export function logAPISuccessAndDuration({
|
||||
model,
|
||||
preNormalizedModel,
|
||||
start,
|
||||
startIncludingRetries,
|
||||
ttftMs,
|
||||
usage,
|
||||
attempt,
|
||||
messageCount,
|
||||
messageTokens,
|
||||
requestId,
|
||||
stopReason,
|
||||
didFallBackToNonStreaming,
|
||||
querySource,
|
||||
headers,
|
||||
costUSD,
|
||||
queryTracking,
|
||||
permissionMode,
|
||||
newMessages,
|
||||
llmSpan,
|
||||
globalCacheStrategy,
|
||||
requestSetupMs,
|
||||
attemptStartTimes,
|
||||
fastMode,
|
||||
previousRequestId,
|
||||
betas,
|
||||
}: {
|
||||
model: string
|
||||
preNormalizedModel: string
|
||||
start: number
|
||||
startIncludingRetries: number
|
||||
ttftMs: number | null
|
||||
usage: NonNullableUsage
|
||||
attempt: number
|
||||
messageCount: number
|
||||
messageTokens: number
|
||||
requestId: string | null
|
||||
stopReason: BetaStopReason | null
|
||||
didFallBackToNonStreaming: boolean
|
||||
querySource: string
|
||||
headers?: globalThis.Headers
|
||||
costUSD: number
|
||||
queryTracking?: QueryChainTracking
|
||||
permissionMode?: PermissionMode
|
||||
/** Assistant messages from the response - used to extract model_output and thinking_output
|
||||
* when beta tracing is enabled */
|
||||
newMessages?: AssistantMessage[]
|
||||
/** The span from startLLMRequestSpan - pass this to correctly match responses to requests */
|
||||
llmSpan?: Span
|
||||
/** Strategy used for global prompt caching: 'tool_based', 'system_prompt', or 'none' */
|
||||
globalCacheStrategy?: GlobalCacheStrategy
|
||||
/** Time spent in pre-request setup before the successful attempt */
|
||||
requestSetupMs?: number
|
||||
/** Timestamps (Date.now()) of each attempt start — used for retry sub-spans in Perfetto */
|
||||
attemptStartTimes?: number[]
|
||||
fastMode?: boolean
|
||||
/** Request ID from the previous API call in this session */
|
||||
previousRequestId?: string | null
|
||||
betas?: string[]
|
||||
}): void {
|
||||
const gateway = detectGateway({
|
||||
headers,
|
||||
baseUrl: process.env.ANTHROPIC_BASE_URL,
|
||||
})
|
||||
|
||||
let textContentLength: number | undefined
|
||||
let thinkingContentLength: number | undefined
|
||||
let toolUseContentLengths: Record<string, number> | undefined
|
||||
let connectorTextBlockCount: number | undefined
|
||||
|
||||
if (newMessages) {
|
||||
let textLen = 0
|
||||
let thinkingLen = 0
|
||||
let hasToolUse = false
|
||||
const toolLengths: Record<string, number> = {}
|
||||
let connectorCount = 0
|
||||
|
||||
for (const msg of newMessages) {
|
||||
for (const block of msg.message.content) {
|
||||
if (block.type === 'text') {
|
||||
textLen += block.text.length
|
||||
} else if (feature('CONNECTOR_TEXT') && isConnectorTextBlock(block)) {
|
||||
connectorCount++
|
||||
} else if (block.type === 'thinking') {
|
||||
thinkingLen += block.thinking.length
|
||||
} else if (
|
||||
block.type === 'tool_use' ||
|
||||
block.type === 'server_tool_use' ||
|
||||
block.type === 'mcp_tool_use'
|
||||
) {
|
||||
const inputLen = jsonStringify(block.input).length
|
||||
const sanitizedName = sanitizeToolNameForAnalytics(block.name)
|
||||
toolLengths[sanitizedName] =
|
||||
(toolLengths[sanitizedName] ?? 0) + inputLen
|
||||
hasToolUse = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
textContentLength = textLen
|
||||
thinkingContentLength = thinkingLen > 0 ? thinkingLen : undefined
|
||||
toolUseContentLengths = hasToolUse ? toolLengths : undefined
|
||||
connectorTextBlockCount = connectorCount > 0 ? connectorCount : undefined
|
||||
}
|
||||
|
||||
const durationMs = Date.now() - start
|
||||
const durationMsIncludingRetries = Date.now() - startIncludingRetries
|
||||
addToTotalDurationState(durationMsIncludingRetries, durationMs)
|
||||
|
||||
logAPISuccess({
|
||||
model,
|
||||
preNormalizedModel,
|
||||
messageCount,
|
||||
messageTokens,
|
||||
usage,
|
||||
durationMs,
|
||||
durationMsIncludingRetries,
|
||||
attempt,
|
||||
ttftMs,
|
||||
requestId,
|
||||
stopReason,
|
||||
costUSD,
|
||||
didFallBackToNonStreaming,
|
||||
querySource,
|
||||
gateway,
|
||||
queryTracking,
|
||||
permissionMode,
|
||||
globalCacheStrategy,
|
||||
textContentLength,
|
||||
thinkingContentLength,
|
||||
toolUseContentLengths,
|
||||
connectorTextBlockCount,
|
||||
fastMode,
|
||||
previousRequestId,
|
||||
betas,
|
||||
})
|
||||
// Log API request event for OTLP
|
||||
void logOTelEvent('api_request', {
|
||||
model,
|
||||
input_tokens: String(usage.input_tokens),
|
||||
output_tokens: String(usage.output_tokens),
|
||||
cache_read_tokens: String(usage.cache_read_input_tokens),
|
||||
cache_creation_tokens: String(usage.cache_creation_input_tokens),
|
||||
cost_usd: String(costUSD),
|
||||
duration_ms: String(durationMs),
|
||||
speed: fastMode ? 'fast' : 'normal',
|
||||
})
|
||||
|
||||
// Extract model output, thinking output, and tool call flag when beta tracing is enabled
|
||||
let modelOutput: string | undefined
|
||||
let thinkingOutput: string | undefined
|
||||
let hasToolCall: boolean | undefined
|
||||
|
||||
if (isBetaTracingEnabled() && newMessages) {
|
||||
// Model output - visible to all users
|
||||
modelOutput =
|
||||
newMessages
|
||||
.flatMap(m =>
|
||||
m.message.content
|
||||
.filter(c => c.type === 'text')
|
||||
.map(c => (c as { type: 'text'; text: string }).text),
|
||||
)
|
||||
.join('\n') || undefined
|
||||
|
||||
// Thinking output - Ant-only (build-time gated)
|
||||
if (process.env.USER_TYPE === 'ant') {
|
||||
thinkingOutput =
|
||||
newMessages
|
||||
.flatMap(m =>
|
||||
m.message.content
|
||||
.filter(c => c.type === 'thinking')
|
||||
.map(c => (c as { type: 'thinking'; thinking: string }).thinking),
|
||||
)
|
||||
.join('\n') || undefined
|
||||
}
|
||||
|
||||
// Check if any tool_use blocks were in the output
|
||||
hasToolCall = newMessages.some(m =>
|
||||
m.message.content.some(c => c.type === 'tool_use'),
|
||||
)
|
||||
}
|
||||
|
||||
// Pass the span to correctly match responses to requests when beta tracing is enabled
|
||||
endLLMRequestSpan(llmSpan, {
|
||||
success: true,
|
||||
inputTokens: usage.input_tokens,
|
||||
outputTokens: usage.output_tokens,
|
||||
cacheReadTokens: usage.cache_read_input_tokens,
|
||||
cacheCreationTokens: usage.cache_creation_input_tokens,
|
||||
attempt,
|
||||
modelOutput,
|
||||
thinkingOutput,
|
||||
hasToolCall,
|
||||
ttftMs: ttftMs ?? undefined,
|
||||
requestSetupMs,
|
||||
attemptStartTimes,
|
||||
})
|
||||
|
||||
// Log first successful message for teleported sessions (reliability tracking)
|
||||
const teleportInfo = getTeleportedSessionInfo()
|
||||
if (teleportInfo?.isTeleported && !teleportInfo.hasLoggedFirstMessage) {
|
||||
logEvent('tengu_teleport_first_message_success', {
|
||||
session_id:
|
||||
teleportInfo.sessionId as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
||||
})
|
||||
markFirstTeleportMessageLogged()
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,159 @@
|
||||
import axios from 'axios'
|
||||
import { hasProfileScope, isClaudeAISubscriber } from '../../utils/auth.js'
|
||||
import { getGlobalConfig, saveGlobalConfig } from '../../utils/config.js'
|
||||
import { logForDebugging } from '../../utils/debug.js'
|
||||
import { errorMessage } from '../../utils/errors.js'
|
||||
import { getAuthHeaders, withOAuth401Retry } from '../../utils/http.js'
|
||||
import { logError } from '../../utils/log.js'
|
||||
import { memoizeWithTTLAsync } from '../../utils/memoize.js'
|
||||
import { isEssentialTrafficOnly } from '../../utils/privacyLevel.js'
|
||||
import { getClaudeCodeUserAgent } from '../../utils/userAgent.js'
|
||||
|
||||
type MetricsEnabledResponse = {
|
||||
metrics_logging_enabled: boolean
|
||||
}
|
||||
|
||||
type MetricsStatus = {
|
||||
enabled: boolean
|
||||
hasError: boolean
|
||||
}
|
||||
|
||||
// In-memory TTL — dedupes calls within a single process
|
||||
const CACHE_TTL_MS = 60 * 60 * 1000
|
||||
|
||||
// Disk TTL — org settings rarely change. When disk cache is fresher than this,
|
||||
// we skip the network entirely (no background refresh). This is what collapses
|
||||
// N `claude -p` invocations into ~1 API call/day.
|
||||
const DISK_CACHE_TTL_MS = 24 * 60 * 60 * 1000
|
||||
|
||||
/**
|
||||
* Internal function to call the API and check if metrics are enabled
|
||||
* This is wrapped by memoizeWithTTLAsync to add caching behavior
|
||||
*/
|
||||
async function _fetchMetricsEnabled(): Promise<MetricsEnabledResponse> {
|
||||
const authResult = getAuthHeaders()
|
||||
if (authResult.error) {
|
||||
throw new Error(`Auth error: ${authResult.error}`)
|
||||
}
|
||||
|
||||
const headers = {
|
||||
'Content-Type': 'application/json',
|
||||
'User-Agent': getClaudeCodeUserAgent(),
|
||||
...authResult.headers,
|
||||
}
|
||||
|
||||
const endpoint = `https://api.anthropic.com/api/claude_code/organizations/metrics_enabled`
|
||||
const response = await axios.get<MetricsEnabledResponse>(endpoint, {
|
||||
headers,
|
||||
timeout: 5000,
|
||||
})
|
||||
return response.data
|
||||
}
|
||||
|
||||
async function _checkMetricsEnabledAPI(): Promise<MetricsStatus> {
|
||||
// Incident kill switch: skip the network call when nonessential traffic is disabled.
|
||||
// Returning enabled:false sheds load at the consumer (bigqueryExporter skips
|
||||
// export). Matches the non-subscriber early-return shape below.
|
||||
if (isEssentialTrafficOnly()) {
|
||||
return { enabled: false, hasError: false }
|
||||
}
|
||||
|
||||
try {
|
||||
const data = await withOAuth401Retry(_fetchMetricsEnabled, {
|
||||
also403Revoked: true,
|
||||
})
|
||||
|
||||
logForDebugging(
|
||||
`Metrics opt-out API response: enabled=${data.metrics_logging_enabled}`,
|
||||
)
|
||||
|
||||
return {
|
||||
enabled: data.metrics_logging_enabled,
|
||||
hasError: false,
|
||||
}
|
||||
} catch (error) {
|
||||
logForDebugging(
|
||||
`Failed to check metrics opt-out status: ${errorMessage(error)}`,
|
||||
)
|
||||
logError(error)
|
||||
return { enabled: false, hasError: true }
|
||||
}
|
||||
}
|
||||
|
||||
// Create memoized version with custom error handling
|
||||
const memoizedCheckMetrics = memoizeWithTTLAsync(
|
||||
_checkMetricsEnabledAPI,
|
||||
CACHE_TTL_MS,
|
||||
)
|
||||
|
||||
/**
|
||||
* Fetch (in-memory memoized) and persist to disk on change.
|
||||
* Errors are not persisted — a transient failure should not overwrite a
|
||||
* known-good disk value.
|
||||
*/
|
||||
async function refreshMetricsStatus(): Promise<MetricsStatus> {
|
||||
const result = await memoizedCheckMetrics()
|
||||
if (result.hasError) {
|
||||
return result
|
||||
}
|
||||
|
||||
const cached = getGlobalConfig().metricsStatusCache
|
||||
const unchanged = cached !== undefined && cached.enabled === result.enabled
|
||||
// Skip write when unchanged AND timestamp still fresh — avoids config churn
|
||||
// when concurrent callers race past a stale disk entry and all try to write.
|
||||
if (unchanged && Date.now() - cached.timestamp < DISK_CACHE_TTL_MS) {
|
||||
return result
|
||||
}
|
||||
|
||||
saveGlobalConfig(current => ({
|
||||
...current,
|
||||
metricsStatusCache: {
|
||||
enabled: result.enabled,
|
||||
timestamp: Date.now(),
|
||||
},
|
||||
}))
|
||||
return result
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if metrics are enabled for the current organization.
|
||||
*
|
||||
* Two-tier cache:
|
||||
* - Disk (24h TTL): survives process restarts. Fresh disk cache → zero network.
|
||||
* - In-memory (1h TTL): dedupes the background refresh within a process.
|
||||
*
|
||||
* The caller (bigqueryExporter) tolerates stale reads — a missed export or
|
||||
* an extra one during the 24h window is acceptable.
|
||||
*/
|
||||
export async function checkMetricsEnabled(): Promise<MetricsStatus> {
|
||||
// Service key OAuth sessions lack user:profile scope → would 403.
|
||||
// API key users (non-subscribers) fall through and use x-api-key auth.
|
||||
// This check runs before the disk read so we never persist auth-state-derived
|
||||
// answers — only real API responses go to disk. Otherwise a service-key
|
||||
// session would poison the cache for a later full-OAuth session.
|
||||
if (isClaudeAISubscriber() && !hasProfileScope()) {
|
||||
return { enabled: false, hasError: false }
|
||||
}
|
||||
|
||||
const cached = getGlobalConfig().metricsStatusCache
|
||||
if (cached) {
|
||||
if (Date.now() - cached.timestamp > DISK_CACHE_TTL_MS) {
|
||||
// saveGlobalConfig's fallback path (config.ts:731) can throw if both
|
||||
// locked and fallback writes fail — catch here so fire-and-forget
|
||||
// doesn't become an unhandled rejection.
|
||||
void refreshMetricsStatus().catch(logError)
|
||||
}
|
||||
return {
|
||||
enabled: cached.enabled,
|
||||
hasError: false,
|
||||
}
|
||||
}
|
||||
|
||||
// First-ever run on this machine: block on the network to populate disk.
|
||||
return refreshMetricsStatus()
|
||||
}
|
||||
|
||||
// Export for testing purposes only
|
||||
export const _clearMetricsEnabledCacheForTesting = (): void => {
|
||||
memoizedCheckMetrics.cache.clear()
|
||||
}
|
||||
@@ -0,0 +1,137 @@
|
||||
import axios from 'axios'
|
||||
import { getOauthConfig } from '../../constants/oauth.js'
|
||||
import { getOauthAccountInfo } from '../../utils/auth.js'
|
||||
import { getGlobalConfig, saveGlobalConfig } from '../../utils/config.js'
|
||||
import { logError } from '../../utils/log.js'
|
||||
import { isEssentialTrafficOnly } from '../../utils/privacyLevel.js'
|
||||
import { getOAuthHeaders, prepareApiRequest } from '../../utils/teleport/api.js'
|
||||
|
||||
export type OverageCreditGrantInfo = {
|
||||
available: boolean
|
||||
eligible: boolean
|
||||
granted: boolean
|
||||
amount_minor_units: number | null
|
||||
currency: string | null
|
||||
}
|
||||
|
||||
type CachedGrantEntry = {
|
||||
info: OverageCreditGrantInfo
|
||||
timestamp: number
|
||||
}
|
||||
|
||||
const CACHE_TTL_MS = 60 * 60 * 1000 // 1 hour
|
||||
|
||||
/**
|
||||
* Fetch the current user's overage credit grant eligibility from the backend.
|
||||
* The backend resolves tier-specific amounts and role-based claim permission,
|
||||
* so the CLI just reads the response without replicating that logic.
|
||||
*/
|
||||
async function fetchOverageCreditGrant(): Promise<OverageCreditGrantInfo | null> {
|
||||
try {
|
||||
const { accessToken, orgUUID } = await prepareApiRequest()
|
||||
const url = `${getOauthConfig().BASE_API_URL}/api/oauth/organizations/${orgUUID}/overage_credit_grant`
|
||||
const response = await axios.get<OverageCreditGrantInfo>(url, {
|
||||
headers: getOAuthHeaders(accessToken),
|
||||
})
|
||||
return response.data
|
||||
} catch (err) {
|
||||
logError(err)
|
||||
return null
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get cached grant info. Returns null if no cache or cache is stale.
|
||||
* Callers should render nothing (not block) when this returns null —
|
||||
* refreshOverageCreditGrantCache fires lazily to populate it.
|
||||
*/
|
||||
export function getCachedOverageCreditGrant(): OverageCreditGrantInfo | null {
|
||||
const orgId = getOauthAccountInfo()?.organizationUuid
|
||||
if (!orgId) return null
|
||||
const cached = getGlobalConfig().overageCreditGrantCache?.[orgId]
|
||||
if (!cached) return null
|
||||
if (Date.now() - cached.timestamp > CACHE_TTL_MS) return null
|
||||
return cached.info
|
||||
}
|
||||
|
||||
/**
|
||||
* Drop the current org's cached entry so the next read refetches.
|
||||
* Leaves other orgs' entries intact.
|
||||
*/
|
||||
export function invalidateOverageCreditGrantCache(): void {
|
||||
const orgId = getOauthAccountInfo()?.organizationUuid
|
||||
if (!orgId) return
|
||||
const cache = getGlobalConfig().overageCreditGrantCache
|
||||
if (!cache || !(orgId in cache)) return
|
||||
saveGlobalConfig(prev => {
|
||||
const next = { ...prev.overageCreditGrantCache }
|
||||
delete next[orgId]
|
||||
return { ...prev, overageCreditGrantCache: next }
|
||||
})
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetch and cache grant info. Fire-and-forget; call when an upsell surface
|
||||
* is about to render and the cache is empty.
|
||||
*/
|
||||
export async function refreshOverageCreditGrantCache(): Promise<void> {
|
||||
if (isEssentialTrafficOnly()) return
|
||||
const orgId = getOauthAccountInfo()?.organizationUuid
|
||||
if (!orgId) return
|
||||
const info = await fetchOverageCreditGrant()
|
||||
if (!info) return
|
||||
// Skip rewriting info if grant data is unchanged — avoids config write
|
||||
// amplification (inc-4552 pattern). Still refresh the timestamp so the
|
||||
// TTL-based staleness check in getCachedOverageCreditGrant doesn't keep
|
||||
// re-triggering API calls on every component mount.
|
||||
saveGlobalConfig(prev => {
|
||||
// Derive from prev (lock-fresh) rather than a pre-lock getGlobalConfig()
|
||||
// read — saveConfigWithLock re-reads config from disk under the file lock,
|
||||
// so another CLI instance may have written between any outer read and lock
|
||||
// acquire.
|
||||
const prevCached = prev.overageCreditGrantCache?.[orgId]
|
||||
const existing = prevCached?.info
|
||||
const dataUnchanged =
|
||||
existing &&
|
||||
existing.available === info.available &&
|
||||
existing.eligible === info.eligible &&
|
||||
existing.granted === info.granted &&
|
||||
existing.amount_minor_units === info.amount_minor_units &&
|
||||
existing.currency === info.currency
|
||||
// When data is unchanged and timestamp is still fresh, skip the write entirely
|
||||
if (
|
||||
dataUnchanged &&
|
||||
prevCached &&
|
||||
Date.now() - prevCached.timestamp <= CACHE_TTL_MS
|
||||
) {
|
||||
return prev
|
||||
}
|
||||
const entry: CachedGrantEntry = {
|
||||
info: dataUnchanged ? existing : info,
|
||||
timestamp: Date.now(),
|
||||
}
|
||||
return {
|
||||
...prev,
|
||||
overageCreditGrantCache: {
|
||||
...prev.overageCreditGrantCache,
|
||||
[orgId]: entry,
|
||||
},
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
/**
|
||||
* Format the grant amount for display. Returns null if amount isn't available
|
||||
* (not eligible, or currency we don't know how to format).
|
||||
*/
|
||||
export function formatGrantAmount(info: OverageCreditGrantInfo): string | null {
|
||||
if (info.amount_minor_units == null || !info.currency) return null
|
||||
// For now only USD; backend may expand later
|
||||
if (info.currency.toUpperCase() === 'USD') {
|
||||
const dollars = info.amount_minor_units / 100
|
||||
return Number.isInteger(dollars) ? `$${dollars}` : `$${dollars.toFixed(2)}`
|
||||
}
|
||||
return null
|
||||
}
|
||||
|
||||
export type { CachedGrantEntry as OverageCreditGrantCacheEntry }
|
||||
@@ -0,0 +1,727 @@
|
||||
import type { BetaToolUnion } from '@anthropic-ai/sdk/resources/beta/messages/messages.mjs'
|
||||
import type { TextBlockParam } from '@anthropic-ai/sdk/resources/index.mjs'
|
||||
import { createPatch } from 'diff'
|
||||
import { mkdir, writeFile } from 'fs/promises'
|
||||
import { join } from 'path'
|
||||
import type { AgentId } from 'src/types/ids.js'
|
||||
import type { Message } from 'src/types/message.js'
|
||||
import { logForDebugging } from 'src/utils/debug.js'
|
||||
import { djb2Hash } from 'src/utils/hash.js'
|
||||
import { logError } from 'src/utils/log.js'
|
||||
import { getClaudeTempDir } from 'src/utils/permissions/filesystem.js'
|
||||
import { jsonStringify } from 'src/utils/slowOperations.js'
|
||||
import type { QuerySource } from '../../constants/querySource.js'
|
||||
import {
|
||||
type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
||||
logEvent,
|
||||
} from '../analytics/index.js'
|
||||
|
||||
function getCacheBreakDiffPath(): string {
|
||||
const chars = 'abcdefghijklmnopqrstuvwxyz0123456789'
|
||||
let suffix = ''
|
||||
for (let i = 0; i < 4; i++) {
|
||||
suffix += chars[Math.floor(Math.random() * chars.length)]
|
||||
}
|
||||
return join(getClaudeTempDir(), `cache-break-${suffix}.diff`)
|
||||
}
|
||||
|
||||
type PreviousState = {
|
||||
systemHash: number
|
||||
toolsHash: number
|
||||
/** Hash of system blocks WITH cache_control intact. Catches scope/TTL flips
|
||||
* (global↔org, 1h↔5m) that stripCacheControl erases from systemHash. */
|
||||
cacheControlHash: number
|
||||
toolNames: string[]
|
||||
/** Per-tool schema hash. Diffed to name which tool's description changed
|
||||
* when toolSchemasChanged but added=removed=0 (77% of tool breaks per
|
||||
* BQ 2026-03-22). AgentTool/SkillTool embed dynamic agent/command lists. */
|
||||
perToolHashes: Record<string, number>
|
||||
systemCharCount: number
|
||||
model: string
|
||||
fastMode: boolean
|
||||
/** 'tool_based' | 'system_prompt' | 'none' — flips when MCP tools are
|
||||
* discovered/removed. */
|
||||
globalCacheStrategy: string
|
||||
/** Sorted beta header list. Diffed to show which headers were added/removed. */
|
||||
betas: string[]
|
||||
/** AFK_MODE_BETA_HEADER presence — should NOT break cache anymore
|
||||
* (sticky-on latched in claude.ts). Tracked to verify the fix. */
|
||||
autoModeActive: boolean
|
||||
/** Overage state flip — should NOT break cache anymore (eligibility is
|
||||
* latched session-stable in should1hCacheTTL). Tracked to verify the fix. */
|
||||
isUsingOverage: boolean
|
||||
/** Cache-editing beta header presence — should NOT break cache anymore
|
||||
* (sticky-on latched in claude.ts). Tracked to verify the fix. */
|
||||
cachedMCEnabled: boolean
|
||||
/** Resolved effort (env → options → model default). Goes into output_config
|
||||
* or anthropic_internal.effort_override. */
|
||||
effortValue: string
|
||||
/** Hash of getExtraBodyParams() — catches CLAUDE_CODE_EXTRA_BODY and
|
||||
* anthropic_internal changes. */
|
||||
extraBodyHash: number
|
||||
callCount: number
|
||||
pendingChanges: PendingChanges | null
|
||||
prevCacheReadTokens: number | null
|
||||
/** Set when cached microcompact sends cache_edits deletions. Cache reads
|
||||
* will legitimately drop — this is expected, not a break. */
|
||||
cacheDeletionsPending: boolean
|
||||
buildDiffableContent: () => string
|
||||
}
|
||||
|
||||
type PendingChanges = {
|
||||
systemPromptChanged: boolean
|
||||
toolSchemasChanged: boolean
|
||||
modelChanged: boolean
|
||||
fastModeChanged: boolean
|
||||
cacheControlChanged: boolean
|
||||
globalCacheStrategyChanged: boolean
|
||||
betasChanged: boolean
|
||||
autoModeChanged: boolean
|
||||
overageChanged: boolean
|
||||
cachedMCChanged: boolean
|
||||
effortChanged: boolean
|
||||
extraBodyChanged: boolean
|
||||
addedToolCount: number
|
||||
removedToolCount: number
|
||||
systemCharDelta: number
|
||||
addedTools: string[]
|
||||
removedTools: string[]
|
||||
changedToolSchemas: string[]
|
||||
previousModel: string
|
||||
newModel: string
|
||||
prevGlobalCacheStrategy: string
|
||||
newGlobalCacheStrategy: string
|
||||
addedBetas: string[]
|
||||
removedBetas: string[]
|
||||
prevEffortValue: string
|
||||
newEffortValue: string
|
||||
buildPrevDiffableContent: () => string
|
||||
}
|
||||
|
||||
const previousStateBySource = new Map<string, PreviousState>()
|
||||
|
||||
// Cap the number of tracked sources to prevent unbounded memory growth.
|
||||
// Each entry stores a ~300KB+ diffableContent string (serialized system prompt
|
||||
// + tool schemas). Without a cap, spawning many subagents (each with a unique
|
||||
// agentId key) causes the map to grow indefinitely.
|
||||
const MAX_TRACKED_SOURCES = 10
|
||||
|
||||
const TRACKED_SOURCE_PREFIXES = [
|
||||
'repl_main_thread',
|
||||
'sdk',
|
||||
'agent:custom',
|
||||
'agent:default',
|
||||
'agent:builtin',
|
||||
]
|
||||
|
||||
// Minimum absolute token drop required to trigger a cache break warning.
|
||||
// Small drops (e.g., a few thousand tokens) can happen due to normal variation
|
||||
// and aren't worth alerting on.
|
||||
const MIN_CACHE_MISS_TOKENS = 2_000
|
||||
|
||||
// Anthropic's server-side prompt cache TTL thresholds to test.
|
||||
// Cache breaks after these durations are likely due to TTL expiration
|
||||
// rather than client-side changes.
|
||||
const CACHE_TTL_5MIN_MS = 5 * 60 * 1000
|
||||
export const CACHE_TTL_1HOUR_MS = 60 * 60 * 1000
|
||||
|
||||
// Models to exclude from cache break detection (e.g., haiku has different caching behavior)
|
||||
function isExcludedModel(model: string): boolean {
|
||||
return model.includes('haiku')
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the tracking key for a querySource, or null if untracked.
|
||||
* Compact shares the same server-side cache as repl_main_thread
|
||||
* (same cacheSafeParams), so they share tracking state.
|
||||
*
|
||||
* For subagents with a tracked querySource, uses the unique agentId to
|
||||
* isolate tracking state. This prevents false positive cache break
|
||||
* notifications when multiple instances of the same agent type run
|
||||
* concurrently.
|
||||
*
|
||||
* Untracked sources (speculation, session_memory, prompt_suggestion, etc.)
|
||||
* are short-lived forked agents where cache break detection provides no
|
||||
* value — they run 1-3 turns with a fresh agentId each time, so there's
|
||||
* nothing meaningful to compare against. Their cache metrics are still
|
||||
* logged via tengu_api_success for analytics.
|
||||
*/
|
||||
function getTrackingKey(
|
||||
querySource: QuerySource,
|
||||
agentId?: AgentId,
|
||||
): string | null {
|
||||
if (querySource === 'compact') return 'repl_main_thread'
|
||||
for (const prefix of TRACKED_SOURCE_PREFIXES) {
|
||||
if (querySource.startsWith(prefix)) return agentId || querySource
|
||||
}
|
||||
return null
|
||||
}
|
||||
|
||||
function stripCacheControl(
|
||||
items: ReadonlyArray<Record<string, unknown>>,
|
||||
): unknown[] {
|
||||
return items.map(item => {
|
||||
if (!('cache_control' in item)) return item
|
||||
const { cache_control: _, ...rest } = item
|
||||
return rest
|
||||
})
|
||||
}
|
||||
|
||||
function computeHash(data: unknown): number {
|
||||
const str = jsonStringify(data)
|
||||
if (typeof Bun !== 'undefined') {
|
||||
const hash = Bun.hash(str)
|
||||
// Bun.hash can return bigint for large inputs; convert to number safely
|
||||
return typeof hash === 'bigint' ? Number(hash & 0xffffffffn) : hash
|
||||
}
|
||||
// Fallback for non-Bun runtimes (e.g. Node.js via npm global install)
|
||||
return djb2Hash(str)
|
||||
}
|
||||
|
||||
/** MCP tool names are user-controlled (server config) and may leak filepaths.
|
||||
* Collapse them to 'mcp'; built-in names are a fixed vocabulary. */
|
||||
function sanitizeToolName(name: string): string {
|
||||
return name.startsWith('mcp__') ? 'mcp' : name
|
||||
}
|
||||
|
||||
function computePerToolHashes(
|
||||
strippedTools: ReadonlyArray<unknown>,
|
||||
names: string[],
|
||||
): Record<string, number> {
|
||||
const hashes: Record<string, number> = {}
|
||||
for (let i = 0; i < strippedTools.length; i++) {
|
||||
hashes[names[i] ?? `__idx_${i}`] = computeHash(strippedTools[i])
|
||||
}
|
||||
return hashes
|
||||
}
|
||||
|
||||
function getSystemCharCount(system: TextBlockParam[]): number {
|
||||
let total = 0
|
||||
for (const block of system) {
|
||||
total += block.text.length
|
||||
}
|
||||
return total
|
||||
}
|
||||
|
||||
function buildDiffableContent(
|
||||
system: TextBlockParam[],
|
||||
tools: BetaToolUnion[],
|
||||
model: string,
|
||||
): string {
|
||||
const systemText = system.map(b => b.text).join('\n\n')
|
||||
const toolDetails = tools
|
||||
.map(t => {
|
||||
if (!('name' in t)) return 'unknown'
|
||||
const desc = 'description' in t ? t.description : ''
|
||||
const schema = 'input_schema' in t ? jsonStringify(t.input_schema) : ''
|
||||
return `${t.name}\n description: ${desc}\n input_schema: ${schema}`
|
||||
})
|
||||
.sort()
|
||||
.join('\n\n')
|
||||
return `Model: ${model}\n\n=== System Prompt ===\n\n${systemText}\n\n=== Tools (${tools.length}) ===\n\n${toolDetails}\n`
|
||||
}
|
||||
|
||||
/** Extended tracking snapshot — everything that could affect the server-side
|
||||
* cache key that we can observe from the client. All fields are optional so
|
||||
* the call site can add incrementally; undefined fields compare as stable. */
|
||||
export type PromptStateSnapshot = {
|
||||
system: TextBlockParam[]
|
||||
toolSchemas: BetaToolUnion[]
|
||||
querySource: QuerySource
|
||||
model: string
|
||||
agentId?: AgentId
|
||||
fastMode?: boolean
|
||||
globalCacheStrategy?: string
|
||||
betas?: readonly string[]
|
||||
autoModeActive?: boolean
|
||||
isUsingOverage?: boolean
|
||||
cachedMCEnabled?: boolean
|
||||
effortValue?: string | number
|
||||
extraBodyParams?: unknown
|
||||
}
|
||||
|
||||
/**
|
||||
* Phase 1 (pre-call): Record the current prompt/tool state and detect what changed.
|
||||
* Does NOT fire events — just stores pending changes for phase 2 to use.
|
||||
*/
|
||||
export function recordPromptState(snapshot: PromptStateSnapshot): void {
|
||||
try {
|
||||
const {
|
||||
system,
|
||||
toolSchemas,
|
||||
querySource,
|
||||
model,
|
||||
agentId,
|
||||
fastMode,
|
||||
globalCacheStrategy = '',
|
||||
betas = [],
|
||||
autoModeActive = false,
|
||||
isUsingOverage = false,
|
||||
cachedMCEnabled = false,
|
||||
effortValue,
|
||||
extraBodyParams,
|
||||
} = snapshot
|
||||
const key = getTrackingKey(querySource, agentId)
|
||||
if (!key) return
|
||||
|
||||
const strippedSystem = stripCacheControl(
|
||||
system as unknown as ReadonlyArray<Record<string, unknown>>,
|
||||
)
|
||||
const strippedTools = stripCacheControl(
|
||||
toolSchemas as unknown as ReadonlyArray<Record<string, unknown>>,
|
||||
)
|
||||
|
||||
const systemHash = computeHash(strippedSystem)
|
||||
const toolsHash = computeHash(strippedTools)
|
||||
// Hash the full system array INCLUDING cache_control — this catches
|
||||
// scope flips (global↔org/none) and TTL flips (1h↔5m) that the stripped
|
||||
// hash can't see because the text content is identical.
|
||||
const cacheControlHash = computeHash(
|
||||
system.map(b => ('cache_control' in b ? b.cache_control : null)),
|
||||
)
|
||||
const toolNames = toolSchemas.map(t => ('name' in t ? t.name : 'unknown'))
|
||||
// Only compute per-tool hashes when the aggregate changed — common case
|
||||
// (tools unchanged) skips N extra jsonStringify calls.
|
||||
const computeToolHashes = () =>
|
||||
computePerToolHashes(strippedTools, toolNames)
|
||||
const systemCharCount = getSystemCharCount(system)
|
||||
const lazyDiffableContent = () =>
|
||||
buildDiffableContent(system, toolSchemas, model)
|
||||
const isFastMode = fastMode ?? false
|
||||
const sortedBetas = [...betas].sort()
|
||||
const effortStr = effortValue === undefined ? '' : String(effortValue)
|
||||
const extraBodyHash =
|
||||
extraBodyParams === undefined ? 0 : computeHash(extraBodyParams)
|
||||
|
||||
const prev = previousStateBySource.get(key)
|
||||
|
||||
if (!prev) {
|
||||
// Evict oldest entries if map is at capacity
|
||||
while (previousStateBySource.size >= MAX_TRACKED_SOURCES) {
|
||||
const oldest = previousStateBySource.keys().next().value
|
||||
if (oldest !== undefined) previousStateBySource.delete(oldest)
|
||||
}
|
||||
|
||||
previousStateBySource.set(key, {
|
||||
systemHash,
|
||||
toolsHash,
|
||||
cacheControlHash,
|
||||
toolNames,
|
||||
systemCharCount,
|
||||
model,
|
||||
fastMode: isFastMode,
|
||||
globalCacheStrategy,
|
||||
betas: sortedBetas,
|
||||
autoModeActive,
|
||||
isUsingOverage,
|
||||
cachedMCEnabled,
|
||||
effortValue: effortStr,
|
||||
extraBodyHash,
|
||||
callCount: 1,
|
||||
pendingChanges: null,
|
||||
prevCacheReadTokens: null,
|
||||
cacheDeletionsPending: false,
|
||||
buildDiffableContent: lazyDiffableContent,
|
||||
perToolHashes: computeToolHashes(),
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
prev.callCount++
|
||||
|
||||
const systemPromptChanged = systemHash !== prev.systemHash
|
||||
const toolSchemasChanged = toolsHash !== prev.toolsHash
|
||||
const modelChanged = model !== prev.model
|
||||
const fastModeChanged = isFastMode !== prev.fastMode
|
||||
const cacheControlChanged = cacheControlHash !== prev.cacheControlHash
|
||||
const globalCacheStrategyChanged =
|
||||
globalCacheStrategy !== prev.globalCacheStrategy
|
||||
const betasChanged =
|
||||
sortedBetas.length !== prev.betas.length ||
|
||||
sortedBetas.some((b, i) => b !== prev.betas[i])
|
||||
const autoModeChanged = autoModeActive !== prev.autoModeActive
|
||||
const overageChanged = isUsingOverage !== prev.isUsingOverage
|
||||
const cachedMCChanged = cachedMCEnabled !== prev.cachedMCEnabled
|
||||
const effortChanged = effortStr !== prev.effortValue
|
||||
const extraBodyChanged = extraBodyHash !== prev.extraBodyHash
|
||||
|
||||
if (
|
||||
systemPromptChanged ||
|
||||
toolSchemasChanged ||
|
||||
modelChanged ||
|
||||
fastModeChanged ||
|
||||
cacheControlChanged ||
|
||||
globalCacheStrategyChanged ||
|
||||
betasChanged ||
|
||||
autoModeChanged ||
|
||||
overageChanged ||
|
||||
cachedMCChanged ||
|
||||
effortChanged ||
|
||||
extraBodyChanged
|
||||
) {
|
||||
const prevToolSet = new Set(prev.toolNames)
|
||||
const newToolSet = new Set(toolNames)
|
||||
const prevBetaSet = new Set(prev.betas)
|
||||
const newBetaSet = new Set(sortedBetas)
|
||||
const addedTools = toolNames.filter(n => !prevToolSet.has(n))
|
||||
const removedTools = prev.toolNames.filter(n => !newToolSet.has(n))
|
||||
const changedToolSchemas: string[] = []
|
||||
if (toolSchemasChanged) {
|
||||
const newHashes = computeToolHashes()
|
||||
for (const name of toolNames) {
|
||||
if (!prevToolSet.has(name)) continue
|
||||
if (newHashes[name] !== prev.perToolHashes[name]) {
|
||||
changedToolSchemas.push(name)
|
||||
}
|
||||
}
|
||||
prev.perToolHashes = newHashes
|
||||
}
|
||||
prev.pendingChanges = {
|
||||
systemPromptChanged,
|
||||
toolSchemasChanged,
|
||||
modelChanged,
|
||||
fastModeChanged,
|
||||
cacheControlChanged,
|
||||
globalCacheStrategyChanged,
|
||||
betasChanged,
|
||||
autoModeChanged,
|
||||
overageChanged,
|
||||
cachedMCChanged,
|
||||
effortChanged,
|
||||
extraBodyChanged,
|
||||
addedToolCount: addedTools.length,
|
||||
removedToolCount: removedTools.length,
|
||||
addedTools,
|
||||
removedTools,
|
||||
changedToolSchemas,
|
||||
systemCharDelta: systemCharCount - prev.systemCharCount,
|
||||
previousModel: prev.model,
|
||||
newModel: model,
|
||||
prevGlobalCacheStrategy: prev.globalCacheStrategy,
|
||||
newGlobalCacheStrategy: globalCacheStrategy,
|
||||
addedBetas: sortedBetas.filter(b => !prevBetaSet.has(b)),
|
||||
removedBetas: prev.betas.filter(b => !newBetaSet.has(b)),
|
||||
prevEffortValue: prev.effortValue,
|
||||
newEffortValue: effortStr,
|
||||
buildPrevDiffableContent: prev.buildDiffableContent,
|
||||
}
|
||||
} else {
|
||||
prev.pendingChanges = null
|
||||
}
|
||||
|
||||
prev.systemHash = systemHash
|
||||
prev.toolsHash = toolsHash
|
||||
prev.cacheControlHash = cacheControlHash
|
||||
prev.toolNames = toolNames
|
||||
prev.systemCharCount = systemCharCount
|
||||
prev.model = model
|
||||
prev.fastMode = isFastMode
|
||||
prev.globalCacheStrategy = globalCacheStrategy
|
||||
prev.betas = sortedBetas
|
||||
prev.autoModeActive = autoModeActive
|
||||
prev.isUsingOverage = isUsingOverage
|
||||
prev.cachedMCEnabled = cachedMCEnabled
|
||||
prev.effortValue = effortStr
|
||||
prev.extraBodyHash = extraBodyHash
|
||||
prev.buildDiffableContent = lazyDiffableContent
|
||||
} catch (e: unknown) {
|
||||
logError(e)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Phase 2 (post-call): Check the API response's cache tokens to determine
|
||||
* if a cache break actually occurred. If it did, use the pending changes
|
||||
* from phase 1 to explain why.
|
||||
*/
|
||||
export async function checkResponseForCacheBreak(
|
||||
querySource: QuerySource,
|
||||
cacheReadTokens: number,
|
||||
cacheCreationTokens: number,
|
||||
messages: Message[],
|
||||
agentId?: AgentId,
|
||||
requestId?: string | null,
|
||||
): Promise<void> {
|
||||
try {
|
||||
const key = getTrackingKey(querySource, agentId)
|
||||
if (!key) return
|
||||
|
||||
const state = previousStateBySource.get(key)
|
||||
if (!state) return
|
||||
|
||||
// Skip excluded models (e.g., haiku has different caching behavior)
|
||||
if (isExcludedModel(state.model)) return
|
||||
|
||||
const prevCacheRead = state.prevCacheReadTokens
|
||||
state.prevCacheReadTokens = cacheReadTokens
|
||||
|
||||
// Calculate time since last call for TTL detection by finding the most recent
|
||||
// assistant message timestamp in the messages array (before the current response)
|
||||
const lastAssistantMessage = messages.findLast(m => m.type === 'assistant')
|
||||
const timeSinceLastAssistantMsg = lastAssistantMessage
|
||||
? Date.now() - new Date(lastAssistantMessage.timestamp).getTime()
|
||||
: null
|
||||
|
||||
// Skip the first call — no previous value to compare against
|
||||
if (prevCacheRead === null) return
|
||||
|
||||
const changes = state.pendingChanges
|
||||
|
||||
// Cache deletions via cached microcompact intentionally reduce the cached
|
||||
// prefix. The drop in cache read tokens is expected — reset the baseline
|
||||
// so we don't false-positive on the next call.
|
||||
if (state.cacheDeletionsPending) {
|
||||
state.cacheDeletionsPending = false
|
||||
logForDebugging(
|
||||
`[PROMPT CACHE] cache deletion applied, cache read: ${prevCacheRead} → ${cacheReadTokens} (expected drop)`,
|
||||
)
|
||||
// Don't flag as a break — the remaining state is still valid
|
||||
state.pendingChanges = null
|
||||
return
|
||||
}
|
||||
|
||||
// Detect a cache break: cache read dropped >5% from previous AND
|
||||
// the absolute drop exceeds the minimum threshold.
|
||||
const tokenDrop = prevCacheRead - cacheReadTokens
|
||||
if (
|
||||
cacheReadTokens >= prevCacheRead * 0.95 ||
|
||||
tokenDrop < MIN_CACHE_MISS_TOKENS
|
||||
) {
|
||||
state.pendingChanges = null
|
||||
return
|
||||
}
|
||||
|
||||
// Build explanation from pending changes (if any)
|
||||
const parts: string[] = []
|
||||
if (changes) {
|
||||
if (changes.modelChanged) {
|
||||
parts.push(
|
||||
`model changed (${changes.previousModel} → ${changes.newModel})`,
|
||||
)
|
||||
}
|
||||
if (changes.systemPromptChanged) {
|
||||
const charDelta = changes.systemCharDelta
|
||||
const charInfo =
|
||||
charDelta === 0
|
||||
? ''
|
||||
: charDelta > 0
|
||||
? ` (+${charDelta} chars)`
|
||||
: ` (${charDelta} chars)`
|
||||
parts.push(`system prompt changed${charInfo}`)
|
||||
}
|
||||
if (changes.toolSchemasChanged) {
|
||||
const toolDiff =
|
||||
changes.addedToolCount > 0 || changes.removedToolCount > 0
|
||||
? ` (+${changes.addedToolCount}/-${changes.removedToolCount} tools)`
|
||||
: ' (tool prompt/schema changed, same tool set)'
|
||||
parts.push(`tools changed${toolDiff}`)
|
||||
}
|
||||
if (changes.fastModeChanged) {
|
||||
parts.push('fast mode toggled')
|
||||
}
|
||||
if (changes.globalCacheStrategyChanged) {
|
||||
parts.push(
|
||||
`global cache strategy changed (${changes.prevGlobalCacheStrategy || 'none'} → ${changes.newGlobalCacheStrategy || 'none'})`,
|
||||
)
|
||||
}
|
||||
if (
|
||||
changes.cacheControlChanged &&
|
||||
!changes.globalCacheStrategyChanged &&
|
||||
!changes.systemPromptChanged
|
||||
) {
|
||||
// Only report as standalone cause if nothing else explains it —
|
||||
// otherwise the scope/TTL flip is a consequence, not the root cause.
|
||||
parts.push('cache_control changed (scope or TTL)')
|
||||
}
|
||||
if (changes.betasChanged) {
|
||||
const added = changes.addedBetas.length
|
||||
? `+${changes.addedBetas.join(',')}`
|
||||
: ''
|
||||
const removed = changes.removedBetas.length
|
||||
? `-${changes.removedBetas.join(',')}`
|
||||
: ''
|
||||
const diff = [added, removed].filter(Boolean).join(' ')
|
||||
parts.push(`betas changed${diff ? ` (${diff})` : ''}`)
|
||||
}
|
||||
if (changes.autoModeChanged) {
|
||||
parts.push('auto mode toggled')
|
||||
}
|
||||
if (changes.overageChanged) {
|
||||
parts.push('overage state changed (TTL latched, no flip)')
|
||||
}
|
||||
if (changes.cachedMCChanged) {
|
||||
parts.push('cached microcompact toggled')
|
||||
}
|
||||
if (changes.effortChanged) {
|
||||
parts.push(
|
||||
`effort changed (${changes.prevEffortValue || 'default'} → ${changes.newEffortValue || 'default'})`,
|
||||
)
|
||||
}
|
||||
if (changes.extraBodyChanged) {
|
||||
parts.push('extra body params changed')
|
||||
}
|
||||
}
|
||||
|
||||
// Check if time gap suggests TTL expiration
|
||||
const lastAssistantMsgOver5minAgo =
|
||||
timeSinceLastAssistantMsg !== null &&
|
||||
timeSinceLastAssistantMsg > CACHE_TTL_5MIN_MS
|
||||
const lastAssistantMsgOver1hAgo =
|
||||
timeSinceLastAssistantMsg !== null &&
|
||||
timeSinceLastAssistantMsg > CACHE_TTL_1HOUR_MS
|
||||
|
||||
// Post PR #19823 BQ analysis (bq-queries/prompt-caching/cache_break_pr19823_analysis.sql):
|
||||
// when all client-side flags are false and the gap is under TTL, ~90% of breaks
|
||||
// are server-side routing/eviction or billed/inference disagreement. Label
|
||||
// accordingly instead of implying a CC bug hunt.
|
||||
let reason: string
|
||||
if (parts.length > 0) {
|
||||
reason = parts.join(', ')
|
||||
} else if (lastAssistantMsgOver1hAgo) {
|
||||
reason = 'possible 1h TTL expiry (prompt unchanged)'
|
||||
} else if (lastAssistantMsgOver5minAgo) {
|
||||
reason = 'possible 5min TTL expiry (prompt unchanged)'
|
||||
} else if (timeSinceLastAssistantMsg !== null) {
|
||||
reason = 'likely server-side (prompt unchanged, <5min gap)'
|
||||
} else {
|
||||
reason = 'unknown cause'
|
||||
}
|
||||
|
||||
logEvent('tengu_prompt_cache_break', {
|
||||
systemPromptChanged: changes?.systemPromptChanged ?? false,
|
||||
toolSchemasChanged: changes?.toolSchemasChanged ?? false,
|
||||
modelChanged: changes?.modelChanged ?? false,
|
||||
fastModeChanged: changes?.fastModeChanged ?? false,
|
||||
cacheControlChanged: changes?.cacheControlChanged ?? false,
|
||||
globalCacheStrategyChanged: changes?.globalCacheStrategyChanged ?? false,
|
||||
betasChanged: changes?.betasChanged ?? false,
|
||||
autoModeChanged: changes?.autoModeChanged ?? false,
|
||||
overageChanged: changes?.overageChanged ?? false,
|
||||
cachedMCChanged: changes?.cachedMCChanged ?? false,
|
||||
effortChanged: changes?.effortChanged ?? false,
|
||||
extraBodyChanged: changes?.extraBodyChanged ?? false,
|
||||
addedToolCount: changes?.addedToolCount ?? 0,
|
||||
removedToolCount: changes?.removedToolCount ?? 0,
|
||||
systemCharDelta: changes?.systemCharDelta ?? 0,
|
||||
// Tool names are sanitized: built-in names are a fixed vocabulary,
|
||||
// MCP tools collapse to 'mcp' (user-configured, could leak paths).
|
||||
addedTools: (changes?.addedTools ?? [])
|
||||
.map(sanitizeToolName)
|
||||
.join(
|
||||
',',
|
||||
) as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
||||
removedTools: (changes?.removedTools ?? [])
|
||||
.map(sanitizeToolName)
|
||||
.join(
|
||||
',',
|
||||
) as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
||||
changedToolSchemas: (changes?.changedToolSchemas ?? [])
|
||||
.map(sanitizeToolName)
|
||||
.join(
|
||||
',',
|
||||
) as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
||||
// Beta header names and cache strategy are fixed enum-like values,
|
||||
// not code or filepaths. requestId is an opaque server-generated ID.
|
||||
addedBetas: (changes?.addedBetas ?? []).join(
|
||||
',',
|
||||
) as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
||||
removedBetas: (changes?.removedBetas ?? []).join(
|
||||
',',
|
||||
) as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
||||
prevGlobalCacheStrategy: (changes?.prevGlobalCacheStrategy ??
|
||||
'') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
||||
newGlobalCacheStrategy: (changes?.newGlobalCacheStrategy ??
|
||||
'') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
||||
callNumber: state.callCount,
|
||||
prevCacheReadTokens: prevCacheRead,
|
||||
cacheReadTokens,
|
||||
cacheCreationTokens,
|
||||
timeSinceLastAssistantMsg: timeSinceLastAssistantMsg ?? -1,
|
||||
lastAssistantMsgOver5minAgo,
|
||||
lastAssistantMsgOver1hAgo,
|
||||
requestId: (requestId ??
|
||||
'') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
||||
})
|
||||
|
||||
// Write diff file for ant debugging via --debug. The path is included in
|
||||
// the summary log so ants can find it (DevBar UI removed — event data
|
||||
// flows reliably to BQ for analytics).
|
||||
let diffPath: string | undefined
|
||||
if (changes?.buildPrevDiffableContent) {
|
||||
diffPath = await writeCacheBreakDiff(
|
||||
changes.buildPrevDiffableContent(),
|
||||
state.buildDiffableContent(),
|
||||
)
|
||||
}
|
||||
|
||||
const diffSuffix = diffPath ? `, diff: ${diffPath}` : ''
|
||||
const summary = `[PROMPT CACHE BREAK] ${reason} [source=${querySource}, call #${state.callCount}, cache read: ${prevCacheRead} → ${cacheReadTokens}, creation: ${cacheCreationTokens}${diffSuffix}]`
|
||||
|
||||
logForDebugging(summary, { level: 'warn' })
|
||||
|
||||
state.pendingChanges = null
|
||||
} catch (e: unknown) {
|
||||
logError(e)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Call when cached microcompact sends cache_edits deletions.
|
||||
* The next API response will have lower cache read tokens — that's
|
||||
* expected, not a cache break.
|
||||
*/
|
||||
export function notifyCacheDeletion(
|
||||
querySource: QuerySource,
|
||||
agentId?: AgentId,
|
||||
): void {
|
||||
const key = getTrackingKey(querySource, agentId)
|
||||
const state = key ? previousStateBySource.get(key) : undefined
|
||||
if (state) {
|
||||
state.cacheDeletionsPending = true
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Call after compaction to reset the cache read baseline.
|
||||
* Compaction legitimately reduces message count, so cache read tokens
|
||||
* will naturally drop on the next call — that's not a break.
|
||||
*/
|
||||
export function notifyCompaction(
|
||||
querySource: QuerySource,
|
||||
agentId?: AgentId,
|
||||
): void {
|
||||
const key = getTrackingKey(querySource, agentId)
|
||||
const state = key ? previousStateBySource.get(key) : undefined
|
||||
if (state) {
|
||||
state.prevCacheReadTokens = null
|
||||
}
|
||||
}
|
||||
|
||||
export function cleanupAgentTracking(agentId: AgentId): void {
|
||||
previousStateBySource.delete(agentId)
|
||||
}
|
||||
|
||||
export function resetPromptCacheBreakDetection(): void {
|
||||
previousStateBySource.clear()
|
||||
}
|
||||
|
||||
async function writeCacheBreakDiff(
|
||||
prevContent: string,
|
||||
newContent: string,
|
||||
): Promise<string | undefined> {
|
||||
try {
|
||||
const diffPath = getCacheBreakDiffPath()
|
||||
await mkdir(getClaudeTempDir(), { recursive: true })
|
||||
const patch = createPatch(
|
||||
'prompt-state',
|
||||
prevContent,
|
||||
newContent,
|
||||
'before',
|
||||
'after',
|
||||
)
|
||||
await writeFile(diffPath, patch)
|
||||
return diffPath
|
||||
} catch {
|
||||
return undefined
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,281 @@
|
||||
import axios from 'axios'
|
||||
import { getOauthConfig } from '../../constants/oauth.js'
|
||||
import {
|
||||
getOauthAccountInfo,
|
||||
getSubscriptionType,
|
||||
isClaudeAISubscriber,
|
||||
} from '../../utils/auth.js'
|
||||
import { getGlobalConfig, saveGlobalConfig } from '../../utils/config.js'
|
||||
import { logForDebugging } from '../../utils/debug.js'
|
||||
import { logError } from '../../utils/log.js'
|
||||
import { isEssentialTrafficOnly } from '../../utils/privacyLevel.js'
|
||||
import { getOAuthHeaders, prepareApiRequest } from '../../utils/teleport/api.js'
|
||||
import type {
|
||||
ReferralCampaign,
|
||||
ReferralEligibilityResponse,
|
||||
ReferralRedemptionsResponse,
|
||||
ReferrerRewardInfo,
|
||||
} from '../oauth/types.js'
|
||||
|
||||
// Cache expiration time: 24 hours (eligibility changes only on subscription/experiment changes)
|
||||
const CACHE_EXPIRATION_MS = 24 * 60 * 60 * 1000
|
||||
|
||||
// Track in-flight fetch to prevent duplicate API calls
|
||||
let fetchInProgress: Promise<ReferralEligibilityResponse | null> | null = null
|
||||
|
||||
export async function fetchReferralEligibility(
|
||||
campaign: ReferralCampaign = 'claude_code_guest_pass',
|
||||
): Promise<ReferralEligibilityResponse> {
|
||||
const { accessToken, orgUUID } = await prepareApiRequest()
|
||||
|
||||
const headers = {
|
||||
...getOAuthHeaders(accessToken),
|
||||
'x-organization-uuid': orgUUID,
|
||||
}
|
||||
|
||||
const url = `${getOauthConfig().BASE_API_URL}/api/oauth/organizations/${orgUUID}/referral/eligibility`
|
||||
|
||||
const response = await axios.get(url, {
|
||||
headers,
|
||||
params: { campaign },
|
||||
timeout: 5000, // 5 second timeout for background fetch
|
||||
})
|
||||
|
||||
return response.data
|
||||
}
|
||||
|
||||
export async function fetchReferralRedemptions(
|
||||
campaign: string = 'claude_code_guest_pass',
|
||||
): Promise<ReferralRedemptionsResponse> {
|
||||
const { accessToken, orgUUID } = await prepareApiRequest()
|
||||
|
||||
const headers = {
|
||||
...getOAuthHeaders(accessToken),
|
||||
'x-organization-uuid': orgUUID,
|
||||
}
|
||||
|
||||
const url = `${getOauthConfig().BASE_API_URL}/api/oauth/organizations/${orgUUID}/referral/redemptions`
|
||||
|
||||
const response = await axios.get<ReferralRedemptionsResponse>(url, {
|
||||
headers,
|
||||
params: { campaign },
|
||||
timeout: 10000, // 10 second timeout
|
||||
})
|
||||
|
||||
return response.data
|
||||
}
|
||||
|
||||
/**
|
||||
* Prechecks for if user can access guest passes feature
|
||||
*/
|
||||
function shouldCheckForPasses(): boolean {
|
||||
return !!(
|
||||
getOauthAccountInfo()?.organizationUuid &&
|
||||
isClaudeAISubscriber() &&
|
||||
getSubscriptionType() === 'max'
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Check cached passes eligibility from GlobalConfig
|
||||
* Returns current cached state and cache status
|
||||
*/
|
||||
export function checkCachedPassesEligibility(): {
|
||||
eligible: boolean
|
||||
needsRefresh: boolean
|
||||
hasCache: boolean
|
||||
} {
|
||||
if (!shouldCheckForPasses()) {
|
||||
return {
|
||||
eligible: false,
|
||||
needsRefresh: false,
|
||||
hasCache: false,
|
||||
}
|
||||
}
|
||||
|
||||
const orgId = getOauthAccountInfo()?.organizationUuid
|
||||
if (!orgId) {
|
||||
return {
|
||||
eligible: false,
|
||||
needsRefresh: false,
|
||||
hasCache: false,
|
||||
}
|
||||
}
|
||||
|
||||
const config = getGlobalConfig()
|
||||
const cachedEntry = config.passesEligibilityCache?.[orgId]
|
||||
|
||||
if (!cachedEntry) {
|
||||
// No cached entry, needs fetch
|
||||
return {
|
||||
eligible: false,
|
||||
needsRefresh: true,
|
||||
hasCache: false,
|
||||
}
|
||||
}
|
||||
|
||||
const { eligible, timestamp } = cachedEntry
|
||||
const now = Date.now()
|
||||
const needsRefresh = now - timestamp > CACHE_EXPIRATION_MS
|
||||
|
||||
return {
|
||||
eligible,
|
||||
needsRefresh,
|
||||
hasCache: true,
|
||||
}
|
||||
}
|
||||
|
||||
const CURRENCY_SYMBOLS: Record<string, string> = {
|
||||
USD: '$',
|
||||
EUR: '€',
|
||||
GBP: '£',
|
||||
BRL: 'R$',
|
||||
CAD: 'CA$',
|
||||
AUD: 'A$',
|
||||
NZD: 'NZ$',
|
||||
SGD: 'S$',
|
||||
}
|
||||
|
||||
export function formatCreditAmount(reward: ReferrerRewardInfo): string {
|
||||
const symbol = CURRENCY_SYMBOLS[reward.currency] ?? `${reward.currency} `
|
||||
const amount = reward.amount_minor_units / 100
|
||||
const formatted = amount % 1 === 0 ? amount.toString() : amount.toFixed(2)
|
||||
return `${symbol}${formatted}`
|
||||
}
|
||||
|
||||
/**
|
||||
* Get cached referrer reward info from eligibility cache
|
||||
* Returns the reward info if the user is in a v1 campaign, null otherwise
|
||||
*/
|
||||
export function getCachedReferrerReward(): ReferrerRewardInfo | null {
|
||||
const orgId = getOauthAccountInfo()?.organizationUuid
|
||||
if (!orgId) return null
|
||||
const config = getGlobalConfig()
|
||||
const cachedEntry = config.passesEligibilityCache?.[orgId]
|
||||
return cachedEntry?.referrer_reward ?? null
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the cached remaining passes count from eligibility cache
|
||||
* Returns the number of remaining passes, or null if not available
|
||||
*/
|
||||
export function getCachedRemainingPasses(): number | null {
|
||||
const orgId = getOauthAccountInfo()?.organizationUuid
|
||||
if (!orgId) return null
|
||||
const config = getGlobalConfig()
|
||||
const cachedEntry = config.passesEligibilityCache?.[orgId]
|
||||
return cachedEntry?.remaining_passes ?? null
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetch passes eligibility and store in GlobalConfig
|
||||
* Returns the fetched response or null on error
|
||||
*/
|
||||
export async function fetchAndStorePassesEligibility(): Promise<ReferralEligibilityResponse | null> {
|
||||
// Return existing promise if fetch is already in progress
|
||||
if (fetchInProgress) {
|
||||
logForDebugging('Passes: Reusing in-flight eligibility fetch')
|
||||
return fetchInProgress
|
||||
}
|
||||
|
||||
const orgId = getOauthAccountInfo()?.organizationUuid
|
||||
|
||||
if (!orgId) {
|
||||
return null
|
||||
}
|
||||
|
||||
// Store the promise to share with concurrent calls
|
||||
fetchInProgress = (async () => {
|
||||
try {
|
||||
const response = await fetchReferralEligibility()
|
||||
|
||||
const cacheEntry = {
|
||||
...response,
|
||||
timestamp: Date.now(),
|
||||
}
|
||||
|
||||
saveGlobalConfig(current => ({
|
||||
...current,
|
||||
passesEligibilityCache: {
|
||||
...current.passesEligibilityCache,
|
||||
[orgId]: cacheEntry,
|
||||
},
|
||||
}))
|
||||
|
||||
logForDebugging(
|
||||
`Passes eligibility cached for org ${orgId}: ${response.eligible}`,
|
||||
)
|
||||
|
||||
return response
|
||||
} catch (error) {
|
||||
logForDebugging('Failed to fetch and cache passes eligibility')
|
||||
logError(error as Error)
|
||||
return null
|
||||
} finally {
|
||||
// Clear the promise when done
|
||||
fetchInProgress = null
|
||||
}
|
||||
})()
|
||||
|
||||
return fetchInProgress
|
||||
}
|
||||
|
||||
/**
|
||||
* Get cached passes eligibility data or fetch if needed
|
||||
* Main entry point for all eligibility checks
|
||||
*
|
||||
* This function never blocks on network - it returns cached data immediately
|
||||
* and fetches in the background if needed. On cold start (no cache), it returns
|
||||
* null and the passes command won't be available until the next session.
|
||||
*/
|
||||
export async function getCachedOrFetchPassesEligibility(): Promise<ReferralEligibilityResponse | null> {
|
||||
if (!shouldCheckForPasses()) {
|
||||
return null
|
||||
}
|
||||
|
||||
const orgId = getOauthAccountInfo()?.organizationUuid
|
||||
if (!orgId) {
|
||||
return null
|
||||
}
|
||||
|
||||
const config = getGlobalConfig()
|
||||
const cachedEntry = config.passesEligibilityCache?.[orgId]
|
||||
const now = Date.now()
|
||||
|
||||
// No cache - trigger background fetch and return null (non-blocking)
|
||||
// The passes command won't be available this session, but will be next time
|
||||
if (!cachedEntry) {
|
||||
logForDebugging(
|
||||
'Passes: No cache, fetching eligibility in background (command unavailable this session)',
|
||||
)
|
||||
void fetchAndStorePassesEligibility()
|
||||
return null
|
||||
}
|
||||
|
||||
// Cache exists but is stale - return stale cache and trigger background refresh
|
||||
if (now - cachedEntry.timestamp > CACHE_EXPIRATION_MS) {
|
||||
logForDebugging(
|
||||
'Passes: Cache stale, returning cached data and refreshing in background',
|
||||
)
|
||||
void fetchAndStorePassesEligibility() // Background refresh
|
||||
const { timestamp, ...response } = cachedEntry
|
||||
return response as ReferralEligibilityResponse
|
||||
}
|
||||
|
||||
// Cache is fresh - return it immediately
|
||||
logForDebugging('Passes: Using fresh cached eligibility data')
|
||||
const { timestamp, ...response } = cachedEntry
|
||||
return response as ReferralEligibilityResponse
|
||||
}
|
||||
|
||||
/**
|
||||
* Prefetch passes eligibility on startup
|
||||
*/
|
||||
export async function prefetchPassesEligibility(): Promise<void> {
|
||||
// Skip network requests if nonessential traffic is disabled
|
||||
if (isEssentialTrafficOnly()) {
|
||||
return
|
||||
}
|
||||
|
||||
void getCachedOrFetchPassesEligibility()
|
||||
}
|
||||
@@ -0,0 +1,514 @@
|
||||
import axios, { type AxiosError } from 'axios'
|
||||
import type { UUID } from 'crypto'
|
||||
import { getOauthConfig } from '../../constants/oauth.js'
|
||||
import type { Entry, TranscriptMessage } from '../../types/logs.js'
|
||||
import { logForDebugging } from '../../utils/debug.js'
|
||||
import { logForDiagnosticsNoPII } from '../../utils/diagLogs.js'
|
||||
import { isEnvTruthy } from '../../utils/envUtils.js'
|
||||
import { logError } from '../../utils/log.js'
|
||||
import { sequential } from '../../utils/sequential.js'
|
||||
import { getSessionIngressAuthToken } from '../../utils/sessionIngressAuth.js'
|
||||
import { sleep } from '../../utils/sleep.js'
|
||||
import { jsonStringify } from '../../utils/slowOperations.js'
|
||||
import { getOAuthHeaders } from '../../utils/teleport/api.js'
|
||||
|
||||
interface SessionIngressError {
|
||||
error?: {
|
||||
message?: string
|
||||
type?: string
|
||||
}
|
||||
}
|
||||
|
||||
// Module-level state
|
||||
const lastUuidMap: Map<string, UUID> = new Map()
|
||||
|
||||
const MAX_RETRIES = 10
|
||||
const BASE_DELAY_MS = 500
|
||||
|
||||
// Per-session sequential wrappers to prevent concurrent log writes
|
||||
const sequentialAppendBySession: Map<
|
||||
string,
|
||||
(
|
||||
entry: TranscriptMessage,
|
||||
url: string,
|
||||
headers: Record<string, string>,
|
||||
) => Promise<boolean>
|
||||
> = new Map()
|
||||
|
||||
/**
|
||||
* Gets or creates a sequential wrapper for a session
|
||||
* This ensures that log appends for a session are processed one at a time
|
||||
*/
|
||||
function getOrCreateSequentialAppend(sessionId: string) {
|
||||
let sequentialAppend = sequentialAppendBySession.get(sessionId)
|
||||
if (!sequentialAppend) {
|
||||
sequentialAppend = sequential(
|
||||
async (
|
||||
entry: TranscriptMessage,
|
||||
url: string,
|
||||
headers: Record<string, string>,
|
||||
) => await appendSessionLogImpl(sessionId, entry, url, headers),
|
||||
)
|
||||
sequentialAppendBySession.set(sessionId, sequentialAppend)
|
||||
}
|
||||
return sequentialAppend
|
||||
}
|
||||
|
||||
/**
|
||||
* Internal implementation of appendSessionLog with retry logic
|
||||
* Retries on transient errors (network, 5xx, 429). On 409, adopts the server's
|
||||
* last UUID and retries (handles stale state from killed process's in-flight
|
||||
* requests). Fails immediately on 401.
|
||||
*/
|
||||
async function appendSessionLogImpl(
|
||||
sessionId: string,
|
||||
entry: TranscriptMessage,
|
||||
url: string,
|
||||
headers: Record<string, string>,
|
||||
): Promise<boolean> {
|
||||
for (let attempt = 1; attempt <= MAX_RETRIES; attempt++) {
|
||||
try {
|
||||
const lastUuid = lastUuidMap.get(sessionId)
|
||||
const requestHeaders = { ...headers }
|
||||
if (lastUuid) {
|
||||
requestHeaders['Last-Uuid'] = lastUuid
|
||||
}
|
||||
|
||||
const response = await axios.put(url, entry, {
|
||||
headers: requestHeaders,
|
||||
validateStatus: status => status < 500,
|
||||
})
|
||||
|
||||
if (response.status === 200 || response.status === 201) {
|
||||
lastUuidMap.set(sessionId, entry.uuid)
|
||||
logForDebugging(
|
||||
`Successfully persisted session log entry for session ${sessionId}`,
|
||||
)
|
||||
return true
|
||||
}
|
||||
|
||||
if (response.status === 409) {
|
||||
// Check if our entry was actually stored (server returned 409 but entry exists)
|
||||
// This handles the scenario where entry was stored but client received an error
|
||||
// response, causing lastUuidMap to be stale
|
||||
const serverLastUuid = response.headers['x-last-uuid']
|
||||
if (serverLastUuid === entry.uuid) {
|
||||
// Our entry IS the last entry on server - it was stored successfully previously
|
||||
lastUuidMap.set(sessionId, entry.uuid)
|
||||
logForDebugging(
|
||||
`Session entry ${entry.uuid} already present on server, recovering from stale state`,
|
||||
)
|
||||
logForDiagnosticsNoPII('info', 'session_persist_recovered_from_409')
|
||||
return true
|
||||
}
|
||||
|
||||
// Another writer (e.g. in-flight request from a killed process)
|
||||
// advanced the server's chain. Try to adopt the server's last UUID
|
||||
// from the response header, or re-fetch the session to discover it.
|
||||
if (serverLastUuid) {
|
||||
lastUuidMap.set(sessionId, serverLastUuid as UUID)
|
||||
logForDebugging(
|
||||
`Session 409: adopting server lastUuid=${serverLastUuid} from header, retrying entry ${entry.uuid}`,
|
||||
)
|
||||
} else {
|
||||
// Server didn't return x-last-uuid (e.g. v1 endpoint). Re-fetch
|
||||
// the session to discover the current head of the append chain.
|
||||
const logs = await fetchSessionLogsFromUrl(sessionId, url, headers)
|
||||
const adoptedUuid = findLastUuid(logs)
|
||||
if (adoptedUuid) {
|
||||
lastUuidMap.set(sessionId, adoptedUuid)
|
||||
logForDebugging(
|
||||
`Session 409: re-fetched ${logs!.length} entries, adopting lastUuid=${adoptedUuid}, retrying entry ${entry.uuid}`,
|
||||
)
|
||||
} else {
|
||||
// Can't determine server state — give up
|
||||
const errorData = response.data as SessionIngressError
|
||||
const errorMessage =
|
||||
errorData.error?.message || 'Concurrent modification detected'
|
||||
logError(
|
||||
new Error(
|
||||
`Session persistence conflict: UUID mismatch for session ${sessionId}, entry ${entry.uuid}. ${errorMessage}`,
|
||||
),
|
||||
)
|
||||
logForDiagnosticsNoPII(
|
||||
'error',
|
||||
'session_persist_fail_concurrent_modification',
|
||||
)
|
||||
return false
|
||||
}
|
||||
}
|
||||
logForDiagnosticsNoPII('info', 'session_persist_409_adopt_server_uuid')
|
||||
continue // retry with updated lastUuid
|
||||
}
|
||||
|
||||
if (response.status === 401) {
|
||||
logForDebugging('Session token expired or invalid')
|
||||
logForDiagnosticsNoPII('error', 'session_persist_fail_bad_token')
|
||||
return false // Non-retryable
|
||||
}
|
||||
|
||||
// Other 4xx (429, etc.) - retryable
|
||||
logForDebugging(
|
||||
`Failed to persist session log: ${response.status} ${response.statusText}`,
|
||||
)
|
||||
logForDiagnosticsNoPII('error', 'session_persist_fail_status', {
|
||||
status: response.status,
|
||||
attempt,
|
||||
})
|
||||
} catch (error) {
|
||||
// Network errors, 5xx - retryable
|
||||
const axiosError = error as AxiosError<SessionIngressError>
|
||||
logError(new Error(`Error persisting session log: ${axiosError.message}`))
|
||||
logForDiagnosticsNoPII('error', 'session_persist_fail_status', {
|
||||
status: axiosError.status,
|
||||
attempt,
|
||||
})
|
||||
}
|
||||
|
||||
if (attempt === MAX_RETRIES) {
|
||||
logForDebugging(`Remote persistence failed after ${MAX_RETRIES} attempts`)
|
||||
logForDiagnosticsNoPII(
|
||||
'error',
|
||||
'session_persist_error_retries_exhausted',
|
||||
{ attempt },
|
||||
)
|
||||
return false
|
||||
}
|
||||
|
||||
const delayMs = Math.min(BASE_DELAY_MS * Math.pow(2, attempt - 1), 8000)
|
||||
logForDebugging(
|
||||
`Remote persistence attempt ${attempt}/${MAX_RETRIES} failed, retrying in ${delayMs}ms…`,
|
||||
)
|
||||
await sleep(delayMs)
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
/**
|
||||
* Append a log entry to the session using JWT token
|
||||
* Uses optimistic concurrency control with Last-Uuid header
|
||||
* Ensures sequential execution per session to prevent race conditions
|
||||
*/
|
||||
export async function appendSessionLog(
|
||||
sessionId: string,
|
||||
entry: TranscriptMessage,
|
||||
url: string,
|
||||
): Promise<boolean> {
|
||||
const sessionToken = getSessionIngressAuthToken()
|
||||
if (!sessionToken) {
|
||||
logForDebugging('No session token available for session persistence')
|
||||
logForDiagnosticsNoPII('error', 'session_persist_fail_jwt_no_token')
|
||||
return false
|
||||
}
|
||||
|
||||
const headers: Record<string, string> = {
|
||||
Authorization: `Bearer ${sessionToken}`,
|
||||
'Content-Type': 'application/json',
|
||||
}
|
||||
|
||||
const sequentialAppend = getOrCreateSequentialAppend(sessionId)
|
||||
return sequentialAppend(entry, url, headers)
|
||||
}
|
||||
|
||||
/**
|
||||
* Get all session logs for hydration
|
||||
*/
|
||||
export async function getSessionLogs(
|
||||
sessionId: string,
|
||||
url: string,
|
||||
): Promise<Entry[] | null> {
|
||||
const sessionToken = getSessionIngressAuthToken()
|
||||
if (!sessionToken) {
|
||||
logForDebugging('No session token available for fetching session logs')
|
||||
logForDiagnosticsNoPII('error', 'session_get_fail_no_token')
|
||||
return null
|
||||
}
|
||||
|
||||
const headers = { Authorization: `Bearer ${sessionToken}` }
|
||||
const logs = await fetchSessionLogsFromUrl(sessionId, url, headers)
|
||||
|
||||
if (logs && logs.length > 0) {
|
||||
// Update our lastUuid to the last entry's UUID
|
||||
const lastEntry = logs.at(-1)
|
||||
if (lastEntry && 'uuid' in lastEntry && lastEntry.uuid) {
|
||||
lastUuidMap.set(sessionId, lastEntry.uuid)
|
||||
}
|
||||
}
|
||||
|
||||
return logs
|
||||
}
|
||||
|
||||
/**
|
||||
* Get all session logs for hydration via OAuth
|
||||
* Used for teleporting sessions from the Sessions API
|
||||
*/
|
||||
export async function getSessionLogsViaOAuth(
|
||||
sessionId: string,
|
||||
accessToken: string,
|
||||
orgUUID: string,
|
||||
): Promise<Entry[] | null> {
|
||||
const url = `${getOauthConfig().BASE_API_URL}/v1/session_ingress/session/${sessionId}`
|
||||
logForDebugging(`[session-ingress] Fetching session logs from: ${url}`)
|
||||
const headers = {
|
||||
...getOAuthHeaders(accessToken),
|
||||
'x-organization-uuid': orgUUID,
|
||||
}
|
||||
const result = await fetchSessionLogsFromUrl(sessionId, url, headers)
|
||||
return result
|
||||
}
|
||||
|
||||
/**
|
||||
* Response shape from GET /v1/code/sessions/{id}/teleport-events.
|
||||
* WorkerEvent.payload IS the Entry (TranscriptMessage struct) — the CLI
|
||||
* writes it via AddWorkerEvent, the server stores it opaque, we read it
|
||||
* back here.
|
||||
*/
|
||||
type TeleportEventsResponse = {
|
||||
data: Array<{
|
||||
event_id: string
|
||||
event_type: string
|
||||
is_compaction: boolean
|
||||
payload: Entry | null
|
||||
created_at: string
|
||||
}>
|
||||
// Unset when there are no more pages — this IS the end-of-stream
|
||||
// signal (no separate has_more field).
|
||||
next_cursor?: string
|
||||
}
|
||||
|
||||
/**
|
||||
* Get worker events (transcript) via the CCR v2 Sessions API. Replaces
|
||||
* getSessionLogsViaOAuth once session-ingress is retired.
|
||||
*
|
||||
* The server dispatches per-session: Spanner for v2-native sessions,
|
||||
* threadstore for pre-backfill session_* IDs. The cursor is opaque to us —
|
||||
* echo it back until next_cursor is unset.
|
||||
*
|
||||
* Paginated (500/page default, server max 1000). session-ingress's one-shot
|
||||
* 50k is gone; we loop.
|
||||
*/
|
||||
export async function getTeleportEvents(
|
||||
sessionId: string,
|
||||
accessToken: string,
|
||||
orgUUID: string,
|
||||
): Promise<Entry[] | null> {
|
||||
const baseUrl = `${getOauthConfig().BASE_API_URL}/v1/code/sessions/${sessionId}/teleport-events`
|
||||
const headers = {
|
||||
...getOAuthHeaders(accessToken),
|
||||
'x-organization-uuid': orgUUID,
|
||||
}
|
||||
|
||||
logForDebugging(`[teleport] Fetching events from: ${baseUrl}`)
|
||||
|
||||
const all: Entry[] = []
|
||||
let cursor: string | undefined
|
||||
let pages = 0
|
||||
|
||||
// Infinite-loop guard: 1000/page × 100 pages = 100k events. Larger than
|
||||
// session-ingress's 50k one-shot. If we hit this, something's wrong
|
||||
// (server not advancing cursor) — bail rather than hang.
|
||||
const maxPages = 100
|
||||
|
||||
while (pages < maxPages) {
|
||||
const params: Record<string, string | number> = { limit: 1000 }
|
||||
if (cursor !== undefined) {
|
||||
params.cursor = cursor
|
||||
}
|
||||
|
||||
let response
|
||||
try {
|
||||
response = await axios.get<TeleportEventsResponse>(baseUrl, {
|
||||
headers,
|
||||
params,
|
||||
timeout: 20000,
|
||||
validateStatus: status => status < 500,
|
||||
})
|
||||
} catch (e) {
|
||||
const err = e as AxiosError
|
||||
logError(new Error(`Teleport events fetch failed: ${err.message}`))
|
||||
logForDiagnosticsNoPII('error', 'teleport_events_fetch_fail')
|
||||
return null
|
||||
}
|
||||
|
||||
if (response.status === 404) {
|
||||
// 404 on page 0 is ambiguous during the migration window:
|
||||
// (a) Session genuinely not found (not in Spanner AND not in
|
||||
// threadstore) — nothing to fetch.
|
||||
// (b) Route-level 404: endpoint not deployed yet, or session is
|
||||
// a threadstore session not yet backfilled into Spanner.
|
||||
// We can't tell them apart from the response alone. Returning null
|
||||
// lets the caller fall back to session-ingress, which will correctly
|
||||
// return empty for case (a) and data for case (b). Once the backfill
|
||||
// is complete and session-ingress is gone, the fallback also returns
|
||||
// null → same "Failed to fetch session logs" error as today.
|
||||
//
|
||||
// 404 mid-pagination (pages > 0) means session was deleted between
|
||||
// pages — return what we have.
|
||||
logForDebugging(
|
||||
`[teleport] Session ${sessionId} not found (page ${pages})`,
|
||||
)
|
||||
logForDiagnosticsNoPII('warn', 'teleport_events_not_found')
|
||||
return pages === 0 ? null : all
|
||||
}
|
||||
|
||||
if (response.status === 401) {
|
||||
logForDiagnosticsNoPII('error', 'teleport_events_bad_token')
|
||||
throw new Error(
|
||||
'Your session has expired. Please run /login to sign in again.',
|
||||
)
|
||||
}
|
||||
|
||||
if (response.status !== 200) {
|
||||
logError(
|
||||
new Error(
|
||||
`Teleport events returned ${response.status}: ${jsonStringify(response.data)}`,
|
||||
),
|
||||
)
|
||||
logForDiagnosticsNoPII('error', 'teleport_events_bad_status')
|
||||
return null
|
||||
}
|
||||
|
||||
const { data, next_cursor } = response.data
|
||||
if (!Array.isArray(data)) {
|
||||
logError(
|
||||
new Error(
|
||||
`Teleport events invalid response shape: ${jsonStringify(response.data)}`,
|
||||
),
|
||||
)
|
||||
logForDiagnosticsNoPII('error', 'teleport_events_invalid_shape')
|
||||
return null
|
||||
}
|
||||
|
||||
// payload IS the Entry. null payload happens for threadstore non-generic
|
||||
// events (server skips them) or encryption failures — skip here too.
|
||||
for (const ev of data) {
|
||||
if (ev.payload !== null) {
|
||||
all.push(ev.payload)
|
||||
}
|
||||
}
|
||||
|
||||
pages++
|
||||
// == null covers both `null` and `undefined` — the proto omits the
|
||||
// field at end-of-stream, but some serializers emit `null`. Strict
|
||||
// `=== undefined` would loop forever on `null` (cursor=null in query
|
||||
// params stringifies to "null", which the server rejects or echoes).
|
||||
if (next_cursor == null) {
|
||||
break
|
||||
}
|
||||
cursor = next_cursor
|
||||
}
|
||||
|
||||
if (pages >= maxPages) {
|
||||
// Don't fail — return what we have. Better to teleport with a
|
||||
// truncated transcript than not at all.
|
||||
logError(
|
||||
new Error(`Teleport events hit page cap (${maxPages}) for ${sessionId}`),
|
||||
)
|
||||
logForDiagnosticsNoPII('warn', 'teleport_events_page_cap')
|
||||
}
|
||||
|
||||
logForDebugging(
|
||||
`[teleport] Fetched ${all.length} events over ${pages} page(s) for ${sessionId}`,
|
||||
)
|
||||
return all
|
||||
}
|
||||
|
||||
/**
|
||||
* Shared implementation for fetching session logs from a URL
|
||||
*/
|
||||
async function fetchSessionLogsFromUrl(
|
||||
sessionId: string,
|
||||
url: string,
|
||||
headers: Record<string, string>,
|
||||
): Promise<Entry[] | null> {
|
||||
try {
|
||||
const response = await axios.get(url, {
|
||||
headers,
|
||||
timeout: 20000,
|
||||
validateStatus: status => status < 500,
|
||||
params: isEnvTruthy(process.env.CLAUDE_AFTER_LAST_COMPACT)
|
||||
? { after_last_compact: true }
|
||||
: undefined,
|
||||
})
|
||||
|
||||
if (response.status === 200) {
|
||||
const data = response.data
|
||||
|
||||
// Validate the response structure
|
||||
if (!data || typeof data !== 'object' || !Array.isArray(data.loglines)) {
|
||||
logError(
|
||||
new Error(
|
||||
`Invalid session logs response format: ${jsonStringify(data)}`,
|
||||
),
|
||||
)
|
||||
logForDiagnosticsNoPII('error', 'session_get_fail_invalid_response')
|
||||
return null
|
||||
}
|
||||
|
||||
const logs = data.loglines as Entry[]
|
||||
logForDebugging(
|
||||
`Fetched ${logs.length} session logs for session ${sessionId}`,
|
||||
)
|
||||
return logs
|
||||
}
|
||||
|
||||
if (response.status === 404) {
|
||||
logForDebugging(`No existing logs for session ${sessionId}`)
|
||||
logForDiagnosticsNoPII('warn', 'session_get_no_logs_for_session')
|
||||
return []
|
||||
}
|
||||
|
||||
if (response.status === 401) {
|
||||
logForDebugging('Auth token expired or invalid')
|
||||
logForDiagnosticsNoPII('error', 'session_get_fail_bad_token')
|
||||
throw new Error(
|
||||
'Your session has expired. Please run /login to sign in again.',
|
||||
)
|
||||
}
|
||||
|
||||
logForDebugging(
|
||||
`Failed to fetch session logs: ${response.status} ${response.statusText}`,
|
||||
)
|
||||
logForDiagnosticsNoPII('error', 'session_get_fail_status', {
|
||||
status: response.status,
|
||||
})
|
||||
return null
|
||||
} catch (error) {
|
||||
const axiosError = error as AxiosError<SessionIngressError>
|
||||
logError(new Error(`Error fetching session logs: ${axiosError.message}`))
|
||||
logForDiagnosticsNoPII('error', 'session_get_fail_status', {
|
||||
status: axiosError.status,
|
||||
})
|
||||
return null
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Walk backward through entries to find the last one with a uuid.
|
||||
* Some entry types (SummaryMessage, TagMessage) don't have one.
|
||||
*/
|
||||
function findLastUuid(logs: Entry[] | null): UUID | undefined {
|
||||
if (!logs) {
|
||||
return undefined
|
||||
}
|
||||
const entry = logs.findLast(e => 'uuid' in e && e.uuid)
|
||||
return entry && 'uuid' in entry ? (entry.uuid as UUID) : undefined
|
||||
}
|
||||
|
||||
/**
|
||||
* Clear cached state for a session
|
||||
*/
|
||||
export function clearSession(sessionId: string): void {
|
||||
lastUuidMap.delete(sessionId)
|
||||
sequentialAppendBySession.delete(sessionId)
|
||||
}
|
||||
|
||||
/**
|
||||
* Clear all cached session state (all sessions).
|
||||
* Use this on /clear to free sub-agent session entries.
|
||||
*/
|
||||
export function clearAllSessions(): void {
|
||||
lastUuidMap.clear()
|
||||
sequentialAppendBySession.clear()
|
||||
}
|
||||
@@ -0,0 +1,38 @@
|
||||
import axios from 'axios'
|
||||
import { getOauthConfig } from '../../constants/oauth.js'
|
||||
import { isClaudeAISubscriber } from '../../utils/auth.js'
|
||||
import { logForDebugging } from '../../utils/debug.js'
|
||||
import { getOAuthHeaders, prepareApiRequest } from '../../utils/teleport/api.js'
|
||||
|
||||
export type UltrareviewQuotaResponse = {
|
||||
reviews_used: number
|
||||
reviews_limit: number
|
||||
reviews_remaining: number
|
||||
is_overage: boolean
|
||||
}
|
||||
|
||||
/**
|
||||
* Peek the ultrareview quota for display and nudge decisions. Consume
|
||||
* happens server-side at session creation. Null when not a subscriber or
|
||||
* the endpoint errors.
|
||||
*/
|
||||
export async function fetchUltrareviewQuota(): Promise<UltrareviewQuotaResponse | null> {
|
||||
if (!isClaudeAISubscriber()) return null
|
||||
try {
|
||||
const { accessToken, orgUUID } = await prepareApiRequest()
|
||||
const response = await axios.get<UltrareviewQuotaResponse>(
|
||||
`${getOauthConfig().BASE_API_URL}/v1/ultrareview/quota`,
|
||||
{
|
||||
headers: {
|
||||
...getOAuthHeaders(accessToken),
|
||||
'x-organization-uuid': orgUUID,
|
||||
},
|
||||
timeout: 5000,
|
||||
},
|
||||
)
|
||||
return response.data
|
||||
} catch (error) {
|
||||
logForDebugging(`fetchUltrareviewQuota failed: ${error}`)
|
||||
return null
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,63 @@
|
||||
import axios from 'axios'
|
||||
import { getOauthConfig } from '../../constants/oauth.js'
|
||||
import {
|
||||
getClaudeAIOAuthTokens,
|
||||
hasProfileScope,
|
||||
isClaudeAISubscriber,
|
||||
} from '../../utils/auth.js'
|
||||
import { getAuthHeaders } from '../../utils/http.js'
|
||||
import { getClaudeCodeUserAgent } from '../../utils/userAgent.js'
|
||||
import { isOAuthTokenExpired } from '../oauth/client.js'
|
||||
|
||||
export type RateLimit = {
|
||||
utilization: number | null // a percentage from 0 to 100
|
||||
resets_at: string | null // ISO 8601 timestamp
|
||||
}
|
||||
|
||||
export type ExtraUsage = {
|
||||
is_enabled: boolean
|
||||
monthly_limit: number | null
|
||||
used_credits: number | null
|
||||
utilization: number | null
|
||||
}
|
||||
|
||||
export type Utilization = {
|
||||
five_hour?: RateLimit | null
|
||||
seven_day?: RateLimit | null
|
||||
seven_day_oauth_apps?: RateLimit | null
|
||||
seven_day_opus?: RateLimit | null
|
||||
seven_day_sonnet?: RateLimit | null
|
||||
extra_usage?: ExtraUsage | null
|
||||
}
|
||||
|
||||
export async function fetchUtilization(): Promise<Utilization | null> {
|
||||
if (!isClaudeAISubscriber() || !hasProfileScope()) {
|
||||
return {}
|
||||
}
|
||||
|
||||
// Skip API call if OAuth token is expired to avoid 401 errors
|
||||
const tokens = getClaudeAIOAuthTokens()
|
||||
if (tokens && isOAuthTokenExpired(tokens.expiresAt)) {
|
||||
return null
|
||||
}
|
||||
|
||||
const authResult = getAuthHeaders()
|
||||
if (authResult.error) {
|
||||
throw new Error(`Auth error: ${authResult.error}`)
|
||||
}
|
||||
|
||||
const headers = {
|
||||
'Content-Type': 'application/json',
|
||||
'User-Agent': getClaudeCodeUserAgent(),
|
||||
...authResult.headers,
|
||||
}
|
||||
|
||||
const url = `${getOauthConfig().BASE_API_URL}/api/oauth/usage`
|
||||
|
||||
const response = await axios.get<Utilization>(url, {
|
||||
headers,
|
||||
timeout: 5000, // 5 second timeout
|
||||
})
|
||||
|
||||
return response.data
|
||||
}
|
||||
@@ -0,0 +1,822 @@
|
||||
import { feature } from 'bun:bundle'
|
||||
import type Anthropic from '@anthropic-ai/sdk'
|
||||
import {
|
||||
APIConnectionError,
|
||||
APIError,
|
||||
APIUserAbortError,
|
||||
} from '@anthropic-ai/sdk'
|
||||
import type { QuerySource } from 'src/constants/querySource.js'
|
||||
import type { SystemAPIErrorMessage } from 'src/types/message.js'
|
||||
import { isAwsCredentialsProviderError } from 'src/utils/aws.js'
|
||||
import { logForDebugging } from 'src/utils/debug.js'
|
||||
import { logError } from 'src/utils/log.js'
|
||||
import { createSystemAPIErrorMessage } from 'src/utils/messages.js'
|
||||
import { getAPIProviderForStatsig } from 'src/utils/model/providers.js'
|
||||
import {
|
||||
clearApiKeyHelperCache,
|
||||
clearAwsCredentialsCache,
|
||||
clearGcpCredentialsCache,
|
||||
getClaudeAIOAuthTokens,
|
||||
handleOAuth401Error,
|
||||
isClaudeAISubscriber,
|
||||
isEnterpriseSubscriber,
|
||||
} from '../../utils/auth.js'
|
||||
import { isEnvTruthy } from '../../utils/envUtils.js'
|
||||
import { errorMessage } from '../../utils/errors.js'
|
||||
import {
|
||||
type CooldownReason,
|
||||
handleFastModeOverageRejection,
|
||||
handleFastModeRejectedByAPI,
|
||||
isFastModeCooldown,
|
||||
isFastModeEnabled,
|
||||
triggerFastModeCooldown,
|
||||
} from '../../utils/fastMode.js'
|
||||
import { isNonCustomOpusModel } from '../../utils/model/model.js'
|
||||
import { disableKeepAlive } from '../../utils/proxy.js'
|
||||
import { sleep } from '../../utils/sleep.js'
|
||||
import type { ThinkingConfig } from '../../utils/thinking.js'
|
||||
import { getFeatureValue_CACHED_MAY_BE_STALE } from '../analytics/growthbook.js'
|
||||
import {
|
||||
type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
||||
logEvent,
|
||||
} from '../analytics/index.js'
|
||||
import {
|
||||
checkMockRateLimitError,
|
||||
isMockRateLimitError,
|
||||
} from '../rateLimitMocking.js'
|
||||
import { REPEATED_529_ERROR_MESSAGE } from './errors.js'
|
||||
import { extractConnectionErrorDetails } from './errorUtils.js'
|
||||
|
||||
const abortError = () => new APIUserAbortError()
|
||||
|
||||
const DEFAULT_MAX_RETRIES = 10
|
||||
const FLOOR_OUTPUT_TOKENS = 3000
|
||||
const MAX_529_RETRIES = 3
|
||||
export const BASE_DELAY_MS = 500
|
||||
|
||||
// Foreground query sources where the user IS blocking on the result — these
|
||||
// retry on 529. Everything else (summaries, titles, suggestions, classifiers)
|
||||
// bails immediately: during a capacity cascade each retry is 3-10× gateway
|
||||
// amplification, and the user never sees those fail anyway. New sources
|
||||
// default to no-retry — add here only if the user is waiting on the result.
|
||||
const FOREGROUND_529_RETRY_SOURCES = new Set<QuerySource>([
|
||||
'repl_main_thread',
|
||||
'repl_main_thread:outputStyle:custom',
|
||||
'repl_main_thread:outputStyle:Explanatory',
|
||||
'repl_main_thread:outputStyle:Learning',
|
||||
'sdk',
|
||||
'agent:custom',
|
||||
'agent:default',
|
||||
'agent:builtin',
|
||||
'compact',
|
||||
'hook_agent',
|
||||
'hook_prompt',
|
||||
'verification_agent',
|
||||
'side_question',
|
||||
// Security classifiers — must complete for auto-mode correctness.
|
||||
// yoloClassifier.ts uses 'auto_mode' (not 'yolo_classifier' — that's
|
||||
// type-only). bash_classifier is ant-only; feature-gate so the string
|
||||
// tree-shakes out of external builds (excluded-strings.txt).
|
||||
'auto_mode',
|
||||
...(feature('BASH_CLASSIFIER') ? (['bash_classifier'] as const) : []),
|
||||
])
|
||||
|
||||
function shouldRetry529(querySource: QuerySource | undefined): boolean {
|
||||
// undefined → retry (conservative for untagged call paths)
|
||||
return (
|
||||
querySource === undefined || FOREGROUND_529_RETRY_SOURCES.has(querySource)
|
||||
)
|
||||
}
|
||||
|
||||
// CLAUDE_CODE_UNATTENDED_RETRY: for unattended sessions (ant-only). Retries 429/529
|
||||
// indefinitely with higher backoff and periodic keep-alive yields so the host
|
||||
// environment does not mark the session idle mid-wait.
|
||||
// TODO(ANT-344): the keep-alive via SystemAPIErrorMessage yields is a stopgap
|
||||
// until there's a dedicated keep-alive channel.
|
||||
const PERSISTENT_MAX_BACKOFF_MS = 5 * 60 * 1000
|
||||
const PERSISTENT_RESET_CAP_MS = 6 * 60 * 60 * 1000
|
||||
const HEARTBEAT_INTERVAL_MS = 30_000
|
||||
|
||||
function isPersistentRetryEnabled(): boolean {
|
||||
return feature('UNATTENDED_RETRY')
|
||||
? isEnvTruthy(process.env.CLAUDE_CODE_UNATTENDED_RETRY)
|
||||
: false
|
||||
}
|
||||
|
||||
function isTransientCapacityError(error: unknown): boolean {
|
||||
return (
|
||||
is529Error(error) || (error instanceof APIError && error.status === 429)
|
||||
)
|
||||
}
|
||||
|
||||
function isStaleConnectionError(error: unknown): boolean {
|
||||
if (!(error instanceof APIConnectionError)) {
|
||||
return false
|
||||
}
|
||||
const details = extractConnectionErrorDetails(error)
|
||||
return details?.code === 'ECONNRESET' || details?.code === 'EPIPE'
|
||||
}
|
||||
|
||||
export interface RetryContext {
|
||||
maxTokensOverride?: number
|
||||
model: string
|
||||
thinkingConfig: ThinkingConfig
|
||||
fastMode?: boolean
|
||||
}
|
||||
|
||||
interface RetryOptions {
|
||||
maxRetries?: number
|
||||
model: string
|
||||
fallbackModel?: string
|
||||
thinkingConfig: ThinkingConfig
|
||||
fastMode?: boolean
|
||||
signal?: AbortSignal
|
||||
querySource?: QuerySource
|
||||
/**
|
||||
* Pre-seed the consecutive 529 counter. Used when this retry loop is a
|
||||
* non-streaming fallback after a streaming 529 — the streaming 529 should
|
||||
* count toward MAX_529_RETRIES so total 529s-before-fallback is consistent
|
||||
* regardless of which request mode hit the overload.
|
||||
*/
|
||||
initialConsecutive529Errors?: number
|
||||
}
|
||||
|
||||
export class CannotRetryError extends Error {
|
||||
constructor(
|
||||
public readonly originalError: unknown,
|
||||
public readonly retryContext: RetryContext,
|
||||
) {
|
||||
const message = errorMessage(originalError)
|
||||
super(message)
|
||||
this.name = 'RetryError'
|
||||
|
||||
// Preserve the original stack trace if available
|
||||
if (originalError instanceof Error && originalError.stack) {
|
||||
this.stack = originalError.stack
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export class FallbackTriggeredError extends Error {
|
||||
constructor(
|
||||
public readonly originalModel: string,
|
||||
public readonly fallbackModel: string,
|
||||
) {
|
||||
super(`Model fallback triggered: ${originalModel} -> ${fallbackModel}`)
|
||||
this.name = 'FallbackTriggeredError'
|
||||
}
|
||||
}
|
||||
|
||||
export async function* withRetry<T>(
|
||||
getClient: () => Promise<Anthropic>,
|
||||
operation: (
|
||||
client: Anthropic,
|
||||
attempt: number,
|
||||
context: RetryContext,
|
||||
) => Promise<T>,
|
||||
options: RetryOptions,
|
||||
): AsyncGenerator<SystemAPIErrorMessage, T> {
|
||||
const maxRetries = getMaxRetries(options)
|
||||
const retryContext: RetryContext = {
|
||||
model: options.model,
|
||||
thinkingConfig: options.thinkingConfig,
|
||||
...(isFastModeEnabled() && { fastMode: options.fastMode }),
|
||||
}
|
||||
let client: Anthropic | null = null
|
||||
let consecutive529Errors = options.initialConsecutive529Errors ?? 0
|
||||
let lastError: unknown
|
||||
let persistentAttempt = 0
|
||||
for (let attempt = 1; attempt <= maxRetries + 1; attempt++) {
|
||||
if (options.signal?.aborted) {
|
||||
throw new APIUserAbortError()
|
||||
}
|
||||
|
||||
// Capture whether fast mode is active before this attempt
|
||||
// (fallback may change the state mid-loop)
|
||||
const wasFastModeActive = isFastModeEnabled()
|
||||
? retryContext.fastMode && !isFastModeCooldown()
|
||||
: false
|
||||
|
||||
try {
|
||||
// Check for mock rate limits (used by /mock-limits command for Ant employees)
|
||||
if (process.env.USER_TYPE === 'ant') {
|
||||
const mockError = checkMockRateLimitError(
|
||||
retryContext.model,
|
||||
wasFastModeActive,
|
||||
)
|
||||
if (mockError) {
|
||||
throw mockError
|
||||
}
|
||||
}
|
||||
|
||||
// Get a fresh client instance on first attempt or after authentication errors
|
||||
// - 401 for first-party API authentication failures
|
||||
// - 403 "OAuth token has been revoked" (another process refreshed the token)
|
||||
// - Bedrock-specific auth errors (403 or CredentialsProviderError)
|
||||
// - Vertex-specific auth errors (credential refresh failures, 401)
|
||||
// - ECONNRESET/EPIPE: stale keep-alive socket; disable pooling and reconnect
|
||||
const isStaleConnection = isStaleConnectionError(lastError)
|
||||
if (
|
||||
isStaleConnection &&
|
||||
getFeatureValue_CACHED_MAY_BE_STALE(
|
||||
'tengu_disable_keepalive_on_econnreset',
|
||||
false,
|
||||
)
|
||||
) {
|
||||
logForDebugging(
|
||||
'Stale connection (ECONNRESET/EPIPE) — disabling keep-alive for retry',
|
||||
)
|
||||
disableKeepAlive()
|
||||
}
|
||||
|
||||
if (
|
||||
client === null ||
|
||||
(lastError instanceof APIError && lastError.status === 401) ||
|
||||
isOAuthTokenRevokedError(lastError) ||
|
||||
isBedrockAuthError(lastError) ||
|
||||
isVertexAuthError(lastError) ||
|
||||
isStaleConnection
|
||||
) {
|
||||
// On 401 "token expired" or 403 "token revoked", force a token refresh
|
||||
if (
|
||||
(lastError instanceof APIError && lastError.status === 401) ||
|
||||
isOAuthTokenRevokedError(lastError)
|
||||
) {
|
||||
const failedAccessToken = getClaudeAIOAuthTokens()?.accessToken
|
||||
if (failedAccessToken) {
|
||||
await handleOAuth401Error(failedAccessToken)
|
||||
}
|
||||
}
|
||||
client = await getClient()
|
||||
}
|
||||
|
||||
return await operation(client, attempt, retryContext)
|
||||
} catch (error) {
|
||||
lastError = error
|
||||
logForDebugging(
|
||||
`API error (attempt ${attempt}/${maxRetries + 1}): ${error instanceof APIError ? `${error.status} ${error.message}` : errorMessage(error)}`,
|
||||
{ level: 'error' },
|
||||
)
|
||||
|
||||
// Fast mode fallback: on 429/529, either wait and retry (short delays)
|
||||
// or fall back to standard speed (long delays) to avoid cache thrashing.
|
||||
// Skip in persistent mode: the short-retry path below loops with fast
|
||||
// mode still active, so its `continue` never reaches the attempt clamp
|
||||
// and the for-loop terminates. Persistent sessions want the chunked
|
||||
// keep-alive path instead of fast-mode cache-preservation anyway.
|
||||
if (
|
||||
wasFastModeActive &&
|
||||
!isPersistentRetryEnabled() &&
|
||||
error instanceof APIError &&
|
||||
(error.status === 429 || is529Error(error))
|
||||
) {
|
||||
// If the 429 is specifically because extra usage (overage) is not
|
||||
// available, permanently disable fast mode with a specific message.
|
||||
const overageReason = error.headers?.get(
|
||||
'anthropic-ratelimit-unified-overage-disabled-reason',
|
||||
)
|
||||
if (overageReason !== null && overageReason !== undefined) {
|
||||
handleFastModeOverageRejection(overageReason)
|
||||
retryContext.fastMode = false
|
||||
continue
|
||||
}
|
||||
|
||||
const retryAfterMs = getRetryAfterMs(error)
|
||||
if (retryAfterMs !== null && retryAfterMs < SHORT_RETRY_THRESHOLD_MS) {
|
||||
// Short retry-after: wait and retry with fast mode still active
|
||||
// to preserve prompt cache (same model name on retry).
|
||||
await sleep(retryAfterMs, options.signal, { abortError })
|
||||
continue
|
||||
}
|
||||
// Long or unknown retry-after: enter cooldown (switches to standard
|
||||
// speed model), with a minimum floor to avoid flip-flopping.
|
||||
const cooldownMs = Math.max(
|
||||
retryAfterMs ?? DEFAULT_FAST_MODE_FALLBACK_HOLD_MS,
|
||||
MIN_COOLDOWN_MS,
|
||||
)
|
||||
const cooldownReason: CooldownReason = is529Error(error)
|
||||
? 'overloaded'
|
||||
: 'rate_limit'
|
||||
triggerFastModeCooldown(Date.now() + cooldownMs, cooldownReason)
|
||||
if (isFastModeEnabled()) {
|
||||
retryContext.fastMode = false
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
// Fast mode fallback: if the API rejects the fast mode parameter
|
||||
// (e.g., org doesn't have fast mode enabled), permanently disable fast
|
||||
// mode and retry at standard speed.
|
||||
if (wasFastModeActive && isFastModeNotEnabledError(error)) {
|
||||
handleFastModeRejectedByAPI()
|
||||
retryContext.fastMode = false
|
||||
continue
|
||||
}
|
||||
|
||||
// Non-foreground sources bail immediately on 529 — no retry amplification
|
||||
// during capacity cascades. User never sees these fail.
|
||||
if (is529Error(error) && !shouldRetry529(options.querySource)) {
|
||||
logEvent('tengu_api_529_background_dropped', {
|
||||
query_source:
|
||||
options.querySource as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
||||
})
|
||||
throw new CannotRetryError(error, retryContext)
|
||||
}
|
||||
|
||||
// Track consecutive 529 errors
|
||||
if (
|
||||
is529Error(error) &&
|
||||
// If FALLBACK_FOR_ALL_PRIMARY_MODELS is not set, fall through only if the primary model is a non-custom Opus model.
|
||||
// TODO: Revisit if the isNonCustomOpusModel check should still exist, or if isNonCustomOpusModel is a stale artifact of when Claude Code was hardcoded on Opus.
|
||||
(process.env.FALLBACK_FOR_ALL_PRIMARY_MODELS ||
|
||||
(!isClaudeAISubscriber() && isNonCustomOpusModel(options.model)))
|
||||
) {
|
||||
consecutive529Errors++
|
||||
if (consecutive529Errors >= MAX_529_RETRIES) {
|
||||
// Check if fallback model is specified
|
||||
if (options.fallbackModel) {
|
||||
logEvent('tengu_api_opus_fallback_triggered', {
|
||||
original_model:
|
||||
options.model as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
||||
fallback_model:
|
||||
options.fallbackModel as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
||||
provider: getAPIProviderForStatsig(),
|
||||
})
|
||||
|
||||
// Throw special error to indicate fallback was triggered
|
||||
throw new FallbackTriggeredError(
|
||||
options.model,
|
||||
options.fallbackModel,
|
||||
)
|
||||
}
|
||||
|
||||
if (
|
||||
process.env.USER_TYPE === 'external' &&
|
||||
!process.env.IS_SANDBOX &&
|
||||
!isPersistentRetryEnabled()
|
||||
) {
|
||||
logEvent('tengu_api_custom_529_overloaded_error', {})
|
||||
throw new CannotRetryError(
|
||||
new Error(REPEATED_529_ERROR_MESSAGE),
|
||||
retryContext,
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Only retry if the error indicates we should
|
||||
const persistent =
|
||||
isPersistentRetryEnabled() && isTransientCapacityError(error)
|
||||
if (attempt > maxRetries && !persistent) {
|
||||
throw new CannotRetryError(error, retryContext)
|
||||
}
|
||||
|
||||
// AWS/GCP errors aren't always APIError, but can be retried
|
||||
const handledCloudAuthError =
|
||||
handleAwsCredentialError(error) || handleGcpCredentialError(error)
|
||||
if (
|
||||
!handledCloudAuthError &&
|
||||
(!(error instanceof APIError) || !shouldRetry(error))
|
||||
) {
|
||||
throw new CannotRetryError(error, retryContext)
|
||||
}
|
||||
|
||||
// Handle max tokens context overflow errors by adjusting max_tokens for the next attempt
|
||||
// NOTE: With extended-context-window beta, this 400 error should not occur.
|
||||
// The API now returns 'model_context_window_exceeded' stop_reason instead.
|
||||
// Keeping for backward compatibility.
|
||||
if (error instanceof APIError) {
|
||||
const overflowData = parseMaxTokensContextOverflowError(error)
|
||||
if (overflowData) {
|
||||
const { inputTokens, contextLimit } = overflowData
|
||||
|
||||
const safetyBuffer = 1000
|
||||
const availableContext = Math.max(
|
||||
0,
|
||||
contextLimit - inputTokens - safetyBuffer,
|
||||
)
|
||||
if (availableContext < FLOOR_OUTPUT_TOKENS) {
|
||||
logError(
|
||||
new Error(
|
||||
`availableContext ${availableContext} is less than FLOOR_OUTPUT_TOKENS ${FLOOR_OUTPUT_TOKENS}`,
|
||||
),
|
||||
)
|
||||
throw error
|
||||
}
|
||||
// Ensure we have enough tokens for thinking + at least 1 output token
|
||||
const minRequired =
|
||||
(retryContext.thinkingConfig.type === 'enabled'
|
||||
? retryContext.thinkingConfig.budgetTokens
|
||||
: 0) + 1
|
||||
const adjustedMaxTokens = Math.max(
|
||||
FLOOR_OUTPUT_TOKENS,
|
||||
availableContext,
|
||||
minRequired,
|
||||
)
|
||||
retryContext.maxTokensOverride = adjustedMaxTokens
|
||||
|
||||
logEvent('tengu_max_tokens_context_overflow_adjustment', {
|
||||
inputTokens,
|
||||
contextLimit,
|
||||
adjustedMaxTokens,
|
||||
attempt,
|
||||
})
|
||||
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
// For other errors, proceed with normal retry logic
|
||||
// Get retry-after header if available
|
||||
const retryAfter = getRetryAfter(error)
|
||||
let delayMs: number
|
||||
if (persistent && error instanceof APIError && error.status === 429) {
|
||||
persistentAttempt++
|
||||
// Window-based limits (e.g. 5hr Max/Pro) include a reset timestamp.
|
||||
// Wait until reset rather than polling every 5 min uselessly.
|
||||
const resetDelay = getRateLimitResetDelayMs(error)
|
||||
delayMs =
|
||||
resetDelay ??
|
||||
Math.min(
|
||||
getRetryDelay(
|
||||
persistentAttempt,
|
||||
retryAfter,
|
||||
PERSISTENT_MAX_BACKOFF_MS,
|
||||
),
|
||||
PERSISTENT_RESET_CAP_MS,
|
||||
)
|
||||
} else if (persistent) {
|
||||
persistentAttempt++
|
||||
// Retry-After is a server directive and bypasses maxDelayMs inside
|
||||
// getRetryDelay (intentional — honoring it is correct). Cap at the
|
||||
// 6hr reset-cap here so a pathological header can't wait unbounded.
|
||||
delayMs = Math.min(
|
||||
getRetryDelay(
|
||||
persistentAttempt,
|
||||
retryAfter,
|
||||
PERSISTENT_MAX_BACKOFF_MS,
|
||||
),
|
||||
PERSISTENT_RESET_CAP_MS,
|
||||
)
|
||||
} else {
|
||||
delayMs = getRetryDelay(attempt, retryAfter)
|
||||
}
|
||||
|
||||
// In persistent mode the for-loop `attempt` is clamped at maxRetries+1;
|
||||
// use persistentAttempt for telemetry/yields so they show the true count.
|
||||
const reportedAttempt = persistent ? persistentAttempt : attempt
|
||||
logEvent('tengu_api_retry', {
|
||||
attempt: reportedAttempt,
|
||||
delayMs: delayMs,
|
||||
error: (error as APIError)
|
||||
.message as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
||||
status: (error as APIError).status,
|
||||
provider: getAPIProviderForStatsig(),
|
||||
})
|
||||
|
||||
if (persistent) {
|
||||
if (delayMs > 60_000) {
|
||||
logEvent('tengu_api_persistent_retry_wait', {
|
||||
status: (error as APIError).status,
|
||||
delayMs,
|
||||
attempt: reportedAttempt,
|
||||
provider: getAPIProviderForStatsig(),
|
||||
})
|
||||
}
|
||||
// Chunk long sleeps so the host sees periodic stdout activity and
|
||||
// does not mark the session idle. Each yield surfaces as
|
||||
// {type:'system', subtype:'api_retry'} on stdout via QueryEngine.
|
||||
let remaining = delayMs
|
||||
while (remaining > 0) {
|
||||
if (options.signal?.aborted) throw new APIUserAbortError()
|
||||
if (error instanceof APIError) {
|
||||
yield createSystemAPIErrorMessage(
|
||||
error,
|
||||
remaining,
|
||||
reportedAttempt,
|
||||
maxRetries,
|
||||
)
|
||||
}
|
||||
const chunk = Math.min(remaining, HEARTBEAT_INTERVAL_MS)
|
||||
await sleep(chunk, options.signal, { abortError })
|
||||
remaining -= chunk
|
||||
}
|
||||
// Clamp so the for-loop never terminates. Backoff uses the separate
|
||||
// persistentAttempt counter which keeps growing to the 5-min cap.
|
||||
if (attempt >= maxRetries) attempt = maxRetries
|
||||
} else {
|
||||
if (error instanceof APIError) {
|
||||
yield createSystemAPIErrorMessage(error, delayMs, attempt, maxRetries)
|
||||
}
|
||||
await sleep(delayMs, options.signal, { abortError })
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
throw new CannotRetryError(lastError, retryContext)
|
||||
}
|
||||
|
||||
function getRetryAfter(error: unknown): string | null {
|
||||
return (
|
||||
((error as { headers?: { 'retry-after'?: string } }).headers?.[
|
||||
'retry-after'
|
||||
] ||
|
||||
// eslint-disable-next-line eslint-plugin-n/no-unsupported-features/node-builtins
|
||||
((error as APIError).headers as Headers)?.get?.('retry-after')) ??
|
||||
null
|
||||
)
|
||||
}
|
||||
|
||||
export function getRetryDelay(
|
||||
attempt: number,
|
||||
retryAfterHeader?: string | null,
|
||||
maxDelayMs = 32000,
|
||||
): number {
|
||||
if (retryAfterHeader) {
|
||||
const seconds = parseInt(retryAfterHeader, 10)
|
||||
if (!isNaN(seconds)) {
|
||||
return seconds * 1000
|
||||
}
|
||||
}
|
||||
|
||||
const baseDelay = Math.min(
|
||||
BASE_DELAY_MS * Math.pow(2, attempt - 1),
|
||||
maxDelayMs,
|
||||
)
|
||||
const jitter = Math.random() * 0.25 * baseDelay
|
||||
return baseDelay + jitter
|
||||
}
|
||||
|
||||
export function parseMaxTokensContextOverflowError(error: APIError):
|
||||
| {
|
||||
inputTokens: number
|
||||
maxTokens: number
|
||||
contextLimit: number
|
||||
}
|
||||
| undefined {
|
||||
if (error.status !== 400 || !error.message) {
|
||||
return undefined
|
||||
}
|
||||
|
||||
if (
|
||||
!error.message.includes(
|
||||
'input length and `max_tokens` exceed context limit',
|
||||
)
|
||||
) {
|
||||
return undefined
|
||||
}
|
||||
|
||||
// Example format: "input length and `max_tokens` exceed context limit: 188059 + 20000 > 200000"
|
||||
const regex =
|
||||
/input length and `max_tokens` exceed context limit: (\d+) \+ (\d+) > (\d+)/
|
||||
const match = error.message.match(regex)
|
||||
|
||||
if (!match || match.length !== 4) {
|
||||
return undefined
|
||||
}
|
||||
|
||||
if (!match[1] || !match[2] || !match[3]) {
|
||||
logError(
|
||||
new Error(
|
||||
'Unable to parse max_tokens from max_tokens exceed context limit error message',
|
||||
),
|
||||
)
|
||||
return undefined
|
||||
}
|
||||
const inputTokens = parseInt(match[1], 10)
|
||||
const maxTokens = parseInt(match[2], 10)
|
||||
const contextLimit = parseInt(match[3], 10)
|
||||
|
||||
if (isNaN(inputTokens) || isNaN(maxTokens) || isNaN(contextLimit)) {
|
||||
return undefined
|
||||
}
|
||||
|
||||
return { inputTokens, maxTokens, contextLimit }
|
||||
}
|
||||
|
||||
// TODO: Replace with a response header check once the API adds a dedicated
|
||||
// header for fast-mode rejection (e.g., x-fast-mode-rejected). String-matching
|
||||
// the error message is fragile and will break if the API wording changes.
|
||||
function isFastModeNotEnabledError(error: unknown): boolean {
|
||||
if (!(error instanceof APIError)) {
|
||||
return false
|
||||
}
|
||||
return (
|
||||
error.status === 400 &&
|
||||
(error.message?.includes('Fast mode is not enabled') ?? false)
|
||||
)
|
||||
}
|
||||
|
||||
export function is529Error(error: unknown): boolean {
|
||||
if (!(error instanceof APIError)) {
|
||||
return false
|
||||
}
|
||||
|
||||
// Check for 529 status code or overloaded error in message
|
||||
return (
|
||||
error.status === 529 ||
|
||||
// See below: the SDK sometimes fails to properly pass the 529 status code during streaming
|
||||
(error.message?.includes('"type":"overloaded_error"') ?? false)
|
||||
)
|
||||
}
|
||||
|
||||
function isOAuthTokenRevokedError(error: unknown): boolean {
|
||||
return (
|
||||
error instanceof APIError &&
|
||||
error.status === 403 &&
|
||||
(error.message?.includes('OAuth token has been revoked') ?? false)
|
||||
)
|
||||
}
|
||||
|
||||
function isBedrockAuthError(error: unknown): boolean {
|
||||
if (isEnvTruthy(process.env.CLAUDE_CODE_USE_BEDROCK)) {
|
||||
// AWS libs reject without an API call if .aws holds a past Expiration value
|
||||
// otherwise, API calls that receive expired tokens give generic 403
|
||||
// "The security token included in the request is invalid"
|
||||
if (
|
||||
isAwsCredentialsProviderError(error) ||
|
||||
(error instanceof APIError && error.status === 403)
|
||||
) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
/**
|
||||
* Clear AWS auth caches if appropriate.
|
||||
* @returns true if action was taken.
|
||||
*/
|
||||
function handleAwsCredentialError(error: unknown): boolean {
|
||||
if (isBedrockAuthError(error)) {
|
||||
clearAwsCredentialsCache()
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// google-auth-library throws plain Error (no typed name like AWS's
|
||||
// CredentialsProviderError). Match common SDK-level credential-failure messages.
|
||||
function isGoogleAuthLibraryCredentialError(error: unknown): boolean {
|
||||
if (!(error instanceof Error)) return false
|
||||
const msg = error.message
|
||||
return (
|
||||
msg.includes('Could not load the default credentials') ||
|
||||
msg.includes('Could not refresh access token') ||
|
||||
msg.includes('invalid_grant')
|
||||
)
|
||||
}
|
||||
|
||||
function isVertexAuthError(error: unknown): boolean {
|
||||
if (isEnvTruthy(process.env.CLAUDE_CODE_USE_VERTEX)) {
|
||||
// SDK-level: google-auth-library fails in prepareOptions() before the HTTP call
|
||||
if (isGoogleAuthLibraryCredentialError(error)) {
|
||||
return true
|
||||
}
|
||||
// Server-side: Vertex returns 401 for expired/invalid tokens
|
||||
if (error instanceof APIError && error.status === 401) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
/**
|
||||
* Clear GCP auth caches if appropriate.
|
||||
* @returns true if action was taken.
|
||||
*/
|
||||
function handleGcpCredentialError(error: unknown): boolean {
|
||||
if (isVertexAuthError(error)) {
|
||||
clearGcpCredentialsCache()
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
function shouldRetry(error: APIError): boolean {
|
||||
// Never retry mock errors - they're from /mock-limits command for testing
|
||||
if (isMockRateLimitError(error)) {
|
||||
return false
|
||||
}
|
||||
|
||||
// Persistent mode: 429/529 always retryable, bypass subscriber gates and
|
||||
// x-should-retry header.
|
||||
if (isPersistentRetryEnabled() && isTransientCapacityError(error)) {
|
||||
return true
|
||||
}
|
||||
|
||||
// CCR mode: auth is via infrastructure-provided JWTs, so a 401/403 is a
|
||||
// transient blip (auth service flap, network hiccup) rather than bad
|
||||
// credentials. Bypass x-should-retry:false — the server assumes we'd retry
|
||||
// the same bad key, but our key is fine.
|
||||
if (
|
||||
isEnvTruthy(process.env.CLAUDE_CODE_REMOTE) &&
|
||||
(error.status === 401 || error.status === 403)
|
||||
) {
|
||||
return true
|
||||
}
|
||||
|
||||
// Check for overloaded errors first by examining the message content
|
||||
// The SDK sometimes fails to properly pass the 529 status code during streaming,
|
||||
// so we need to check the error message directly
|
||||
if (error.message?.includes('"type":"overloaded_error"')) {
|
||||
return true
|
||||
}
|
||||
|
||||
// Check for max tokens context overflow errors that we can handle
|
||||
if (parseMaxTokensContextOverflowError(error)) {
|
||||
return true
|
||||
}
|
||||
|
||||
// Note this is not a standard header.
|
||||
const shouldRetryHeader = error.headers?.get('x-should-retry')
|
||||
|
||||
// If the server explicitly says whether or not to retry, obey.
|
||||
// For Max and Pro users, should-retry is true, but in several hours, so we shouldn't.
|
||||
// Enterprise users can retry because they typically use PAYG instead of rate limits.
|
||||
if (
|
||||
shouldRetryHeader === 'true' &&
|
||||
(!isClaudeAISubscriber() || isEnterpriseSubscriber())
|
||||
) {
|
||||
return true
|
||||
}
|
||||
|
||||
// Ants can ignore x-should-retry: false for 5xx server errors only.
|
||||
// For other status codes (401, 403, 400, 429, etc.), respect the header.
|
||||
if (shouldRetryHeader === 'false') {
|
||||
const is5xxError = error.status !== undefined && error.status >= 500
|
||||
if (!(process.env.USER_TYPE === 'ant' && is5xxError)) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
if (error instanceof APIConnectionError) {
|
||||
return true
|
||||
}
|
||||
|
||||
if (!error.status) return false
|
||||
|
||||
// Retry on request timeouts.
|
||||
if (error.status === 408) return true
|
||||
|
||||
// Retry on lock timeouts.
|
||||
if (error.status === 409) return true
|
||||
|
||||
// Retry on rate limits, but not for ClaudeAI Subscription users
|
||||
// Enterprise users can retry because they typically use PAYG instead of rate limits
|
||||
if (error.status === 429) {
|
||||
return !isClaudeAISubscriber() || isEnterpriseSubscriber()
|
||||
}
|
||||
|
||||
// Clear API key cache on 401 and allow retry.
|
||||
// OAuth token handling is done in the main retry loop via handleOAuth401Error.
|
||||
if (error.status === 401) {
|
||||
clearApiKeyHelperCache()
|
||||
return true
|
||||
}
|
||||
|
||||
// Retry on 403 "token revoked" (same refresh logic as 401, see above)
|
||||
if (isOAuthTokenRevokedError(error)) {
|
||||
return true
|
||||
}
|
||||
|
||||
// Retry internal errors.
|
||||
if (error.status && error.status >= 500) return true
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
export function getDefaultMaxRetries(): number {
|
||||
if (process.env.CLAUDE_CODE_MAX_RETRIES) {
|
||||
return parseInt(process.env.CLAUDE_CODE_MAX_RETRIES, 10)
|
||||
}
|
||||
return DEFAULT_MAX_RETRIES
|
||||
}
|
||||
function getMaxRetries(options: RetryOptions): number {
|
||||
return options.maxRetries ?? getDefaultMaxRetries()
|
||||
}
|
||||
|
||||
const DEFAULT_FAST_MODE_FALLBACK_HOLD_MS = 30 * 60 * 1000 // 30 minutes
|
||||
const SHORT_RETRY_THRESHOLD_MS = 20 * 1000 // 20 seconds
|
||||
const MIN_COOLDOWN_MS = 10 * 60 * 1000 // 10 minutes
|
||||
|
||||
function getRetryAfterMs(error: APIError): number | null {
|
||||
const retryAfter = getRetryAfter(error)
|
||||
if (retryAfter) {
|
||||
const seconds = parseInt(retryAfter, 10)
|
||||
if (!isNaN(seconds)) {
|
||||
return seconds * 1000
|
||||
}
|
||||
}
|
||||
return null
|
||||
}
|
||||
|
||||
function getRateLimitResetDelayMs(error: APIError): number | null {
|
||||
const resetHeader = error.headers?.get?.('anthropic-ratelimit-unified-reset')
|
||||
if (!resetHeader) return null
|
||||
const resetUnixSec = Number(resetHeader)
|
||||
if (!Number.isFinite(resetUnixSec)) return null
|
||||
const delayMs = resetUnixSec * 1000 - Date.now()
|
||||
if (delayMs <= 0) return null
|
||||
return Math.min(delayMs, PERSISTENT_RESET_CAP_MS)
|
||||
}
|
||||
Reference in New Issue
Block a user