init claude-code

2026-04-01 17:32:37 +02:00
commit 73b208c009
1902 changed files with 513237 additions and 0 deletions
@@ -0,0 +1,119 @@
+import axios from 'axios'
+import { getOauthConfig } from '../../constants/oauth.js'
+import { getOAuthHeaders, prepareApiRequest } from '../../utils/teleport/api.js'
+
+export type AdminRequestType = 'limit_increase' | 'seat_upgrade'
+
+export type AdminRequestStatus = 'pending' | 'approved' | 'dismissed'
+
+export type AdminRequestSeatUpgradeDetails = {
+  message?: string | null
+  current_seat_tier?: string | null
+}
+
+export type AdminRequestCreateParams =
+  | {
+      request_type: 'limit_increase'
+      details: null
+    }
+  | {
+      request_type: 'seat_upgrade'
+      details: AdminRequestSeatUpgradeDetails
+    }
+
+export type AdminRequest = {
+  uuid: string
+  status: AdminRequestStatus
+  requester_uuid?: string | null
+  created_at: string
+} & (
+  | {
+      request_type: 'limit_increase'
+      details: null
+    }
+  | {
+      request_type: 'seat_upgrade'
+      details: AdminRequestSeatUpgradeDetails
+    }
+)
+
+/**
+ * Create an admin request (limit increase or seat upgrade).
+ *
+ * For Team/Enterprise users who don't have billing/admin permissions,
+ * this creates a request that their admin can act on.
+ *
+ * If a pending request of the same type already exists for this user,
+ * returns the existing request instead of creating a new one.
+ */
+export async function createAdminRequest(
+  params: AdminRequestCreateParams,
+): Promise<AdminRequest> {
+  const { accessToken, orgUUID } = await prepareApiRequest()
+
+  const headers = {
+    ...getOAuthHeaders(accessToken),
+    'x-organization-uuid': orgUUID,
+  }
+
+  const url = `${getOauthConfig().BASE_API_URL}/api/oauth/organizations/${orgUUID}/admin_requests`
+
+  const response = await axios.post<AdminRequest>(url, params, { headers })
+
+  return response.data
+}
+
+/**
+ * Get pending admin request of a specific type for the current user.
+ *
+ * Returns the pending request if one exists, otherwise null.
+ */
+export async function getMyAdminRequests(
+  requestType: AdminRequestType,
+  statuses: AdminRequestStatus[],
+): Promise<AdminRequest[] | null> {
+  const { accessToken, orgUUID } = await prepareApiRequest()
+
+  const headers = {
+    ...getOAuthHeaders(accessToken),
+    'x-organization-uuid': orgUUID,
+  }
+
+  let url = `${getOauthConfig().BASE_API_URL}/api/oauth/organizations/${orgUUID}/admin_requests/me?request_type=${requestType}`
+  for (const status of statuses) {
+    url += `&statuses=${status}`
+  }
+
+  const response = await axios.get<AdminRequest[] | null>(url, {
+    headers,
+  })
+
+  return response.data
+}
+
+type AdminRequestEligibilityResponse = {
+  request_type: AdminRequestType
+  is_allowed: boolean
+}
+
+/**
+ * Check if a specific admin request type is allowed for this org.
+ */
+export async function checkAdminRequestEligibility(
+  requestType: AdminRequestType,
+): Promise<AdminRequestEligibilityResponse | null> {
+  const { accessToken, orgUUID } = await prepareApiRequest()
+
+  const headers = {
+    ...getOAuthHeaders(accessToken),
+    'x-organization-uuid': orgUUID,
+  }
+
+  const url = `${getOauthConfig().BASE_API_URL}/api/oauth/organizations/${orgUUID}/admin_requests/eligibility?request_type=${requestType}`
+
+  const response = await axios.get<AdminRequestEligibilityResponse>(url, {
+    headers,
+  })
+
+  return response.data
+}
@@ -0,0 +1,141 @@
+import axios from 'axios'
+import isEqual from 'lodash-es/isEqual.js'
+import {
+  getAnthropicApiKey,
+  getClaudeAIOAuthTokens,
+  hasProfileScope,
+} from 'src/utils/auth.js'
+import { z } from 'zod'
+import { getOauthConfig, OAUTH_BETA_HEADER } from '../../constants/oauth.js'
+import { getGlobalConfig, saveGlobalConfig } from '../../utils/config.js'
+import { logForDebugging } from '../../utils/debug.js'
+import { withOAuth401Retry } from '../../utils/http.js'
+import { lazySchema } from '../../utils/lazySchema.js'
+import { logError } from '../../utils/log.js'
+import { getAPIProvider } from '../../utils/model/providers.js'
+import { isEssentialTrafficOnly } from '../../utils/privacyLevel.js'
+import { getClaudeCodeUserAgent } from '../../utils/userAgent.js'
+
+const bootstrapResponseSchema = lazySchema(() =>
+  z.object({
+    client_data: z.record(z.unknown()).nullish(),
+    additional_model_options: z
+      .array(
+        z
+          .object({
+            model: z.string(),
+            name: z.string(),
+            description: z.string(),
+          })
+          .transform(({ model, name, description }) => ({
+            value: model,
+            label: name,
+            description,
+          })),
+      )
+      .nullish(),
+  }),
+)
+
+type BootstrapResponse = z.infer<ReturnType<typeof bootstrapResponseSchema>>
+
+async function fetchBootstrapAPI(): Promise<BootstrapResponse | null> {
+  if (isEssentialTrafficOnly()) {
+    logForDebugging('[Bootstrap] Skipped: Nonessential traffic disabled')
+    return null
+  }
+
+  if (getAPIProvider() !== 'firstParty') {
+    logForDebugging('[Bootstrap] Skipped: 3P provider')
+    return null
+  }
+
+  // OAuth preferred (requires user:profile scope — service-key OAuth tokens
+  // lack it and would 403). Fall back to API key auth for console users.
+  const apiKey = getAnthropicApiKey()
+  const hasUsableOAuth =
+    getClaudeAIOAuthTokens()?.accessToken && hasProfileScope()
+  if (!hasUsableOAuth && !apiKey) {
+    logForDebugging('[Bootstrap] Skipped: no usable OAuth or API key')
+    return null
+  }
+
+  const endpoint = `${getOauthConfig().BASE_API_URL}/api/claude_cli/bootstrap`
+
+  // withOAuth401Retry handles the refresh-and-retry. API key users fail
+  // through on 401 (no refresh mechanism — no OAuth token to pass).
+  try {
+    return await withOAuth401Retry(async () => {
+      // Re-read OAuth each call so the retry picks up the refreshed token.
+      const token = getClaudeAIOAuthTokens()?.accessToken
+      let authHeaders: Record<string, string>
+      if (token && hasProfileScope()) {
+        authHeaders = {
+          Authorization: `Bearer ${token}`,
+          'anthropic-beta': OAUTH_BETA_HEADER,
+        }
+      } else if (apiKey) {
+        authHeaders = { 'x-api-key': apiKey }
+      } else {
+        logForDebugging('[Bootstrap] No auth available on retry, aborting')
+        return null
+      }
+
+      logForDebugging('[Bootstrap] Fetching')
+      const response = await axios.get<unknown>(endpoint, {
+        headers: {
+          'Content-Type': 'application/json',
+          'User-Agent': getClaudeCodeUserAgent(),
+          ...authHeaders,
+        },
+        timeout: 5000,
+      })
+      const parsed = bootstrapResponseSchema().safeParse(response.data)
+      if (!parsed.success) {
+        logForDebugging(
+          `[Bootstrap] Response failed validation: ${parsed.error.message}`,
+        )
+        return null
+      }
+      logForDebugging('[Bootstrap] Fetch ok')
+      return parsed.data
+    })
+  } catch (error) {
+    logForDebugging(
+      `[Bootstrap] Fetch failed: ${axios.isAxiosError(error) ? (error.response?.status ?? error.code) : 'unknown'}`,
+    )
+    throw error
+  }
+}
+
+/**
+ * Fetch bootstrap data from the API and persist to disk cache.
+ */
+export async function fetchBootstrapData(): Promise<void> {
+  try {
+    const response = await fetchBootstrapAPI()
+    if (!response) return
+
+    const clientData = response.client_data ?? null
+    const additionalModelOptions = response.additional_model_options ?? []
+
+    // Only persist if data actually changed — avoids a config write on every startup.
+    const config = getGlobalConfig()
+    if (
+      isEqual(config.clientDataCache, clientData) &&
+      isEqual(config.additionalModelOptionsCache, additionalModelOptions)
+    ) {
+      logForDebugging('[Bootstrap] Cache unchanged, skipping write')
+      return
+    }
+
+    logForDebugging('[Bootstrap] Cache updated, persisting to disk')
+    saveGlobalConfig(current => ({
+      ...current,
+      clientDataCache: clientData,
+      additionalModelOptionsCache: additionalModelOptions,
+    }))
+  } catch (error) {
+    logError(error)
+  }
+}
@@ -0,0 +1,389 @@
+import Anthropic, { type ClientOptions } from '@anthropic-ai/sdk'
+import { randomUUID } from 'crypto'
+import type { GoogleAuth } from 'google-auth-library'
+import {
+  checkAndRefreshOAuthTokenIfNeeded,
+  getAnthropicApiKey,
+  getApiKeyFromApiKeyHelper,
+  getClaudeAIOAuthTokens,
+  isClaudeAISubscriber,
+  refreshAndGetAwsCredentials,
+  refreshGcpCredentialsIfNeeded,
+} from 'src/utils/auth.js'
+import { getUserAgent } from 'src/utils/http.js'
+import { getSmallFastModel } from 'src/utils/model/model.js'
+import {
+  getAPIProvider,
+  isFirstPartyAnthropicBaseUrl,
+} from 'src/utils/model/providers.js'
+import { getProxyFetchOptions } from 'src/utils/proxy.js'
+import {
+  getIsNonInteractiveSession,
+  getSessionId,
+} from '../../bootstrap/state.js'
+import { getOauthConfig } from '../../constants/oauth.js'
+import { isDebugToStdErr, logForDebugging } from '../../utils/debug.js'
+import {
+  getAWSRegion,
+  getVertexRegionForModel,
+  isEnvTruthy,
+} from '../../utils/envUtils.js'
+
+/**
+ * Environment variables for different client types:
+ *
+ * Direct API:
+ * - ANTHROPIC_API_KEY: Required for direct API access
+ *
+ * AWS Bedrock:
+ * - AWS credentials configured via aws-sdk defaults
+ * - AWS_REGION or AWS_DEFAULT_REGION: Sets the AWS region for all models (default: us-east-1)
+ * - ANTHROPIC_SMALL_FAST_MODEL_AWS_REGION: Optional. Override AWS region specifically for the small fast model (Haiku)
+ *
+ * Foundry (Azure):
+ * - ANTHROPIC_FOUNDRY_RESOURCE: Your Azure resource name (e.g., 'my-resource')
+ *   For the full endpoint: https://{resource}.services.ai.azure.com/anthropic/v1/messages
+ * - ANTHROPIC_FOUNDRY_BASE_URL: Optional. Alternative to resource - provide full base URL directly
+ *   (e.g., 'https://my-resource.services.ai.azure.com')
+ *
+ * Authentication (one of the following):
+ * - ANTHROPIC_FOUNDRY_API_KEY: Your Microsoft Foundry API key (if using API key auth)
+ * - Azure AD authentication: If no API key is provided, uses DefaultAzureCredential
+ *   which supports multiple auth methods (environment variables, managed identity,
+ *   Azure CLI, etc.). See: https://docs.microsoft.com/en-us/javascript/api/@azure/identity
+ *
+ * Vertex AI:
+ * - Model-specific region variables (highest priority):
+ *   - VERTEX_REGION_CLAUDE_3_5_HAIKU: Region for Claude 3.5 Haiku model
+ *   - VERTEX_REGION_CLAUDE_HAIKU_4_5: Region for Claude Haiku 4.5 model
+ *   - VERTEX_REGION_CLAUDE_3_5_SONNET: Region for Claude 3.5 Sonnet model
+ *   - VERTEX_REGION_CLAUDE_3_7_SONNET: Region for Claude 3.7 Sonnet model
+ * - CLOUD_ML_REGION: Optional. The default GCP region to use for all models
+ *   If specific model region not specified above
+ * - ANTHROPIC_VERTEX_PROJECT_ID: Required. Your GCP project ID
+ * - Standard GCP credentials configured via google-auth-library
+ *
+ * Priority for determining region:
+ * 1. Hardcoded model-specific environment variables
+ * 2. Global CLOUD_ML_REGION variable
+ * 3. Default region from config
+ * 4. Fallback region (us-east5)
+ */
+
+function createStderrLogger(): ClientOptions['logger'] {
+  return {
+    error: (msg, ...args) =>
+      // biome-ignore lint/suspicious/noConsole:: intentional console output -- SDK logger must use console
+      console.error('[Anthropic SDK ERROR]', msg, ...args),
+    // biome-ignore lint/suspicious/noConsole:: intentional console output -- SDK logger must use console
+    warn: (msg, ...args) => console.error('[Anthropic SDK WARN]', msg, ...args),
+    // biome-ignore lint/suspicious/noConsole:: intentional console output -- SDK logger must use console
+    info: (msg, ...args) => console.error('[Anthropic SDK INFO]', msg, ...args),
+    debug: (msg, ...args) =>
+      // biome-ignore lint/suspicious/noConsole:: intentional console output -- SDK logger must use console
+      console.error('[Anthropic SDK DEBUG]', msg, ...args),
+  }
+}
+
+export async function getAnthropicClient({
+  apiKey,
+  maxRetries,
+  model,
+  fetchOverride,
+  source,
+}: {
+  apiKey?: string
+  maxRetries: number
+  model?: string
+  fetchOverride?: ClientOptions['fetch']
+  source?: string
+}): Promise<Anthropic> {
+  const containerId = process.env.CLAUDE_CODE_CONTAINER_ID
+  const remoteSessionId = process.env.CLAUDE_CODE_REMOTE_SESSION_ID
+  const clientApp = process.env.CLAUDE_AGENT_SDK_CLIENT_APP
+  const customHeaders = getCustomHeaders()
+  const defaultHeaders: { [key: string]: string } = {
+    'x-app': 'cli',
+    'User-Agent': getUserAgent(),
+    'X-Claude-Code-Session-Id': getSessionId(),
+    ...customHeaders,
+    ...(containerId ? { 'x-claude-remote-container-id': containerId } : {}),
+    ...(remoteSessionId
+      ? { 'x-claude-remote-session-id': remoteSessionId }
+      : {}),
+    // SDK consumers can identify their app/library for backend analytics
+    ...(clientApp ? { 'x-client-app': clientApp } : {}),
+  }
+
+  // Log API client configuration for HFI debugging
+  logForDebugging(
+    `[API:request] Creating client, ANTHROPIC_CUSTOM_HEADERS present: ${!!process.env.ANTHROPIC_CUSTOM_HEADERS}, has Authorization header: ${!!customHeaders['Authorization']}`,
+  )
+
+  // Add additional protection header if enabled via env var
+  const additionalProtectionEnabled = isEnvTruthy(
+    process.env.CLAUDE_CODE_ADDITIONAL_PROTECTION,
+  )
+  if (additionalProtectionEnabled) {
+    defaultHeaders['x-anthropic-additional-protection'] = 'true'
+  }
+
+  logForDebugging('[API:auth] OAuth token check starting')
+  await checkAndRefreshOAuthTokenIfNeeded()
+  logForDebugging('[API:auth] OAuth token check complete')
+
+  if (!isClaudeAISubscriber()) {
+    await configureApiKeyHeaders(defaultHeaders, getIsNonInteractiveSession())
+  }
+
+  const resolvedFetch = buildFetch(fetchOverride, source)
+
+  const ARGS = {
+    defaultHeaders,
+    maxRetries,
+    timeout: parseInt(process.env.API_TIMEOUT_MS || String(600 * 1000), 10),
+    dangerouslyAllowBrowser: true,
+    fetchOptions: getProxyFetchOptions({
+      forAnthropicAPI: true,
+    }) as ClientOptions['fetchOptions'],
+    ...(resolvedFetch && {
+      fetch: resolvedFetch,
+    }),
+  }
+  if (isEnvTruthy(process.env.CLAUDE_CODE_USE_BEDROCK)) {
+    const { AnthropicBedrock } = await import('@anthropic-ai/bedrock-sdk')
+    // Use region override for small fast model if specified
+    const awsRegion =
+      model === getSmallFastModel() &&
+      process.env.ANTHROPIC_SMALL_FAST_MODEL_AWS_REGION
+        ? process.env.ANTHROPIC_SMALL_FAST_MODEL_AWS_REGION
+        : getAWSRegion()
+
+    const bedrockArgs: ConstructorParameters<typeof AnthropicBedrock>[0] = {
+      ...ARGS,
+      awsRegion,
+      ...(isEnvTruthy(process.env.CLAUDE_CODE_SKIP_BEDROCK_AUTH) && {
+        skipAuth: true,
+      }),
+      ...(isDebugToStdErr() && { logger: createStderrLogger() }),
+    }
+
+    // Add API key authentication if available
+    if (process.env.AWS_BEARER_TOKEN_BEDROCK) {
+      bedrockArgs.skipAuth = true
+      // Add the Bearer token for Bedrock API key authentication
+      bedrockArgs.defaultHeaders = {
+        ...bedrockArgs.defaultHeaders,
+        Authorization: `Bearer ${process.env.AWS_BEARER_TOKEN_BEDROCK}`,
+      }
+    } else if (!isEnvTruthy(process.env.CLAUDE_CODE_SKIP_BEDROCK_AUTH)) {
+      // Refresh auth and get credentials with cache clearing
+      const cachedCredentials = await refreshAndGetAwsCredentials()
+      if (cachedCredentials) {
+        bedrockArgs.awsAccessKey = cachedCredentials.accessKeyId
+        bedrockArgs.awsSecretKey = cachedCredentials.secretAccessKey
+        bedrockArgs.awsSessionToken = cachedCredentials.sessionToken
+      }
+    }
+    // we have always been lying about the return type - this doesn't support batching or models
+    return new AnthropicBedrock(bedrockArgs) as unknown as Anthropic
+  }
+  if (isEnvTruthy(process.env.CLAUDE_CODE_USE_FOUNDRY)) {
+    const { AnthropicFoundry } = await import('@anthropic-ai/foundry-sdk')
+    // Determine Azure AD token provider based on configuration
+    // SDK reads ANTHROPIC_FOUNDRY_API_KEY by default
+    let azureADTokenProvider: (() => Promise<string>) | undefined
+    if (!process.env.ANTHROPIC_FOUNDRY_API_KEY) {
+      if (isEnvTruthy(process.env.CLAUDE_CODE_SKIP_FOUNDRY_AUTH)) {
+        // Mock token provider for testing/proxy scenarios (similar to Vertex mock GoogleAuth)
+        azureADTokenProvider = () => Promise.resolve('')
+      } else {
+        // Use real Azure AD authentication with DefaultAzureCredential
+        const {
+          DefaultAzureCredential: AzureCredential,
+          getBearerTokenProvider,
+        } = await import('@azure/identity')
+        azureADTokenProvider = getBearerTokenProvider(
+          new AzureCredential(),
+          'https://cognitiveservices.azure.com/.default',
+        )
+      }
+    }
+
+    const foundryArgs: ConstructorParameters<typeof AnthropicFoundry>[0] = {
+      ...ARGS,
+      ...(azureADTokenProvider && { azureADTokenProvider }),
+      ...(isDebugToStdErr() && { logger: createStderrLogger() }),
+    }
+    // we have always been lying about the return type - this doesn't support batching or models
+    return new AnthropicFoundry(foundryArgs) as unknown as Anthropic
+  }
+  if (isEnvTruthy(process.env.CLAUDE_CODE_USE_VERTEX)) {
+    // Refresh GCP credentials if gcpAuthRefresh is configured and credentials are expired
+    // This is similar to how we handle AWS credential refresh for Bedrock
+    if (!isEnvTruthy(process.env.CLAUDE_CODE_SKIP_VERTEX_AUTH)) {
+      await refreshGcpCredentialsIfNeeded()
+    }
+
+    const [{ AnthropicVertex }, { GoogleAuth }] = await Promise.all([
+      import('@anthropic-ai/vertex-sdk'),
+      import('google-auth-library'),
+    ])
+    // TODO: Cache either GoogleAuth instance or AuthClient to improve performance
+    // Currently we create a new GoogleAuth instance for every getAnthropicClient() call
+    // This could cause repeated authentication flows and metadata server checks
+    // However, caching needs careful handling of:
+    // - Credential refresh/expiration
+    // - Environment variable changes (GOOGLE_APPLICATION_CREDENTIALS, project vars)
+    // - Cross-request auth state management
+    // See: https://github.com/googleapis/google-auth-library-nodejs/issues/390 for caching challenges
+
+    // Prevent metadata server timeout by providing projectId as fallback
+    // google-auth-library checks project ID in this order:
+    // 1. Environment variables (GCLOUD_PROJECT, GOOGLE_CLOUD_PROJECT, etc.)
+    // 2. Credential files (service account JSON, ADC file)
+    // 3. gcloud config
+    // 4. GCE metadata server (causes 12s timeout outside GCP)
+    //
+    // We only set projectId if user hasn't configured other discovery methods
+    // to avoid interfering with their existing auth setup
+
+    // Check project environment variables in same order as google-auth-library
+    // See: https://github.com/googleapis/google-auth-library-nodejs/blob/main/src/auth/googleauth.ts
+    const hasProjectEnvVar =
+      process.env['GCLOUD_PROJECT'] ||
+      process.env['GOOGLE_CLOUD_PROJECT'] ||
+      process.env['gcloud_project'] ||
+      process.env['google_cloud_project']
+
+    // Check for credential file paths (service account or ADC)
+    // Note: We're checking both standard and lowercase variants to be safe,
+    // though we should verify what google-auth-library actually checks
+    const hasKeyFile =
+      process.env['GOOGLE_APPLICATION_CREDENTIALS'] ||
+      process.env['google_application_credentials']
+
+    const googleAuth = isEnvTruthy(process.env.CLAUDE_CODE_SKIP_VERTEX_AUTH)
+      ? ({
+          // Mock GoogleAuth for testing/proxy scenarios
+          getClient: () => ({
+            getRequestHeaders: () => ({}),
+          }),
+        } as unknown as GoogleAuth)
+      : new GoogleAuth({
+          scopes: ['https://www.googleapis.com/auth/cloud-platform'],
+          // Only use ANTHROPIC_VERTEX_PROJECT_ID as last resort fallback
+          // This prevents the 12-second metadata server timeout when:
+          // - No project env vars are set AND
+          // - No credential keyfile is specified AND
+          // - ADC file exists but lacks project_id field
+          //
+          // Risk: If auth project != API target project, this could cause billing/audit issues
+          // Mitigation: Users can set GOOGLE_CLOUD_PROJECT to override
+          ...(hasProjectEnvVar || hasKeyFile
+            ? {}
+            : {
+                projectId: process.env.ANTHROPIC_VERTEX_PROJECT_ID,
+              }),
+        })
+
+    const vertexArgs: ConstructorParameters<typeof AnthropicVertex>[0] = {
+      ...ARGS,
+      region: getVertexRegionForModel(model),
+      googleAuth,
+      ...(isDebugToStdErr() && { logger: createStderrLogger() }),
+    }
+    // we have always been lying about the return type - this doesn't support batching or models
+    return new AnthropicVertex(vertexArgs) as unknown as Anthropic
+  }
+
+  // Determine authentication method based on available tokens
+  const clientConfig: ConstructorParameters<typeof Anthropic>[0] = {
+    apiKey: isClaudeAISubscriber() ? null : apiKey || getAnthropicApiKey(),
+    authToken: isClaudeAISubscriber()
+      ? getClaudeAIOAuthTokens()?.accessToken
+      : undefined,
+    // Set baseURL from OAuth config when using staging OAuth
+    ...(process.env.USER_TYPE === 'ant' &&
+    isEnvTruthy(process.env.USE_STAGING_OAUTH)
+      ? { baseURL: getOauthConfig().BASE_API_URL }
+      : {}),
+    ...ARGS,
+    ...(isDebugToStdErr() && { logger: createStderrLogger() }),
+  }
+
+  return new Anthropic(clientConfig)
+}
+
+async function configureApiKeyHeaders(
+  headers: Record<string, string>,
+  isNonInteractiveSession: boolean,
+): Promise<void> {
+  const token =
+    process.env.ANTHROPIC_AUTH_TOKEN ||
+    (await getApiKeyFromApiKeyHelper(isNonInteractiveSession))
+  if (token) {
+    headers['Authorization'] = `Bearer ${token}`
+  }
+}
+
+function getCustomHeaders(): Record<string, string> {
+  const customHeaders: Record<string, string> = {}
+  const customHeadersEnv = process.env.ANTHROPIC_CUSTOM_HEADERS
+
+  if (!customHeadersEnv) return customHeaders
+
+  // Split by newlines to support multiple headers
+  const headerStrings = customHeadersEnv.split(/\n|\r\n/)
+
+  for (const headerString of headerStrings) {
+    if (!headerString.trim()) continue
+
+    // Parse header in format "Name: Value" (curl style). Split on first `:`
+    // then trim — avoids regex backtracking on malformed long header lines.
+    const colonIdx = headerString.indexOf(':')
+    if (colonIdx === -1) continue
+    const name = headerString.slice(0, colonIdx).trim()
+    const value = headerString.slice(colonIdx + 1).trim()
+    if (name) {
+      customHeaders[name] = value
+    }
+  }
+
+  return customHeaders
+}
+
+export const CLIENT_REQUEST_ID_HEADER = 'x-client-request-id'
+
+function buildFetch(
+  fetchOverride: ClientOptions['fetch'],
+  source: string | undefined,
+): ClientOptions['fetch'] {
+  // eslint-disable-next-line eslint-plugin-n/no-unsupported-features/node-builtins
+  const inner = fetchOverride ?? globalThis.fetch
+  // Only send to the first-party API — Bedrock/Vertex/Foundry don't log it
+  // and unknown headers risk rejection by strict proxies (inc-4029 class).
+  const injectClientRequestId =
+    getAPIProvider() === 'firstParty' && isFirstPartyAnthropicBaseUrl()
+  return (input, init) => {
+    // eslint-disable-next-line eslint-plugin-n/no-unsupported-features/node-builtins
+    const headers = new Headers(init?.headers)
+    // Generate a client-side request ID so timeouts (which return no server
+    // request ID) can still be correlated with server logs by the API team.
+    // Callers that want to track the ID themselves can pre-set the header.
+    if (injectClientRequestId && !headers.has(CLIENT_REQUEST_ID_HEADER)) {
+      headers.set(CLIENT_REQUEST_ID_HEADER, randomUUID())
+    }
+    try {
+      // eslint-disable-next-line eslint-plugin-n/no-unsupported-features/node-builtins
+      const url = input instanceof Request ? input.url : String(input)
+      const id = headers.get(CLIENT_REQUEST_ID_HEADER)
+      logForDebugging(
+        `[API REQUEST] ${new URL(url).pathname}${id ? ` ${CLIENT_REQUEST_ID_HEADER}=${id}` : ''} source=${source ?? 'unknown'}`,
+      )
+    } catch {
+      // never let logging crash the fetch
+    }
+    return inner(input, { ...init, headers })
+  }
+}
@@ -0,0 +1,226 @@
+import type { ClientOptions } from '@anthropic-ai/sdk'
+import { createHash } from 'crypto'
+import { promises as fs } from 'fs'
+import { dirname, join } from 'path'
+import { getSessionId } from 'src/bootstrap/state.js'
+import { getClaudeConfigHomeDir } from '../../utils/envUtils.js'
+import { jsonParse, jsonStringify } from '../../utils/slowOperations.js'
+
+function hashString(str: string): string {
+  return createHash('sha256').update(str).digest('hex')
+}
+
+// Cache last few API requests for ant users (e.g., for /issue command)
+const MAX_CACHED_REQUESTS = 5
+const cachedApiRequests: Array<{ timestamp: string; request: unknown }> = []
+
+type DumpState = {
+  initialized: boolean
+  messageCountSeen: number
+  lastInitDataHash: string
+  // Cheap proxy for change detection — skips the expensive stringify+hash
+  // when model/tools/system are structurally identical to the last call.
+  lastInitFingerprint: string
+}
+
+// Track state per session to avoid duplicating data
+const dumpState = new Map<string, DumpState>()
+
+export function getLastApiRequests(): Array<{
+  timestamp: string
+  request: unknown
+}> {
+  return [...cachedApiRequests]
+}
+
+export function clearApiRequestCache(): void {
+  cachedApiRequests.length = 0
+}
+
+export function clearDumpState(agentIdOrSessionId: string): void {
+  dumpState.delete(agentIdOrSessionId)
+}
+
+export function clearAllDumpState(): void {
+  dumpState.clear()
+}
+
+export function addApiRequestToCache(requestData: unknown): void {
+  if (process.env.USER_TYPE !== 'ant') return
+  cachedApiRequests.push({
+    timestamp: new Date().toISOString(),
+    request: requestData,
+  })
+  if (cachedApiRequests.length > MAX_CACHED_REQUESTS) {
+    cachedApiRequests.shift()
+  }
+}
+
+export function getDumpPromptsPath(agentIdOrSessionId?: string): string {
+  return join(
+    getClaudeConfigHomeDir(),
+    'dump-prompts',
+    `${agentIdOrSessionId ?? getSessionId()}.jsonl`,
+  )
+}
+
+function appendToFile(filePath: string, entries: string[]): void {
+  if (entries.length === 0) return
+  fs.mkdir(dirname(filePath), { recursive: true })
+    .then(() => fs.appendFile(filePath, entries.join('\n') + '\n'))
+    .catch(() => {})
+}
+
+function initFingerprint(req: Record<string, unknown>): string {
+  const tools = req.tools as Array<{ name?: string }> | undefined
+  const system = req.system as unknown[] | string | undefined
+  const sysLen =
+    typeof system === 'string'
+      ? system.length
+      : Array.isArray(system)
+        ? system.reduce(
+            (n: number, b) => n + ((b as { text?: string }).text?.length ?? 0),
+            0,
+          )
+        : 0
+  const toolNames = tools?.map(t => t.name ?? '').join(',') ?? ''
+  return `${req.model}|${toolNames}|${sysLen}`
+}
+
+function dumpRequest(
+  body: string,
+  ts: string,
+  state: DumpState,
+  filePath: string,
+): void {
+  try {
+    const req = jsonParse(body) as Record<string, unknown>
+    addApiRequestToCache(req)
+
+    if (process.env.USER_TYPE !== 'ant') return
+    const entries: string[] = []
+    const messages = (req.messages ?? []) as Array<{ role?: string }>
+
+    // Write init data (system, tools, metadata) on first request,
+    // and a system_update entry whenever it changes.
+    // Cheap fingerprint first: system+tools don't change between turns,
+    // so skip the 300ms stringify when the shape is unchanged.
+    const fingerprint = initFingerprint(req)
+    if (!state.initialized || fingerprint !== state.lastInitFingerprint) {
+      const { messages: _, ...initData } = req
+      const initDataStr = jsonStringify(initData)
+      const initDataHash = hashString(initDataStr)
+      state.lastInitFingerprint = fingerprint
+      if (!state.initialized) {
+        state.initialized = true
+        state.lastInitDataHash = initDataHash
+        // Reuse initDataStr rather than re-serializing initData inside a wrapper.
+        // timestamp from toISOString() contains no chars needing JSON escaping.
+        entries.push(
+          `{"type":"init","timestamp":"${ts}","data":${initDataStr}}`,
+        )
+      } else if (initDataHash !== state.lastInitDataHash) {
+        state.lastInitDataHash = initDataHash
+        entries.push(
+          `{"type":"system_update","timestamp":"${ts}","data":${initDataStr}}`,
+        )
+      }
+    }
+
+    // Write only new user messages (assistant messages captured in response)
+    for (const msg of messages.slice(state.messageCountSeen)) {
+      if (msg.role === 'user') {
+        entries.push(
+          jsonStringify({ type: 'message', timestamp: ts, data: msg }),
+        )
+      }
+    }
+    state.messageCountSeen = messages.length
+
+    appendToFile(filePath, entries)
+  } catch {
+    // Ignore parsing errors
+  }
+}
+
+export function createDumpPromptsFetch(
+  agentIdOrSessionId: string,
+): ClientOptions['fetch'] {
+  const filePath = getDumpPromptsPath(agentIdOrSessionId)
+
+  return async (input: RequestInfo | URL, init?: RequestInit) => {
+    const state = dumpState.get(agentIdOrSessionId) ?? {
+      initialized: false,
+      messageCountSeen: 0,
+      lastInitDataHash: '',
+      lastInitFingerprint: '',
+    }
+    dumpState.set(agentIdOrSessionId, state)
+
+    let timestamp: string | undefined
+
+    if (init?.method === 'POST' && init.body) {
+      timestamp = new Date().toISOString()
+      // Parsing + stringifying the request (system prompt + tool schemas = MBs)
+      // takes hundreds of ms. Defer so it doesn't block the actual API call —
+      // this is debug tooling for /issue, not on the critical path.
+      setImmediate(dumpRequest, init.body as string, timestamp, state, filePath)
+    }
+
+    // eslint-disable-next-line eslint-plugin-n/no-unsupported-features/node-builtins
+    const response = await globalThis.fetch(input, init)
+
+    // Save response async
+    if (timestamp && response.ok && process.env.USER_TYPE === 'ant') {
+      const cloned = response.clone()
+      void (async () => {
+        try {
+          const isStreaming = cloned.headers
+            .get('content-type')
+            ?.includes('text/event-stream')
+
+          let data: unknown
+          if (isStreaming && cloned.body) {
+            // Parse SSE stream into chunks
+            const reader = cloned.body.getReader()
+            const decoder = new TextDecoder()
+            let buffer = ''
+            try {
+              while (true) {
+                const { done, value } = await reader.read()
+                if (done) break
+                buffer += decoder.decode(value, { stream: true })
+              }
+            } finally {
+              reader.releaseLock()
+            }
+            const chunks: unknown[] = []
+            for (const event of buffer.split('\n\n')) {
+              for (const line of event.split('\n')) {
+                if (line.startsWith('data: ') && line !== 'data: [DONE]') {
+                  try {
+                    chunks.push(jsonParse(line.slice(6)))
+                  } catch {
+                    // Ignore parse errors
+                  }
+                }
+              }
+            }
+            data = { stream: true, chunks }
+          } else {
+            data = await cloned.json()
+          }
+
+          await fs.appendFile(
+            filePath,
+            jsonStringify({ type: 'response', timestamp, data }) + '\n',
+          )
+        } catch {
+          // Best effort
+        }
+      })()
+    }
+
+    return response
+  }
+}
@@ -0,0 +1,22 @@
+import type { NonNullableUsage } from '../../entrypoints/sdk/sdkUtilityTypes.js'
+
+/**
+ * Zero-initialized usage object. Extracted from logging.ts so that
+ * bridge/replBridge.ts can import it without transitively pulling in
+ * api/errors.ts → utils/messages.ts → BashTool.tsx → the world.
+ */
+export const EMPTY_USAGE: Readonly<NonNullableUsage> = {
+  input_tokens: 0,
+  cache_creation_input_tokens: 0,
+  cache_read_input_tokens: 0,
+  output_tokens: 0,
+  server_tool_use: { web_search_requests: 0, web_fetch_requests: 0 },
+  service_tier: 'standard',
+  cache_creation: {
+    ephemeral_1h_input_tokens: 0,
+    ephemeral_5m_input_tokens: 0,
+  },
+  inference_geo: '',
+  iterations: [],
+  speed: 'standard',
+}
@@ -0,0 +1,260 @@
+import type { APIError } from '@anthropic-ai/sdk'
+
+// SSL/TLS error codes from OpenSSL (used by both Node.js and Bun)
+// See: https://www.openssl.org/docs/man3.1/man3/X509_STORE_CTX_get_error.html
+const SSL_ERROR_CODES = new Set([
+  // Certificate verification errors
+  'UNABLE_TO_VERIFY_LEAF_SIGNATURE',
+  'UNABLE_TO_GET_ISSUER_CERT',
+  'UNABLE_TO_GET_ISSUER_CERT_LOCALLY',
+  'CERT_SIGNATURE_FAILURE',
+  'CERT_NOT_YET_VALID',
+  'CERT_HAS_EXPIRED',
+  'CERT_REVOKED',
+  'CERT_REJECTED',
+  'CERT_UNTRUSTED',
+  // Self-signed certificate errors
+  'DEPTH_ZERO_SELF_SIGNED_CERT',
+  'SELF_SIGNED_CERT_IN_CHAIN',
+  // Chain errors
+  'CERT_CHAIN_TOO_LONG',
+  'PATH_LENGTH_EXCEEDED',
+  // Hostname/altname errors
+  'ERR_TLS_CERT_ALTNAME_INVALID',
+  'HOSTNAME_MISMATCH',
+  // TLS handshake errors
+  'ERR_TLS_HANDSHAKE_TIMEOUT',
+  'ERR_SSL_WRONG_VERSION_NUMBER',
+  'ERR_SSL_DECRYPTION_FAILED_OR_BAD_RECORD_MAC',
+])
+
+export type ConnectionErrorDetails = {
+  code: string
+  message: string
+  isSSLError: boolean
+}
+
+/**
+ * Extracts connection error details from the error cause chain.
+ * The Anthropic SDK wraps underlying errors in the `cause` property.
+ * This function walks the cause chain to find the root error code/message.
+ */
+export function extractConnectionErrorDetails(
+  error: unknown,
+): ConnectionErrorDetails | null {
+  if (!error || typeof error !== 'object') {
+    return null
+  }
+
+  // Walk the cause chain to find the root error with a code
+  let current: unknown = error
+  const maxDepth = 5 // Prevent infinite loops
+  let depth = 0
+
+  while (current && depth < maxDepth) {
+    if (
+      current instanceof Error &&
+      'code' in current &&
+      typeof current.code === 'string'
+    ) {
+      const code = current.code
+      const isSSLError = SSL_ERROR_CODES.has(code)
+      return {
+        code,
+        message: current.message,
+        isSSLError,
+      }
+    }
+
+    // Move to the next cause in the chain
+    if (
+      current instanceof Error &&
+      'cause' in current &&
+      current.cause !== current
+    ) {
+      current = current.cause
+      depth++
+    } else {
+      break
+    }
+  }
+
+  return null
+}
+
+/**
+ * Returns an actionable hint for SSL/TLS errors, intended for contexts outside
+ * the main API client (OAuth token exchange, preflight connectivity checks)
+ * where `formatAPIError` doesn't apply.
+ *
+ * Motivation: enterprise users behind TLS-intercepting proxies (Zscaler et al.)
+ * see OAuth complete in-browser but the CLI's token exchange silently fails
+ * with a raw SSL code. Surfacing the likely fix saves a support round-trip.
+ */
+export function getSSLErrorHint(error: unknown): string | null {
+  const details = extractConnectionErrorDetails(error)
+  if (!details?.isSSLError) {
+    return null
+  }
+  return `SSL certificate error (${details.code}). If you are behind a corporate proxy or TLS-intercepting firewall, set NODE_EXTRA_CA_CERTS to your CA bundle path, or ask IT to allowlist *.anthropic.com. Run /doctor for details.`
+}
+
+/**
+ * Strips HTML content (e.g., CloudFlare error pages) from a message string,
+ * returning a user-friendly title or empty string if HTML is detected.
+ * Returns the original message unchanged if no HTML is found.
+ */
+function sanitizeMessageHTML(message: string): string {
+  if (message.includes('<!DOCTYPE html') || message.includes('<html')) {
+    const titleMatch = message.match(/<title>([^<]+)<\/title>/)
+    if (titleMatch && titleMatch[1]) {
+      return titleMatch[1].trim()
+    }
+    return ''
+  }
+  return message
+}
+
+/**
+ * Detects if an error message contains HTML content (e.g., CloudFlare error pages)
+ * and returns a user-friendly message instead
+ */
+export function sanitizeAPIError(apiError: APIError): string {
+  const message = apiError.message
+  if (!message) {
+    // Sometimes message is undefined
+    // TODO: figure out why
+    return ''
+  }
+  return sanitizeMessageHTML(message)
+}
+
+/**
+ * Shapes of deserialized API errors from session JSONL.
+ *
+ * After JSON round-tripping, the SDK's APIError loses its `.message` property.
+ * The actual message lives at different nesting levels depending on the provider:
+ *
+ * - Bedrock/proxy: `{ error: { message: "..." } }`
+ * - Standard Anthropic API: `{ error: { error: { message: "..." } } }`
+ *   (the outer `.error` is the response body, the inner `.error` is the API error)
+ *
+ * See also: `getErrorMessage` in `logging.ts` which handles the same shapes.
+ */
+type NestedAPIError = {
+  error?: {
+    message?: string
+    error?: { message?: string }
+  }
+}
+
+function hasNestedError(value: unknown): value is NestedAPIError {
+  return (
+    typeof value === 'object' &&
+    value !== null &&
+    'error' in value &&
+    typeof value.error === 'object' &&
+    value.error !== null
+  )
+}
+
+/**
+ * Extract a human-readable message from a deserialized API error that lacks
+ * a top-level `.message`.
+ *
+ * Checks two nesting levels (deeper first for specificity):
+ * 1. `error.error.error.message` — standard Anthropic API shape
+ * 2. `error.error.message` — Bedrock shape
+ */
+function extractNestedErrorMessage(error: APIError): string | null {
+  if (!hasNestedError(error)) {
+    return null
+  }
+
+  // Access `.error` via the narrowed type so TypeScript sees the nested shape
+  // instead of the SDK's `Object | undefined`.
+  const narrowed: NestedAPIError = error
+  const nested = narrowed.error
+
+  // Standard Anthropic API shape: { error: { error: { message } } }
+  const deepMsg = nested?.error?.message
+  if (typeof deepMsg === 'string' && deepMsg.length > 0) {
+    const sanitized = sanitizeMessageHTML(deepMsg)
+    if (sanitized.length > 0) {
+      return sanitized
+    }
+  }
+
+  // Bedrock shape: { error: { message } }
+  const msg = nested?.message
+  if (typeof msg === 'string' && msg.length > 0) {
+    const sanitized = sanitizeMessageHTML(msg)
+    if (sanitized.length > 0) {
+      return sanitized
+    }
+  }
+
+  return null
+}
+
+export function formatAPIError(error: APIError): string {
+  // Extract connection error details from the cause chain
+  const connectionDetails = extractConnectionErrorDetails(error)
+
+  if (connectionDetails) {
+    const { code, isSSLError } = connectionDetails
+
+    // Handle timeout errors
+    if (code === 'ETIMEDOUT') {
+      return 'Request timed out. Check your internet connection and proxy settings'
+    }
+
+    // Handle SSL/TLS errors with specific messages
+    if (isSSLError) {
+      switch (code) {
+        case 'UNABLE_TO_VERIFY_LEAF_SIGNATURE':
+        case 'UNABLE_TO_GET_ISSUER_CERT':
+        case 'UNABLE_TO_GET_ISSUER_CERT_LOCALLY':
+          return 'Unable to connect to API: SSL certificate verification failed. Check your proxy or corporate SSL certificates'
+        case 'CERT_HAS_EXPIRED':
+          return 'Unable to connect to API: SSL certificate has expired'
+        case 'CERT_REVOKED':
+          return 'Unable to connect to API: SSL certificate has been revoked'
+        case 'DEPTH_ZERO_SELF_SIGNED_CERT':
+        case 'SELF_SIGNED_CERT_IN_CHAIN':
+          return 'Unable to connect to API: Self-signed certificate detected. Check your proxy or corporate SSL certificates'
+        case 'ERR_TLS_CERT_ALTNAME_INVALID':
+        case 'HOSTNAME_MISMATCH':
+          return 'Unable to connect to API: SSL certificate hostname mismatch'
+        case 'CERT_NOT_YET_VALID':
+          return 'Unable to connect to API: SSL certificate is not yet valid'
+        default:
+          return `Unable to connect to API: SSL error (${code})`
+      }
+    }
+  }
+
+  if (error.message === 'Connection error.') {
+    // If we have a code but it's not SSL, include it for debugging
+    if (connectionDetails?.code) {
+      return `Unable to connect to API (${connectionDetails.code})`
+    }
+    return 'Unable to connect to API. Check your internet connection'
+  }
+
+  // Guard: when deserialized from JSONL (e.g. --resume), the error object may
+  // be a plain object without a `.message` property.  Return a safe fallback
+  // instead of undefined, which would crash callers that access `.length`.
+  if (!error.message) {
+    return (
+      extractNestedErrorMessage(error) ??
+      `API error (status ${error.status ?? 'unknown'})`
+    )
+  }
+
+  const sanitizedMessage = sanitizeAPIError(error)
+  // Use sanitized message if it's different from the original (i.e., HTML was sanitized)
+  return sanitizedMessage !== error.message && sanitizedMessage.length > 0
+    ? sanitizedMessage
+    : error.message
+}
@@ -0,0 +1,748 @@
+/**
+ * Files API client for managing files
+ *
+ * This module provides functionality to download and upload files to Anthropic Public Files API.
+ * Used by the Claude Code agent to download file attachments at session startup.
+ *
+ * API Reference: https://docs.anthropic.com/en/api/files-content
+ */
+
+import axios from 'axios'
+import { randomUUID } from 'crypto'
+import * as fs from 'fs/promises'
+import * as path from 'path'
+import { count } from '../../utils/array.js'
+import { getCwd } from '../../utils/cwd.js'
+import { logForDebugging } from '../../utils/debug.js'
+import { errorMessage } from '../../utils/errors.js'
+import { logError } from '../../utils/log.js'
+import { sleep } from '../../utils/sleep.js'
+import {
+  type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+  logEvent,
+} from '../analytics/index.js'
+
+// Files API is currently in beta. oauth-2025-04-20 enables Bearer OAuth
+// on public-api routes (auth.py: "oauth_auth" not in beta_versions → 404).
+const FILES_API_BETA_HEADER = 'files-api-2025-04-14,oauth-2025-04-20'
+const ANTHROPIC_VERSION = '2023-06-01'
+
+// API base URL - uses ANTHROPIC_BASE_URL set by env-manager for the appropriate environment
+// Falls back to public API for standalone usage
+function getDefaultApiBaseUrl(): string {
+  return (
+    process.env.ANTHROPIC_BASE_URL ||
+    process.env.CLAUDE_CODE_API_BASE_URL ||
+    'https://api.anthropic.com'
+  )
+}
+
+function logDebugError(message: string): void {
+  logForDebugging(`[files-api] ${message}`, { level: 'error' })
+}
+
+function logDebug(message: string): void {
+  logForDebugging(`[files-api] ${message}`)
+}
+
+/**
+ * File specification parsed from CLI args
+ * Format: --file=<file_id>:<relative_path>
+ */
+export type File = {
+  fileId: string
+  relativePath: string
+}
+
+/**
+ * Configuration for the files API client
+ */
+export type FilesApiConfig = {
+  /** OAuth token for authentication (from session JWT) */
+  oauthToken: string
+  /** Base URL for the API (default: https://api.anthropic.com) */
+  baseUrl?: string
+  /** Session ID for creating session-specific directories */
+  sessionId: string
+}
+
+/**
+ * Result of a file download operation
+ */
+export type DownloadResult = {
+  fileId: string
+  path: string
+  success: boolean
+  error?: string
+  bytesWritten?: number
+}
+
+const MAX_RETRIES = 3
+const BASE_DELAY_MS = 500
+const MAX_FILE_SIZE_BYTES = 500 * 1024 * 1024 // 500MB
+
+/**
+ * Result type for retry operations - signals whether to continue retrying
+ */
+type RetryResult<T> = { done: true; value: T } | { done: false; error?: string }
+
+/**
+ * Executes an operation with exponential backoff retry logic
+ *
+ * @param operation - Operation name for logging
+ * @param attemptFn - Function to execute on each attempt, returns RetryResult
+ * @returns The successful result value
+ * @throws Error if all retries exhausted
+ */
+async function retryWithBackoff<T>(
+  operation: string,
+  attemptFn: (attempt: number) => Promise<RetryResult<T>>,
+): Promise<T> {
+  let lastError = ''
+
+  for (let attempt = 1; attempt <= MAX_RETRIES; attempt++) {
+    const result = await attemptFn(attempt)
+
+    if (result.done) {
+      return result.value
+    }
+
+    lastError = result.error || `${operation} failed`
+    logDebug(
+      `${operation} attempt ${attempt}/${MAX_RETRIES} failed: ${lastError}`,
+    )
+
+    if (attempt < MAX_RETRIES) {
+      const delayMs = BASE_DELAY_MS * Math.pow(2, attempt - 1)
+      logDebug(`Retrying ${operation} in ${delayMs}ms...`)
+      await sleep(delayMs)
+    }
+  }
+
+  throw new Error(`${lastError} after ${MAX_RETRIES} attempts`)
+}
+
+/**
+ * Downloads a single file from the Anthropic Public Files API
+ *
+ * @param fileId - The file ID (e.g., "file_011CNha8iCJcU1wXNR6q4V8w")
+ * @param config - Files API configuration
+ * @returns The file content as a Buffer
+ */
+export async function downloadFile(
+  fileId: string,
+  config: FilesApiConfig,
+): Promise<Buffer> {
+  const baseUrl = config.baseUrl || getDefaultApiBaseUrl()
+  const url = `${baseUrl}/v1/files/${fileId}/content`
+
+  const headers = {
+    Authorization: `Bearer ${config.oauthToken}`,
+    'anthropic-version': ANTHROPIC_VERSION,
+    'anthropic-beta': FILES_API_BETA_HEADER,
+  }
+
+  logDebug(`Downloading file ${fileId} from ${url}`)
+
+  return retryWithBackoff(`Download file ${fileId}`, async () => {
+    try {
+      const response = await axios.get(url, {
+        headers,
+        responseType: 'arraybuffer',
+        timeout: 60000, // 60 second timeout for large files
+        validateStatus: status => status < 500,
+      })
+
+      if (response.status === 200) {
+        logDebug(`Downloaded file ${fileId} (${response.data.length} bytes)`)
+        return { done: true, value: Buffer.from(response.data) }
+      }
+
+      // Non-retriable errors - throw immediately
+      if (response.status === 404) {
+        throw new Error(`File not found: ${fileId}`)
+      }
+      if (response.status === 401) {
+        throw new Error('Authentication failed: invalid or missing API key')
+      }
+      if (response.status === 403) {
+        throw new Error(`Access denied to file: ${fileId}`)
+      }
+
+      return { done: false, error: `status ${response.status}` }
+    } catch (error) {
+      if (!axios.isAxiosError(error)) {
+        throw error
+      }
+      return { done: false, error: error.message }
+    }
+  })
+}
+
+/**
+ * Normalizes a relative path, strips redundant prefixes, and builds the full
+ * download path under {basePath}/{session_id}/uploads/.
+ * Returns null if the path is invalid (e.g., path traversal).
+ */
+export function buildDownloadPath(
+  basePath: string,
+  sessionId: string,
+  relativePath: string,
+): string | null {
+  const normalized = path.normalize(relativePath)
+  if (normalized.startsWith('..')) {
+    logDebugError(
+      `Invalid file path: ${relativePath}. Path must not traverse above workspace`,
+    )
+    return null
+  }
+
+  const uploadsBase = path.join(basePath, sessionId, 'uploads')
+  const redundantPrefixes = [
+    path.join(basePath, sessionId, 'uploads') + path.sep,
+    path.sep + 'uploads' + path.sep,
+  ]
+  const matchedPrefix = redundantPrefixes.find(p => normalized.startsWith(p))
+  const cleanPath = matchedPrefix
+    ? normalized.slice(matchedPrefix.length)
+    : normalized
+  return path.join(uploadsBase, cleanPath)
+}
+
+/**
+ * Downloads a file and saves it to the session-specific workspace directory
+ *
+ * @param attachment - The file attachment to download
+ * @param config - Files API configuration
+ * @returns Download result with success/failure status
+ */
+export async function downloadAndSaveFile(
+  attachment: File,
+  config: FilesApiConfig,
+): Promise<DownloadResult> {
+  const { fileId, relativePath } = attachment
+  const fullPath = buildDownloadPath(getCwd(), config.sessionId, relativePath)
+
+  if (!fullPath) {
+    return {
+      fileId,
+      path: '',
+      success: false,
+      error: `Invalid file path: ${relativePath}`,
+    }
+  }
+
+  try {
+    // Download the file content
+    const content = await downloadFile(fileId, config)
+
+    // Ensure the parent directory exists
+    const parentDir = path.dirname(fullPath)
+    await fs.mkdir(parentDir, { recursive: true })
+
+    // Write the file
+    await fs.writeFile(fullPath, content)
+
+    logDebug(`Saved file ${fileId} to ${fullPath} (${content.length} bytes)`)
+
+    return {
+      fileId,
+      path: fullPath,
+      success: true,
+      bytesWritten: content.length,
+    }
+  } catch (error) {
+    logDebugError(`Failed to download file ${fileId}: ${errorMessage(error)}`)
+    if (error instanceof Error) {
+      logError(error)
+    }
+
+    return {
+      fileId,
+      path: fullPath,
+      success: false,
+      error: errorMessage(error),
+    }
+  }
+}
+
+// Default concurrency limit for parallel downloads
+const DEFAULT_CONCURRENCY = 5
+
+/**
+ * Execute promises with limited concurrency
+ *
+ * @param items - Items to process
+ * @param fn - Async function to apply to each item
+ * @param concurrency - Maximum concurrent operations
+ * @returns Results in the same order as input items
+ */
+async function parallelWithLimit<T, R>(
+  items: T[],
+  fn: (item: T, index: number) => Promise<R>,
+  concurrency: number,
+): Promise<R[]> {
+  const results: R[] = new Array(items.length)
+  let currentIndex = 0
+
+  async function worker(): Promise<void> {
+    while (currentIndex < items.length) {
+      const index = currentIndex++
+      const item = items[index]
+      if (item !== undefined) {
+        results[index] = await fn(item, index)
+      }
+    }
+  }
+
+  // Start workers up to the concurrency limit
+  const workers: Promise<void>[] = []
+  const workerCount = Math.min(concurrency, items.length)
+  for (let i = 0; i < workerCount; i++) {
+    workers.push(worker())
+  }
+
+  await Promise.all(workers)
+  return results
+}
+
+/**
+ * Downloads all file attachments for a session in parallel
+ *
+ * @param attachments - List of file attachments to download
+ * @param config - Files API configuration
+ * @param concurrency - Maximum concurrent downloads (default: 5)
+ * @returns Array of download results in the same order as input
+ */
+export async function downloadSessionFiles(
+  files: File[],
+  config: FilesApiConfig,
+  concurrency: number = DEFAULT_CONCURRENCY,
+): Promise<DownloadResult[]> {
+  if (files.length === 0) {
+    return []
+  }
+
+  logDebug(
+    `Downloading ${files.length} file(s) for session ${config.sessionId}`,
+  )
+  const startTime = Date.now()
+
+  // Download files in parallel with concurrency limit
+  const results = await parallelWithLimit(
+    files,
+    file => downloadAndSaveFile(file, config),
+    concurrency,
+  )
+
+  const elapsedMs = Date.now() - startTime
+  const successCount = count(results, r => r.success)
+  logDebug(
+    `Downloaded ${successCount}/${files.length} file(s) in ${elapsedMs}ms`,
+  )
+
+  return results
+}
+
+// ============================================================================
+// Upload Functions (BYOC mode)
+// ============================================================================
+
+/**
+ * Result of a file upload operation
+ */
+export type UploadResult =
+  | {
+      path: string
+      fileId: string
+      size: number
+      success: true
+    }
+  | {
+      path: string
+      error: string
+      success: false
+    }
+
+/**
+ * Upload a single file to the Files API (BYOC mode)
+ *
+ * Size validation is performed after reading the file to avoid TOCTOU race
+ * conditions where the file size could change between initial check and upload.
+ *
+ * @param filePath - Absolute path to the file to upload
+ * @param relativePath - Relative path for the file (used as filename in API)
+ * @param config - Files API configuration
+ * @returns Upload result with success/failure status
+ */
+export async function uploadFile(
+  filePath: string,
+  relativePath: string,
+  config: FilesApiConfig,
+  opts?: { signal?: AbortSignal },
+): Promise<UploadResult> {
+  const baseUrl = config.baseUrl || getDefaultApiBaseUrl()
+  const url = `${baseUrl}/v1/files`
+
+  const headers = {
+    Authorization: `Bearer ${config.oauthToken}`,
+    'anthropic-version': ANTHROPIC_VERSION,
+    'anthropic-beta': FILES_API_BETA_HEADER,
+  }
+
+  logDebug(`Uploading file ${filePath} as ${relativePath}`)
+
+  // Read file content first (outside retry loop since it's not a network operation)
+  let content: Buffer
+  try {
+    content = await fs.readFile(filePath)
+  } catch (error) {
+    logEvent('tengu_file_upload_failed', {
+      error_type:
+        'file_read' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+    })
+    return {
+      path: relativePath,
+      error: errorMessage(error),
+      success: false,
+    }
+  }
+
+  const fileSize = content.length
+
+  if (fileSize > MAX_FILE_SIZE_BYTES) {
+    logEvent('tengu_file_upload_failed', {
+      error_type:
+        'file_too_large' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+    })
+    return {
+      path: relativePath,
+      error: `File exceeds maximum size of ${MAX_FILE_SIZE_BYTES} bytes (actual: ${fileSize})`,
+      success: false,
+    }
+  }
+
+  // Use crypto.randomUUID for boundary to avoid collisions when uploads start same millisecond
+  const boundary = `----FormBoundary${randomUUID()}`
+  const filename = path.basename(relativePath)
+
+  // Build the multipart body
+  const bodyParts: Buffer[] = []
+
+  // File part
+  bodyParts.push(
+    Buffer.from(
+      `--${boundary}\r\n` +
+        `Content-Disposition: form-data; name="file"; filename="${filename}"\r\n` +
+        `Content-Type: application/octet-stream\r\n\r\n`,
+    ),
+  )
+  bodyParts.push(content)
+  bodyParts.push(Buffer.from('\r\n'))
+
+  // Purpose part
+  bodyParts.push(
+    Buffer.from(
+      `--${boundary}\r\n` +
+        `Content-Disposition: form-data; name="purpose"\r\n\r\n` +
+        `user_data\r\n`,
+    ),
+  )
+
+  // End boundary
+  bodyParts.push(Buffer.from(`--${boundary}--\r\n`))
+
+  const body = Buffer.concat(bodyParts)
+
+  try {
+    return await retryWithBackoff(`Upload file ${relativePath}`, async () => {
+      try {
+        const response = await axios.post(url, body, {
+          headers: {
+            ...headers,
+            'Content-Type': `multipart/form-data; boundary=${boundary}`,
+            'Content-Length': body.length.toString(),
+          },
+          timeout: 120000, // 2 minute timeout for uploads
+          signal: opts?.signal,
+          validateStatus: status => status < 500,
+        })
+
+        if (response.status === 200 || response.status === 201) {
+          const fileId = response.data?.id
+          if (!fileId) {
+            return {
+              done: false,
+              error: 'Upload succeeded but no file ID returned',
+            }
+          }
+          logDebug(`Uploaded file ${filePath} -> ${fileId} (${fileSize} bytes)`)
+          return {
+            done: true,
+            value: {
+              path: relativePath,
+              fileId,
+              size: fileSize,
+              success: true as const,
+            },
+          }
+        }
+
+        // Non-retriable errors - throw to exit retry loop
+        if (response.status === 401) {
+          logEvent('tengu_file_upload_failed', {
+            error_type:
+              'auth' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+          })
+          throw new UploadNonRetriableError(
+            'Authentication failed: invalid or missing API key',
+          )
+        }
+
+        if (response.status === 403) {
+          logEvent('tengu_file_upload_failed', {
+            error_type:
+              'forbidden' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+          })
+          throw new UploadNonRetriableError('Access denied for upload')
+        }
+
+        if (response.status === 413) {
+          logEvent('tengu_file_upload_failed', {
+            error_type:
+              'size' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+          })
+          throw new UploadNonRetriableError('File too large for upload')
+        }
+
+        return { done: false, error: `status ${response.status}` }
+      } catch (error) {
+        // Non-retriable errors propagate up
+        if (error instanceof UploadNonRetriableError) {
+          throw error
+        }
+        if (axios.isCancel(error)) {
+          throw new UploadNonRetriableError('Upload canceled')
+        }
+        // Network errors are retriable
+        if (axios.isAxiosError(error)) {
+          return { done: false, error: error.message }
+        }
+        throw error
+      }
+    })
+  } catch (error) {
+    if (error instanceof UploadNonRetriableError) {
+      return {
+        path: relativePath,
+        error: error.message,
+        success: false,
+      }
+    }
+    logEvent('tengu_file_upload_failed', {
+      error_type:
+        'network' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+    })
+    return {
+      path: relativePath,
+      error: errorMessage(error),
+      success: false,
+    }
+  }
+}
+
+/** Error class for non-retriable upload failures */
+class UploadNonRetriableError extends Error {
+  constructor(message: string) {
+    super(message)
+    this.name = 'UploadNonRetriableError'
+  }
+}
+
+/**
+ * Upload multiple files in parallel with concurrency limit (BYOC mode)
+ *
+ * @param files - Array of files to upload (path and relativePath)
+ * @param config - Files API configuration
+ * @param concurrency - Maximum concurrent uploads (default: 5)
+ * @returns Array of upload results in the same order as input
+ */
+export async function uploadSessionFiles(
+  files: Array<{ path: string; relativePath: string }>,
+  config: FilesApiConfig,
+  concurrency: number = DEFAULT_CONCURRENCY,
+): Promise<UploadResult[]> {
+  if (files.length === 0) {
+    return []
+  }
+
+  logDebug(`Uploading ${files.length} file(s) for session ${config.sessionId}`)
+  const startTime = Date.now()
+
+  const results = await parallelWithLimit(
+    files,
+    file => uploadFile(file.path, file.relativePath, config),
+    concurrency,
+  )
+
+  const elapsedMs = Date.now() - startTime
+  const successCount = count(results, r => r.success)
+  logDebug(`Uploaded ${successCount}/${files.length} file(s) in ${elapsedMs}ms`)
+
+  return results
+}
+
+// ============================================================================
+// List Files Functions (1P/Cloud mode)
+// ============================================================================
+
+/**
+ * File metadata returned from listFilesCreatedAfter
+ */
+export type FileMetadata = {
+  filename: string
+  fileId: string
+  size: number
+}
+
+/**
+ * List files created after a given timestamp (1P/Cloud mode).
+ * Uses the public GET /v1/files endpoint with after_created_at query param.
+ * Handles pagination via after_id cursor when has_more is true.
+ *
+ * @param afterCreatedAt - ISO 8601 timestamp to filter files created after
+ * @param config - Files API configuration
+ * @returns Array of file metadata for files created after the timestamp
+ */
+export async function listFilesCreatedAfter(
+  afterCreatedAt: string,
+  config: FilesApiConfig,
+): Promise<FileMetadata[]> {
+  const baseUrl = config.baseUrl || getDefaultApiBaseUrl()
+  const headers = {
+    Authorization: `Bearer ${config.oauthToken}`,
+    'anthropic-version': ANTHROPIC_VERSION,
+    'anthropic-beta': FILES_API_BETA_HEADER,
+  }
+
+  logDebug(`Listing files created after ${afterCreatedAt}`)
+
+  const allFiles: FileMetadata[] = []
+  let afterId: string | undefined
+
+  // Paginate through results
+  while (true) {
+    const params: Record<string, string> = {
+      after_created_at: afterCreatedAt,
+    }
+    if (afterId) {
+      params.after_id = afterId
+    }
+
+    const page = await retryWithBackoff(
+      `List files after ${afterCreatedAt}`,
+      async () => {
+        try {
+          const response = await axios.get(`${baseUrl}/v1/files`, {
+            headers,
+            params,
+            timeout: 60000,
+            validateStatus: status => status < 500,
+          })
+
+          if (response.status === 200) {
+            return { done: true, value: response.data }
+          }
+
+          if (response.status === 401) {
+            logEvent('tengu_file_list_failed', {
+              error_type:
+                'auth' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+            })
+            throw new Error('Authentication failed: invalid or missing API key')
+          }
+          if (response.status === 403) {
+            logEvent('tengu_file_list_failed', {
+              error_type:
+                'forbidden' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+            })
+            throw new Error('Access denied to list files')
+          }
+
+          return { done: false, error: `status ${response.status}` }
+        } catch (error) {
+          if (!axios.isAxiosError(error)) {
+            throw error
+          }
+          logEvent('tengu_file_list_failed', {
+            error_type:
+              'network' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+          })
+          return { done: false, error: error.message }
+        }
+      },
+    )
+
+    const files = page.data || []
+    for (const f of files) {
+      allFiles.push({
+        filename: f.filename,
+        fileId: f.id,
+        size: f.size_bytes,
+      })
+    }
+
+    if (!page.has_more) {
+      break
+    }
+
+    // Use the last file's ID as cursor for next page
+    const lastFile = files.at(-1)
+    if (!lastFile?.id) {
+      break
+    }
+    afterId = lastFile.id
+  }
+
+  logDebug(`Listed ${allFiles.length} files created after ${afterCreatedAt}`)
+  return allFiles
+}
+
+// ============================================================================
+// Parse Functions
+// ============================================================================
+
+/**
+ * Parse file attachment specs from CLI arguments
+ * Format: <file_id>:<relative_path>
+ *
+ * @param fileSpecs - Array of file spec strings
+ * @returns Parsed file attachments
+ */
+export function parseFileSpecs(fileSpecs: string[]): File[] {
+  const files: File[] = []
+
+  // Sandbox-gateway may pass multiple specs as a single space-separated string
+  const expandedSpecs = fileSpecs.flatMap(s => s.split(' ').filter(Boolean))
+
+  for (const spec of expandedSpecs) {
+    const colonIndex = spec.indexOf(':')
+    if (colonIndex === -1) {
+      continue
+    }
+
+    const fileId = spec.substring(0, colonIndex)
+    const relativePath = spec.substring(colonIndex + 1)
+
+    if (!fileId || !relativePath) {
+      logDebugError(
+        `Invalid file spec: ${spec}. Both file_id and path are required`,
+      )
+      continue
+    }
+
+    files.push({ fileId, relativePath })
+  }
+
+  return files
+}
@@ -0,0 +1,60 @@
+import axios from 'axios'
+import { getOauthConfig } from '../../constants/oauth.js'
+import { getGlobalConfig, saveGlobalConfig } from '../../utils/config.js'
+import { getAuthHeaders } from '../../utils/http.js'
+import { logError } from '../../utils/log.js'
+import { getClaudeCodeUserAgent } from '../../utils/userAgent.js'
+
+/**
+ * Fetch the user's first Claude Code token date and store in config.
+ * This is called after successful login to cache when they started using Claude Code.
+ */
+export async function fetchAndStoreClaudeCodeFirstTokenDate(): Promise<void> {
+  try {
+    const config = getGlobalConfig()
+
+    if (config.claudeCodeFirstTokenDate !== undefined) {
+      return
+    }
+
+    const authHeaders = getAuthHeaders()
+    if (authHeaders.error) {
+      logError(new Error(`Failed to get auth headers: ${authHeaders.error}`))
+      return
+    }
+
+    const oauthConfig = getOauthConfig()
+    const url = `${oauthConfig.BASE_API_URL}/api/organization/claude_code_first_token_date`
+
+    const response = await axios.get(url, {
+      headers: {
+        ...authHeaders.headers,
+        'User-Agent': getClaudeCodeUserAgent(),
+      },
+      timeout: 10000,
+    })
+
+    const firstTokenDate = response.data?.first_token_date ?? null
+
+    // Validate the date if it's not null
+    if (firstTokenDate !== null) {
+      const dateTime = new Date(firstTokenDate).getTime()
+      if (isNaN(dateTime)) {
+        logError(
+          new Error(
+            `Received invalid first_token_date from API: ${firstTokenDate}`,
+          ),
+        )
+        // Don't save invalid dates
+        return
+      }
+    }
+
+    saveGlobalConfig(current => ({
+      ...current,
+      claudeCodeFirstTokenDate: firstTokenDate,
+    }))
+  } catch (error) {
+    logError(error)
+  }
+}
@@ -0,0 +1,357 @@
+import axios from 'axios'
+import memoize from 'lodash-es/memoize.js'
+import {
+  type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+  logEvent,
+} from 'src/services/analytics/index.js'
+import { getOauthAccountInfo, isConsumerSubscriber } from 'src/utils/auth.js'
+import { logForDebugging } from 'src/utils/debug.js'
+import { gracefulShutdown } from 'src/utils/gracefulShutdown.js'
+import { isEssentialTrafficOnly } from 'src/utils/privacyLevel.js'
+import { writeToStderr } from 'src/utils/process.js'
+import { getOauthConfig } from '../../constants/oauth.js'
+import { getGlobalConfig, saveGlobalConfig } from '../../utils/config.js'
+import {
+  getAuthHeaders,
+  getUserAgent,
+  withOAuth401Retry,
+} from '../../utils/http.js'
+import { logError } from '../../utils/log.js'
+import { getClaudeCodeUserAgent } from '../../utils/userAgent.js'
+
+// Cache expiration: 24 hours
+const GROVE_CACHE_EXPIRATION_MS = 24 * 60 * 60 * 1000
+
+export type AccountSettings = {
+  grove_enabled: boolean | null
+  grove_notice_viewed_at: string | null
+}
+
+export type GroveConfig = {
+  grove_enabled: boolean
+  domain_excluded: boolean
+  notice_is_grace_period: boolean
+  notice_reminder_frequency: number | null
+}
+
+/**
+ * Result type that distinguishes between API failure and success.
+ * - success: true means API call succeeded (data may still contain null fields)
+ * - success: false means API call failed after retry
+ */
+export type ApiResult<T> = { success: true; data: T } | { success: false }
+
+/**
+ * Get the current Grove settings for the user account.
+ * Returns ApiResult to distinguish between API failure and success.
+ * Uses existing OAuth 401 retry, then returns failure if that doesn't help.
+ *
+ * Memoized for the session to avoid redundant per-render requests.
+ * Cache is invalidated in updateGroveSettings() so post-toggle reads are fresh.
+ */
+export const getGroveSettings = memoize(
+  async (): Promise<ApiResult<AccountSettings>> => {
+    // Grove is a notification feature; during an outage, skipping it is correct.
+    if (isEssentialTrafficOnly()) {
+      return { success: false }
+    }
+    try {
+      const response = await withOAuth401Retry(() => {
+        const authHeaders = getAuthHeaders()
+        if (authHeaders.error) {
+          throw new Error(`Failed to get auth headers: ${authHeaders.error}`)
+        }
+        return axios.get<AccountSettings>(
+          `${getOauthConfig().BASE_API_URL}/api/oauth/account/settings`,
+          {
+            headers: {
+              ...authHeaders.headers,
+              'User-Agent': getClaudeCodeUserAgent(),
+            },
+          },
+        )
+      })
+      return { success: true, data: response.data }
+    } catch (err) {
+      logError(err)
+      // Don't cache failures — transient network issues would lock the user
+      // out of privacy settings for the entire session (deadlock: dialog needs
+      // success to render the toggle, toggle calls updateGroveSettings which
+      // is the only other place the cache is cleared).
+      getGroveSettings.cache.clear?.()
+      return { success: false }
+    }
+  },
+)
+
+/**
+ * Mark that the Grove notice has been viewed by the user
+ */
+export async function markGroveNoticeViewed(): Promise<void> {
+  try {
+    await withOAuth401Retry(() => {
+      const authHeaders = getAuthHeaders()
+      if (authHeaders.error) {
+        throw new Error(`Failed to get auth headers: ${authHeaders.error}`)
+      }
+      return axios.post(
+        `${getOauthConfig().BASE_API_URL}/api/oauth/account/grove_notice_viewed`,
+        {},
+        {
+          headers: {
+            ...authHeaders.headers,
+            'User-Agent': getClaudeCodeUserAgent(),
+          },
+        },
+      )
+    })
+    // This mutates grove_notice_viewed_at server-side — Grove.tsx:87 reads it
+    // to decide whether to show the dialog. Without invalidation a same-session
+    // remount would read stale viewed_at:null and re-show the dialog.
+    getGroveSettings.cache.clear?.()
+  } catch (err) {
+    logError(err)
+  }
+}
+
+/**
+ * Update Grove settings for the user account
+ */
+export async function updateGroveSettings(
+  groveEnabled: boolean,
+): Promise<void> {
+  try {
+    await withOAuth401Retry(() => {
+      const authHeaders = getAuthHeaders()
+      if (authHeaders.error) {
+        throw new Error(`Failed to get auth headers: ${authHeaders.error}`)
+      }
+      return axios.patch(
+        `${getOauthConfig().BASE_API_URL}/api/oauth/account/settings`,
+        {
+          grove_enabled: groveEnabled,
+        },
+        {
+          headers: {
+            ...authHeaders.headers,
+            'User-Agent': getClaudeCodeUserAgent(),
+          },
+        },
+      )
+    })
+    // Invalidate memoized settings so the post-toggle confirmation
+    // read in privacy-settings.tsx picks up the new value.
+    getGroveSettings.cache.clear?.()
+  } catch (err) {
+    logError(err)
+  }
+}
+
+/**
+ * Check if user is qualified for Grove (non-blocking, cache-first).
+ *
+ * This function never blocks on network - it returns cached data immediately
+ * and fetches in the background if needed. On cold start (no cache), it returns
+ * false and the Grove dialog won't show until the next session.
+ */
+export async function isQualifiedForGrove(): Promise<boolean> {
+  if (!isConsumerSubscriber()) {
+    return false
+  }
+
+  const accountId = getOauthAccountInfo()?.accountUuid
+  if (!accountId) {
+    return false
+  }
+
+  const globalConfig = getGlobalConfig()
+  const cachedEntry = globalConfig.groveConfigCache?.[accountId]
+  const now = Date.now()
+
+  // No cache - trigger background fetch and return false (non-blocking)
+  // The Grove dialog won't show this session, but will next time if eligible
+  if (!cachedEntry) {
+    logForDebugging(
+      'Grove: No cache, fetching config in background (dialog skipped this session)',
+    )
+    void fetchAndStoreGroveConfig(accountId)
+    return false
+  }
+
+  // Cache exists but is stale - return cached value and refresh in background
+  if (now - cachedEntry.timestamp > GROVE_CACHE_EXPIRATION_MS) {
+    logForDebugging(
+      'Grove: Cache stale, returning cached data and refreshing in background',
+    )
+    void fetchAndStoreGroveConfig(accountId)
+    return cachedEntry.grove_enabled
+  }
+
+  // Cache is fresh - return it immediately
+  logForDebugging('Grove: Using fresh cached config')
+  return cachedEntry.grove_enabled
+}
+
+/**
+ * Fetch Grove config from API and store in cache
+ */
+async function fetchAndStoreGroveConfig(accountId: string): Promise<void> {
+  try {
+    const result = await getGroveNoticeConfig()
+    if (!result.success) {
+      return
+    }
+    const groveEnabled = result.data.grove_enabled
+    const cachedEntry = getGlobalConfig().groveConfigCache?.[accountId]
+    if (
+      cachedEntry?.grove_enabled === groveEnabled &&
+      Date.now() - cachedEntry.timestamp <= GROVE_CACHE_EXPIRATION_MS
+    ) {
+      return
+    }
+    saveGlobalConfig(current => ({
+      ...current,
+      groveConfigCache: {
+        ...current.groveConfigCache,
+        [accountId]: {
+          grove_enabled: groveEnabled,
+          timestamp: Date.now(),
+        },
+      },
+    }))
+  } catch (err) {
+    logForDebugging(`Grove: Failed to fetch and store config: ${err}`)
+  }
+}
+
+/**
+ * Get Grove Statsig configuration from the API.
+ * Returns ApiResult to distinguish between API failure and success.
+ * Uses existing OAuth 401 retry, then returns failure if that doesn't help.
+ */
+export const getGroveNoticeConfig = memoize(
+  async (): Promise<ApiResult<GroveConfig>> => {
+    // Grove is a notification feature; during an outage, skipping it is correct.
+    if (isEssentialTrafficOnly()) {
+      return { success: false }
+    }
+    try {
+      const response = await withOAuth401Retry(() => {
+        const authHeaders = getAuthHeaders()
+        if (authHeaders.error) {
+          throw new Error(`Failed to get auth headers: ${authHeaders.error}`)
+        }
+        return axios.get<GroveConfig>(
+          `${getOauthConfig().BASE_API_URL}/api/claude_code_grove`,
+          {
+            headers: {
+              ...authHeaders.headers,
+              'User-Agent': getUserAgent(),
+            },
+            timeout: 3000, // Short timeout - if slow, skip Grove dialog
+          },
+        )
+      })
+
+      // Map the API response to the GroveConfig type
+      const {
+        grove_enabled,
+        domain_excluded,
+        notice_is_grace_period,
+        notice_reminder_frequency,
+      } = response.data
+
+      return {
+        success: true,
+        data: {
+          grove_enabled,
+          domain_excluded: domain_excluded ?? false,
+          notice_is_grace_period: notice_is_grace_period ?? true,
+          notice_reminder_frequency,
+        },
+      }
+    } catch (err) {
+      logForDebugging(`Failed to fetch Grove notice config: ${err}`)
+      return { success: false }
+    }
+  },
+)
+
+/**
+ * Determines whether the Grove dialog should be shown.
+ * Returns false if either API call failed (after retry) - we hide the dialog on API failure.
+ */
+export function calculateShouldShowGrove(
+  settingsResult: ApiResult<AccountSettings>,
+  configResult: ApiResult<GroveConfig>,
+  showIfAlreadyViewed: boolean,
+): boolean {
+  // Hide dialog on API failure (after retry)
+  if (!settingsResult.success || !configResult.success) {
+    return false
+  }
+
+  const settings = settingsResult.data
+  const config = configResult.data
+
+  const hasChosen = settings.grove_enabled !== null
+  if (hasChosen) {
+    return false
+  }
+  if (showIfAlreadyViewed) {
+    return true
+  }
+  if (!config.notice_is_grace_period) {
+    return true
+  }
+  // Check if we need to remind the user to accept the terms and choose
+  // whether to help improve Claude.
+  const reminderFrequency = config.notice_reminder_frequency
+  if (reminderFrequency !== null && settings.grove_notice_viewed_at) {
+    const daysSinceViewed = Math.floor(
+      (Date.now() - new Date(settings.grove_notice_viewed_at).getTime()) /
+        (1000 * 60 * 60 * 24),
+    )
+    return daysSinceViewed >= reminderFrequency
+  } else {
+    // Show if never viewed before
+    const viewedAt = settings.grove_notice_viewed_at
+    return viewedAt === null || viewedAt === undefined
+  }
+}
+
+export async function checkGroveForNonInteractive(): Promise<void> {
+  const [settingsResult, configResult] = await Promise.all([
+    getGroveSettings(),
+    getGroveNoticeConfig(),
+  ])
+
+  // Check if user hasn't made a choice yet (returns false on API failure)
+  const shouldShowGrove = calculateShouldShowGrove(
+    settingsResult,
+    configResult,
+    false,
+  )
+
+  if (shouldShowGrove) {
+    // shouldShowGrove is only true if both API calls succeeded
+    const config = configResult.success ? configResult.data : null
+    logEvent('tengu_grove_print_viewed', {
+      dismissable:
+        config?.notice_is_grace_period as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+    })
+    if (config === null || config.notice_is_grace_period) {
+      // Grace period is still active - show informational message and continue
+      writeToStderr(
+        '\nAn update to our Consumer Terms and Privacy Policy will take effect on October 8, 2025. Run `claude` to review the updated terms.\n\n',
+      )
+      await markGroveNoticeViewed()
+    } else {
+      // Grace period has ended - show error message and exit
+      writeToStderr(
+        '\n[ACTION REQUIRED] An update to our Consumer Terms and Privacy Policy has taken effect on October 8, 2025. You must run `claude` to review the updated terms.\n\n',
+      )
+      await gracefulShutdown(1)
+    }
+  }
+}
@@ -0,0 +1,788 @@
+import { feature } from 'bun:bundle'
+import { APIError } from '@anthropic-ai/sdk'
+import type {
+  BetaStopReason,
+  BetaUsage as Usage,
+} from '@anthropic-ai/sdk/resources/beta/messages/messages.mjs'
+import {
+  addToTotalDurationState,
+  consumePostCompaction,
+  getIsNonInteractiveSession,
+  getLastApiCompletionTimestamp,
+  getTeleportedSessionInfo,
+  markFirstTeleportMessageLogged,
+  setLastApiCompletionTimestamp,
+} from 'src/bootstrap/state.js'
+import type { QueryChainTracking } from 'src/Tool.js'
+import { isConnectorTextBlock } from 'src/types/connectorText.js'
+import type { AssistantMessage } from 'src/types/message.js'
+import { logForDebugging } from 'src/utils/debug.js'
+import type { EffortLevel } from 'src/utils/effort.js'
+import { logError } from 'src/utils/log.js'
+import { getAPIProviderForStatsig } from 'src/utils/model/providers.js'
+import type { PermissionMode } from 'src/utils/permissions/PermissionMode.js'
+import { jsonStringify } from 'src/utils/slowOperations.js'
+import { logOTelEvent } from 'src/utils/telemetry/events.js'
+import {
+  endLLMRequestSpan,
+  isBetaTracingEnabled,
+  type Span,
+} from 'src/utils/telemetry/sessionTracing.js'
+import type { NonNullableUsage } from '../../entrypoints/sdk/sdkUtilityTypes.js'
+import { consumeInvokingRequestId } from '../../utils/agentContext.js'
+import {
+  type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+  logEvent,
+} from '../analytics/index.js'
+import { sanitizeToolNameForAnalytics } from '../analytics/metadata.js'
+import { EMPTY_USAGE } from './emptyUsage.js'
+import { classifyAPIError } from './errors.js'
+import { extractConnectionErrorDetails } from './errorUtils.js'
+
+export type { NonNullableUsage }
+export { EMPTY_USAGE }
+
+// Strategy used for global prompt caching
+export type GlobalCacheStrategy = 'tool_based' | 'system_prompt' | 'none'
+
+function getErrorMessage(error: unknown): string {
+  if (error instanceof APIError) {
+    const body = error.error as { error?: { message?: string } } | undefined
+    if (body?.error?.message) return body.error.message
+  }
+  return error instanceof Error ? error.message : String(error)
+}
+
+type KnownGateway =
+  | 'litellm'
+  | 'helicone'
+  | 'portkey'
+  | 'cloudflare-ai-gateway'
+  | 'kong'
+  | 'braintrust'
+  | 'databricks'
+
+// Gateway fingerprints for detecting AI gateways from response headers
+const GATEWAY_FINGERPRINTS: Partial<
+  Record<KnownGateway, { prefixes: string[] }>
+> = {
+  // https://docs.litellm.ai/docs/proxy/response_headers
+  litellm: {
+    prefixes: ['x-litellm-'],
+  },
+  // https://docs.helicone.ai/helicone-headers/header-directory
+  helicone: {
+    prefixes: ['helicone-'],
+  },
+  // https://portkey.ai/docs/api-reference/response-schema
+  portkey: {
+    prefixes: ['x-portkey-'],
+  },
+  // https://developers.cloudflare.com/ai-gateway/evaluations/add-human-feedback-api/
+  'cloudflare-ai-gateway': {
+    prefixes: ['cf-aig-'],
+  },
+  // https://developer.konghq.com/ai-gateway/ — X-Kong-Upstream-Latency, X-Kong-Proxy-Latency
+  kong: {
+    prefixes: ['x-kong-'],
+  },
+  // https://www.braintrust.dev/docs/guides/proxy — x-bt-used-endpoint, x-bt-cached
+  braintrust: {
+    prefixes: ['x-bt-'],
+  },
+}
+
+// Gateways that use provider-owned domains (not self-hosted), so the
+// ANTHROPIC_BASE_URL hostname is a reliable signal even without a
+// distinctive response header.
+const GATEWAY_HOST_SUFFIXES: Partial<Record<KnownGateway, string[]>> = {
+  // https://docs.databricks.com/aws/en/ai-gateway/
+  databricks: [
+    '.cloud.databricks.com',
+    '.azuredatabricks.net',
+    '.gcp.databricks.com',
+  ],
+}
+
+function detectGateway({
+  headers,
+  baseUrl,
+}: {
+  headers?: globalThis.Headers
+  baseUrl?: string
+}): KnownGateway | undefined {
+  if (headers) {
+    // Header names are already lowercase from the Headers API
+    const headerNames: string[] = []
+    headers.forEach((_, key) => headerNames.push(key))
+    for (const [gw, { prefixes }] of Object.entries(GATEWAY_FINGERPRINTS)) {
+      if (prefixes.some(p => headerNames.some(h => h.startsWith(p)))) {
+        return gw as KnownGateway
+      }
+    }
+  }
+
+  if (baseUrl) {
+    try {
+      const host = new URL(baseUrl).hostname.toLowerCase()
+      for (const [gw, suffixes] of Object.entries(GATEWAY_HOST_SUFFIXES)) {
+        if (suffixes.some(s => host.endsWith(s))) {
+          return gw as KnownGateway
+        }
+      }
+    } catch {
+      // malformed URL — ignore
+    }
+  }
+
+  return undefined
+}
+
+function getAnthropicEnvMetadata() {
+  return {
+    ...(process.env.ANTHROPIC_BASE_URL
+      ? {
+          baseUrl: process.env
+            .ANTHROPIC_BASE_URL as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+        }
+      : {}),
+    ...(process.env.ANTHROPIC_MODEL
+      ? {
+          envModel: process.env
+            .ANTHROPIC_MODEL as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+        }
+      : {}),
+    ...(process.env.ANTHROPIC_SMALL_FAST_MODEL
+      ? {
+          envSmallFastModel: process.env
+            .ANTHROPIC_SMALL_FAST_MODEL as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+        }
+      : {}),
+  }
+}
+
+function getBuildAgeMinutes(): number | undefined {
+  if (!MACRO.BUILD_TIME) return undefined
+  const buildTime = new Date(MACRO.BUILD_TIME).getTime()
+  if (isNaN(buildTime)) return undefined
+  return Math.floor((Date.now() - buildTime) / 60000)
+}
+
+export function logAPIQuery({
+  model,
+  messagesLength,
+  temperature,
+  betas,
+  permissionMode,
+  querySource,
+  queryTracking,
+  thinkingType,
+  effortValue,
+  fastMode,
+  previousRequestId,
+}: {
+  model: string
+  messagesLength: number
+  temperature: number
+  betas?: string[]
+  permissionMode?: PermissionMode
+  querySource: string
+  queryTracking?: QueryChainTracking
+  thinkingType?: 'adaptive' | 'enabled' | 'disabled'
+  effortValue?: EffortLevel | null
+  fastMode?: boolean
+  previousRequestId?: string | null
+}): void {
+  logEvent('tengu_api_query', {
+    model: model as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+    messagesLength,
+    temperature: temperature,
+    provider: getAPIProviderForStatsig(),
+    buildAgeMins: getBuildAgeMinutes(),
+    ...(betas?.length
+      ? {
+          betas: betas.join(
+            ',',
+          ) as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+        }
+      : {}),
+    permissionMode:
+      permissionMode as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+    querySource:
+      querySource as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+    ...(queryTracking
+      ? {
+          queryChainId:
+            queryTracking.chainId as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+          queryDepth: queryTracking.depth,
+        }
+      : {}),
+    thinkingType:
+      thinkingType as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+    effortValue:
+      effortValue as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+    fastMode,
+    ...(previousRequestId
+      ? {
+          previousRequestId:
+            previousRequestId as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+        }
+      : {}),
+    ...getAnthropicEnvMetadata(),
+  })
+}
+
+export function logAPIError({
+  error,
+  model,
+  messageCount,
+  messageTokens,
+  durationMs,
+  durationMsIncludingRetries,
+  attempt,
+  requestId,
+  clientRequestId,
+  didFallBackToNonStreaming,
+  promptCategory,
+  headers,
+  queryTracking,
+  querySource,
+  llmSpan,
+  fastMode,
+  previousRequestId,
+}: {
+  error: unknown
+  model: string
+  messageCount: number
+  messageTokens?: number
+  durationMs: number
+  durationMsIncludingRetries: number
+  attempt: number
+  requestId?: string | null
+  /** Client-generated ID sent as x-client-request-id header (survives timeouts) */
+  clientRequestId?: string
+  didFallBackToNonStreaming?: boolean
+  promptCategory?: string
+  headers?: globalThis.Headers
+  queryTracking?: QueryChainTracking
+  querySource?: string
+  /** The span from startLLMRequestSpan - pass this to correctly match responses to requests */
+  llmSpan?: Span
+  fastMode?: boolean
+  previousRequestId?: string | null
+}): void {
+  const gateway = detectGateway({
+    headers:
+      error instanceof APIError && error.headers ? error.headers : headers,
+    baseUrl: process.env.ANTHROPIC_BASE_URL,
+  })
+
+  const errStr = getErrorMessage(error)
+  const status = error instanceof APIError ? String(error.status) : undefined
+  const errorType = classifyAPIError(error)
+
+  // Log detailed connection error info to debug logs (visible via --debug)
+  const connectionDetails = extractConnectionErrorDetails(error)
+  if (connectionDetails) {
+    const sslLabel = connectionDetails.isSSLError ? ' (SSL error)' : ''
+    logForDebugging(
+      `Connection error details: code=${connectionDetails.code}${sslLabel}, message=${connectionDetails.message}`,
+      { level: 'error' },
+    )
+  }
+
+  const invocation = consumeInvokingRequestId()
+
+  if (clientRequestId) {
+    logForDebugging(
+      `API error x-client-request-id=${clientRequestId} (give this to the API team for server-log lookup)`,
+      { level: 'error' },
+    )
+  }
+
+  logError(error as Error)
+  logEvent('tengu_api_error', {
+    model: model as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+    error: errStr as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+    status:
+      status as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+    errorType:
+      errorType as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+    messageCount,
+    messageTokens,
+    durationMs,
+    durationMsIncludingRetries,
+    attempt,
+    provider: getAPIProviderForStatsig(),
+    requestId:
+      (requestId as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS) ||
+      undefined,
+    ...(invocation
+      ? {
+          invokingRequestId:
+            invocation.invokingRequestId as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+          invocationKind:
+            invocation.invocationKind as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+        }
+      : {}),
+    clientRequestId:
+      (clientRequestId as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS) ||
+      undefined,
+    didFallBackToNonStreaming,
+    ...(promptCategory
+      ? {
+          promptCategory:
+            promptCategory as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+        }
+      : {}),
+    ...(gateway
+      ? {
+          gateway:
+            gateway as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+        }
+      : {}),
+    ...(queryTracking
+      ? {
+          queryChainId:
+            queryTracking.chainId as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+          queryDepth: queryTracking.depth,
+        }
+      : {}),
+    ...(querySource
+      ? {
+          querySource:
+            querySource as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+        }
+      : {}),
+    fastMode,
+    ...(previousRequestId
+      ? {
+          previousRequestId:
+            previousRequestId as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+        }
+      : {}),
+    ...getAnthropicEnvMetadata(),
+  })
+
+  // Log API error event for OTLP
+  void logOTelEvent('api_error', {
+    model: model,
+    error: errStr,
+    status_code: String(status),
+    duration_ms: String(durationMs),
+    attempt: String(attempt),
+    speed: fastMode ? 'fast' : 'normal',
+  })
+
+  // Pass the span to correctly match responses to requests when beta tracing is enabled
+  endLLMRequestSpan(llmSpan, {
+    success: false,
+    statusCode: status ? parseInt(status) : undefined,
+    error: errStr,
+    attempt,
+  })
+
+  // Log first error for teleported sessions (reliability tracking)
+  const teleportInfo = getTeleportedSessionInfo()
+  if (teleportInfo?.isTeleported && !teleportInfo.hasLoggedFirstMessage) {
+    logEvent('tengu_teleport_first_message_error', {
+      session_id:
+        teleportInfo.sessionId as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+      error_type:
+        errorType as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+    })
+    markFirstTeleportMessageLogged()
+  }
+}
+
+function logAPISuccess({
+  model,
+  preNormalizedModel,
+  messageCount,
+  messageTokens,
+  usage,
+  durationMs,
+  durationMsIncludingRetries,
+  attempt,
+  ttftMs,
+  requestId,
+  stopReason,
+  costUSD,
+  didFallBackToNonStreaming,
+  querySource,
+  gateway,
+  queryTracking,
+  permissionMode,
+  globalCacheStrategy,
+  textContentLength,
+  thinkingContentLength,
+  toolUseContentLengths,
+  connectorTextBlockCount,
+  fastMode,
+  previousRequestId,
+  betas,
+}: {
+  model: string
+  preNormalizedModel: string
+  messageCount: number
+  messageTokens: number
+  usage: Usage
+  durationMs: number
+  durationMsIncludingRetries: number
+  attempt: number
+  ttftMs: number | null
+  requestId: string | null
+  stopReason: BetaStopReason | null
+  costUSD: number
+  didFallBackToNonStreaming: boolean
+  querySource: string
+  gateway?: KnownGateway
+  queryTracking?: QueryChainTracking
+  permissionMode?: PermissionMode
+  globalCacheStrategy?: GlobalCacheStrategy
+  textContentLength?: number
+  thinkingContentLength?: number
+  toolUseContentLengths?: Record<string, number>
+  connectorTextBlockCount?: number
+  fastMode?: boolean
+  previousRequestId?: string | null
+  betas?: string[]
+}): void {
+  const isNonInteractiveSession = getIsNonInteractiveSession()
+  const isPostCompaction = consumePostCompaction()
+  const hasPrintFlag =
+    process.argv.includes('-p') || process.argv.includes('--print')
+
+  const now = Date.now()
+  const lastCompletion = getLastApiCompletionTimestamp()
+  const timeSinceLastApiCallMs =
+    lastCompletion !== null ? now - lastCompletion : undefined
+
+  const invocation = consumeInvokingRequestId()
+
+  logEvent('tengu_api_success', {
+    model: model as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+    ...(preNormalizedModel !== model
+      ? {
+          preNormalizedModel:
+            preNormalizedModel as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+        }
+      : {}),
+    ...(betas?.length
+      ? {
+          betas: betas.join(
+            ',',
+          ) as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+        }
+      : {}),
+    messageCount,
+    messageTokens,
+    inputTokens: usage.input_tokens,
+    outputTokens: usage.output_tokens,
+    cachedInputTokens: usage.cache_read_input_tokens ?? 0,
+    uncachedInputTokens: usage.cache_creation_input_tokens ?? 0,
+    durationMs: durationMs,
+    durationMsIncludingRetries: durationMsIncludingRetries,
+    attempt: attempt,
+    ttftMs: ttftMs ?? undefined,
+    buildAgeMins: getBuildAgeMinutes(),
+    provider: getAPIProviderForStatsig(),
+    requestId:
+      (requestId as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS) ??
+      undefined,
+    ...(invocation
+      ? {
+          invokingRequestId:
+            invocation.invokingRequestId as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+          invocationKind:
+            invocation.invocationKind as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+        }
+      : {}),
+    stop_reason:
+      (stopReason as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS) ??
+      undefined,
+    costUSD,
+    didFallBackToNonStreaming,
+    isNonInteractiveSession,
+    print: hasPrintFlag,
+    isTTY: process.stdout.isTTY ?? false,
+    querySource:
+      querySource as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+    ...(gateway
+      ? {
+          gateway:
+            gateway as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+        }
+      : {}),
+    ...(queryTracking
+      ? {
+          queryChainId:
+            queryTracking.chainId as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+          queryDepth: queryTracking.depth,
+        }
+      : {}),
+    permissionMode:
+      permissionMode as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+    ...(globalCacheStrategy
+      ? {
+          globalCacheStrategy:
+            globalCacheStrategy as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+        }
+      : {}),
+    ...(textContentLength !== undefined
+      ? ({
+          textContentLength,
+        } as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS)
+      : {}),
+    ...(thinkingContentLength !== undefined
+      ? ({
+          thinkingContentLength,
+        } as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS)
+      : {}),
+    ...(toolUseContentLengths !== undefined
+      ? ({
+          toolUseContentLengths: jsonStringify(
+            toolUseContentLengths,
+          ) as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+        } as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS)
+      : {}),
+    ...(connectorTextBlockCount !== undefined
+      ? ({
+          connectorTextBlockCount,
+        } as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS)
+      : {}),
+    fastMode,
+    // Log cache_deleted_input_tokens for cache editing analysis. Casts needed
+    // because the field is intentionally not on NonNullableUsage (excluded from
+    // external builds). Set by updateUsage() when cache editing is active.
+    ...(feature('CACHED_MICROCOMPACT') &&
+    ((usage as unknown as { cache_deleted_input_tokens?: number })
+      .cache_deleted_input_tokens ?? 0) > 0
+      ? {
+          cacheDeletedInputTokens: (
+            usage as unknown as { cache_deleted_input_tokens: number }
+          ).cache_deleted_input_tokens,
+        }
+      : {}),
+    ...(previousRequestId
+      ? {
+          previousRequestId:
+            previousRequestId as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+        }
+      : {}),
+    ...(isPostCompaction ? { isPostCompaction } : {}),
+    ...getAnthropicEnvMetadata(),
+    timeSinceLastApiCallMs,
+  })
+
+  setLastApiCompletionTimestamp(now)
+}
+
+export function logAPISuccessAndDuration({
+  model,
+  preNormalizedModel,
+  start,
+  startIncludingRetries,
+  ttftMs,
+  usage,
+  attempt,
+  messageCount,
+  messageTokens,
+  requestId,
+  stopReason,
+  didFallBackToNonStreaming,
+  querySource,
+  headers,
+  costUSD,
+  queryTracking,
+  permissionMode,
+  newMessages,
+  llmSpan,
+  globalCacheStrategy,
+  requestSetupMs,
+  attemptStartTimes,
+  fastMode,
+  previousRequestId,
+  betas,
+}: {
+  model: string
+  preNormalizedModel: string
+  start: number
+  startIncludingRetries: number
+  ttftMs: number | null
+  usage: NonNullableUsage
+  attempt: number
+  messageCount: number
+  messageTokens: number
+  requestId: string | null
+  stopReason: BetaStopReason | null
+  didFallBackToNonStreaming: boolean
+  querySource: string
+  headers?: globalThis.Headers
+  costUSD: number
+  queryTracking?: QueryChainTracking
+  permissionMode?: PermissionMode
+  /** Assistant messages from the response - used to extract model_output and thinking_output
+   *  when beta tracing is enabled */
+  newMessages?: AssistantMessage[]
+  /** The span from startLLMRequestSpan - pass this to correctly match responses to requests */
+  llmSpan?: Span
+  /** Strategy used for global prompt caching: 'tool_based', 'system_prompt', or 'none' */
+  globalCacheStrategy?: GlobalCacheStrategy
+  /** Time spent in pre-request setup before the successful attempt */
+  requestSetupMs?: number
+  /** Timestamps (Date.now()) of each attempt start — used for retry sub-spans in Perfetto */
+  attemptStartTimes?: number[]
+  fastMode?: boolean
+  /** Request ID from the previous API call in this session */
+  previousRequestId?: string | null
+  betas?: string[]
+}): void {
+  const gateway = detectGateway({
+    headers,
+    baseUrl: process.env.ANTHROPIC_BASE_URL,
+  })
+
+  let textContentLength: number | undefined
+  let thinkingContentLength: number | undefined
+  let toolUseContentLengths: Record<string, number> | undefined
+  let connectorTextBlockCount: number | undefined
+
+  if (newMessages) {
+    let textLen = 0
+    let thinkingLen = 0
+    let hasToolUse = false
+    const toolLengths: Record<string, number> = {}
+    let connectorCount = 0
+
+    for (const msg of newMessages) {
+      for (const block of msg.message.content) {
+        if (block.type === 'text') {
+          textLen += block.text.length
+        } else if (feature('CONNECTOR_TEXT') && isConnectorTextBlock(block)) {
+          connectorCount++
+        } else if (block.type === 'thinking') {
+          thinkingLen += block.thinking.length
+        } else if (
+          block.type === 'tool_use' ||
+          block.type === 'server_tool_use' ||
+          block.type === 'mcp_tool_use'
+        ) {
+          const inputLen = jsonStringify(block.input).length
+          const sanitizedName = sanitizeToolNameForAnalytics(block.name)
+          toolLengths[sanitizedName] =
+            (toolLengths[sanitizedName] ?? 0) + inputLen
+          hasToolUse = true
+        }
+      }
+    }
+
+    textContentLength = textLen
+    thinkingContentLength = thinkingLen > 0 ? thinkingLen : undefined
+    toolUseContentLengths = hasToolUse ? toolLengths : undefined
+    connectorTextBlockCount = connectorCount > 0 ? connectorCount : undefined
+  }
+
+  const durationMs = Date.now() - start
+  const durationMsIncludingRetries = Date.now() - startIncludingRetries
+  addToTotalDurationState(durationMsIncludingRetries, durationMs)
+
+  logAPISuccess({
+    model,
+    preNormalizedModel,
+    messageCount,
+    messageTokens,
+    usage,
+    durationMs,
+    durationMsIncludingRetries,
+    attempt,
+    ttftMs,
+    requestId,
+    stopReason,
+    costUSD,
+    didFallBackToNonStreaming,
+    querySource,
+    gateway,
+    queryTracking,
+    permissionMode,
+    globalCacheStrategy,
+    textContentLength,
+    thinkingContentLength,
+    toolUseContentLengths,
+    connectorTextBlockCount,
+    fastMode,
+    previousRequestId,
+    betas,
+  })
+  // Log API request event for OTLP
+  void logOTelEvent('api_request', {
+    model,
+    input_tokens: String(usage.input_tokens),
+    output_tokens: String(usage.output_tokens),
+    cache_read_tokens: String(usage.cache_read_input_tokens),
+    cache_creation_tokens: String(usage.cache_creation_input_tokens),
+    cost_usd: String(costUSD),
+    duration_ms: String(durationMs),
+    speed: fastMode ? 'fast' : 'normal',
+  })
+
+  // Extract model output, thinking output, and tool call flag when beta tracing is enabled
+  let modelOutput: string | undefined
+  let thinkingOutput: string | undefined
+  let hasToolCall: boolean | undefined
+
+  if (isBetaTracingEnabled() && newMessages) {
+    // Model output - visible to all users
+    modelOutput =
+      newMessages
+        .flatMap(m =>
+          m.message.content
+            .filter(c => c.type === 'text')
+            .map(c => (c as { type: 'text'; text: string }).text),
+        )
+        .join('\n') || undefined
+
+    // Thinking output - Ant-only (build-time gated)
+    if (process.env.USER_TYPE === 'ant') {
+      thinkingOutput =
+        newMessages
+          .flatMap(m =>
+            m.message.content
+              .filter(c => c.type === 'thinking')
+              .map(c => (c as { type: 'thinking'; thinking: string }).thinking),
+          )
+          .join('\n') || undefined
+    }
+
+    // Check if any tool_use blocks were in the output
+    hasToolCall = newMessages.some(m =>
+      m.message.content.some(c => c.type === 'tool_use'),
+    )
+  }
+
+  // Pass the span to correctly match responses to requests when beta tracing is enabled
+  endLLMRequestSpan(llmSpan, {
+    success: true,
+    inputTokens: usage.input_tokens,
+    outputTokens: usage.output_tokens,
+    cacheReadTokens: usage.cache_read_input_tokens,
+    cacheCreationTokens: usage.cache_creation_input_tokens,
+    attempt,
+    modelOutput,
+    thinkingOutput,
+    hasToolCall,
+    ttftMs: ttftMs ?? undefined,
+    requestSetupMs,
+    attemptStartTimes,
+  })
+
+  // Log first successful message for teleported sessions (reliability tracking)
+  const teleportInfo = getTeleportedSessionInfo()
+  if (teleportInfo?.isTeleported && !teleportInfo.hasLoggedFirstMessage) {
+    logEvent('tengu_teleport_first_message_success', {
+      session_id:
+        teleportInfo.sessionId as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+    })
+    markFirstTeleportMessageLogged()
+  }
+}
@@ -0,0 +1,159 @@
+import axios from 'axios'
+import { hasProfileScope, isClaudeAISubscriber } from '../../utils/auth.js'
+import { getGlobalConfig, saveGlobalConfig } from '../../utils/config.js'
+import { logForDebugging } from '../../utils/debug.js'
+import { errorMessage } from '../../utils/errors.js'
+import { getAuthHeaders, withOAuth401Retry } from '../../utils/http.js'
+import { logError } from '../../utils/log.js'
+import { memoizeWithTTLAsync } from '../../utils/memoize.js'
+import { isEssentialTrafficOnly } from '../../utils/privacyLevel.js'
+import { getClaudeCodeUserAgent } from '../../utils/userAgent.js'
+
+type MetricsEnabledResponse = {
+  metrics_logging_enabled: boolean
+}
+
+type MetricsStatus = {
+  enabled: boolean
+  hasError: boolean
+}
+
+// In-memory TTL — dedupes calls within a single process
+const CACHE_TTL_MS = 60 * 60 * 1000
+
+// Disk TTL — org settings rarely change. When disk cache is fresher than this,
+// we skip the network entirely (no background refresh). This is what collapses
+// N `claude -p` invocations into ~1 API call/day.
+const DISK_CACHE_TTL_MS = 24 * 60 * 60 * 1000
+
+/**
+ * Internal function to call the API and check if metrics are enabled
+ * This is wrapped by memoizeWithTTLAsync to add caching behavior
+ */
+async function _fetchMetricsEnabled(): Promise<MetricsEnabledResponse> {
+  const authResult = getAuthHeaders()
+  if (authResult.error) {
+    throw new Error(`Auth error: ${authResult.error}`)
+  }
+
+  const headers = {
+    'Content-Type': 'application/json',
+    'User-Agent': getClaudeCodeUserAgent(),
+    ...authResult.headers,
+  }
+
+  const endpoint = `https://api.anthropic.com/api/claude_code/organizations/metrics_enabled`
+  const response = await axios.get<MetricsEnabledResponse>(endpoint, {
+    headers,
+    timeout: 5000,
+  })
+  return response.data
+}
+
+async function _checkMetricsEnabledAPI(): Promise<MetricsStatus> {
+  // Incident kill switch: skip the network call when nonessential traffic is disabled.
+  // Returning enabled:false sheds load at the consumer (bigqueryExporter skips
+  // export). Matches the non-subscriber early-return shape below.
+  if (isEssentialTrafficOnly()) {
+    return { enabled: false, hasError: false }
+  }
+
+  try {
+    const data = await withOAuth401Retry(_fetchMetricsEnabled, {
+      also403Revoked: true,
+    })
+
+    logForDebugging(
+      `Metrics opt-out API response: enabled=${data.metrics_logging_enabled}`,
+    )
+
+    return {
+      enabled: data.metrics_logging_enabled,
+      hasError: false,
+    }
+  } catch (error) {
+    logForDebugging(
+      `Failed to check metrics opt-out status: ${errorMessage(error)}`,
+    )
+    logError(error)
+    return { enabled: false, hasError: true }
+  }
+}
+
+// Create memoized version with custom error handling
+const memoizedCheckMetrics = memoizeWithTTLAsync(
+  _checkMetricsEnabledAPI,
+  CACHE_TTL_MS,
+)
+
+/**
+ * Fetch (in-memory memoized) and persist to disk on change.
+ * Errors are not persisted — a transient failure should not overwrite a
+ * known-good disk value.
+ */
+async function refreshMetricsStatus(): Promise<MetricsStatus> {
+  const result = await memoizedCheckMetrics()
+  if (result.hasError) {
+    return result
+  }
+
+  const cached = getGlobalConfig().metricsStatusCache
+  const unchanged = cached !== undefined && cached.enabled === result.enabled
+  // Skip write when unchanged AND timestamp still fresh — avoids config churn
+  // when concurrent callers race past a stale disk entry and all try to write.
+  if (unchanged && Date.now() - cached.timestamp < DISK_CACHE_TTL_MS) {
+    return result
+  }
+
+  saveGlobalConfig(current => ({
+    ...current,
+    metricsStatusCache: {
+      enabled: result.enabled,
+      timestamp: Date.now(),
+    },
+  }))
+  return result
+}
+
+/**
+ * Check if metrics are enabled for the current organization.
+ *
+ * Two-tier cache:
+ * - Disk (24h TTL): survives process restarts. Fresh disk cache → zero network.
+ * - In-memory (1h TTL): dedupes the background refresh within a process.
+ *
+ * The caller (bigqueryExporter) tolerates stale reads — a missed export or
+ * an extra one during the 24h window is acceptable.
+ */
+export async function checkMetricsEnabled(): Promise<MetricsStatus> {
+  // Service key OAuth sessions lack user:profile scope → would 403.
+  // API key users (non-subscribers) fall through and use x-api-key auth.
+  // This check runs before the disk read so we never persist auth-state-derived
+  // answers — only real API responses go to disk. Otherwise a service-key
+  // session would poison the cache for a later full-OAuth session.
+  if (isClaudeAISubscriber() && !hasProfileScope()) {
+    return { enabled: false, hasError: false }
+  }
+
+  const cached = getGlobalConfig().metricsStatusCache
+  if (cached) {
+    if (Date.now() - cached.timestamp > DISK_CACHE_TTL_MS) {
+      // saveGlobalConfig's fallback path (config.ts:731) can throw if both
+      // locked and fallback writes fail — catch here so fire-and-forget
+      // doesn't become an unhandled rejection.
+      void refreshMetricsStatus().catch(logError)
+    }
+    return {
+      enabled: cached.enabled,
+      hasError: false,
+    }
+  }
+
+  // First-ever run on this machine: block on the network to populate disk.
+  return refreshMetricsStatus()
+}
+
+// Export for testing purposes only
+export const _clearMetricsEnabledCacheForTesting = (): void => {
+  memoizedCheckMetrics.cache.clear()
+}
@@ -0,0 +1,137 @@
+import axios from 'axios'
+import { getOauthConfig } from '../../constants/oauth.js'
+import { getOauthAccountInfo } from '../../utils/auth.js'
+import { getGlobalConfig, saveGlobalConfig } from '../../utils/config.js'
+import { logError } from '../../utils/log.js'
+import { isEssentialTrafficOnly } from '../../utils/privacyLevel.js'
+import { getOAuthHeaders, prepareApiRequest } from '../../utils/teleport/api.js'
+
+export type OverageCreditGrantInfo = {
+  available: boolean
+  eligible: boolean
+  granted: boolean
+  amount_minor_units: number | null
+  currency: string | null
+}
+
+type CachedGrantEntry = {
+  info: OverageCreditGrantInfo
+  timestamp: number
+}
+
+const CACHE_TTL_MS = 60 * 60 * 1000 // 1 hour
+
+/**
+ * Fetch the current user's overage credit grant eligibility from the backend.
+ * The backend resolves tier-specific amounts and role-based claim permission,
+ * so the CLI just reads the response without replicating that logic.
+ */
+async function fetchOverageCreditGrant(): Promise<OverageCreditGrantInfo | null> {
+  try {
+    const { accessToken, orgUUID } = await prepareApiRequest()
+    const url = `${getOauthConfig().BASE_API_URL}/api/oauth/organizations/${orgUUID}/overage_credit_grant`
+    const response = await axios.get<OverageCreditGrantInfo>(url, {
+      headers: getOAuthHeaders(accessToken),
+    })
+    return response.data
+  } catch (err) {
+    logError(err)
+    return null
+  }
+}
+
+/**
+ * Get cached grant info. Returns null if no cache or cache is stale.
+ * Callers should render nothing (not block) when this returns null —
+ * refreshOverageCreditGrantCache fires lazily to populate it.
+ */
+export function getCachedOverageCreditGrant(): OverageCreditGrantInfo | null {
+  const orgId = getOauthAccountInfo()?.organizationUuid
+  if (!orgId) return null
+  const cached = getGlobalConfig().overageCreditGrantCache?.[orgId]
+  if (!cached) return null
+  if (Date.now() - cached.timestamp > CACHE_TTL_MS) return null
+  return cached.info
+}
+
+/**
+ * Drop the current org's cached entry so the next read refetches.
+ * Leaves other orgs' entries intact.
+ */
+export function invalidateOverageCreditGrantCache(): void {
+  const orgId = getOauthAccountInfo()?.organizationUuid
+  if (!orgId) return
+  const cache = getGlobalConfig().overageCreditGrantCache
+  if (!cache || !(orgId in cache)) return
+  saveGlobalConfig(prev => {
+    const next = { ...prev.overageCreditGrantCache }
+    delete next[orgId]
+    return { ...prev, overageCreditGrantCache: next }
+  })
+}
+
+/**
+ * Fetch and cache grant info. Fire-and-forget; call when an upsell surface
+ * is about to render and the cache is empty.
+ */
+export async function refreshOverageCreditGrantCache(): Promise<void> {
+  if (isEssentialTrafficOnly()) return
+  const orgId = getOauthAccountInfo()?.organizationUuid
+  if (!orgId) return
+  const info = await fetchOverageCreditGrant()
+  if (!info) return
+  // Skip rewriting info if grant data is unchanged — avoids config write
+  // amplification (inc-4552 pattern). Still refresh the timestamp so the
+  // TTL-based staleness check in getCachedOverageCreditGrant doesn't keep
+  // re-triggering API calls on every component mount.
+  saveGlobalConfig(prev => {
+    // Derive from prev (lock-fresh) rather than a pre-lock getGlobalConfig()
+    // read — saveConfigWithLock re-reads config from disk under the file lock,
+    // so another CLI instance may have written between any outer read and lock
+    // acquire.
+    const prevCached = prev.overageCreditGrantCache?.[orgId]
+    const existing = prevCached?.info
+    const dataUnchanged =
+      existing &&
+      existing.available === info.available &&
+      existing.eligible === info.eligible &&
+      existing.granted === info.granted &&
+      existing.amount_minor_units === info.amount_minor_units &&
+      existing.currency === info.currency
+    // When data is unchanged and timestamp is still fresh, skip the write entirely
+    if (
+      dataUnchanged &&
+      prevCached &&
+      Date.now() - prevCached.timestamp <= CACHE_TTL_MS
+    ) {
+      return prev
+    }
+    const entry: CachedGrantEntry = {
+      info: dataUnchanged ? existing : info,
+      timestamp: Date.now(),
+    }
+    return {
+      ...prev,
+      overageCreditGrantCache: {
+        ...prev.overageCreditGrantCache,
+        [orgId]: entry,
+      },
+    }
+  })
+}
+
+/**
+ * Format the grant amount for display. Returns null if amount isn't available
+ * (not eligible, or currency we don't know how to format).
+ */
+export function formatGrantAmount(info: OverageCreditGrantInfo): string | null {
+  if (info.amount_minor_units == null || !info.currency) return null
+  // For now only USD; backend may expand later
+  if (info.currency.toUpperCase() === 'USD') {
+    const dollars = info.amount_minor_units / 100
+    return Number.isInteger(dollars) ? `$${dollars}` : `$${dollars.toFixed(2)}`
+  }
+  return null
+}
+
+export type { CachedGrantEntry as OverageCreditGrantCacheEntry }
@@ -0,0 +1,727 @@
+import type { BetaToolUnion } from '@anthropic-ai/sdk/resources/beta/messages/messages.mjs'
+import type { TextBlockParam } from '@anthropic-ai/sdk/resources/index.mjs'
+import { createPatch } from 'diff'
+import { mkdir, writeFile } from 'fs/promises'
+import { join } from 'path'
+import type { AgentId } from 'src/types/ids.js'
+import type { Message } from 'src/types/message.js'
+import { logForDebugging } from 'src/utils/debug.js'
+import { djb2Hash } from 'src/utils/hash.js'
+import { logError } from 'src/utils/log.js'
+import { getClaudeTempDir } from 'src/utils/permissions/filesystem.js'
+import { jsonStringify } from 'src/utils/slowOperations.js'
+import type { QuerySource } from '../../constants/querySource.js'
+import {
+  type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+  logEvent,
+} from '../analytics/index.js'
+
+function getCacheBreakDiffPath(): string {
+  const chars = 'abcdefghijklmnopqrstuvwxyz0123456789'
+  let suffix = ''
+  for (let i = 0; i < 4; i++) {
+    suffix += chars[Math.floor(Math.random() * chars.length)]
+  }
+  return join(getClaudeTempDir(), `cache-break-${suffix}.diff`)
+}
+
+type PreviousState = {
+  systemHash: number
+  toolsHash: number
+  /** Hash of system blocks WITH cache_control intact. Catches scope/TTL flips
+   *  (global↔org, 1h↔5m) that stripCacheControl erases from systemHash. */
+  cacheControlHash: number
+  toolNames: string[]
+  /** Per-tool schema hash. Diffed to name which tool's description changed
+   *  when toolSchemasChanged but added=removed=0 (77% of tool breaks per
+   *  BQ 2026-03-22). AgentTool/SkillTool embed dynamic agent/command lists. */
+  perToolHashes: Record<string, number>
+  systemCharCount: number
+  model: string
+  fastMode: boolean
+  /** 'tool_based' | 'system_prompt' | 'none' — flips when MCP tools are
+   *  discovered/removed. */
+  globalCacheStrategy: string
+  /** Sorted beta header list. Diffed to show which headers were added/removed. */
+  betas: string[]
+  /** AFK_MODE_BETA_HEADER presence — should NOT break cache anymore
+   *  (sticky-on latched in claude.ts). Tracked to verify the fix. */
+  autoModeActive: boolean
+  /** Overage state flip — should NOT break cache anymore (eligibility is
+   *  latched session-stable in should1hCacheTTL). Tracked to verify the fix. */
+  isUsingOverage: boolean
+  /** Cache-editing beta header presence — should NOT break cache anymore
+   *  (sticky-on latched in claude.ts). Tracked to verify the fix. */
+  cachedMCEnabled: boolean
+  /** Resolved effort (env → options → model default). Goes into output_config
+   *  or anthropic_internal.effort_override. */
+  effortValue: string
+  /** Hash of getExtraBodyParams() — catches CLAUDE_CODE_EXTRA_BODY and
+   *  anthropic_internal changes. */
+  extraBodyHash: number
+  callCount: number
+  pendingChanges: PendingChanges | null
+  prevCacheReadTokens: number | null
+  /** Set when cached microcompact sends cache_edits deletions. Cache reads
+   *  will legitimately drop — this is expected, not a break. */
+  cacheDeletionsPending: boolean
+  buildDiffableContent: () => string
+}
+
+type PendingChanges = {
+  systemPromptChanged: boolean
+  toolSchemasChanged: boolean
+  modelChanged: boolean
+  fastModeChanged: boolean
+  cacheControlChanged: boolean
+  globalCacheStrategyChanged: boolean
+  betasChanged: boolean
+  autoModeChanged: boolean
+  overageChanged: boolean
+  cachedMCChanged: boolean
+  effortChanged: boolean
+  extraBodyChanged: boolean
+  addedToolCount: number
+  removedToolCount: number
+  systemCharDelta: number
+  addedTools: string[]
+  removedTools: string[]
+  changedToolSchemas: string[]
+  previousModel: string
+  newModel: string
+  prevGlobalCacheStrategy: string
+  newGlobalCacheStrategy: string
+  addedBetas: string[]
+  removedBetas: string[]
+  prevEffortValue: string
+  newEffortValue: string
+  buildPrevDiffableContent: () => string
+}
+
+const previousStateBySource = new Map<string, PreviousState>()
+
+// Cap the number of tracked sources to prevent unbounded memory growth.
+// Each entry stores a ~300KB+ diffableContent string (serialized system prompt
+// + tool schemas). Without a cap, spawning many subagents (each with a unique
+// agentId key) causes the map to grow indefinitely.
+const MAX_TRACKED_SOURCES = 10
+
+const TRACKED_SOURCE_PREFIXES = [
+  'repl_main_thread',
+  'sdk',
+  'agent:custom',
+  'agent:default',
+  'agent:builtin',
+]
+
+// Minimum absolute token drop required to trigger a cache break warning.
+// Small drops (e.g., a few thousand tokens) can happen due to normal variation
+// and aren't worth alerting on.
+const MIN_CACHE_MISS_TOKENS = 2_000
+
+// Anthropic's server-side prompt cache TTL thresholds to test.
+// Cache breaks after these durations are likely due to TTL expiration
+// rather than client-side changes.
+const CACHE_TTL_5MIN_MS = 5 * 60 * 1000
+export const CACHE_TTL_1HOUR_MS = 60 * 60 * 1000
+
+// Models to exclude from cache break detection (e.g., haiku has different caching behavior)
+function isExcludedModel(model: string): boolean {
+  return model.includes('haiku')
+}
+
+/**
+ * Returns the tracking key for a querySource, or null if untracked.
+ * Compact shares the same server-side cache as repl_main_thread
+ * (same cacheSafeParams), so they share tracking state.
+ *
+ * For subagents with a tracked querySource, uses the unique agentId to
+ * isolate tracking state. This prevents false positive cache break
+ * notifications when multiple instances of the same agent type run
+ * concurrently.
+ *
+ * Untracked sources (speculation, session_memory, prompt_suggestion, etc.)
+ * are short-lived forked agents where cache break detection provides no
+ * value — they run 1-3 turns with a fresh agentId each time, so there's
+ * nothing meaningful to compare against. Their cache metrics are still
+ * logged via tengu_api_success for analytics.
+ */
+function getTrackingKey(
+  querySource: QuerySource,
+  agentId?: AgentId,
+): string | null {
+  if (querySource === 'compact') return 'repl_main_thread'
+  for (const prefix of TRACKED_SOURCE_PREFIXES) {
+    if (querySource.startsWith(prefix)) return agentId || querySource
+  }
+  return null
+}
+
+function stripCacheControl(
+  items: ReadonlyArray<Record<string, unknown>>,
+): unknown[] {
+  return items.map(item => {
+    if (!('cache_control' in item)) return item
+    const { cache_control: _, ...rest } = item
+    return rest
+  })
+}
+
+function computeHash(data: unknown): number {
+  const str = jsonStringify(data)
+  if (typeof Bun !== 'undefined') {
+    const hash = Bun.hash(str)
+    // Bun.hash can return bigint for large inputs; convert to number safely
+    return typeof hash === 'bigint' ? Number(hash & 0xffffffffn) : hash
+  }
+  // Fallback for non-Bun runtimes (e.g. Node.js via npm global install)
+  return djb2Hash(str)
+}
+
+/** MCP tool names are user-controlled (server config) and may leak filepaths.
+ *  Collapse them to 'mcp'; built-in names are a fixed vocabulary. */
+function sanitizeToolName(name: string): string {
+  return name.startsWith('mcp__') ? 'mcp' : name
+}
+
+function computePerToolHashes(
+  strippedTools: ReadonlyArray<unknown>,
+  names: string[],
+): Record<string, number> {
+  const hashes: Record<string, number> = {}
+  for (let i = 0; i < strippedTools.length; i++) {
+    hashes[names[i] ?? `__idx_${i}`] = computeHash(strippedTools[i])
+  }
+  return hashes
+}
+
+function getSystemCharCount(system: TextBlockParam[]): number {
+  let total = 0
+  for (const block of system) {
+    total += block.text.length
+  }
+  return total
+}
+
+function buildDiffableContent(
+  system: TextBlockParam[],
+  tools: BetaToolUnion[],
+  model: string,
+): string {
+  const systemText = system.map(b => b.text).join('\n\n')
+  const toolDetails = tools
+    .map(t => {
+      if (!('name' in t)) return 'unknown'
+      const desc = 'description' in t ? t.description : ''
+      const schema = 'input_schema' in t ? jsonStringify(t.input_schema) : ''
+      return `${t.name}\n  description: ${desc}\n  input_schema: ${schema}`
+    })
+    .sort()
+    .join('\n\n')
+  return `Model: ${model}\n\n=== System Prompt ===\n\n${systemText}\n\n=== Tools (${tools.length}) ===\n\n${toolDetails}\n`
+}
+
+/** Extended tracking snapshot — everything that could affect the server-side
+ *  cache key that we can observe from the client. All fields are optional so
+ *  the call site can add incrementally; undefined fields compare as stable. */
+export type PromptStateSnapshot = {
+  system: TextBlockParam[]
+  toolSchemas: BetaToolUnion[]
+  querySource: QuerySource
+  model: string
+  agentId?: AgentId
+  fastMode?: boolean
+  globalCacheStrategy?: string
+  betas?: readonly string[]
+  autoModeActive?: boolean
+  isUsingOverage?: boolean
+  cachedMCEnabled?: boolean
+  effortValue?: string | number
+  extraBodyParams?: unknown
+}
+
+/**
+ * Phase 1 (pre-call): Record the current prompt/tool state and detect what changed.
+ * Does NOT fire events — just stores pending changes for phase 2 to use.
+ */
+export function recordPromptState(snapshot: PromptStateSnapshot): void {
+  try {
+    const {
+      system,
+      toolSchemas,
+      querySource,
+      model,
+      agentId,
+      fastMode,
+      globalCacheStrategy = '',
+      betas = [],
+      autoModeActive = false,
+      isUsingOverage = false,
+      cachedMCEnabled = false,
+      effortValue,
+      extraBodyParams,
+    } = snapshot
+    const key = getTrackingKey(querySource, agentId)
+    if (!key) return
+
+    const strippedSystem = stripCacheControl(
+      system as unknown as ReadonlyArray<Record<string, unknown>>,
+    )
+    const strippedTools = stripCacheControl(
+      toolSchemas as unknown as ReadonlyArray<Record<string, unknown>>,
+    )
+
+    const systemHash = computeHash(strippedSystem)
+    const toolsHash = computeHash(strippedTools)
+    // Hash the full system array INCLUDING cache_control — this catches
+    // scope flips (global↔org/none) and TTL flips (1h↔5m) that the stripped
+    // hash can't see because the text content is identical.
+    const cacheControlHash = computeHash(
+      system.map(b => ('cache_control' in b ? b.cache_control : null)),
+    )
+    const toolNames = toolSchemas.map(t => ('name' in t ? t.name : 'unknown'))
+    // Only compute per-tool hashes when the aggregate changed — common case
+    // (tools unchanged) skips N extra jsonStringify calls.
+    const computeToolHashes = () =>
+      computePerToolHashes(strippedTools, toolNames)
+    const systemCharCount = getSystemCharCount(system)
+    const lazyDiffableContent = () =>
+      buildDiffableContent(system, toolSchemas, model)
+    const isFastMode = fastMode ?? false
+    const sortedBetas = [...betas].sort()
+    const effortStr = effortValue === undefined ? '' : String(effortValue)
+    const extraBodyHash =
+      extraBodyParams === undefined ? 0 : computeHash(extraBodyParams)
+
+    const prev = previousStateBySource.get(key)
+
+    if (!prev) {
+      // Evict oldest entries if map is at capacity
+      while (previousStateBySource.size >= MAX_TRACKED_SOURCES) {
+        const oldest = previousStateBySource.keys().next().value
+        if (oldest !== undefined) previousStateBySource.delete(oldest)
+      }
+
+      previousStateBySource.set(key, {
+        systemHash,
+        toolsHash,
+        cacheControlHash,
+        toolNames,
+        systemCharCount,
+        model,
+        fastMode: isFastMode,
+        globalCacheStrategy,
+        betas: sortedBetas,
+        autoModeActive,
+        isUsingOverage,
+        cachedMCEnabled,
+        effortValue: effortStr,
+        extraBodyHash,
+        callCount: 1,
+        pendingChanges: null,
+        prevCacheReadTokens: null,
+        cacheDeletionsPending: false,
+        buildDiffableContent: lazyDiffableContent,
+        perToolHashes: computeToolHashes(),
+      })
+      return
+    }
+
+    prev.callCount++
+
+    const systemPromptChanged = systemHash !== prev.systemHash
+    const toolSchemasChanged = toolsHash !== prev.toolsHash
+    const modelChanged = model !== prev.model
+    const fastModeChanged = isFastMode !== prev.fastMode
+    const cacheControlChanged = cacheControlHash !== prev.cacheControlHash
+    const globalCacheStrategyChanged =
+      globalCacheStrategy !== prev.globalCacheStrategy
+    const betasChanged =
+      sortedBetas.length !== prev.betas.length ||
+      sortedBetas.some((b, i) => b !== prev.betas[i])
+    const autoModeChanged = autoModeActive !== prev.autoModeActive
+    const overageChanged = isUsingOverage !== prev.isUsingOverage
+    const cachedMCChanged = cachedMCEnabled !== prev.cachedMCEnabled
+    const effortChanged = effortStr !== prev.effortValue
+    const extraBodyChanged = extraBodyHash !== prev.extraBodyHash
+
+    if (
+      systemPromptChanged ||
+      toolSchemasChanged ||
+      modelChanged ||
+      fastModeChanged ||
+      cacheControlChanged ||
+      globalCacheStrategyChanged ||
+      betasChanged ||
+      autoModeChanged ||
+      overageChanged ||
+      cachedMCChanged ||
+      effortChanged ||
+      extraBodyChanged
+    ) {
+      const prevToolSet = new Set(prev.toolNames)
+      const newToolSet = new Set(toolNames)
+      const prevBetaSet = new Set(prev.betas)
+      const newBetaSet = new Set(sortedBetas)
+      const addedTools = toolNames.filter(n => !prevToolSet.has(n))
+      const removedTools = prev.toolNames.filter(n => !newToolSet.has(n))
+      const changedToolSchemas: string[] = []
+      if (toolSchemasChanged) {
+        const newHashes = computeToolHashes()
+        for (const name of toolNames) {
+          if (!prevToolSet.has(name)) continue
+          if (newHashes[name] !== prev.perToolHashes[name]) {
+            changedToolSchemas.push(name)
+          }
+        }
+        prev.perToolHashes = newHashes
+      }
+      prev.pendingChanges = {
+        systemPromptChanged,
+        toolSchemasChanged,
+        modelChanged,
+        fastModeChanged,
+        cacheControlChanged,
+        globalCacheStrategyChanged,
+        betasChanged,
+        autoModeChanged,
+        overageChanged,
+        cachedMCChanged,
+        effortChanged,
+        extraBodyChanged,
+        addedToolCount: addedTools.length,
+        removedToolCount: removedTools.length,
+        addedTools,
+        removedTools,
+        changedToolSchemas,
+        systemCharDelta: systemCharCount - prev.systemCharCount,
+        previousModel: prev.model,
+        newModel: model,
+        prevGlobalCacheStrategy: prev.globalCacheStrategy,
+        newGlobalCacheStrategy: globalCacheStrategy,
+        addedBetas: sortedBetas.filter(b => !prevBetaSet.has(b)),
+        removedBetas: prev.betas.filter(b => !newBetaSet.has(b)),
+        prevEffortValue: prev.effortValue,
+        newEffortValue: effortStr,
+        buildPrevDiffableContent: prev.buildDiffableContent,
+      }
+    } else {
+      prev.pendingChanges = null
+    }
+
+    prev.systemHash = systemHash
+    prev.toolsHash = toolsHash
+    prev.cacheControlHash = cacheControlHash
+    prev.toolNames = toolNames
+    prev.systemCharCount = systemCharCount
+    prev.model = model
+    prev.fastMode = isFastMode
+    prev.globalCacheStrategy = globalCacheStrategy
+    prev.betas = sortedBetas
+    prev.autoModeActive = autoModeActive
+    prev.isUsingOverage = isUsingOverage
+    prev.cachedMCEnabled = cachedMCEnabled
+    prev.effortValue = effortStr
+    prev.extraBodyHash = extraBodyHash
+    prev.buildDiffableContent = lazyDiffableContent
+  } catch (e: unknown) {
+    logError(e)
+  }
+}
+
+/**
+ * Phase 2 (post-call): Check the API response's cache tokens to determine
+ * if a cache break actually occurred. If it did, use the pending changes
+ * from phase 1 to explain why.
+ */
+export async function checkResponseForCacheBreak(
+  querySource: QuerySource,
+  cacheReadTokens: number,
+  cacheCreationTokens: number,
+  messages: Message[],
+  agentId?: AgentId,
+  requestId?: string | null,
+): Promise<void> {
+  try {
+    const key = getTrackingKey(querySource, agentId)
+    if (!key) return
+
+    const state = previousStateBySource.get(key)
+    if (!state) return
+
+    // Skip excluded models (e.g., haiku has different caching behavior)
+    if (isExcludedModel(state.model)) return
+
+    const prevCacheRead = state.prevCacheReadTokens
+    state.prevCacheReadTokens = cacheReadTokens
+
+    // Calculate time since last call for TTL detection by finding the most recent
+    // assistant message timestamp in the messages array (before the current response)
+    const lastAssistantMessage = messages.findLast(m => m.type === 'assistant')
+    const timeSinceLastAssistantMsg = lastAssistantMessage
+      ? Date.now() - new Date(lastAssistantMessage.timestamp).getTime()
+      : null
+
+    // Skip the first call — no previous value to compare against
+    if (prevCacheRead === null) return
+
+    const changes = state.pendingChanges
+
+    // Cache deletions via cached microcompact intentionally reduce the cached
+    // prefix. The drop in cache read tokens is expected — reset the baseline
+    // so we don't false-positive on the next call.
+    if (state.cacheDeletionsPending) {
+      state.cacheDeletionsPending = false
+      logForDebugging(
+        `[PROMPT CACHE] cache deletion applied, cache read: ${prevCacheRead} → ${cacheReadTokens} (expected drop)`,
+      )
+      // Don't flag as a break — the remaining state is still valid
+      state.pendingChanges = null
+      return
+    }
+
+    // Detect a cache break: cache read dropped >5% from previous AND
+    // the absolute drop exceeds the minimum threshold.
+    const tokenDrop = prevCacheRead - cacheReadTokens
+    if (
+      cacheReadTokens >= prevCacheRead * 0.95 ||
+      tokenDrop < MIN_CACHE_MISS_TOKENS
+    ) {
+      state.pendingChanges = null
+      return
+    }
+
+    // Build explanation from pending changes (if any)
+    const parts: string[] = []
+    if (changes) {
+      if (changes.modelChanged) {
+        parts.push(
+          `model changed (${changes.previousModel} → ${changes.newModel})`,
+        )
+      }
+      if (changes.systemPromptChanged) {
+        const charDelta = changes.systemCharDelta
+        const charInfo =
+          charDelta === 0
+            ? ''
+            : charDelta > 0
+              ? ` (+${charDelta} chars)`
+              : ` (${charDelta} chars)`
+        parts.push(`system prompt changed${charInfo}`)
+      }
+      if (changes.toolSchemasChanged) {
+        const toolDiff =
+          changes.addedToolCount > 0 || changes.removedToolCount > 0
+            ? ` (+${changes.addedToolCount}/-${changes.removedToolCount} tools)`
+            : ' (tool prompt/schema changed, same tool set)'
+        parts.push(`tools changed${toolDiff}`)
+      }
+      if (changes.fastModeChanged) {
+        parts.push('fast mode toggled')
+      }
+      if (changes.globalCacheStrategyChanged) {
+        parts.push(
+          `global cache strategy changed (${changes.prevGlobalCacheStrategy || 'none'} → ${changes.newGlobalCacheStrategy || 'none'})`,
+        )
+      }
+      if (
+        changes.cacheControlChanged &&
+        !changes.globalCacheStrategyChanged &&
+        !changes.systemPromptChanged
+      ) {
+        // Only report as standalone cause if nothing else explains it —
+        // otherwise the scope/TTL flip is a consequence, not the root cause.
+        parts.push('cache_control changed (scope or TTL)')
+      }
+      if (changes.betasChanged) {
+        const added = changes.addedBetas.length
+          ? `+${changes.addedBetas.join(',')}`
+          : ''
+        const removed = changes.removedBetas.length
+          ? `-${changes.removedBetas.join(',')}`
+          : ''
+        const diff = [added, removed].filter(Boolean).join(' ')
+        parts.push(`betas changed${diff ? ` (${diff})` : ''}`)
+      }
+      if (changes.autoModeChanged) {
+        parts.push('auto mode toggled')
+      }
+      if (changes.overageChanged) {
+        parts.push('overage state changed (TTL latched, no flip)')
+      }
+      if (changes.cachedMCChanged) {
+        parts.push('cached microcompact toggled')
+      }
+      if (changes.effortChanged) {
+        parts.push(
+          `effort changed (${changes.prevEffortValue || 'default'} → ${changes.newEffortValue || 'default'})`,
+        )
+      }
+      if (changes.extraBodyChanged) {
+        parts.push('extra body params changed')
+      }
+    }
+
+    // Check if time gap suggests TTL expiration
+    const lastAssistantMsgOver5minAgo =
+      timeSinceLastAssistantMsg !== null &&
+      timeSinceLastAssistantMsg > CACHE_TTL_5MIN_MS
+    const lastAssistantMsgOver1hAgo =
+      timeSinceLastAssistantMsg !== null &&
+      timeSinceLastAssistantMsg > CACHE_TTL_1HOUR_MS
+
+    // Post PR #19823 BQ analysis (bq-queries/prompt-caching/cache_break_pr19823_analysis.sql):
+    // when all client-side flags are false and the gap is under TTL, ~90% of breaks
+    // are server-side routing/eviction or billed/inference disagreement. Label
+    // accordingly instead of implying a CC bug hunt.
+    let reason: string
+    if (parts.length > 0) {
+      reason = parts.join(', ')
+    } else if (lastAssistantMsgOver1hAgo) {
+      reason = 'possible 1h TTL expiry (prompt unchanged)'
+    } else if (lastAssistantMsgOver5minAgo) {
+      reason = 'possible 5min TTL expiry (prompt unchanged)'
+    } else if (timeSinceLastAssistantMsg !== null) {
+      reason = 'likely server-side (prompt unchanged, <5min gap)'
+    } else {
+      reason = 'unknown cause'
+    }
+
+    logEvent('tengu_prompt_cache_break', {
+      systemPromptChanged: changes?.systemPromptChanged ?? false,
+      toolSchemasChanged: changes?.toolSchemasChanged ?? false,
+      modelChanged: changes?.modelChanged ?? false,
+      fastModeChanged: changes?.fastModeChanged ?? false,
+      cacheControlChanged: changes?.cacheControlChanged ?? false,
+      globalCacheStrategyChanged: changes?.globalCacheStrategyChanged ?? false,
+      betasChanged: changes?.betasChanged ?? false,
+      autoModeChanged: changes?.autoModeChanged ?? false,
+      overageChanged: changes?.overageChanged ?? false,
+      cachedMCChanged: changes?.cachedMCChanged ?? false,
+      effortChanged: changes?.effortChanged ?? false,
+      extraBodyChanged: changes?.extraBodyChanged ?? false,
+      addedToolCount: changes?.addedToolCount ?? 0,
+      removedToolCount: changes?.removedToolCount ?? 0,
+      systemCharDelta: changes?.systemCharDelta ?? 0,
+      // Tool names are sanitized: built-in names are a fixed vocabulary,
+      // MCP tools collapse to 'mcp' (user-configured, could leak paths).
+      addedTools: (changes?.addedTools ?? [])
+        .map(sanitizeToolName)
+        .join(
+          ',',
+        ) as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+      removedTools: (changes?.removedTools ?? [])
+        .map(sanitizeToolName)
+        .join(
+          ',',
+        ) as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+      changedToolSchemas: (changes?.changedToolSchemas ?? [])
+        .map(sanitizeToolName)
+        .join(
+          ',',
+        ) as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+      // Beta header names and cache strategy are fixed enum-like values,
+      // not code or filepaths. requestId is an opaque server-generated ID.
+      addedBetas: (changes?.addedBetas ?? []).join(
+        ',',
+      ) as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+      removedBetas: (changes?.removedBetas ?? []).join(
+        ',',
+      ) as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+      prevGlobalCacheStrategy: (changes?.prevGlobalCacheStrategy ??
+        '') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+      newGlobalCacheStrategy: (changes?.newGlobalCacheStrategy ??
+        '') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+      callNumber: state.callCount,
+      prevCacheReadTokens: prevCacheRead,
+      cacheReadTokens,
+      cacheCreationTokens,
+      timeSinceLastAssistantMsg: timeSinceLastAssistantMsg ?? -1,
+      lastAssistantMsgOver5minAgo,
+      lastAssistantMsgOver1hAgo,
+      requestId: (requestId ??
+        '') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+    })
+
+    // Write diff file for ant debugging via --debug. The path is included in
+    // the summary log so ants can find it (DevBar UI removed — event data
+    // flows reliably to BQ for analytics).
+    let diffPath: string | undefined
+    if (changes?.buildPrevDiffableContent) {
+      diffPath = await writeCacheBreakDiff(
+        changes.buildPrevDiffableContent(),
+        state.buildDiffableContent(),
+      )
+    }
+
+    const diffSuffix = diffPath ? `, diff: ${diffPath}` : ''
+    const summary = `[PROMPT CACHE BREAK] ${reason} [source=${querySource}, call #${state.callCount}, cache read: ${prevCacheRead} → ${cacheReadTokens}, creation: ${cacheCreationTokens}${diffSuffix}]`
+
+    logForDebugging(summary, { level: 'warn' })
+
+    state.pendingChanges = null
+  } catch (e: unknown) {
+    logError(e)
+  }
+}
+
+/**
+ * Call when cached microcompact sends cache_edits deletions.
+ * The next API response will have lower cache read tokens — that's
+ * expected, not a cache break.
+ */
+export function notifyCacheDeletion(
+  querySource: QuerySource,
+  agentId?: AgentId,
+): void {
+  const key = getTrackingKey(querySource, agentId)
+  const state = key ? previousStateBySource.get(key) : undefined
+  if (state) {
+    state.cacheDeletionsPending = true
+  }
+}
+
+/**
+ * Call after compaction to reset the cache read baseline.
+ * Compaction legitimately reduces message count, so cache read tokens
+ * will naturally drop on the next call — that's not a break.
+ */
+export function notifyCompaction(
+  querySource: QuerySource,
+  agentId?: AgentId,
+): void {
+  const key = getTrackingKey(querySource, agentId)
+  const state = key ? previousStateBySource.get(key) : undefined
+  if (state) {
+    state.prevCacheReadTokens = null
+  }
+}
+
+export function cleanupAgentTracking(agentId: AgentId): void {
+  previousStateBySource.delete(agentId)
+}
+
+export function resetPromptCacheBreakDetection(): void {
+  previousStateBySource.clear()
+}
+
+async function writeCacheBreakDiff(
+  prevContent: string,
+  newContent: string,
+): Promise<string | undefined> {
+  try {
+    const diffPath = getCacheBreakDiffPath()
+    await mkdir(getClaudeTempDir(), { recursive: true })
+    const patch = createPatch(
+      'prompt-state',
+      prevContent,
+      newContent,
+      'before',
+      'after',
+    )
+    await writeFile(diffPath, patch)
+    return diffPath
+  } catch {
+    return undefined
+  }
+}
@@ -0,0 +1,281 @@
+import axios from 'axios'
+import { getOauthConfig } from '../../constants/oauth.js'
+import {
+  getOauthAccountInfo,
+  getSubscriptionType,
+  isClaudeAISubscriber,
+} from '../../utils/auth.js'
+import { getGlobalConfig, saveGlobalConfig } from '../../utils/config.js'
+import { logForDebugging } from '../../utils/debug.js'
+import { logError } from '../../utils/log.js'
+import { isEssentialTrafficOnly } from '../../utils/privacyLevel.js'
+import { getOAuthHeaders, prepareApiRequest } from '../../utils/teleport/api.js'
+import type {
+  ReferralCampaign,
+  ReferralEligibilityResponse,
+  ReferralRedemptionsResponse,
+  ReferrerRewardInfo,
+} from '../oauth/types.js'
+
+// Cache expiration time: 24 hours (eligibility changes only on subscription/experiment changes)
+const CACHE_EXPIRATION_MS = 24 * 60 * 60 * 1000
+
+// Track in-flight fetch to prevent duplicate API calls
+let fetchInProgress: Promise<ReferralEligibilityResponse | null> | null = null
+
+export async function fetchReferralEligibility(
+  campaign: ReferralCampaign = 'claude_code_guest_pass',
+): Promise<ReferralEligibilityResponse> {
+  const { accessToken, orgUUID } = await prepareApiRequest()
+
+  const headers = {
+    ...getOAuthHeaders(accessToken),
+    'x-organization-uuid': orgUUID,
+  }
+
+  const url = `${getOauthConfig().BASE_API_URL}/api/oauth/organizations/${orgUUID}/referral/eligibility`
+
+  const response = await axios.get(url, {
+    headers,
+    params: { campaign },
+    timeout: 5000, // 5 second timeout for background fetch
+  })
+
+  return response.data
+}
+
+export async function fetchReferralRedemptions(
+  campaign: string = 'claude_code_guest_pass',
+): Promise<ReferralRedemptionsResponse> {
+  const { accessToken, orgUUID } = await prepareApiRequest()
+
+  const headers = {
+    ...getOAuthHeaders(accessToken),
+    'x-organization-uuid': orgUUID,
+  }
+
+  const url = `${getOauthConfig().BASE_API_URL}/api/oauth/organizations/${orgUUID}/referral/redemptions`
+
+  const response = await axios.get<ReferralRedemptionsResponse>(url, {
+    headers,
+    params: { campaign },
+    timeout: 10000, // 10 second timeout
+  })
+
+  return response.data
+}
+
+/**
+ * Prechecks for if user can access guest passes feature
+ */
+function shouldCheckForPasses(): boolean {
+  return !!(
+    getOauthAccountInfo()?.organizationUuid &&
+    isClaudeAISubscriber() &&
+    getSubscriptionType() === 'max'
+  )
+}
+
+/**
+ * Check cached passes eligibility from GlobalConfig
+ * Returns current cached state and cache status
+ */
+export function checkCachedPassesEligibility(): {
+  eligible: boolean
+  needsRefresh: boolean
+  hasCache: boolean
+} {
+  if (!shouldCheckForPasses()) {
+    return {
+      eligible: false,
+      needsRefresh: false,
+      hasCache: false,
+    }
+  }
+
+  const orgId = getOauthAccountInfo()?.organizationUuid
+  if (!orgId) {
+    return {
+      eligible: false,
+      needsRefresh: false,
+      hasCache: false,
+    }
+  }
+
+  const config = getGlobalConfig()
+  const cachedEntry = config.passesEligibilityCache?.[orgId]
+
+  if (!cachedEntry) {
+    // No cached entry, needs fetch
+    return {
+      eligible: false,
+      needsRefresh: true,
+      hasCache: false,
+    }
+  }
+
+  const { eligible, timestamp } = cachedEntry
+  const now = Date.now()
+  const needsRefresh = now - timestamp > CACHE_EXPIRATION_MS
+
+  return {
+    eligible,
+    needsRefresh,
+    hasCache: true,
+  }
+}
+
+const CURRENCY_SYMBOLS: Record<string, string> = {
+  USD: '$',
+  EUR: '€',
+  GBP: '£',
+  BRL: 'R$',
+  CAD: 'CA$',
+  AUD: 'A$',
+  NZD: 'NZ$',
+  SGD: 'S$',
+}
+
+export function formatCreditAmount(reward: ReferrerRewardInfo): string {
+  const symbol = CURRENCY_SYMBOLS[reward.currency] ?? `${reward.currency} `
+  const amount = reward.amount_minor_units / 100
+  const formatted = amount % 1 === 0 ? amount.toString() : amount.toFixed(2)
+  return `${symbol}${formatted}`
+}
+
+/**
+ * Get cached referrer reward info from eligibility cache
+ * Returns the reward info if the user is in a v1 campaign, null otherwise
+ */
+export function getCachedReferrerReward(): ReferrerRewardInfo | null {
+  const orgId = getOauthAccountInfo()?.organizationUuid
+  if (!orgId) return null
+  const config = getGlobalConfig()
+  const cachedEntry = config.passesEligibilityCache?.[orgId]
+  return cachedEntry?.referrer_reward ?? null
+}
+
+/**
+ * Get the cached remaining passes count from eligibility cache
+ * Returns the number of remaining passes, or null if not available
+ */
+export function getCachedRemainingPasses(): number | null {
+  const orgId = getOauthAccountInfo()?.organizationUuid
+  if (!orgId) return null
+  const config = getGlobalConfig()
+  const cachedEntry = config.passesEligibilityCache?.[orgId]
+  return cachedEntry?.remaining_passes ?? null
+}
+
+/**
+ * Fetch passes eligibility and store in GlobalConfig
+ * Returns the fetched response or null on error
+ */
+export async function fetchAndStorePassesEligibility(): Promise<ReferralEligibilityResponse | null> {
+  // Return existing promise if fetch is already in progress
+  if (fetchInProgress) {
+    logForDebugging('Passes: Reusing in-flight eligibility fetch')
+    return fetchInProgress
+  }
+
+  const orgId = getOauthAccountInfo()?.organizationUuid
+
+  if (!orgId) {
+    return null
+  }
+
+  // Store the promise to share with concurrent calls
+  fetchInProgress = (async () => {
+    try {
+      const response = await fetchReferralEligibility()
+
+      const cacheEntry = {
+        ...response,
+        timestamp: Date.now(),
+      }
+
+      saveGlobalConfig(current => ({
+        ...current,
+        passesEligibilityCache: {
+          ...current.passesEligibilityCache,
+          [orgId]: cacheEntry,
+        },
+      }))
+
+      logForDebugging(
+        `Passes eligibility cached for org ${orgId}: ${response.eligible}`,
+      )
+
+      return response
+    } catch (error) {
+      logForDebugging('Failed to fetch and cache passes eligibility')
+      logError(error as Error)
+      return null
+    } finally {
+      // Clear the promise when done
+      fetchInProgress = null
+    }
+  })()
+
+  return fetchInProgress
+}
+
+/**
+ * Get cached passes eligibility data or fetch if needed
+ * Main entry point for all eligibility checks
+ *
+ * This function never blocks on network - it returns cached data immediately
+ * and fetches in the background if needed. On cold start (no cache), it returns
+ * null and the passes command won't be available until the next session.
+ */
+export async function getCachedOrFetchPassesEligibility(): Promise<ReferralEligibilityResponse | null> {
+  if (!shouldCheckForPasses()) {
+    return null
+  }
+
+  const orgId = getOauthAccountInfo()?.organizationUuid
+  if (!orgId) {
+    return null
+  }
+
+  const config = getGlobalConfig()
+  const cachedEntry = config.passesEligibilityCache?.[orgId]
+  const now = Date.now()
+
+  // No cache - trigger background fetch and return null (non-blocking)
+  // The passes command won't be available this session, but will be next time
+  if (!cachedEntry) {
+    logForDebugging(
+      'Passes: No cache, fetching eligibility in background (command unavailable this session)',
+    )
+    void fetchAndStorePassesEligibility()
+    return null
+  }
+
+  // Cache exists but is stale - return stale cache and trigger background refresh
+  if (now - cachedEntry.timestamp > CACHE_EXPIRATION_MS) {
+    logForDebugging(
+      'Passes: Cache stale, returning cached data and refreshing in background',
+    )
+    void fetchAndStorePassesEligibility() // Background refresh
+    const { timestamp, ...response } = cachedEntry
+    return response as ReferralEligibilityResponse
+  }
+
+  // Cache is fresh - return it immediately
+  logForDebugging('Passes: Using fresh cached eligibility data')
+  const { timestamp, ...response } = cachedEntry
+  return response as ReferralEligibilityResponse
+}
+
+/**
+ * Prefetch passes eligibility on startup
+ */
+export async function prefetchPassesEligibility(): Promise<void> {
+  // Skip network requests if nonessential traffic is disabled
+  if (isEssentialTrafficOnly()) {
+    return
+  }
+
+  void getCachedOrFetchPassesEligibility()
+}
@@ -0,0 +1,514 @@
+import axios, { type AxiosError } from 'axios'
+import type { UUID } from 'crypto'
+import { getOauthConfig } from '../../constants/oauth.js'
+import type { Entry, TranscriptMessage } from '../../types/logs.js'
+import { logForDebugging } from '../../utils/debug.js'
+import { logForDiagnosticsNoPII } from '../../utils/diagLogs.js'
+import { isEnvTruthy } from '../../utils/envUtils.js'
+import { logError } from '../../utils/log.js'
+import { sequential } from '../../utils/sequential.js'
+import { getSessionIngressAuthToken } from '../../utils/sessionIngressAuth.js'
+import { sleep } from '../../utils/sleep.js'
+import { jsonStringify } from '../../utils/slowOperations.js'
+import { getOAuthHeaders } from '../../utils/teleport/api.js'
+
+interface SessionIngressError {
+  error?: {
+    message?: string
+    type?: string
+  }
+}
+
+// Module-level state
+const lastUuidMap: Map<string, UUID> = new Map()
+
+const MAX_RETRIES = 10
+const BASE_DELAY_MS = 500
+
+// Per-session sequential wrappers to prevent concurrent log writes
+const sequentialAppendBySession: Map<
+  string,
+  (
+    entry: TranscriptMessage,
+    url: string,
+    headers: Record<string, string>,
+  ) => Promise<boolean>
+> = new Map()
+
+/**
+ * Gets or creates a sequential wrapper for a session
+ * This ensures that log appends for a session are processed one at a time
+ */
+function getOrCreateSequentialAppend(sessionId: string) {
+  let sequentialAppend = sequentialAppendBySession.get(sessionId)
+  if (!sequentialAppend) {
+    sequentialAppend = sequential(
+      async (
+        entry: TranscriptMessage,
+        url: string,
+        headers: Record<string, string>,
+      ) => await appendSessionLogImpl(sessionId, entry, url, headers),
+    )
+    sequentialAppendBySession.set(sessionId, sequentialAppend)
+  }
+  return sequentialAppend
+}
+
+/**
+ * Internal implementation of appendSessionLog with retry logic
+ * Retries on transient errors (network, 5xx, 429). On 409, adopts the server's
+ * last UUID and retries (handles stale state from killed process's in-flight
+ * requests). Fails immediately on 401.
+ */
+async function appendSessionLogImpl(
+  sessionId: string,
+  entry: TranscriptMessage,
+  url: string,
+  headers: Record<string, string>,
+): Promise<boolean> {
+  for (let attempt = 1; attempt <= MAX_RETRIES; attempt++) {
+    try {
+      const lastUuid = lastUuidMap.get(sessionId)
+      const requestHeaders = { ...headers }
+      if (lastUuid) {
+        requestHeaders['Last-Uuid'] = lastUuid
+      }
+
+      const response = await axios.put(url, entry, {
+        headers: requestHeaders,
+        validateStatus: status => status < 500,
+      })
+
+      if (response.status === 200 || response.status === 201) {
+        lastUuidMap.set(sessionId, entry.uuid)
+        logForDebugging(
+          `Successfully persisted session log entry for session ${sessionId}`,
+        )
+        return true
+      }
+
+      if (response.status === 409) {
+        // Check if our entry was actually stored (server returned 409 but entry exists)
+        // This handles the scenario where entry was stored but client received an error
+        // response, causing lastUuidMap to be stale
+        const serverLastUuid = response.headers['x-last-uuid']
+        if (serverLastUuid === entry.uuid) {
+          // Our entry IS the last entry on server - it was stored successfully previously
+          lastUuidMap.set(sessionId, entry.uuid)
+          logForDebugging(
+            `Session entry ${entry.uuid} already present on server, recovering from stale state`,
+          )
+          logForDiagnosticsNoPII('info', 'session_persist_recovered_from_409')
+          return true
+        }
+
+        // Another writer (e.g. in-flight request from a killed process)
+        // advanced the server's chain. Try to adopt the server's last UUID
+        // from the response header, or re-fetch the session to discover it.
+        if (serverLastUuid) {
+          lastUuidMap.set(sessionId, serverLastUuid as UUID)
+          logForDebugging(
+            `Session 409: adopting server lastUuid=${serverLastUuid} from header, retrying entry ${entry.uuid}`,
+          )
+        } else {
+          // Server didn't return x-last-uuid (e.g. v1 endpoint). Re-fetch
+          // the session to discover the current head of the append chain.
+          const logs = await fetchSessionLogsFromUrl(sessionId, url, headers)
+          const adoptedUuid = findLastUuid(logs)
+          if (adoptedUuid) {
+            lastUuidMap.set(sessionId, adoptedUuid)
+            logForDebugging(
+              `Session 409: re-fetched ${logs!.length} entries, adopting lastUuid=${adoptedUuid}, retrying entry ${entry.uuid}`,
+            )
+          } else {
+            // Can't determine server state — give up
+            const errorData = response.data as SessionIngressError
+            const errorMessage =
+              errorData.error?.message || 'Concurrent modification detected'
+            logError(
+              new Error(
+                `Session persistence conflict: UUID mismatch for session ${sessionId}, entry ${entry.uuid}. ${errorMessage}`,
+              ),
+            )
+            logForDiagnosticsNoPII(
+              'error',
+              'session_persist_fail_concurrent_modification',
+            )
+            return false
+          }
+        }
+        logForDiagnosticsNoPII('info', 'session_persist_409_adopt_server_uuid')
+        continue // retry with updated lastUuid
+      }
+
+      if (response.status === 401) {
+        logForDebugging('Session token expired or invalid')
+        logForDiagnosticsNoPII('error', 'session_persist_fail_bad_token')
+        return false // Non-retryable
+      }
+
+      // Other 4xx (429, etc.) - retryable
+      logForDebugging(
+        `Failed to persist session log: ${response.status} ${response.statusText}`,
+      )
+      logForDiagnosticsNoPII('error', 'session_persist_fail_status', {
+        status: response.status,
+        attempt,
+      })
+    } catch (error) {
+      // Network errors, 5xx - retryable
+      const axiosError = error as AxiosError<SessionIngressError>
+      logError(new Error(`Error persisting session log: ${axiosError.message}`))
+      logForDiagnosticsNoPII('error', 'session_persist_fail_status', {
+        status: axiosError.status,
+        attempt,
+      })
+    }
+
+    if (attempt === MAX_RETRIES) {
+      logForDebugging(`Remote persistence failed after ${MAX_RETRIES} attempts`)
+      logForDiagnosticsNoPII(
+        'error',
+        'session_persist_error_retries_exhausted',
+        { attempt },
+      )
+      return false
+    }
+
+    const delayMs = Math.min(BASE_DELAY_MS * Math.pow(2, attempt - 1), 8000)
+    logForDebugging(
+      `Remote persistence attempt ${attempt}/${MAX_RETRIES} failed, retrying in ${delayMs}ms…`,
+    )
+    await sleep(delayMs)
+  }
+
+  return false
+}
+
+/**
+ * Append a log entry to the session using JWT token
+ * Uses optimistic concurrency control with Last-Uuid header
+ * Ensures sequential execution per session to prevent race conditions
+ */
+export async function appendSessionLog(
+  sessionId: string,
+  entry: TranscriptMessage,
+  url: string,
+): Promise<boolean> {
+  const sessionToken = getSessionIngressAuthToken()
+  if (!sessionToken) {
+    logForDebugging('No session token available for session persistence')
+    logForDiagnosticsNoPII('error', 'session_persist_fail_jwt_no_token')
+    return false
+  }
+
+  const headers: Record<string, string> = {
+    Authorization: `Bearer ${sessionToken}`,
+    'Content-Type': 'application/json',
+  }
+
+  const sequentialAppend = getOrCreateSequentialAppend(sessionId)
+  return sequentialAppend(entry, url, headers)
+}
+
+/**
+ * Get all session logs for hydration
+ */
+export async function getSessionLogs(
+  sessionId: string,
+  url: string,
+): Promise<Entry[] | null> {
+  const sessionToken = getSessionIngressAuthToken()
+  if (!sessionToken) {
+    logForDebugging('No session token available for fetching session logs')
+    logForDiagnosticsNoPII('error', 'session_get_fail_no_token')
+    return null
+  }
+
+  const headers = { Authorization: `Bearer ${sessionToken}` }
+  const logs = await fetchSessionLogsFromUrl(sessionId, url, headers)
+
+  if (logs && logs.length > 0) {
+    // Update our lastUuid to the last entry's UUID
+    const lastEntry = logs.at(-1)
+    if (lastEntry && 'uuid' in lastEntry && lastEntry.uuid) {
+      lastUuidMap.set(sessionId, lastEntry.uuid)
+    }
+  }
+
+  return logs
+}
+
+/**
+ * Get all session logs for hydration via OAuth
+ * Used for teleporting sessions from the Sessions API
+ */
+export async function getSessionLogsViaOAuth(
+  sessionId: string,
+  accessToken: string,
+  orgUUID: string,
+): Promise<Entry[] | null> {
+  const url = `${getOauthConfig().BASE_API_URL}/v1/session_ingress/session/${sessionId}`
+  logForDebugging(`[session-ingress] Fetching session logs from: ${url}`)
+  const headers = {
+    ...getOAuthHeaders(accessToken),
+    'x-organization-uuid': orgUUID,
+  }
+  const result = await fetchSessionLogsFromUrl(sessionId, url, headers)
+  return result
+}
+
+/**
+ * Response shape from GET /v1/code/sessions/{id}/teleport-events.
+ * WorkerEvent.payload IS the Entry (TranscriptMessage struct) — the CLI
+ * writes it via AddWorkerEvent, the server stores it opaque, we read it
+ * back here.
+ */
+type TeleportEventsResponse = {
+  data: Array<{
+    event_id: string
+    event_type: string
+    is_compaction: boolean
+    payload: Entry | null
+    created_at: string
+  }>
+  // Unset when there are no more pages — this IS the end-of-stream
+  // signal (no separate has_more field).
+  next_cursor?: string
+}
+
+/**
+ * Get worker events (transcript) via the CCR v2 Sessions API. Replaces
+ * getSessionLogsViaOAuth once session-ingress is retired.
+ *
+ * The server dispatches per-session: Spanner for v2-native sessions,
+ * threadstore for pre-backfill session_* IDs. The cursor is opaque to us —
+ * echo it back until next_cursor is unset.
+ *
+ * Paginated (500/page default, server max 1000). session-ingress's one-shot
+ * 50k is gone; we loop.
+ */
+export async function getTeleportEvents(
+  sessionId: string,
+  accessToken: string,
+  orgUUID: string,
+): Promise<Entry[] | null> {
+  const baseUrl = `${getOauthConfig().BASE_API_URL}/v1/code/sessions/${sessionId}/teleport-events`
+  const headers = {
+    ...getOAuthHeaders(accessToken),
+    'x-organization-uuid': orgUUID,
+  }
+
+  logForDebugging(`[teleport] Fetching events from: ${baseUrl}`)
+
+  const all: Entry[] = []
+  let cursor: string | undefined
+  let pages = 0
+
+  // Infinite-loop guard: 1000/page × 100 pages = 100k events. Larger than
+  // session-ingress's 50k one-shot. If we hit this, something's wrong
+  // (server not advancing cursor) — bail rather than hang.
+  const maxPages = 100
+
+  while (pages < maxPages) {
+    const params: Record<string, string | number> = { limit: 1000 }
+    if (cursor !== undefined) {
+      params.cursor = cursor
+    }
+
+    let response
+    try {
+      response = await axios.get<TeleportEventsResponse>(baseUrl, {
+        headers,
+        params,
+        timeout: 20000,
+        validateStatus: status => status < 500,
+      })
+    } catch (e) {
+      const err = e as AxiosError
+      logError(new Error(`Teleport events fetch failed: ${err.message}`))
+      logForDiagnosticsNoPII('error', 'teleport_events_fetch_fail')
+      return null
+    }
+
+    if (response.status === 404) {
+      // 404 on page 0 is ambiguous during the migration window:
+      //   (a) Session genuinely not found (not in Spanner AND not in
+      //       threadstore) — nothing to fetch.
+      //   (b) Route-level 404: endpoint not deployed yet, or session is
+      //       a threadstore session not yet backfilled into Spanner.
+      // We can't tell them apart from the response alone. Returning null
+      // lets the caller fall back to session-ingress, which will correctly
+      // return empty for case (a) and data for case (b). Once the backfill
+      // is complete and session-ingress is gone, the fallback also returns
+      // null → same "Failed to fetch session logs" error as today.
+      //
+      // 404 mid-pagination (pages > 0) means session was deleted between
+      // pages — return what we have.
+      logForDebugging(
+        `[teleport] Session ${sessionId} not found (page ${pages})`,
+      )
+      logForDiagnosticsNoPII('warn', 'teleport_events_not_found')
+      return pages === 0 ? null : all
+    }
+
+    if (response.status === 401) {
+      logForDiagnosticsNoPII('error', 'teleport_events_bad_token')
+      throw new Error(
+        'Your session has expired. Please run /login to sign in again.',
+      )
+    }
+
+    if (response.status !== 200) {
+      logError(
+        new Error(
+          `Teleport events returned ${response.status}: ${jsonStringify(response.data)}`,
+        ),
+      )
+      logForDiagnosticsNoPII('error', 'teleport_events_bad_status')
+      return null
+    }
+
+    const { data, next_cursor } = response.data
+    if (!Array.isArray(data)) {
+      logError(
+        new Error(
+          `Teleport events invalid response shape: ${jsonStringify(response.data)}`,
+        ),
+      )
+      logForDiagnosticsNoPII('error', 'teleport_events_invalid_shape')
+      return null
+    }
+
+    // payload IS the Entry. null payload happens for threadstore non-generic
+    // events (server skips them) or encryption failures — skip here too.
+    for (const ev of data) {
+      if (ev.payload !== null) {
+        all.push(ev.payload)
+      }
+    }
+
+    pages++
+    // == null covers both `null` and `undefined` — the proto omits the
+    // field at end-of-stream, but some serializers emit `null`. Strict
+    // `=== undefined` would loop forever on `null` (cursor=null in query
+    // params stringifies to "null", which the server rejects or echoes).
+    if (next_cursor == null) {
+      break
+    }
+    cursor = next_cursor
+  }
+
+  if (pages >= maxPages) {
+    // Don't fail — return what we have. Better to teleport with a
+    // truncated transcript than not at all.
+    logError(
+      new Error(`Teleport events hit page cap (${maxPages}) for ${sessionId}`),
+    )
+    logForDiagnosticsNoPII('warn', 'teleport_events_page_cap')
+  }
+
+  logForDebugging(
+    `[teleport] Fetched ${all.length} events over ${pages} page(s) for ${sessionId}`,
+  )
+  return all
+}
+
+/**
+ * Shared implementation for fetching session logs from a URL
+ */
+async function fetchSessionLogsFromUrl(
+  sessionId: string,
+  url: string,
+  headers: Record<string, string>,
+): Promise<Entry[] | null> {
+  try {
+    const response = await axios.get(url, {
+      headers,
+      timeout: 20000,
+      validateStatus: status => status < 500,
+      params: isEnvTruthy(process.env.CLAUDE_AFTER_LAST_COMPACT)
+        ? { after_last_compact: true }
+        : undefined,
+    })
+
+    if (response.status === 200) {
+      const data = response.data
+
+      // Validate the response structure
+      if (!data || typeof data !== 'object' || !Array.isArray(data.loglines)) {
+        logError(
+          new Error(
+            `Invalid session logs response format: ${jsonStringify(data)}`,
+          ),
+        )
+        logForDiagnosticsNoPII('error', 'session_get_fail_invalid_response')
+        return null
+      }
+
+      const logs = data.loglines as Entry[]
+      logForDebugging(
+        `Fetched ${logs.length} session logs for session ${sessionId}`,
+      )
+      return logs
+    }
+
+    if (response.status === 404) {
+      logForDebugging(`No existing logs for session ${sessionId}`)
+      logForDiagnosticsNoPII('warn', 'session_get_no_logs_for_session')
+      return []
+    }
+
+    if (response.status === 401) {
+      logForDebugging('Auth token expired or invalid')
+      logForDiagnosticsNoPII('error', 'session_get_fail_bad_token')
+      throw new Error(
+        'Your session has expired. Please run /login to sign in again.',
+      )
+    }
+
+    logForDebugging(
+      `Failed to fetch session logs: ${response.status} ${response.statusText}`,
+    )
+    logForDiagnosticsNoPII('error', 'session_get_fail_status', {
+      status: response.status,
+    })
+    return null
+  } catch (error) {
+    const axiosError = error as AxiosError<SessionIngressError>
+    logError(new Error(`Error fetching session logs: ${axiosError.message}`))
+    logForDiagnosticsNoPII('error', 'session_get_fail_status', {
+      status: axiosError.status,
+    })
+    return null
+  }
+}
+
+/**
+ * Walk backward through entries to find the last one with a uuid.
+ * Some entry types (SummaryMessage, TagMessage) don't have one.
+ */
+function findLastUuid(logs: Entry[] | null): UUID | undefined {
+  if (!logs) {
+    return undefined
+  }
+  const entry = logs.findLast(e => 'uuid' in e && e.uuid)
+  return entry && 'uuid' in entry ? (entry.uuid as UUID) : undefined
+}
+
+/**
+ * Clear cached state for a session
+ */
+export function clearSession(sessionId: string): void {
+  lastUuidMap.delete(sessionId)
+  sequentialAppendBySession.delete(sessionId)
+}
+
+/**
+ * Clear all cached session state (all sessions).
+ * Use this on /clear to free sub-agent session entries.
+ */
+export function clearAllSessions(): void {
+  lastUuidMap.clear()
+  sequentialAppendBySession.clear()
+}
@@ -0,0 +1,38 @@
+import axios from 'axios'
+import { getOauthConfig } from '../../constants/oauth.js'
+import { isClaudeAISubscriber } from '../../utils/auth.js'
+import { logForDebugging } from '../../utils/debug.js'
+import { getOAuthHeaders, prepareApiRequest } from '../../utils/teleport/api.js'
+
+export type UltrareviewQuotaResponse = {
+  reviews_used: number
+  reviews_limit: number
+  reviews_remaining: number
+  is_overage: boolean
+}
+
+/**
+ * Peek the ultrareview quota for display and nudge decisions. Consume
+ * happens server-side at session creation. Null when not a subscriber or
+ * the endpoint errors.
+ */
+export async function fetchUltrareviewQuota(): Promise<UltrareviewQuotaResponse | null> {
+  if (!isClaudeAISubscriber()) return null
+  try {
+    const { accessToken, orgUUID } = await prepareApiRequest()
+    const response = await axios.get<UltrareviewQuotaResponse>(
+      `${getOauthConfig().BASE_API_URL}/v1/ultrareview/quota`,
+      {
+        headers: {
+          ...getOAuthHeaders(accessToken),
+          'x-organization-uuid': orgUUID,
+        },
+        timeout: 5000,
+      },
+    )
+    return response.data
+  } catch (error) {
+    logForDebugging(`fetchUltrareviewQuota failed: ${error}`)
+    return null
+  }
+}
@@ -0,0 +1,63 @@
+import axios from 'axios'
+import { getOauthConfig } from '../../constants/oauth.js'
+import {
+  getClaudeAIOAuthTokens,
+  hasProfileScope,
+  isClaudeAISubscriber,
+} from '../../utils/auth.js'
+import { getAuthHeaders } from '../../utils/http.js'
+import { getClaudeCodeUserAgent } from '../../utils/userAgent.js'
+import { isOAuthTokenExpired } from '../oauth/client.js'
+
+export type RateLimit = {
+  utilization: number | null // a percentage from 0 to 100
+  resets_at: string | null // ISO 8601 timestamp
+}
+
+export type ExtraUsage = {
+  is_enabled: boolean
+  monthly_limit: number | null
+  used_credits: number | null
+  utilization: number | null
+}
+
+export type Utilization = {
+  five_hour?: RateLimit | null
+  seven_day?: RateLimit | null
+  seven_day_oauth_apps?: RateLimit | null
+  seven_day_opus?: RateLimit | null
+  seven_day_sonnet?: RateLimit | null
+  extra_usage?: ExtraUsage | null
+}
+
+export async function fetchUtilization(): Promise<Utilization | null> {
+  if (!isClaudeAISubscriber() || !hasProfileScope()) {
+    return {}
+  }
+
+  // Skip API call if OAuth token is expired to avoid 401 errors
+  const tokens = getClaudeAIOAuthTokens()
+  if (tokens && isOAuthTokenExpired(tokens.expiresAt)) {
+    return null
+  }
+
+  const authResult = getAuthHeaders()
+  if (authResult.error) {
+    throw new Error(`Auth error: ${authResult.error}`)
+  }
+
+  const headers = {
+    'Content-Type': 'application/json',
+    'User-Agent': getClaudeCodeUserAgent(),
+    ...authResult.headers,
+  }
+
+  const url = `${getOauthConfig().BASE_API_URL}/api/oauth/usage`
+
+  const response = await axios.get<Utilization>(url, {
+    headers,
+    timeout: 5000, // 5 second timeout
+  })
+
+  return response.data
+}
@@ -0,0 +1,822 @@
+import { feature } from 'bun:bundle'
+import type Anthropic from '@anthropic-ai/sdk'
+import {
+  APIConnectionError,
+  APIError,
+  APIUserAbortError,
+} from '@anthropic-ai/sdk'
+import type { QuerySource } from 'src/constants/querySource.js'
+import type { SystemAPIErrorMessage } from 'src/types/message.js'
+import { isAwsCredentialsProviderError } from 'src/utils/aws.js'
+import { logForDebugging } from 'src/utils/debug.js'
+import { logError } from 'src/utils/log.js'
+import { createSystemAPIErrorMessage } from 'src/utils/messages.js'
+import { getAPIProviderForStatsig } from 'src/utils/model/providers.js'
+import {
+  clearApiKeyHelperCache,
+  clearAwsCredentialsCache,
+  clearGcpCredentialsCache,
+  getClaudeAIOAuthTokens,
+  handleOAuth401Error,
+  isClaudeAISubscriber,
+  isEnterpriseSubscriber,
+} from '../../utils/auth.js'
+import { isEnvTruthy } from '../../utils/envUtils.js'
+import { errorMessage } from '../../utils/errors.js'
+import {
+  type CooldownReason,
+  handleFastModeOverageRejection,
+  handleFastModeRejectedByAPI,
+  isFastModeCooldown,
+  isFastModeEnabled,
+  triggerFastModeCooldown,
+} from '../../utils/fastMode.js'
+import { isNonCustomOpusModel } from '../../utils/model/model.js'
+import { disableKeepAlive } from '../../utils/proxy.js'
+import { sleep } from '../../utils/sleep.js'
+import type { ThinkingConfig } from '../../utils/thinking.js'
+import { getFeatureValue_CACHED_MAY_BE_STALE } from '../analytics/growthbook.js'
+import {
+  type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+  logEvent,
+} from '../analytics/index.js'
+import {
+  checkMockRateLimitError,
+  isMockRateLimitError,
+} from '../rateLimitMocking.js'
+import { REPEATED_529_ERROR_MESSAGE } from './errors.js'
+import { extractConnectionErrorDetails } from './errorUtils.js'
+
+const abortError = () => new APIUserAbortError()
+
+const DEFAULT_MAX_RETRIES = 10
+const FLOOR_OUTPUT_TOKENS = 3000
+const MAX_529_RETRIES = 3
+export const BASE_DELAY_MS = 500
+
+// Foreground query sources where the user IS blocking on the result — these
+// retry on 529. Everything else (summaries, titles, suggestions, classifiers)
+// bails immediately: during a capacity cascade each retry is 3-10× gateway
+// amplification, and the user never sees those fail anyway. New sources
+// default to no-retry — add here only if the user is waiting on the result.
+const FOREGROUND_529_RETRY_SOURCES = new Set<QuerySource>([
+  'repl_main_thread',
+  'repl_main_thread:outputStyle:custom',
+  'repl_main_thread:outputStyle:Explanatory',
+  'repl_main_thread:outputStyle:Learning',
+  'sdk',
+  'agent:custom',
+  'agent:default',
+  'agent:builtin',
+  'compact',
+  'hook_agent',
+  'hook_prompt',
+  'verification_agent',
+  'side_question',
+  // Security classifiers — must complete for auto-mode correctness.
+  // yoloClassifier.ts uses 'auto_mode' (not 'yolo_classifier' — that's
+  // type-only). bash_classifier is ant-only; feature-gate so the string
+  // tree-shakes out of external builds (excluded-strings.txt).
+  'auto_mode',
+  ...(feature('BASH_CLASSIFIER') ? (['bash_classifier'] as const) : []),
+])
+
+function shouldRetry529(querySource: QuerySource | undefined): boolean {
+  // undefined → retry (conservative for untagged call paths)
+  return (
+    querySource === undefined || FOREGROUND_529_RETRY_SOURCES.has(querySource)
+  )
+}
+
+// CLAUDE_CODE_UNATTENDED_RETRY: for unattended sessions (ant-only). Retries 429/529
+// indefinitely with higher backoff and periodic keep-alive yields so the host
+// environment does not mark the session idle mid-wait.
+// TODO(ANT-344): the keep-alive via SystemAPIErrorMessage yields is a stopgap
+// until there's a dedicated keep-alive channel.
+const PERSISTENT_MAX_BACKOFF_MS = 5 * 60 * 1000
+const PERSISTENT_RESET_CAP_MS = 6 * 60 * 60 * 1000
+const HEARTBEAT_INTERVAL_MS = 30_000
+
+function isPersistentRetryEnabled(): boolean {
+  return feature('UNATTENDED_RETRY')
+    ? isEnvTruthy(process.env.CLAUDE_CODE_UNATTENDED_RETRY)
+    : false
+}
+
+function isTransientCapacityError(error: unknown): boolean {
+  return (
+    is529Error(error) || (error instanceof APIError && error.status === 429)
+  )
+}
+
+function isStaleConnectionError(error: unknown): boolean {
+  if (!(error instanceof APIConnectionError)) {
+    return false
+  }
+  const details = extractConnectionErrorDetails(error)
+  return details?.code === 'ECONNRESET' || details?.code === 'EPIPE'
+}
+
+export interface RetryContext {
+  maxTokensOverride?: number
+  model: string
+  thinkingConfig: ThinkingConfig
+  fastMode?: boolean
+}
+
+interface RetryOptions {
+  maxRetries?: number
+  model: string
+  fallbackModel?: string
+  thinkingConfig: ThinkingConfig
+  fastMode?: boolean
+  signal?: AbortSignal
+  querySource?: QuerySource
+  /**
+   * Pre-seed the consecutive 529 counter. Used when this retry loop is a
+   * non-streaming fallback after a streaming 529 — the streaming 529 should
+   * count toward MAX_529_RETRIES so total 529s-before-fallback is consistent
+   * regardless of which request mode hit the overload.
+   */
+  initialConsecutive529Errors?: number
+}
+
+export class CannotRetryError extends Error {
+  constructor(
+    public readonly originalError: unknown,
+    public readonly retryContext: RetryContext,
+  ) {
+    const message = errorMessage(originalError)
+    super(message)
+    this.name = 'RetryError'
+
+    // Preserve the original stack trace if available
+    if (originalError instanceof Error && originalError.stack) {
+      this.stack = originalError.stack
+    }
+  }
+}
+
+export class FallbackTriggeredError extends Error {
+  constructor(
+    public readonly originalModel: string,
+    public readonly fallbackModel: string,
+  ) {
+    super(`Model fallback triggered: ${originalModel} -> ${fallbackModel}`)
+    this.name = 'FallbackTriggeredError'
+  }
+}
+
+export async function* withRetry<T>(
+  getClient: () => Promise<Anthropic>,
+  operation: (
+    client: Anthropic,
+    attempt: number,
+    context: RetryContext,
+  ) => Promise<T>,
+  options: RetryOptions,
+): AsyncGenerator<SystemAPIErrorMessage, T> {
+  const maxRetries = getMaxRetries(options)
+  const retryContext: RetryContext = {
+    model: options.model,
+    thinkingConfig: options.thinkingConfig,
+    ...(isFastModeEnabled() && { fastMode: options.fastMode }),
+  }
+  let client: Anthropic | null = null
+  let consecutive529Errors = options.initialConsecutive529Errors ?? 0
+  let lastError: unknown
+  let persistentAttempt = 0
+  for (let attempt = 1; attempt <= maxRetries + 1; attempt++) {
+    if (options.signal?.aborted) {
+      throw new APIUserAbortError()
+    }
+
+    // Capture whether fast mode is active before this attempt
+    // (fallback may change the state mid-loop)
+    const wasFastModeActive = isFastModeEnabled()
+      ? retryContext.fastMode && !isFastModeCooldown()
+      : false
+
+    try {
+      // Check for mock rate limits (used by /mock-limits command for Ant employees)
+      if (process.env.USER_TYPE === 'ant') {
+        const mockError = checkMockRateLimitError(
+          retryContext.model,
+          wasFastModeActive,
+        )
+        if (mockError) {
+          throw mockError
+        }
+      }
+
+      // Get a fresh client instance on first attempt or after authentication errors
+      // - 401 for first-party API authentication failures
+      // - 403 "OAuth token has been revoked" (another process refreshed the token)
+      // - Bedrock-specific auth errors (403 or CredentialsProviderError)
+      // - Vertex-specific auth errors (credential refresh failures, 401)
+      // - ECONNRESET/EPIPE: stale keep-alive socket; disable pooling and reconnect
+      const isStaleConnection = isStaleConnectionError(lastError)
+      if (
+        isStaleConnection &&
+        getFeatureValue_CACHED_MAY_BE_STALE(
+          'tengu_disable_keepalive_on_econnreset',
+          false,
+        )
+      ) {
+        logForDebugging(
+          'Stale connection (ECONNRESET/EPIPE) — disabling keep-alive for retry',
+        )
+        disableKeepAlive()
+      }
+
+      if (
+        client === null ||
+        (lastError instanceof APIError && lastError.status === 401) ||
+        isOAuthTokenRevokedError(lastError) ||
+        isBedrockAuthError(lastError) ||
+        isVertexAuthError(lastError) ||
+        isStaleConnection
+      ) {
+        // On 401 "token expired" or 403 "token revoked", force a token refresh
+        if (
+          (lastError instanceof APIError && lastError.status === 401) ||
+          isOAuthTokenRevokedError(lastError)
+        ) {
+          const failedAccessToken = getClaudeAIOAuthTokens()?.accessToken
+          if (failedAccessToken) {
+            await handleOAuth401Error(failedAccessToken)
+          }
+        }
+        client = await getClient()
+      }
+
+      return await operation(client, attempt, retryContext)
+    } catch (error) {
+      lastError = error
+      logForDebugging(
+        `API error (attempt ${attempt}/${maxRetries + 1}): ${error instanceof APIError ? `${error.status} ${error.message}` : errorMessage(error)}`,
+        { level: 'error' },
+      )
+
+      // Fast mode fallback: on 429/529, either wait and retry (short delays)
+      // or fall back to standard speed (long delays) to avoid cache thrashing.
+      // Skip in persistent mode: the short-retry path below loops with fast
+      // mode still active, so its `continue` never reaches the attempt clamp
+      // and the for-loop terminates. Persistent sessions want the chunked
+      // keep-alive path instead of fast-mode cache-preservation anyway.
+      if (
+        wasFastModeActive &&
+        !isPersistentRetryEnabled() &&
+        error instanceof APIError &&
+        (error.status === 429 || is529Error(error))
+      ) {
+        // If the 429 is specifically because extra usage (overage) is not
+        // available, permanently disable fast mode with a specific message.
+        const overageReason = error.headers?.get(
+          'anthropic-ratelimit-unified-overage-disabled-reason',
+        )
+        if (overageReason !== null && overageReason !== undefined) {
+          handleFastModeOverageRejection(overageReason)
+          retryContext.fastMode = false
+          continue
+        }
+
+        const retryAfterMs = getRetryAfterMs(error)
+        if (retryAfterMs !== null && retryAfterMs < SHORT_RETRY_THRESHOLD_MS) {
+          // Short retry-after: wait and retry with fast mode still active
+          // to preserve prompt cache (same model name on retry).
+          await sleep(retryAfterMs, options.signal, { abortError })
+          continue
+        }
+        // Long or unknown retry-after: enter cooldown (switches to standard
+        // speed model), with a minimum floor to avoid flip-flopping.
+        const cooldownMs = Math.max(
+          retryAfterMs ?? DEFAULT_FAST_MODE_FALLBACK_HOLD_MS,
+          MIN_COOLDOWN_MS,
+        )
+        const cooldownReason: CooldownReason = is529Error(error)
+          ? 'overloaded'
+          : 'rate_limit'
+        triggerFastModeCooldown(Date.now() + cooldownMs, cooldownReason)
+        if (isFastModeEnabled()) {
+          retryContext.fastMode = false
+        }
+        continue
+      }
+
+      // Fast mode fallback: if the API rejects the fast mode parameter
+      // (e.g., org doesn't have fast mode enabled), permanently disable fast
+      // mode and retry at standard speed.
+      if (wasFastModeActive && isFastModeNotEnabledError(error)) {
+        handleFastModeRejectedByAPI()
+        retryContext.fastMode = false
+        continue
+      }
+
+      // Non-foreground sources bail immediately on 529 — no retry amplification
+      // during capacity cascades. User never sees these fail.
+      if (is529Error(error) && !shouldRetry529(options.querySource)) {
+        logEvent('tengu_api_529_background_dropped', {
+          query_source:
+            options.querySource as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+        })
+        throw new CannotRetryError(error, retryContext)
+      }
+
+      // Track consecutive 529 errors
+      if (
+        is529Error(error) &&
+        // If FALLBACK_FOR_ALL_PRIMARY_MODELS is not set, fall through only if the primary model is a non-custom Opus model.
+        // TODO: Revisit if the isNonCustomOpusModel check should still exist, or if isNonCustomOpusModel is a stale artifact of when Claude Code was hardcoded on Opus.
+        (process.env.FALLBACK_FOR_ALL_PRIMARY_MODELS ||
+          (!isClaudeAISubscriber() && isNonCustomOpusModel(options.model)))
+      ) {
+        consecutive529Errors++
+        if (consecutive529Errors >= MAX_529_RETRIES) {
+          // Check if fallback model is specified
+          if (options.fallbackModel) {
+            logEvent('tengu_api_opus_fallback_triggered', {
+              original_model:
+                options.model as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+              fallback_model:
+                options.fallbackModel as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+              provider: getAPIProviderForStatsig(),
+            })
+
+            // Throw special error to indicate fallback was triggered
+            throw new FallbackTriggeredError(
+              options.model,
+              options.fallbackModel,
+            )
+          }
+
+          if (
+            process.env.USER_TYPE === 'external' &&
+            !process.env.IS_SANDBOX &&
+            !isPersistentRetryEnabled()
+          ) {
+            logEvent('tengu_api_custom_529_overloaded_error', {})
+            throw new CannotRetryError(
+              new Error(REPEATED_529_ERROR_MESSAGE),
+              retryContext,
+            )
+          }
+        }
+      }
+
+      // Only retry if the error indicates we should
+      const persistent =
+        isPersistentRetryEnabled() && isTransientCapacityError(error)
+      if (attempt > maxRetries && !persistent) {
+        throw new CannotRetryError(error, retryContext)
+      }
+
+      // AWS/GCP errors aren't always APIError, but can be retried
+      const handledCloudAuthError =
+        handleAwsCredentialError(error) || handleGcpCredentialError(error)
+      if (
+        !handledCloudAuthError &&
+        (!(error instanceof APIError) || !shouldRetry(error))
+      ) {
+        throw new CannotRetryError(error, retryContext)
+      }
+
+      // Handle max tokens context overflow errors by adjusting max_tokens for the next attempt
+      // NOTE: With extended-context-window beta, this 400 error should not occur.
+      // The API now returns 'model_context_window_exceeded' stop_reason instead.
+      // Keeping for backward compatibility.
+      if (error instanceof APIError) {
+        const overflowData = parseMaxTokensContextOverflowError(error)
+        if (overflowData) {
+          const { inputTokens, contextLimit } = overflowData
+
+          const safetyBuffer = 1000
+          const availableContext = Math.max(
+            0,
+            contextLimit - inputTokens - safetyBuffer,
+          )
+          if (availableContext < FLOOR_OUTPUT_TOKENS) {
+            logError(
+              new Error(
+                `availableContext ${availableContext} is less than FLOOR_OUTPUT_TOKENS ${FLOOR_OUTPUT_TOKENS}`,
+              ),
+            )
+            throw error
+          }
+          // Ensure we have enough tokens for thinking + at least 1 output token
+          const minRequired =
+            (retryContext.thinkingConfig.type === 'enabled'
+              ? retryContext.thinkingConfig.budgetTokens
+              : 0) + 1
+          const adjustedMaxTokens = Math.max(
+            FLOOR_OUTPUT_TOKENS,
+            availableContext,
+            minRequired,
+          )
+          retryContext.maxTokensOverride = adjustedMaxTokens
+
+          logEvent('tengu_max_tokens_context_overflow_adjustment', {
+            inputTokens,
+            contextLimit,
+            adjustedMaxTokens,
+            attempt,
+          })
+
+          continue
+        }
+      }
+
+      // For other errors, proceed with normal retry logic
+      // Get retry-after header if available
+      const retryAfter = getRetryAfter(error)
+      let delayMs: number
+      if (persistent && error instanceof APIError && error.status === 429) {
+        persistentAttempt++
+        // Window-based limits (e.g. 5hr Max/Pro) include a reset timestamp.
+        // Wait until reset rather than polling every 5 min uselessly.
+        const resetDelay = getRateLimitResetDelayMs(error)
+        delayMs =
+          resetDelay ??
+          Math.min(
+            getRetryDelay(
+              persistentAttempt,
+              retryAfter,
+              PERSISTENT_MAX_BACKOFF_MS,
+            ),
+            PERSISTENT_RESET_CAP_MS,
+          )
+      } else if (persistent) {
+        persistentAttempt++
+        // Retry-After is a server directive and bypasses maxDelayMs inside
+        // getRetryDelay (intentional — honoring it is correct). Cap at the
+        // 6hr reset-cap here so a pathological header can't wait unbounded.
+        delayMs = Math.min(
+          getRetryDelay(
+            persistentAttempt,
+            retryAfter,
+            PERSISTENT_MAX_BACKOFF_MS,
+          ),
+          PERSISTENT_RESET_CAP_MS,
+        )
+      } else {
+        delayMs = getRetryDelay(attempt, retryAfter)
+      }
+
+      // In persistent mode the for-loop `attempt` is clamped at maxRetries+1;
+      // use persistentAttempt for telemetry/yields so they show the true count.
+      const reportedAttempt = persistent ? persistentAttempt : attempt
+      logEvent('tengu_api_retry', {
+        attempt: reportedAttempt,
+        delayMs: delayMs,
+        error: (error as APIError)
+          .message as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+        status: (error as APIError).status,
+        provider: getAPIProviderForStatsig(),
+      })
+
+      if (persistent) {
+        if (delayMs > 60_000) {
+          logEvent('tengu_api_persistent_retry_wait', {
+            status: (error as APIError).status,
+            delayMs,
+            attempt: reportedAttempt,
+            provider: getAPIProviderForStatsig(),
+          })
+        }
+        // Chunk long sleeps so the host sees periodic stdout activity and
+        // does not mark the session idle. Each yield surfaces as
+        // {type:'system', subtype:'api_retry'} on stdout via QueryEngine.
+        let remaining = delayMs
+        while (remaining > 0) {
+          if (options.signal?.aborted) throw new APIUserAbortError()
+          if (error instanceof APIError) {
+            yield createSystemAPIErrorMessage(
+              error,
+              remaining,
+              reportedAttempt,
+              maxRetries,
+            )
+          }
+          const chunk = Math.min(remaining, HEARTBEAT_INTERVAL_MS)
+          await sleep(chunk, options.signal, { abortError })
+          remaining -= chunk
+        }
+        // Clamp so the for-loop never terminates. Backoff uses the separate
+        // persistentAttempt counter which keeps growing to the 5-min cap.
+        if (attempt >= maxRetries) attempt = maxRetries
+      } else {
+        if (error instanceof APIError) {
+          yield createSystemAPIErrorMessage(error, delayMs, attempt, maxRetries)
+        }
+        await sleep(delayMs, options.signal, { abortError })
+      }
+    }
+  }
+
+  throw new CannotRetryError(lastError, retryContext)
+}
+
+function getRetryAfter(error: unknown): string | null {
+  return (
+    ((error as { headers?: { 'retry-after'?: string } }).headers?.[
+      'retry-after'
+    ] ||
+      // eslint-disable-next-line eslint-plugin-n/no-unsupported-features/node-builtins
+      ((error as APIError).headers as Headers)?.get?.('retry-after')) ??
+    null
+  )
+}
+
+export function getRetryDelay(
+  attempt: number,
+  retryAfterHeader?: string | null,
+  maxDelayMs = 32000,
+): number {
+  if (retryAfterHeader) {
+    const seconds = parseInt(retryAfterHeader, 10)
+    if (!isNaN(seconds)) {
+      return seconds * 1000
+    }
+  }
+
+  const baseDelay = Math.min(
+    BASE_DELAY_MS * Math.pow(2, attempt - 1),
+    maxDelayMs,
+  )
+  const jitter = Math.random() * 0.25 * baseDelay
+  return baseDelay + jitter
+}
+
+export function parseMaxTokensContextOverflowError(error: APIError):
+  | {
+      inputTokens: number
+      maxTokens: number
+      contextLimit: number
+    }
+  | undefined {
+  if (error.status !== 400 || !error.message) {
+    return undefined
+  }
+
+  if (
+    !error.message.includes(
+      'input length and `max_tokens` exceed context limit',
+    )
+  ) {
+    return undefined
+  }
+
+  // Example format: "input length and `max_tokens` exceed context limit: 188059 + 20000 > 200000"
+  const regex =
+    /input length and `max_tokens` exceed context limit: (\d+) \+ (\d+) > (\d+)/
+  const match = error.message.match(regex)
+
+  if (!match || match.length !== 4) {
+    return undefined
+  }
+
+  if (!match[1] || !match[2] || !match[3]) {
+    logError(
+      new Error(
+        'Unable to parse max_tokens from max_tokens exceed context limit error message',
+      ),
+    )
+    return undefined
+  }
+  const inputTokens = parseInt(match[1], 10)
+  const maxTokens = parseInt(match[2], 10)
+  const contextLimit = parseInt(match[3], 10)
+
+  if (isNaN(inputTokens) || isNaN(maxTokens) || isNaN(contextLimit)) {
+    return undefined
+  }
+
+  return { inputTokens, maxTokens, contextLimit }
+}
+
+// TODO: Replace with a response header check once the API adds a dedicated
+// header for fast-mode rejection (e.g., x-fast-mode-rejected). String-matching
+// the error message is fragile and will break if the API wording changes.
+function isFastModeNotEnabledError(error: unknown): boolean {
+  if (!(error instanceof APIError)) {
+    return false
+  }
+  return (
+    error.status === 400 &&
+    (error.message?.includes('Fast mode is not enabled') ?? false)
+  )
+}
+
+export function is529Error(error: unknown): boolean {
+  if (!(error instanceof APIError)) {
+    return false
+  }
+
+  // Check for 529 status code or overloaded error in message
+  return (
+    error.status === 529 ||
+    // See below: the SDK sometimes fails to properly pass the 529 status code during streaming
+    (error.message?.includes('"type":"overloaded_error"') ?? false)
+  )
+}
+
+function isOAuthTokenRevokedError(error: unknown): boolean {
+  return (
+    error instanceof APIError &&
+    error.status === 403 &&
+    (error.message?.includes('OAuth token has been revoked') ?? false)
+  )
+}
+
+function isBedrockAuthError(error: unknown): boolean {
+  if (isEnvTruthy(process.env.CLAUDE_CODE_USE_BEDROCK)) {
+    // AWS libs reject without an API call if .aws holds a past Expiration value
+    // otherwise, API calls that receive expired tokens give generic 403
+    // "The security token included in the request is invalid"
+    if (
+      isAwsCredentialsProviderError(error) ||
+      (error instanceof APIError && error.status === 403)
+    ) {
+      return true
+    }
+  }
+  return false
+}
+
+/**
+ * Clear AWS auth caches if appropriate.
+ * @returns true if action was taken.
+ */
+function handleAwsCredentialError(error: unknown): boolean {
+  if (isBedrockAuthError(error)) {
+    clearAwsCredentialsCache()
+    return true
+  }
+  return false
+}
+
+// google-auth-library throws plain Error (no typed name like AWS's
+// CredentialsProviderError). Match common SDK-level credential-failure messages.
+function isGoogleAuthLibraryCredentialError(error: unknown): boolean {
+  if (!(error instanceof Error)) return false
+  const msg = error.message
+  return (
+    msg.includes('Could not load the default credentials') ||
+    msg.includes('Could not refresh access token') ||
+    msg.includes('invalid_grant')
+  )
+}
+
+function isVertexAuthError(error: unknown): boolean {
+  if (isEnvTruthy(process.env.CLAUDE_CODE_USE_VERTEX)) {
+    // SDK-level: google-auth-library fails in prepareOptions() before the HTTP call
+    if (isGoogleAuthLibraryCredentialError(error)) {
+      return true
+    }
+    // Server-side: Vertex returns 401 for expired/invalid tokens
+    if (error instanceof APIError && error.status === 401) {
+      return true
+    }
+  }
+  return false
+}
+
+/**
+ * Clear GCP auth caches if appropriate.
+ * @returns true if action was taken.
+ */
+function handleGcpCredentialError(error: unknown): boolean {
+  if (isVertexAuthError(error)) {
+    clearGcpCredentialsCache()
+    return true
+  }
+  return false
+}
+
+function shouldRetry(error: APIError): boolean {
+  // Never retry mock errors - they're from /mock-limits command for testing
+  if (isMockRateLimitError(error)) {
+    return false
+  }
+
+  // Persistent mode: 429/529 always retryable, bypass subscriber gates and
+  // x-should-retry header.
+  if (isPersistentRetryEnabled() && isTransientCapacityError(error)) {
+    return true
+  }
+
+  // CCR mode: auth is via infrastructure-provided JWTs, so a 401/403 is a
+  // transient blip (auth service flap, network hiccup) rather than bad
+  // credentials. Bypass x-should-retry:false — the server assumes we'd retry
+  // the same bad key, but our key is fine.
+  if (
+    isEnvTruthy(process.env.CLAUDE_CODE_REMOTE) &&
+    (error.status === 401 || error.status === 403)
+  ) {
+    return true
+  }
+
+  // Check for overloaded errors first by examining the message content
+  // The SDK sometimes fails to properly pass the 529 status code during streaming,
+  // so we need to check the error message directly
+  if (error.message?.includes('"type":"overloaded_error"')) {
+    return true
+  }
+
+  // Check for max tokens context overflow errors that we can handle
+  if (parseMaxTokensContextOverflowError(error)) {
+    return true
+  }
+
+  // Note this is not a standard header.
+  const shouldRetryHeader = error.headers?.get('x-should-retry')
+
+  // If the server explicitly says whether or not to retry, obey.
+  // For Max and Pro users, should-retry is true, but in several hours, so we shouldn't.
+  // Enterprise users can retry because they typically use PAYG instead of rate limits.
+  if (
+    shouldRetryHeader === 'true' &&
+    (!isClaudeAISubscriber() || isEnterpriseSubscriber())
+  ) {
+    return true
+  }
+
+  // Ants can ignore x-should-retry: false for 5xx server errors only.
+  // For other status codes (401, 403, 400, 429, etc.), respect the header.
+  if (shouldRetryHeader === 'false') {
+    const is5xxError = error.status !== undefined && error.status >= 500
+    if (!(process.env.USER_TYPE === 'ant' && is5xxError)) {
+      return false
+    }
+  }
+
+  if (error instanceof APIConnectionError) {
+    return true
+  }
+
+  if (!error.status) return false
+
+  // Retry on request timeouts.
+  if (error.status === 408) return true
+
+  // Retry on lock timeouts.
+  if (error.status === 409) return true
+
+  // Retry on rate limits, but not for ClaudeAI Subscription users
+  // Enterprise users can retry because they typically use PAYG instead of rate limits
+  if (error.status === 429) {
+    return !isClaudeAISubscriber() || isEnterpriseSubscriber()
+  }
+
+  // Clear API key cache on 401 and allow retry.
+  // OAuth token handling is done in the main retry loop via handleOAuth401Error.
+  if (error.status === 401) {
+    clearApiKeyHelperCache()
+    return true
+  }
+
+  // Retry on 403 "token revoked" (same refresh logic as 401, see above)
+  if (isOAuthTokenRevokedError(error)) {
+    return true
+  }
+
+  // Retry internal errors.
+  if (error.status && error.status >= 500) return true
+
+  return false
+}
+
+export function getDefaultMaxRetries(): number {
+  if (process.env.CLAUDE_CODE_MAX_RETRIES) {
+    return parseInt(process.env.CLAUDE_CODE_MAX_RETRIES, 10)
+  }
+  return DEFAULT_MAX_RETRIES
+}
+function getMaxRetries(options: RetryOptions): number {
+  return options.maxRetries ?? getDefaultMaxRetries()
+}
+
+const DEFAULT_FAST_MODE_FALLBACK_HOLD_MS = 30 * 60 * 1000 // 30 minutes
+const SHORT_RETRY_THRESHOLD_MS = 20 * 1000 // 20 seconds
+const MIN_COOLDOWN_MS = 10 * 60 * 1000 // 10 minutes
+
+function getRetryAfterMs(error: APIError): number | null {
+  const retryAfter = getRetryAfter(error)
+  if (retryAfter) {
+    const seconds = parseInt(retryAfter, 10)
+    if (!isNaN(seconds)) {
+      return seconds * 1000
+    }
+  }
+  return null
+}
+
+function getRateLimitResetDelayMs(error: APIError): number | null {
+  const resetHeader = error.headers?.get?.('anthropic-ratelimit-unified-reset')
+  if (!resetHeader) return null
+  const resetUnixSec = Number(resetHeader)
+  if (!Number.isFinite(resetUnixSec)) return null
+  const delayMs = resetUnixSec * 1000 - Date.now()
+  if (delayMs <= 0) return null
+  return Math.min(delayMs, PERSISTENT_RESET_CAP_MS)
+}