init claude-code
This commit is contained in:
@@ -0,0 +1,515 @@
|
||||
import { APIError } from '@anthropic-ai/sdk'
|
||||
import type { MessageParam } from '@anthropic-ai/sdk/resources/index.mjs'
|
||||
import isEqual from 'lodash-es/isEqual.js'
|
||||
import { getIsNonInteractiveSession } from '../bootstrap/state.js'
|
||||
import { isClaudeAISubscriber } from '../utils/auth.js'
|
||||
import { getModelBetas } from '../utils/betas.js'
|
||||
import { getGlobalConfig, saveGlobalConfig } from '../utils/config.js'
|
||||
import { logError } from '../utils/log.js'
|
||||
import { getSmallFastModel } from '../utils/model/model.js'
|
||||
import { isEssentialTrafficOnly } from '../utils/privacyLevel.js'
|
||||
import type { AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS } from './analytics/index.js'
|
||||
import { logEvent } from './analytics/index.js'
|
||||
import { getAPIMetadata } from './api/claude.js'
|
||||
import { getAnthropicClient } from './api/client.js'
|
||||
import {
|
||||
processRateLimitHeaders,
|
||||
shouldProcessRateLimits,
|
||||
} from './rateLimitMocking.js'
|
||||
|
||||
// Re-export message functions from centralized location
|
||||
export {
|
||||
getRateLimitErrorMessage,
|
||||
getRateLimitWarning,
|
||||
getUsingOverageText,
|
||||
} from './rateLimitMessages.js'
|
||||
|
||||
type QuotaStatus = 'allowed' | 'allowed_warning' | 'rejected'
|
||||
|
||||
type RateLimitType =
|
||||
| 'five_hour'
|
||||
| 'seven_day'
|
||||
| 'seven_day_opus'
|
||||
| 'seven_day_sonnet'
|
||||
| 'overage'
|
||||
|
||||
export type { RateLimitType }
|
||||
|
||||
type EarlyWarningThreshold = {
|
||||
utilization: number // 0-1 scale: trigger warning when usage >= this
|
||||
timePct: number // 0-1 scale: trigger warning when time elapsed <= this
|
||||
}
|
||||
|
||||
type EarlyWarningConfig = {
|
||||
rateLimitType: RateLimitType
|
||||
claimAbbrev: '5h' | '7d'
|
||||
windowSeconds: number
|
||||
thresholds: EarlyWarningThreshold[]
|
||||
}
|
||||
|
||||
// Early warning configurations in priority order (checked first to last)
|
||||
// Used as fallback when server doesn't send surpassed-threshold header
|
||||
// Warns users when they're consuming quota faster than the time window allows
|
||||
const EARLY_WARNING_CONFIGS: EarlyWarningConfig[] = [
|
||||
{
|
||||
rateLimitType: 'five_hour',
|
||||
claimAbbrev: '5h',
|
||||
windowSeconds: 5 * 60 * 60,
|
||||
thresholds: [{ utilization: 0.9, timePct: 0.72 }],
|
||||
},
|
||||
{
|
||||
rateLimitType: 'seven_day',
|
||||
claimAbbrev: '7d',
|
||||
windowSeconds: 7 * 24 * 60 * 60,
|
||||
thresholds: [
|
||||
{ utilization: 0.75, timePct: 0.6 },
|
||||
{ utilization: 0.5, timePct: 0.35 },
|
||||
{ utilization: 0.25, timePct: 0.15 },
|
||||
],
|
||||
},
|
||||
]
|
||||
|
||||
// Maps claim abbreviations to rate limit types for header-based detection
|
||||
const EARLY_WARNING_CLAIM_MAP: Record<string, RateLimitType> = {
|
||||
'5h': 'five_hour',
|
||||
'7d': 'seven_day',
|
||||
overage: 'overage',
|
||||
}
|
||||
|
||||
const RATE_LIMIT_DISPLAY_NAMES: Record<RateLimitType, string> = {
|
||||
five_hour: 'session limit',
|
||||
seven_day: 'weekly limit',
|
||||
seven_day_opus: 'Opus limit',
|
||||
seven_day_sonnet: 'Sonnet limit',
|
||||
overage: 'extra usage limit',
|
||||
}
|
||||
|
||||
export function getRateLimitDisplayName(type: RateLimitType): string {
|
||||
return RATE_LIMIT_DISPLAY_NAMES[type] || type
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate what fraction of a time window has elapsed.
|
||||
* Used for time-relative early warning fallback.
|
||||
* @param resetsAt - Unix epoch timestamp in seconds when the limit resets
|
||||
* @param windowSeconds - Duration of the window in seconds
|
||||
* @returns fraction (0-1) of the window that has elapsed
|
||||
*/
|
||||
function computeTimeProgress(resetsAt: number, windowSeconds: number): number {
|
||||
const nowSeconds = Date.now() / 1000
|
||||
const windowStart = resetsAt - windowSeconds
|
||||
const elapsed = nowSeconds - windowStart
|
||||
return Math.max(0, Math.min(1, elapsed / windowSeconds))
|
||||
}
|
||||
|
||||
// Reason why overage is disabled/rejected
|
||||
// These values come from the API's unified limiter
|
||||
export type OverageDisabledReason =
|
||||
| 'overage_not_provisioned' // Overage is not provisioned for this org or seat tier
|
||||
| 'org_level_disabled' // Organization doesn't have overage enabled
|
||||
| 'org_level_disabled_until' // Organization overage temporarily disabled
|
||||
| 'out_of_credits' // Organization has insufficient credits
|
||||
| 'seat_tier_level_disabled' // Seat tier doesn't have overage enabled
|
||||
| 'member_level_disabled' // Account specifically has overage disabled
|
||||
| 'seat_tier_zero_credit_limit' // Seat tier has a zero credit limit
|
||||
| 'group_zero_credit_limit' // Resolved group limit has a zero credit limit
|
||||
| 'member_zero_credit_limit' // Account has a zero credit limit
|
||||
| 'org_service_level_disabled' // Org service specifically has overage disabled
|
||||
| 'org_service_zero_credit_limit' // Org service has a zero credit limit
|
||||
| 'no_limits_configured' // No overage limits configured for account
|
||||
| 'unknown' // Unknown reason, should not happen
|
||||
|
||||
export type ClaudeAILimits = {
|
||||
status: QuotaStatus
|
||||
// unifiedRateLimitFallbackAvailable is currently used to warn users that set
|
||||
// their model to Opus whenever they are about to run out of quota. It does
|
||||
// not change the actual model that is used.
|
||||
unifiedRateLimitFallbackAvailable: boolean
|
||||
resetsAt?: number
|
||||
rateLimitType?: RateLimitType
|
||||
utilization?: number
|
||||
overageStatus?: QuotaStatus
|
||||
overageResetsAt?: number
|
||||
overageDisabledReason?: OverageDisabledReason
|
||||
isUsingOverage?: boolean
|
||||
surpassedThreshold?: number
|
||||
}
|
||||
|
||||
// Exported for testing only
|
||||
export let currentLimits: ClaudeAILimits = {
|
||||
status: 'allowed',
|
||||
unifiedRateLimitFallbackAvailable: false,
|
||||
isUsingOverage: false,
|
||||
}
|
||||
|
||||
/**
|
||||
* Raw per-window utilization from response headers, tracked on every API
|
||||
* response (unlike currentLimits.utilization which is only set when a warning
|
||||
* threshold fires). Exposed to statusline scripts via getRawUtilization().
|
||||
*/
|
||||
type RawWindowUtilization = {
|
||||
utilization: number // 0-1 fraction
|
||||
resets_at: number // unix epoch seconds
|
||||
}
|
||||
type RawUtilization = {
|
||||
five_hour?: RawWindowUtilization
|
||||
seven_day?: RawWindowUtilization
|
||||
}
|
||||
let rawUtilization: RawUtilization = {}
|
||||
|
||||
export function getRawUtilization(): RawUtilization {
|
||||
return rawUtilization
|
||||
}
|
||||
|
||||
function extractRawUtilization(headers: globalThis.Headers): RawUtilization {
|
||||
const result: RawUtilization = {}
|
||||
for (const [key, abbrev] of [
|
||||
['five_hour', '5h'],
|
||||
['seven_day', '7d'],
|
||||
] as const) {
|
||||
const util = headers.get(
|
||||
`anthropic-ratelimit-unified-${abbrev}-utilization`,
|
||||
)
|
||||
const reset = headers.get(`anthropic-ratelimit-unified-${abbrev}-reset`)
|
||||
if (util !== null && reset !== null) {
|
||||
result[key] = { utilization: Number(util), resets_at: Number(reset) }
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
type StatusChangeListener = (limits: ClaudeAILimits) => void
|
||||
export const statusListeners: Set<StatusChangeListener> = new Set()
|
||||
|
||||
export function emitStatusChange(limits: ClaudeAILimits) {
|
||||
currentLimits = limits
|
||||
statusListeners.forEach(listener => listener(limits))
|
||||
const hoursTillReset = Math.round(
|
||||
(limits.resetsAt ? limits.resetsAt - Date.now() / 1000 : 0) / (60 * 60),
|
||||
)
|
||||
|
||||
logEvent('tengu_claudeai_limits_status_changed', {
|
||||
status:
|
||||
limits.status as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
||||
unifiedRateLimitFallbackAvailable: limits.unifiedRateLimitFallbackAvailable,
|
||||
hoursTillReset,
|
||||
})
|
||||
}
|
||||
|
||||
async function makeTestQuery() {
|
||||
const model = getSmallFastModel()
|
||||
const anthropic = await getAnthropicClient({
|
||||
maxRetries: 0,
|
||||
model,
|
||||
source: 'quota_check',
|
||||
})
|
||||
const messages: MessageParam[] = [{ role: 'user', content: 'quota' }]
|
||||
const betas = getModelBetas(model)
|
||||
// biome-ignore lint/plugin: quota check needs raw response access via asResponse()
|
||||
return anthropic.beta.messages
|
||||
.create({
|
||||
model,
|
||||
max_tokens: 1,
|
||||
messages,
|
||||
metadata: getAPIMetadata(),
|
||||
...(betas.length > 0 ? { betas } : {}),
|
||||
})
|
||||
.asResponse()
|
||||
}
|
||||
|
||||
export async function checkQuotaStatus(): Promise<void> {
|
||||
// Skip network requests if nonessential traffic is disabled
|
||||
if (isEssentialTrafficOnly()) {
|
||||
return
|
||||
}
|
||||
|
||||
// Check if we should process rate limits (real subscriber or mock testing)
|
||||
if (!shouldProcessRateLimits(isClaudeAISubscriber())) {
|
||||
return
|
||||
}
|
||||
|
||||
// In non-interactive mode (-p), the real query follows immediately and
|
||||
// extractQuotaStatusFromHeaders() will update limits from its response
|
||||
// headers (claude.ts), so skip this pre-check API call.
|
||||
if (getIsNonInteractiveSession()) {
|
||||
return
|
||||
}
|
||||
|
||||
try {
|
||||
// Make a minimal request to check quota
|
||||
const raw = await makeTestQuery()
|
||||
|
||||
// Update limits based on the response
|
||||
extractQuotaStatusFromHeaders(raw.headers)
|
||||
} catch (error) {
|
||||
if (error instanceof APIError) {
|
||||
extractQuotaStatusFromError(error)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if early warning should be triggered based on surpassed-threshold header.
|
||||
* Returns ClaudeAILimits if a threshold was surpassed, null otherwise.
|
||||
*/
|
||||
function getHeaderBasedEarlyWarning(
|
||||
headers: globalThis.Headers,
|
||||
unifiedRateLimitFallbackAvailable: boolean,
|
||||
): ClaudeAILimits | null {
|
||||
// Check each claim type for surpassed threshold header
|
||||
for (const [claimAbbrev, rateLimitType] of Object.entries(
|
||||
EARLY_WARNING_CLAIM_MAP,
|
||||
)) {
|
||||
const surpassedThreshold = headers.get(
|
||||
`anthropic-ratelimit-unified-${claimAbbrev}-surpassed-threshold`,
|
||||
)
|
||||
|
||||
// If threshold header is present, user has crossed a warning threshold
|
||||
if (surpassedThreshold !== null) {
|
||||
const utilizationHeader = headers.get(
|
||||
`anthropic-ratelimit-unified-${claimAbbrev}-utilization`,
|
||||
)
|
||||
const resetHeader = headers.get(
|
||||
`anthropic-ratelimit-unified-${claimAbbrev}-reset`,
|
||||
)
|
||||
|
||||
const utilization = utilizationHeader
|
||||
? Number(utilizationHeader)
|
||||
: undefined
|
||||
const resetsAt = resetHeader ? Number(resetHeader) : undefined
|
||||
|
||||
return {
|
||||
status: 'allowed_warning',
|
||||
resetsAt,
|
||||
rateLimitType: rateLimitType as RateLimitType,
|
||||
utilization,
|
||||
unifiedRateLimitFallbackAvailable,
|
||||
isUsingOverage: false,
|
||||
surpassedThreshold: Number(surpassedThreshold),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return null
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if time-relative early warning should be triggered for a rate limit type.
|
||||
* Fallback when server doesn't send surpassed-threshold header.
|
||||
* Returns ClaudeAILimits if thresholds are exceeded, null otherwise.
|
||||
*/
|
||||
function getTimeRelativeEarlyWarning(
|
||||
headers: globalThis.Headers,
|
||||
config: EarlyWarningConfig,
|
||||
unifiedRateLimitFallbackAvailable: boolean,
|
||||
): ClaudeAILimits | null {
|
||||
const { rateLimitType, claimAbbrev, windowSeconds, thresholds } = config
|
||||
|
||||
const utilizationHeader = headers.get(
|
||||
`anthropic-ratelimit-unified-${claimAbbrev}-utilization`,
|
||||
)
|
||||
const resetHeader = headers.get(
|
||||
`anthropic-ratelimit-unified-${claimAbbrev}-reset`,
|
||||
)
|
||||
|
||||
if (utilizationHeader === null || resetHeader === null) {
|
||||
return null
|
||||
}
|
||||
|
||||
const utilization = Number(utilizationHeader)
|
||||
const resetsAt = Number(resetHeader)
|
||||
const timeProgress = computeTimeProgress(resetsAt, windowSeconds)
|
||||
|
||||
// Check if any threshold is exceeded: high usage early in the window
|
||||
const shouldWarn = thresholds.some(
|
||||
t => utilization >= t.utilization && timeProgress <= t.timePct,
|
||||
)
|
||||
|
||||
if (!shouldWarn) {
|
||||
return null
|
||||
}
|
||||
|
||||
return {
|
||||
status: 'allowed_warning',
|
||||
resetsAt,
|
||||
rateLimitType,
|
||||
utilization,
|
||||
unifiedRateLimitFallbackAvailable,
|
||||
isUsingOverage: false,
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get early warning limits using header-based detection with time-relative fallback.
|
||||
* 1. First checks for surpassed-threshold header (new server-side approach)
|
||||
* 2. Falls back to time-relative thresholds (client-side calculation)
|
||||
*/
|
||||
function getEarlyWarningFromHeaders(
|
||||
headers: globalThis.Headers,
|
||||
unifiedRateLimitFallbackAvailable: boolean,
|
||||
): ClaudeAILimits | null {
|
||||
// Try header-based detection first (preferred when API sends the header)
|
||||
const headerBasedWarning = getHeaderBasedEarlyWarning(
|
||||
headers,
|
||||
unifiedRateLimitFallbackAvailable,
|
||||
)
|
||||
if (headerBasedWarning) {
|
||||
return headerBasedWarning
|
||||
}
|
||||
|
||||
// Fallback: Use time-relative thresholds (client-side calculation)
|
||||
// This catches users burning quota faster than sustainable
|
||||
for (const config of EARLY_WARNING_CONFIGS) {
|
||||
const timeRelativeWarning = getTimeRelativeEarlyWarning(
|
||||
headers,
|
||||
config,
|
||||
unifiedRateLimitFallbackAvailable,
|
||||
)
|
||||
if (timeRelativeWarning) {
|
||||
return timeRelativeWarning
|
||||
}
|
||||
}
|
||||
|
||||
return null
|
||||
}
|
||||
|
||||
function computeNewLimitsFromHeaders(
|
||||
headers: globalThis.Headers,
|
||||
): ClaudeAILimits {
|
||||
const status =
|
||||
(headers.get('anthropic-ratelimit-unified-status') as QuotaStatus) ||
|
||||
'allowed'
|
||||
const resetsAtHeader = headers.get('anthropic-ratelimit-unified-reset')
|
||||
const resetsAt = resetsAtHeader ? Number(resetsAtHeader) : undefined
|
||||
const unifiedRateLimitFallbackAvailable =
|
||||
headers.get('anthropic-ratelimit-unified-fallback') === 'available'
|
||||
|
||||
// Headers for rate limit type and overage support
|
||||
const rateLimitType = headers.get(
|
||||
'anthropic-ratelimit-unified-representative-claim',
|
||||
) as RateLimitType | null
|
||||
const overageStatus = headers.get(
|
||||
'anthropic-ratelimit-unified-overage-status',
|
||||
) as QuotaStatus | null
|
||||
const overageResetsAtHeader = headers.get(
|
||||
'anthropic-ratelimit-unified-overage-reset',
|
||||
)
|
||||
const overageResetsAt = overageResetsAtHeader
|
||||
? Number(overageResetsAtHeader)
|
||||
: undefined
|
||||
|
||||
// Reason why overage is disabled (spending cap or wallet empty)
|
||||
const overageDisabledReason = headers.get(
|
||||
'anthropic-ratelimit-unified-overage-disabled-reason',
|
||||
) as OverageDisabledReason | null
|
||||
|
||||
// Determine if we're using overage (standard limits rejected but overage allowed)
|
||||
const isUsingOverage =
|
||||
status === 'rejected' &&
|
||||
(overageStatus === 'allowed' || overageStatus === 'allowed_warning')
|
||||
|
||||
// Check for early warning based on surpassed-threshold header
|
||||
// If status is allowed/allowed_warning and we find a surpassed threshold, show warning
|
||||
let finalStatus: QuotaStatus = status
|
||||
if (status === 'allowed' || status === 'allowed_warning') {
|
||||
const earlyWarning = getEarlyWarningFromHeaders(
|
||||
headers,
|
||||
unifiedRateLimitFallbackAvailable,
|
||||
)
|
||||
if (earlyWarning) {
|
||||
return earlyWarning
|
||||
}
|
||||
// No early warning threshold surpassed
|
||||
finalStatus = 'allowed'
|
||||
}
|
||||
|
||||
return {
|
||||
status: finalStatus,
|
||||
resetsAt,
|
||||
unifiedRateLimitFallbackAvailable,
|
||||
...(rateLimitType && { rateLimitType }),
|
||||
...(overageStatus && { overageStatus }),
|
||||
...(overageResetsAt && { overageResetsAt }),
|
||||
...(overageDisabledReason && { overageDisabledReason }),
|
||||
isUsingOverage,
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Cache the extra usage disabled reason from API headers.
|
||||
*/
|
||||
function cacheExtraUsageDisabledReason(headers: globalThis.Headers): void {
|
||||
// A null reason means extra usage is enabled (no disabled reason header)
|
||||
const reason =
|
||||
headers.get('anthropic-ratelimit-unified-overage-disabled-reason') ?? null
|
||||
const cached = getGlobalConfig().cachedExtraUsageDisabledReason
|
||||
if (cached !== reason) {
|
||||
saveGlobalConfig(current => ({
|
||||
...current,
|
||||
cachedExtraUsageDisabledReason: reason,
|
||||
}))
|
||||
}
|
||||
}
|
||||
|
||||
export function extractQuotaStatusFromHeaders(
|
||||
headers: globalThis.Headers,
|
||||
): void {
|
||||
// Check if we need to process rate limits
|
||||
const isSubscriber = isClaudeAISubscriber()
|
||||
|
||||
if (!shouldProcessRateLimits(isSubscriber)) {
|
||||
// If we have any rate limit state, clear it
|
||||
rawUtilization = {}
|
||||
if (currentLimits.status !== 'allowed' || currentLimits.resetsAt) {
|
||||
const defaultLimits: ClaudeAILimits = {
|
||||
status: 'allowed',
|
||||
unifiedRateLimitFallbackAvailable: false,
|
||||
isUsingOverage: false,
|
||||
}
|
||||
emitStatusChange(defaultLimits)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// Process headers (applies mocks from /mock-limits command if active)
|
||||
const headersToUse = processRateLimitHeaders(headers)
|
||||
rawUtilization = extractRawUtilization(headersToUse)
|
||||
const newLimits = computeNewLimitsFromHeaders(headersToUse)
|
||||
|
||||
// Cache extra usage status (persists across sessions)
|
||||
cacheExtraUsageDisabledReason(headersToUse)
|
||||
|
||||
if (!isEqual(currentLimits, newLimits)) {
|
||||
emitStatusChange(newLimits)
|
||||
}
|
||||
}
|
||||
|
||||
export function extractQuotaStatusFromError(error: APIError): void {
|
||||
if (
|
||||
!shouldProcessRateLimits(isClaudeAISubscriber()) ||
|
||||
error.status !== 429
|
||||
) {
|
||||
return
|
||||
}
|
||||
|
||||
try {
|
||||
let newLimits = { ...currentLimits }
|
||||
if (error.headers) {
|
||||
// Process headers (applies mocks from /mock-limits command if active)
|
||||
const headersToUse = processRateLimitHeaders(error.headers)
|
||||
rawUtilization = extractRawUtilization(headersToUse)
|
||||
newLimits = computeNewLimitsFromHeaders(headersToUse)
|
||||
|
||||
// Cache extra usage status (persists across sessions)
|
||||
cacheExtraUsageDisabledReason(headersToUse)
|
||||
}
|
||||
// For errors, always set status to rejected even if headers are not present.
|
||||
newLimits.status = 'rejected'
|
||||
|
||||
if (!isEqual(currentLimits, newLimits)) {
|
||||
emitStatusChange(newLimits)
|
||||
}
|
||||
} catch (e) {
|
||||
logError(e as Error)
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user