init claude-code

2026-04-01 17:32:37 +02:00
commit 73b208c009
1902 changed files with 513237 additions and 0 deletions
@@ -0,0 +1,153 @@
+import { FILE_EDIT_TOOL_NAME } from 'src/tools/FileEditTool/constants.js'
+import { FILE_READ_TOOL_NAME } from 'src/tools/FileReadTool/prompt.js'
+import { FILE_WRITE_TOOL_NAME } from 'src/tools/FileWriteTool/prompt.js'
+import { GLOB_TOOL_NAME } from 'src/tools/GlobTool/prompt.js'
+import { GREP_TOOL_NAME } from 'src/tools/GrepTool/prompt.js'
+import { NOTEBOOK_EDIT_TOOL_NAME } from 'src/tools/NotebookEditTool/constants.js'
+import { WEB_FETCH_TOOL_NAME } from 'src/tools/WebFetchTool/prompt.js'
+import { WEB_SEARCH_TOOL_NAME } from 'src/tools/WebSearchTool/prompt.js'
+import { SHELL_TOOL_NAMES } from 'src/utils/shell/shellToolUtils.js'
+import { isEnvTruthy } from '../../utils/envUtils.js'
+
+// docs: https://docs.google.com/document/d/1oCT4evvWTh3P6z-kcfNQwWTCxAhkoFndSaNS9Gm40uw/edit?tab=t.0
+
+// Default values for context management strategies
+// Match client-side microcompact token values
+const DEFAULT_MAX_INPUT_TOKENS = 180_000 // Typical warning threshold
+const DEFAULT_TARGET_INPUT_TOKENS = 40_000 // Keep last 40k tokens like client-side
+
+const TOOLS_CLEARABLE_RESULTS = [
+  ...SHELL_TOOL_NAMES,
+  GLOB_TOOL_NAME,
+  GREP_TOOL_NAME,
+  FILE_READ_TOOL_NAME,
+  WEB_FETCH_TOOL_NAME,
+  WEB_SEARCH_TOOL_NAME,
+]
+
+const TOOLS_CLEARABLE_USES = [
+  FILE_EDIT_TOOL_NAME,
+  FILE_WRITE_TOOL_NAME,
+  NOTEBOOK_EDIT_TOOL_NAME,
+]
+
+// Context management strategy types matching API documentation
+export type ContextEditStrategy =
+  | {
+      type: 'clear_tool_uses_20250919'
+      trigger?: {
+        type: 'input_tokens'
+        value: number
+      }
+      keep?: {
+        type: 'tool_uses'
+        value: number
+      }
+      clear_tool_inputs?: boolean | string[]
+      exclude_tools?: string[]
+      clear_at_least?: {
+        type: 'input_tokens'
+        value: number
+      }
+    }
+  | {
+      type: 'clear_thinking_20251015'
+      keep: { type: 'thinking_turns'; value: number } | 'all'
+    }
+
+// Context management configuration wrapper
+export type ContextManagementConfig = {
+  edits: ContextEditStrategy[]
+}
+
+// API-based microcompact implementation that uses native context management
+export function getAPIContextManagement(options?: {
+  hasThinking?: boolean
+  isRedactThinkingActive?: boolean
+  clearAllThinking?: boolean
+}): ContextManagementConfig | undefined {
+  const {
+    hasThinking = false,
+    isRedactThinkingActive = false,
+    clearAllThinking = false,
+  } = options ?? {}
+
+  const strategies: ContextEditStrategy[] = []
+
+  // Preserve thinking blocks in previous assistant turns. Skip when
+  // redact-thinking is active — redacted blocks have no model-visible content.
+  // When clearAllThinking is set (>1h idle = cache miss), keep only the last
+  // thinking turn — the API schema requires value >= 1, and omitting the edit
+  // falls back to the model-policy default (often "all"), which wouldn't clear.
+  if (hasThinking && !isRedactThinkingActive) {
+    strategies.push({
+      type: 'clear_thinking_20251015',
+      keep: clearAllThinking ? { type: 'thinking_turns', value: 1 } : 'all',
+    })
+  }
+
+  // Tool clearing strategies are ant-only
+  if (process.env.USER_TYPE !== 'ant') {
+    return strategies.length > 0 ? { edits: strategies } : undefined
+  }
+
+  const useClearToolResults = isEnvTruthy(
+    process.env.USE_API_CLEAR_TOOL_RESULTS,
+  )
+  const useClearToolUses = isEnvTruthy(process.env.USE_API_CLEAR_TOOL_USES)
+
+  // If no tool clearing strategy is enabled, return early
+  if (!useClearToolResults && !useClearToolUses) {
+    return strategies.length > 0 ? { edits: strategies } : undefined
+  }
+
+  if (useClearToolResults) {
+    const triggerThreshold = process.env.API_MAX_INPUT_TOKENS
+      ? parseInt(process.env.API_MAX_INPUT_TOKENS)
+      : DEFAULT_MAX_INPUT_TOKENS
+    const keepTarget = process.env.API_TARGET_INPUT_TOKENS
+      ? parseInt(process.env.API_TARGET_INPUT_TOKENS)
+      : DEFAULT_TARGET_INPUT_TOKENS
+
+    const strategy: ContextEditStrategy = {
+      type: 'clear_tool_uses_20250919',
+      trigger: {
+        type: 'input_tokens',
+        value: triggerThreshold,
+      },
+      clear_at_least: {
+        type: 'input_tokens',
+        value: triggerThreshold - keepTarget,
+      },
+      clear_tool_inputs: TOOLS_CLEARABLE_RESULTS,
+    }
+
+    strategies.push(strategy)
+  }
+
+  if (useClearToolUses) {
+    const triggerThreshold = process.env.API_MAX_INPUT_TOKENS
+      ? parseInt(process.env.API_MAX_INPUT_TOKENS)
+      : DEFAULT_MAX_INPUT_TOKENS
+    const keepTarget = process.env.API_TARGET_INPUT_TOKENS
+      ? parseInt(process.env.API_TARGET_INPUT_TOKENS)
+      : DEFAULT_TARGET_INPUT_TOKENS
+
+    const strategy: ContextEditStrategy = {
+      type: 'clear_tool_uses_20250919',
+      trigger: {
+        type: 'input_tokens',
+        value: triggerThreshold,
+      },
+      clear_at_least: {
+        type: 'input_tokens',
+        value: triggerThreshold - keepTarget,
+      },
+      exclude_tools: TOOLS_CLEARABLE_USES,
+    }
+
+    strategies.push(strategy)
+  }
+
+  return strategies.length > 0 ? { edits: strategies } : undefined
+}
@@ -0,0 +1,351 @@
+import { feature } from 'bun:bundle'
+import { markPostCompaction } from 'src/bootstrap/state.js'
+import { getSdkBetas } from '../../bootstrap/state.js'
+import type { QuerySource } from '../../constants/querySource.js'
+import type { ToolUseContext } from '../../Tool.js'
+import type { Message } from '../../types/message.js'
+import { getGlobalConfig } from '../../utils/config.js'
+import { getContextWindowForModel } from '../../utils/context.js'
+import { logForDebugging } from '../../utils/debug.js'
+import { isEnvTruthy } from '../../utils/envUtils.js'
+import { hasExactErrorMessage } from '../../utils/errors.js'
+import type { CacheSafeParams } from '../../utils/forkedAgent.js'
+import { logError } from '../../utils/log.js'
+import { tokenCountWithEstimation } from '../../utils/tokens.js'
+import { getFeatureValue_CACHED_MAY_BE_STALE } from '../analytics/growthbook.js'
+import { getMaxOutputTokensForModel } from '../api/claude.js'
+import { notifyCompaction } from '../api/promptCacheBreakDetection.js'
+import { setLastSummarizedMessageId } from '../SessionMemory/sessionMemoryUtils.js'
+import {
+  type CompactionResult,
+  compactConversation,
+  ERROR_MESSAGE_USER_ABORT,
+  type RecompactionInfo,
+} from './compact.js'
+import { runPostCompactCleanup } from './postCompactCleanup.js'
+import { trySessionMemoryCompaction } from './sessionMemoryCompact.js'
+
+// Reserve this many tokens for output during compaction
+// Based on p99.99 of compact summary output being 17,387 tokens.
+const MAX_OUTPUT_TOKENS_FOR_SUMMARY = 20_000
+
+// Returns the context window size minus the max output tokens for the model
+export function getEffectiveContextWindowSize(model: string): number {
+  const reservedTokensForSummary = Math.min(
+    getMaxOutputTokensForModel(model),
+    MAX_OUTPUT_TOKENS_FOR_SUMMARY,
+  )
+  let contextWindow = getContextWindowForModel(model, getSdkBetas())
+
+  const autoCompactWindow = process.env.CLAUDE_CODE_AUTO_COMPACT_WINDOW
+  if (autoCompactWindow) {
+    const parsed = parseInt(autoCompactWindow, 10)
+    if (!isNaN(parsed) && parsed > 0) {
+      contextWindow = Math.min(contextWindow, parsed)
+    }
+  }
+
+  return contextWindow - reservedTokensForSummary
+}
+
+export type AutoCompactTrackingState = {
+  compacted: boolean
+  turnCounter: number
+  // Unique ID per turn
+  turnId: string
+  // Consecutive autocompact failures. Reset on success.
+  // Used as a circuit breaker to stop retrying when the context is
+  // irrecoverably over the limit (e.g., prompt_too_long).
+  consecutiveFailures?: number
+}
+
+export const AUTOCOMPACT_BUFFER_TOKENS = 13_000
+export const WARNING_THRESHOLD_BUFFER_TOKENS = 20_000
+export const ERROR_THRESHOLD_BUFFER_TOKENS = 20_000
+export const MANUAL_COMPACT_BUFFER_TOKENS = 3_000
+
+// Stop trying autocompact after this many consecutive failures.
+// BQ 2026-03-10: 1,279 sessions had 50+ consecutive failures (up to 3,272)
+// in a single session, wasting ~250K API calls/day globally.
+const MAX_CONSECUTIVE_AUTOCOMPACT_FAILURES = 3
+
+export function getAutoCompactThreshold(model: string): number {
+  const effectiveContextWindow = getEffectiveContextWindowSize(model)
+
+  const autocompactThreshold =
+    effectiveContextWindow - AUTOCOMPACT_BUFFER_TOKENS
+
+  // Override for easier testing of autocompact
+  const envPercent = process.env.CLAUDE_AUTOCOMPACT_PCT_OVERRIDE
+  if (envPercent) {
+    const parsed = parseFloat(envPercent)
+    if (!isNaN(parsed) && parsed > 0 && parsed <= 100) {
+      const percentageThreshold = Math.floor(
+        effectiveContextWindow * (parsed / 100),
+      )
+      return Math.min(percentageThreshold, autocompactThreshold)
+    }
+  }
+
+  return autocompactThreshold
+}
+
+export function calculateTokenWarningState(
+  tokenUsage: number,
+  model: string,
+): {
+  percentLeft: number
+  isAboveWarningThreshold: boolean
+  isAboveErrorThreshold: boolean
+  isAboveAutoCompactThreshold: boolean
+  isAtBlockingLimit: boolean
+} {
+  const autoCompactThreshold = getAutoCompactThreshold(model)
+  const threshold = isAutoCompactEnabled()
+    ? autoCompactThreshold
+    : getEffectiveContextWindowSize(model)
+
+  const percentLeft = Math.max(
+    0,
+    Math.round(((threshold - tokenUsage) / threshold) * 100),
+  )
+
+  const warningThreshold = threshold - WARNING_THRESHOLD_BUFFER_TOKENS
+  const errorThreshold = threshold - ERROR_THRESHOLD_BUFFER_TOKENS
+
+  const isAboveWarningThreshold = tokenUsage >= warningThreshold
+  const isAboveErrorThreshold = tokenUsage >= errorThreshold
+
+  const isAboveAutoCompactThreshold =
+    isAutoCompactEnabled() && tokenUsage >= autoCompactThreshold
+
+  const actualContextWindow = getEffectiveContextWindowSize(model)
+  const defaultBlockingLimit =
+    actualContextWindow - MANUAL_COMPACT_BUFFER_TOKENS
+
+  // Allow override for testing
+  const blockingLimitOverride = process.env.CLAUDE_CODE_BLOCKING_LIMIT_OVERRIDE
+  const parsedOverride = blockingLimitOverride
+    ? parseInt(blockingLimitOverride, 10)
+    : NaN
+  const blockingLimit =
+    !isNaN(parsedOverride) && parsedOverride > 0
+      ? parsedOverride
+      : defaultBlockingLimit
+
+  const isAtBlockingLimit = tokenUsage >= blockingLimit
+
+  return {
+    percentLeft,
+    isAboveWarningThreshold,
+    isAboveErrorThreshold,
+    isAboveAutoCompactThreshold,
+    isAtBlockingLimit,
+  }
+}
+
+export function isAutoCompactEnabled(): boolean {
+  if (isEnvTruthy(process.env.DISABLE_COMPACT)) {
+    return false
+  }
+  // Allow disabling just auto-compact (keeps manual /compact working)
+  if (isEnvTruthy(process.env.DISABLE_AUTO_COMPACT)) {
+    return false
+  }
+  // Check if user has disabled auto-compact in their settings
+  const userConfig = getGlobalConfig()
+  return userConfig.autoCompactEnabled
+}
+
+export async function shouldAutoCompact(
+  messages: Message[],
+  model: string,
+  querySource?: QuerySource,
+  // Snip removes messages but the surviving assistant's usage still reflects
+  // pre-snip context, so tokenCountWithEstimation can't see the savings.
+  // Subtract the rough-delta that snip already computed.
+  snipTokensFreed = 0,
+): Promise<boolean> {
+  // Recursion guards. session_memory and compact are forked agents that
+  // would deadlock.
+  if (querySource === 'session_memory' || querySource === 'compact') {
+    return false
+  }
+  // marble_origami is the ctx-agent — if ITS context blows up and
+  // autocompact fires, runPostCompactCleanup calls resetContextCollapse()
+  // which destroys the MAIN thread's committed log (module-level state
+  // shared across forks). Inside feature() so the string DCEs from
+  // external builds (it's in excluded-strings.txt).
+  if (feature('CONTEXT_COLLAPSE')) {
+    if (querySource === 'marble_origami') {
+      return false
+    }
+  }
+
+  if (!isAutoCompactEnabled()) {
+    return false
+  }
+
+  // Reactive-only mode: suppress proactive autocompact, let reactive compact
+  // catch the API's prompt-too-long. feature() wrapper keeps the flag string
+  // out of external builds (REACTIVE_COMPACT is ant-only).
+  // Note: returning false here also means autoCompactIfNeeded never reaches
+  // trySessionMemoryCompaction in the query loop — the /compact call site
+  // still tries session memory first. Revisit if reactive-only graduates.
+  if (feature('REACTIVE_COMPACT')) {
+    if (getFeatureValue_CACHED_MAY_BE_STALE('tengu_cobalt_raccoon', false)) {
+      return false
+    }
+  }
+
+  // Context-collapse mode: same suppression. Collapse IS the context
+  // management system when it's on — the 90% commit / 95% blocking-spawn
+  // flow owns the headroom problem. Autocompact firing at effective-13k
+  // (~93% of effective) sits right between collapse's commit-start (90%)
+  // and blocking (95%), so it would race collapse and usually win, nuking
+  // granular context that collapse was about to save. Gating here rather
+  // than in isAutoCompactEnabled() keeps reactiveCompact alive as the 413
+  // fallback (it consults isAutoCompactEnabled directly) and leaves
+  // sessionMemory + manual /compact working.
+  //
+  // Consult isContextCollapseEnabled (not the raw gate) so the
+  // CLAUDE_CONTEXT_COLLAPSE env override is honored here too. require()
+  // inside the block breaks the init-time cycle (this file exports
+  // getEffectiveContextWindowSize which collapse's index imports).
+  if (feature('CONTEXT_COLLAPSE')) {
+    /* eslint-disable @typescript-eslint/no-require-imports */
+    const { isContextCollapseEnabled } =
+      require('../contextCollapse/index.js') as typeof import('../contextCollapse/index.js')
+    /* eslint-enable @typescript-eslint/no-require-imports */
+    if (isContextCollapseEnabled()) {
+      return false
+    }
+  }
+
+  const tokenCount = tokenCountWithEstimation(messages) - snipTokensFreed
+  const threshold = getAutoCompactThreshold(model)
+  const effectiveWindow = getEffectiveContextWindowSize(model)
+
+  logForDebugging(
+    `autocompact: tokens=${tokenCount} threshold=${threshold} effectiveWindow=${effectiveWindow}${snipTokensFreed > 0 ? ` snipFreed=${snipTokensFreed}` : ''}`,
+  )
+
+  const { isAboveAutoCompactThreshold } = calculateTokenWarningState(
+    tokenCount,
+    model,
+  )
+
+  return isAboveAutoCompactThreshold
+}
+
+export async function autoCompactIfNeeded(
+  messages: Message[],
+  toolUseContext: ToolUseContext,
+  cacheSafeParams: CacheSafeParams,
+  querySource?: QuerySource,
+  tracking?: AutoCompactTrackingState,
+  snipTokensFreed?: number,
+): Promise<{
+  wasCompacted: boolean
+  compactionResult?: CompactionResult
+  consecutiveFailures?: number
+}> {
+  if (isEnvTruthy(process.env.DISABLE_COMPACT)) {
+    return { wasCompacted: false }
+  }
+
+  // Circuit breaker: stop retrying after N consecutive failures.
+  // Without this, sessions where context is irrecoverably over the limit
+  // hammer the API with doomed compaction attempts on every turn.
+  if (
+    tracking?.consecutiveFailures !== undefined &&
+    tracking.consecutiveFailures >= MAX_CONSECUTIVE_AUTOCOMPACT_FAILURES
+  ) {
+    return { wasCompacted: false }
+  }
+
+  const model = toolUseContext.options.mainLoopModel
+  const shouldCompact = await shouldAutoCompact(
+    messages,
+    model,
+    querySource,
+    snipTokensFreed,
+  )
+
+  if (!shouldCompact) {
+    return { wasCompacted: false }
+  }
+
+  const recompactionInfo: RecompactionInfo = {
+    isRecompactionInChain: tracking?.compacted === true,
+    turnsSincePreviousCompact: tracking?.turnCounter ?? -1,
+    previousCompactTurnId: tracking?.turnId,
+    autoCompactThreshold: getAutoCompactThreshold(model),
+    querySource,
+  }
+
+  // EXPERIMENT: Try session memory compaction first
+  const sessionMemoryResult = await trySessionMemoryCompaction(
+    messages,
+    toolUseContext.agentId,
+    recompactionInfo.autoCompactThreshold,
+  )
+  if (sessionMemoryResult) {
+    // Reset lastSummarizedMessageId since session memory compaction prunes messages
+    // and the old message UUID will no longer exist after the REPL replaces messages
+    setLastSummarizedMessageId(undefined)
+    runPostCompactCleanup(querySource)
+    // Reset cache read baseline so the post-compact drop isn't flagged as a
+    // break. compactConversation does this internally; SM-compact doesn't.
+    // BQ 2026-03-01: missing this made 20% of tengu_prompt_cache_break events
+    // false positives (systemPromptChanged=true, timeSinceLastAssistantMsg=-1).
+    if (feature('PROMPT_CACHE_BREAK_DETECTION')) {
+      notifyCompaction(querySource ?? 'compact', toolUseContext.agentId)
+    }
+    markPostCompaction()
+    return {
+      wasCompacted: true,
+      compactionResult: sessionMemoryResult,
+    }
+  }
+
+  try {
+    const compactionResult = await compactConversation(
+      messages,
+      toolUseContext,
+      cacheSafeParams,
+      true, // Suppress user questions for autocompact
+      undefined, // No custom instructions for autocompact
+      true, // isAutoCompact
+      recompactionInfo,
+    )
+
+    // Reset lastSummarizedMessageId since legacy compaction replaces all messages
+    // and the old message UUID will no longer exist in the new messages array
+    setLastSummarizedMessageId(undefined)
+    runPostCompactCleanup(querySource)
+
+    return {
+      wasCompacted: true,
+      compactionResult,
+      // Reset failure count on success
+      consecutiveFailures: 0,
+    }
+  } catch (error) {
+    if (!hasExactErrorMessage(error, ERROR_MESSAGE_USER_ABORT)) {
+      logError(error)
+    }
+    // Increment consecutive failure count for circuit breaker.
+    // The caller threads this through autoCompactTracking so the
+    // next query loop iteration can skip futile retry attempts.
+    const prevFailures = tracking?.consecutiveFailures ?? 0
+    const nextFailures = prevFailures + 1
+    if (nextFailures >= MAX_CONSECUTIVE_AUTOCOMPACT_FAILURES) {
+      logForDebugging(
+        `autocompact: circuit breaker tripped after ${nextFailures} consecutive failures — skipping future attempts this session`,
+        { level: 'warn' },
+      )
+    }
+    return { wasCompacted: false, consecutiveFailures: nextFailures }
+  }
+}
@@ -0,0 +1,16 @@
+import { useSyncExternalStore } from 'react'
+import { compactWarningStore } from './compactWarningState.js'
+
+/**
+ * React hook to subscribe to compact warning suppression state.
+ *
+ * Lives in its own file so that compactWarningState.ts stays React-free:
+ * microCompact.ts imports the pure state functions, and pulling React into
+ * that module graph would drag it into the print-mode startup path.
+ */
+export function useCompactWarningSuppression(): boolean {
+  return useSyncExternalStore(
+    compactWarningStore.subscribe,
+    compactWarningStore.getState,
+  )
+}
@@ -0,0 +1,18 @@
+import { createStore } from '../../state/store.js'
+
+/**
+ * Tracks whether the "context left until autocompact" warning should be suppressed.
+ * We suppress immediately after successful compaction since we don't have accurate
+ * token counts until the next API response.
+ */
+export const compactWarningStore = createStore<boolean>(false)
+
+/** Suppress the compact warning. Call after successful compaction. */
+export function suppressCompactWarning(): void {
+  compactWarningStore.setState(() => true)
+}
+
+/** Clear the compact warning suppression. Called at start of new compact attempt. */
+export function clearCompactWarningSuppression(): void {
+  compactWarningStore.setState(() => false)
+}
@@ -0,0 +1,63 @@
+import type { Message } from '../../types/message.js'
+
+/**
+ * Groups messages at API-round boundaries: one group per API round-trip.
+ * A boundary fires when a NEW assistant response begins (different
+ * message.id from the prior assistant). For well-formed conversations
+ * this is an API-safe split point — the API contract requires every
+ * tool_use to be resolved before the next assistant turn, so pairing
+ * validity falls out of the assistant-id boundary. For malformed inputs
+ * (dangling tool_use after resume/truncation) the fork's
+ * ensureToolResultPairing repairs the split at API time.
+ *
+ * Replaces the prior human-turn grouping (boundaries only at real user
+ * prompts) with finer-grained API-round grouping, allowing reactive
+ * compact to operate on single-prompt agentic sessions (SDK/CCR/eval
+ * callers) where the entire workload is one human turn.
+ *
+ * Extracted to its own file to break the compact.ts ↔ compactMessages.ts
+ * cycle (CC-1180) — the cycle shifted module-init order enough to surface
+ * a latent ws CJS/ESM resolution race in CI shard-2.
+ */
+export function groupMessagesByApiRound(messages: Message[]): Message[][] {
+  const groups: Message[][] = []
+  let current: Message[] = []
+  // message.id of the most recently seen assistant. This is the sole
+  // boundary gate: streaming chunks from the same API response share an
+  // id, so boundaries only fire at the start of a genuinely new round.
+  // normalizeMessages yields one AssistantMessage per content block, and
+  // StreamingToolExecutor interleaves tool_results between chunks live
+  // (yield order, not concat order — see query.ts:613). The id check
+  // correctly keeps `[tu_A(id=X), result_A, tu_B(id=X)]` in one group.
+  let lastAssistantId: string | undefined
+
+  // In a well-formed conversation the API contract guarantees every
+  // tool_use is resolved before the next assistant turn, so lastAssistantId
+  // alone is a sufficient boundary gate. Tracking unresolved tool_use IDs
+  // would only do work when the conversation is malformed (dangling tool_use
+  // after resume-from-partial-batch or max_tokens truncation) — and in that
+  // case it pins the gate shut forever, merging all subsequent rounds into
+  // one group. We let those boundaries fire; the summarizer fork's own
+  // ensureToolResultPairing at claude.ts:1136 repairs the dangling tu at
+  // API time.
+  for (const msg of messages) {
+    if (
+      msg.type === 'assistant' &&
+      msg.message.id !== lastAssistantId &&
+      current.length > 0
+    ) {
+      groups.push(current)
+      current = [msg]
+    } else {
+      current.push(msg)
+    }
+    if (msg.type === 'assistant') {
+      lastAssistantId = msg.message.id
+    }
+  }
+
+  if (current.length > 0) {
+    groups.push(current)
+  }
+  return groups
+}
@@ -0,0 +1,530 @@
+import { feature } from 'bun:bundle'
+import type { ToolResultBlockParam } from '@anthropic-ai/sdk/resources/index.mjs'
+import type { QuerySource } from '../../constants/querySource.js'
+import type { ToolUseContext } from '../../Tool.js'
+import { FILE_EDIT_TOOL_NAME } from '../../tools/FileEditTool/constants.js'
+import { FILE_READ_TOOL_NAME } from '../../tools/FileReadTool/prompt.js'
+import { FILE_WRITE_TOOL_NAME } from '../../tools/FileWriteTool/prompt.js'
+import { GLOB_TOOL_NAME } from '../../tools/GlobTool/prompt.js'
+import { GREP_TOOL_NAME } from '../../tools/GrepTool/prompt.js'
+import { WEB_FETCH_TOOL_NAME } from '../../tools/WebFetchTool/prompt.js'
+import { WEB_SEARCH_TOOL_NAME } from '../../tools/WebSearchTool/prompt.js'
+import type { Message } from '../../types/message.js'
+import { logForDebugging } from '../../utils/debug.js'
+import { getMainLoopModel } from '../../utils/model/model.js'
+import { SHELL_TOOL_NAMES } from '../../utils/shell/shellToolUtils.js'
+import { jsonStringify } from '../../utils/slowOperations.js'
+import {
+  type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+  logEvent,
+} from '../analytics/index.js'
+import { notifyCacheDeletion } from '../api/promptCacheBreakDetection.js'
+import { roughTokenCountEstimation } from '../tokenEstimation.js'
+import {
+  clearCompactWarningSuppression,
+  suppressCompactWarning,
+} from './compactWarningState.js'
+import {
+  getTimeBasedMCConfig,
+  type TimeBasedMCConfig,
+} from './timeBasedMCConfig.js'
+
+// Inline from utils/toolResultStorage.ts — importing that file pulls in
+// sessionStorage → utils/messages → services/api/errors, completing a
+// circular-deps loop back through this file via promptCacheBreakDetection.
+// Drift is caught by a test asserting equality with the source-of-truth.
+export const TIME_BASED_MC_CLEARED_MESSAGE = '[Old tool result content cleared]'
+
+const IMAGE_MAX_TOKEN_SIZE = 2000
+
+// Only compact these tools
+const COMPACTABLE_TOOLS = new Set<string>([
+  FILE_READ_TOOL_NAME,
+  ...SHELL_TOOL_NAMES,
+  GREP_TOOL_NAME,
+  GLOB_TOOL_NAME,
+  WEB_SEARCH_TOOL_NAME,
+  WEB_FETCH_TOOL_NAME,
+  FILE_EDIT_TOOL_NAME,
+  FILE_WRITE_TOOL_NAME,
+])
+
+// --- Cached microcompact state (ant-only, gated by feature('CACHED_MICROCOMPACT')) ---
+
+// Lazy-initialized cached MC module and state to avoid importing in external builds.
+// The imports and state live inside feature() checks for dead code elimination.
+let cachedMCModule: typeof import('./cachedMicrocompact.js') | null = null
+let cachedMCState: import('./cachedMicrocompact.js').CachedMCState | null = null
+let pendingCacheEdits:
+  | import('./cachedMicrocompact.js').CacheEditsBlock
+  | null = null
+
+async function getCachedMCModule(): Promise<
+  typeof import('./cachedMicrocompact.js')
+> {
+  if (!cachedMCModule) {
+    cachedMCModule = await import('./cachedMicrocompact.js')
+  }
+  return cachedMCModule
+}
+
+function ensureCachedMCState(): import('./cachedMicrocompact.js').CachedMCState {
+  if (!cachedMCState && cachedMCModule) {
+    cachedMCState = cachedMCModule.createCachedMCState()
+  }
+  if (!cachedMCState) {
+    throw new Error(
+      'cachedMCState not initialized — getCachedMCModule() must be called first',
+    )
+  }
+  return cachedMCState
+}
+
+/**
+ * Get new pending cache edits to be included in the next API request.
+ * Returns null if there are no new pending edits.
+ * Clears the pending state (caller must pin them after insertion).
+ */
+export function consumePendingCacheEdits():
+  | import('./cachedMicrocompact.js').CacheEditsBlock
+  | null {
+  const edits = pendingCacheEdits
+  pendingCacheEdits = null
+  return edits
+}
+
+/**
+ * Get all previously-pinned cache edits that must be re-sent at their
+ * original positions for cache hits.
+ */
+export function getPinnedCacheEdits(): import('./cachedMicrocompact.js').PinnedCacheEdits[] {
+  if (!cachedMCState) {
+    return []
+  }
+  return cachedMCState.pinnedEdits
+}
+
+/**
+ * Pin a new cache_edits block to a specific user message position.
+ * Called after inserting new edits so they are re-sent in subsequent calls.
+ */
+export function pinCacheEdits(
+  userMessageIndex: number,
+  block: import('./cachedMicrocompact.js').CacheEditsBlock,
+): void {
+  if (cachedMCState) {
+    cachedMCState.pinnedEdits.push({ userMessageIndex, block })
+  }
+}
+
+/**
+ * Marks all registered tools as sent to the API.
+ * Called after a successful API response.
+ */
+export function markToolsSentToAPIState(): void {
+  if (cachedMCState && cachedMCModule) {
+    cachedMCModule.markToolsSentToAPI(cachedMCState)
+  }
+}
+
+export function resetMicrocompactState(): void {
+  if (cachedMCState && cachedMCModule) {
+    cachedMCModule.resetCachedMCState(cachedMCState)
+  }
+  pendingCacheEdits = null
+}
+
+// Helper to calculate tool result tokens
+function calculateToolResultTokens(block: ToolResultBlockParam): number {
+  if (!block.content) {
+    return 0
+  }
+
+  if (typeof block.content === 'string') {
+    return roughTokenCountEstimation(block.content)
+  }
+
+  // Array of TextBlockParam | ImageBlockParam | DocumentBlockParam
+  return block.content.reduce((sum, item) => {
+    if (item.type === 'text') {
+      return sum + roughTokenCountEstimation(item.text)
+    } else if (item.type === 'image' || item.type === 'document') {
+      // Images/documents are approximately 2000 tokens regardless of format
+      return sum + IMAGE_MAX_TOKEN_SIZE
+    }
+    return sum
+  }, 0)
+}
+
+/**
+ * Estimate token count for messages by extracting text content
+ * Used for rough token estimation when we don't have accurate API counts
+ * Pads estimate by 4/3 to be conservative since we're approximating
+ */
+export function estimateMessageTokens(messages: Message[]): number {
+  let totalTokens = 0
+
+  for (const message of messages) {
+    if (message.type !== 'user' && message.type !== 'assistant') {
+      continue
+    }
+
+    if (!Array.isArray(message.message.content)) {
+      continue
+    }
+
+    for (const block of message.message.content) {
+      if (block.type === 'text') {
+        totalTokens += roughTokenCountEstimation(block.text)
+      } else if (block.type === 'tool_result') {
+        totalTokens += calculateToolResultTokens(block)
+      } else if (block.type === 'image' || block.type === 'document') {
+        totalTokens += IMAGE_MAX_TOKEN_SIZE
+      } else if (block.type === 'thinking') {
+        // Match roughTokenCountEstimationForBlock: count only the thinking
+        // text, not the JSON wrapper or signature (signature is metadata,
+        // not model-tokenized content).
+        totalTokens += roughTokenCountEstimation(block.thinking)
+      } else if (block.type === 'redacted_thinking') {
+        totalTokens += roughTokenCountEstimation(block.data)
+      } else if (block.type === 'tool_use') {
+        // Match roughTokenCountEstimationForBlock: count name + input,
+        // not the JSON wrapper or id field.
+        totalTokens += roughTokenCountEstimation(
+          block.name + jsonStringify(block.input ?? {}),
+        )
+      } else {
+        // server_tool_use, web_search_tool_result, etc.
+        totalTokens += roughTokenCountEstimation(jsonStringify(block))
+      }
+    }
+  }
+
+  // Pad estimate by 4/3 to be conservative since we're approximating
+  return Math.ceil(totalTokens * (4 / 3))
+}
+
+export type PendingCacheEdits = {
+  trigger: 'auto'
+  deletedToolIds: string[]
+  // Baseline cumulative cache_deleted_input_tokens from the previous API response,
+  // used to compute the per-operation delta (the API value is sticky/cumulative)
+  baselineCacheDeletedTokens: number
+}
+
+export type MicrocompactResult = {
+  messages: Message[]
+  compactionInfo?: {
+    pendingCacheEdits?: PendingCacheEdits
+  }
+}
+
+/**
+ * Walk messages and collect tool_use IDs whose tool name is in
+ * COMPACTABLE_TOOLS, in encounter order. Shared by both microcompact paths.
+ */
+function collectCompactableToolIds(messages: Message[]): string[] {
+  const ids: string[] = []
+  for (const message of messages) {
+    if (
+      message.type === 'assistant' &&
+      Array.isArray(message.message.content)
+    ) {
+      for (const block of message.message.content) {
+        if (block.type === 'tool_use' && COMPACTABLE_TOOLS.has(block.name)) {
+          ids.push(block.id)
+        }
+      }
+    }
+  }
+  return ids
+}
+
+// Prefix-match because promptCategory.ts sets the querySource to
+// 'repl_main_thread:outputStyle:<style>' when a non-default output style
+// is active. The bare 'repl_main_thread' is only used for the default style.
+// query.ts:350/1451 use the same startsWith pattern; the pre-existing
+// cached-MC `=== 'repl_main_thread'` check was a latent bug — users with a
+// non-default output style were silently excluded from cached MC.
+function isMainThreadSource(querySource: QuerySource | undefined): boolean {
+  return !querySource || querySource.startsWith('repl_main_thread')
+}
+
+export async function microcompactMessages(
+  messages: Message[],
+  toolUseContext?: ToolUseContext,
+  querySource?: QuerySource,
+): Promise<MicrocompactResult> {
+  // Clear suppression flag at start of new microcompact attempt
+  clearCompactWarningSuppression()
+
+  // Time-based trigger runs first and short-circuits. If the gap since the
+  // last assistant message exceeds the threshold, the server cache has expired
+  // and the full prefix will be rewritten regardless — so content-clear old
+  // tool results now, before the request, to shrink what gets rewritten.
+  // Cached MC (cache-editing) is skipped when this fires: editing assumes a
+  // warm cache, and we just established it's cold.
+  const timeBasedResult = maybeTimeBasedMicrocompact(messages, querySource)
+  if (timeBasedResult) {
+    return timeBasedResult
+  }
+
+  // Only run cached MC for the main thread to prevent forked agents
+  // (session_memory, prompt_suggestion, etc.) from registering their
+  // tool_results in the global cachedMCState, which would cause the main
+  // thread to try deleting tools that don't exist in its own conversation.
+  if (feature('CACHED_MICROCOMPACT')) {
+    const mod = await getCachedMCModule()
+    const model = toolUseContext?.options.mainLoopModel ?? getMainLoopModel()
+    if (
+      mod.isCachedMicrocompactEnabled() &&
+      mod.isModelSupportedForCacheEditing(model) &&
+      isMainThreadSource(querySource)
+    ) {
+      return await cachedMicrocompactPath(messages, querySource)
+    }
+  }
+
+  // Legacy microcompact path removed — tengu_cache_plum_violet is always true.
+  // For contexts where cached microcompact is not available (external builds,
+  // non-ant users, unsupported models, sub-agents), no compaction happens here;
+  // autocompact handles context pressure instead.
+  return { messages }
+}
+
+/**
+ * Cached microcompact path - uses cache editing API to remove tool results
+ * without invalidating the cached prefix.
+ *
+ * Key differences from regular microcompact:
+ * - Does NOT modify local message content (cache_reference and cache_edits are added at API layer)
+ * - Uses count-based trigger/keep thresholds from GrowthBook config
+ * - Takes precedence over regular microcompact (no disk persistence)
+ * - Tracks tool results and queues cache edits for the API layer
+ */
+async function cachedMicrocompactPath(
+  messages: Message[],
+  querySource: QuerySource | undefined,
+): Promise<MicrocompactResult> {
+  const mod = await getCachedMCModule()
+  const state = ensureCachedMCState()
+  const config = mod.getCachedMCConfig()
+
+  const compactableToolIds = new Set(collectCompactableToolIds(messages))
+  // Second pass: register tool results grouped by user message
+  for (const message of messages) {
+    if (message.type === 'user' && Array.isArray(message.message.content)) {
+      const groupIds: string[] = []
+      for (const block of message.message.content) {
+        if (
+          block.type === 'tool_result' &&
+          compactableToolIds.has(block.tool_use_id) &&
+          !state.registeredTools.has(block.tool_use_id)
+        ) {
+          mod.registerToolResult(state, block.tool_use_id)
+          groupIds.push(block.tool_use_id)
+        }
+      }
+      mod.registerToolMessage(state, groupIds)
+    }
+  }
+
+  const toolsToDelete = mod.getToolResultsToDelete(state)
+
+  if (toolsToDelete.length > 0) {
+    // Create and queue the cache_edits block for the API layer
+    const cacheEdits = mod.createCacheEditsBlock(state, toolsToDelete)
+    if (cacheEdits) {
+      pendingCacheEdits = cacheEdits
+    }
+
+    logForDebugging(
+      `Cached MC deleting ${toolsToDelete.length} tool(s): ${toolsToDelete.join(', ')}`,
+    )
+
+    // Log the event
+    logEvent('tengu_cached_microcompact', {
+      toolsDeleted: toolsToDelete.length,
+      deletedToolIds: toolsToDelete.join(
+        ',',
+      ) as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+      activeToolCount: state.toolOrder.length - state.deletedRefs.size,
+      triggerType:
+        'auto' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+      threshold: config.triggerThreshold,
+      keepRecent: config.keepRecent,
+    })
+
+    // Suppress warning after successful compaction
+    suppressCompactWarning()
+
+    // Notify cache break detection that cache reads will legitimately drop
+    if (feature('PROMPT_CACHE_BREAK_DETECTION')) {
+      // Pass the actual querySource — isMainThreadSource now prefix-matches
+      // so output-style variants enter here, and getTrackingKey keys on the
+      // full source string, not the 'repl_main_thread' prefix.
+      notifyCacheDeletion(querySource ?? 'repl_main_thread')
+    }
+
+    // Return messages unchanged - cache_reference and cache_edits are added at API layer
+    // Boundary message is deferred until after API response so we can use
+    // actual cache_deleted_input_tokens from the API instead of client-side estimates
+    // Capture the baseline cumulative cache_deleted_input_tokens from the last
+    // assistant message so we can compute a per-operation delta after the API call
+    const lastAsst = messages.findLast(m => m.type === 'assistant')
+    const baseline =
+      lastAsst?.type === 'assistant'
+        ? ((
+            lastAsst.message.usage as unknown as Record<
+              string,
+              number | undefined
+            >
+          )?.cache_deleted_input_tokens ?? 0)
+        : 0
+
+    return {
+      messages,
+      compactionInfo: {
+        pendingCacheEdits: {
+          trigger: 'auto',
+          deletedToolIds: toolsToDelete,
+          baselineCacheDeletedTokens: baseline,
+        },
+      },
+    }
+  }
+
+  // No compaction needed, return messages unchanged
+  return { messages }
+}
+
+/**
+ * Time-based microcompact: when the gap since the last main-loop assistant
+ * message exceeds the configured threshold, content-clear all but the most
+ * recent N compactable tool results.
+ *
+ * Returns null when the trigger doesn't fire (disabled, wrong source, gap
+ * under threshold, nothing to clear) — caller falls through to other paths.
+ *
+ * Unlike cached MC, this mutates message content directly. The cache is cold,
+ * so there's no cached prefix to preserve via cache_edits.
+ */
+/**
+ * Check whether the time-based trigger should fire for this request.
+ *
+ * Returns the measured gap (minutes since last assistant message) when the
+ * trigger fires, or null when it doesn't (disabled, wrong source, under
+ * threshold, no prior assistant, unparseable timestamp).
+ *
+ * Extracted so other pre-request paths (e.g. snip force-apply) can consult
+ * the same predicate without coupling to the tool-result clearing action.
+ */
+export function evaluateTimeBasedTrigger(
+  messages: Message[],
+  querySource: QuerySource | undefined,
+): { gapMinutes: number; config: TimeBasedMCConfig } | null {
+  const config = getTimeBasedMCConfig()
+  // Require an explicit main-thread querySource. isMainThreadSource treats
+  // undefined as main-thread (for cached-MC backward-compat), but several
+  // callers (/context, /compact, analyzeContext) invoke microcompactMessages
+  // without a source for analysis-only purposes — they should not trigger.
+  if (!config.enabled || !querySource || !isMainThreadSource(querySource)) {
+    return null
+  }
+  const lastAssistant = messages.findLast(m => m.type === 'assistant')
+  if (!lastAssistant) {
+    return null
+  }
+  const gapMinutes =
+    (Date.now() - new Date(lastAssistant.timestamp).getTime()) / 60_000
+  if (!Number.isFinite(gapMinutes) || gapMinutes < config.gapThresholdMinutes) {
+    return null
+  }
+  return { gapMinutes, config }
+}
+
+function maybeTimeBasedMicrocompact(
+  messages: Message[],
+  querySource: QuerySource | undefined,
+): MicrocompactResult | null {
+  const trigger = evaluateTimeBasedTrigger(messages, querySource)
+  if (!trigger) {
+    return null
+  }
+  const { gapMinutes, config } = trigger
+
+  const compactableIds = collectCompactableToolIds(messages)
+
+  // Floor at 1: slice(-0) returns the full array (paradoxically keeps
+  // everything), and clearing ALL results leaves the model with zero working
+  // context. Neither degenerate is sensible — always keep at least the last.
+  const keepRecent = Math.max(1, config.keepRecent)
+  const keepSet = new Set(compactableIds.slice(-keepRecent))
+  const clearSet = new Set(compactableIds.filter(id => !keepSet.has(id)))
+
+  if (clearSet.size === 0) {
+    return null
+  }
+
+  let tokensSaved = 0
+  const result: Message[] = messages.map(message => {
+    if (message.type !== 'user' || !Array.isArray(message.message.content)) {
+      return message
+    }
+    let touched = false
+    const newContent = message.message.content.map(block => {
+      if (
+        block.type === 'tool_result' &&
+        clearSet.has(block.tool_use_id) &&
+        block.content !== TIME_BASED_MC_CLEARED_MESSAGE
+      ) {
+        tokensSaved += calculateToolResultTokens(block)
+        touched = true
+        return { ...block, content: TIME_BASED_MC_CLEARED_MESSAGE }
+      }
+      return block
+    })
+    if (!touched) return message
+    return {
+      ...message,
+      message: { ...message.message, content: newContent },
+    }
+  })
+
+  if (tokensSaved === 0) {
+    return null
+  }
+
+  logEvent('tengu_time_based_microcompact', {
+    gapMinutes: Math.round(gapMinutes),
+    gapThresholdMinutes: config.gapThresholdMinutes,
+    toolsCleared: clearSet.size,
+    toolsKept: keepSet.size,
+    keepRecent: config.keepRecent,
+    tokensSaved,
+  })
+
+  logForDebugging(
+    `[TIME-BASED MC] gap ${Math.round(gapMinutes)}min > ${config.gapThresholdMinutes}min, cleared ${clearSet.size} tool results (~${tokensSaved} tokens), kept last ${keepSet.size}`,
+  )
+
+  suppressCompactWarning()
+  // Cached-MC state (module-level) holds tool IDs registered on prior turns.
+  // We just content-cleared some of those tools AND invalidated the server
+  // cache by changing prompt content. If cached-MC runs next turn with the
+  // stale state, it would try to cache_edit tools whose server-side entries
+  // no longer exist. Reset it.
+  resetMicrocompactState()
+  // We just changed the prompt content — the next response's cache read will
+  // be low, but that's us, not a break. Tell the detector to expect a drop.
+  // notifyCacheDeletion (not notifyCompaction) because it's already imported
+  // here and achieves the same false-positive suppression — adding the second
+  // symbol to the import was flagged by the circular-deps check.
+  // Pass the actual querySource: getTrackingKey returns the full source string
+  // (e.g. 'repl_main_thread:outputStyle:custom'), not just the prefix.
+  if (feature('PROMPT_CACHE_BREAK_DETECTION') && querySource) {
+    notifyCacheDeletion(querySource)
+  }
+
+  return { messages: result }
+}
@@ -0,0 +1,77 @@
+import { feature } from 'bun:bundle'
+import type { QuerySource } from '../../constants/querySource.js'
+import { clearSystemPromptSections } from '../../constants/systemPromptSections.js'
+import { getUserContext } from '../../context.js'
+import { clearSpeculativeChecks } from '../../tools/BashTool/bashPermissions.js'
+import { clearClassifierApprovals } from '../../utils/classifierApprovals.js'
+import { resetGetMemoryFilesCache } from '../../utils/claudemd.js'
+import { clearSessionMessagesCache } from '../../utils/sessionStorage.js'
+import { clearBetaTracingState } from '../../utils/telemetry/betaSessionTracing.js'
+import { resetMicrocompactState } from './microCompact.js'
+
+/**
+ * Run cleanup of caches and tracking state after compaction.
+ * Call this after both auto-compact and manual /compact to free memory
+ * held by tracking structures that are invalidated by compaction.
+ *
+ * Note: We intentionally do NOT clear invoked skill content here.
+ * Skill content must survive across multiple compactions so that
+ * createSkillAttachmentIfNeeded() can include the full skill text
+ * in subsequent compaction attachments.
+ *
+ * querySource: pass the compacting query's source so we can skip
+ * resets that would clobber main-thread module-level state. Subagents
+ * (agent:*) run in the same process and share module-level state
+ * (context-collapse store, getMemoryFiles one-shot hook flag,
+ * getUserContext cache); resetting those when a SUBAGENT compacts
+ * would corrupt the MAIN thread's state. All compaction callers should
+ * pass querySource — undefined is only safe for callers that are
+ * genuinely main-thread-only (/compact, /clear).
+ */
+export function runPostCompactCleanup(querySource?: QuerySource): void {
+  // Subagents (agent:*) run in the same process and share module-level
+  // state with the main thread. Only reset main-thread module-level state
+  // (context-collapse, memory file cache) for main-thread compacts.
+  // Same startsWith pattern as isMainThread (index.ts:188).
+  const isMainThreadCompact =
+    querySource === undefined ||
+    querySource.startsWith('repl_main_thread') ||
+    querySource === 'sdk'
+
+  resetMicrocompactState()
+  if (feature('CONTEXT_COLLAPSE')) {
+    if (isMainThreadCompact) {
+      /* eslint-disable @typescript-eslint/no-require-imports */
+      ;(
+        require('../contextCollapse/index.js') as typeof import('../contextCollapse/index.js')
+      ).resetContextCollapse()
+      /* eslint-enable @typescript-eslint/no-require-imports */
+    }
+  }
+  if (isMainThreadCompact) {
+    // getUserContext is a memoized outer layer wrapping getClaudeMds() →
+    // getMemoryFiles(). If only the inner getMemoryFiles cache is cleared,
+    // the next turn hits the getUserContext cache and never reaches
+    // getMemoryFiles(), so the armed InstructionsLoaded hook never fires.
+    // Manual /compact already clears this explicitly at its call sites;
+    // auto-compact and reactive-compact did not — this centralizes the
+    // clear so all compaction paths behave consistently.
+    getUserContext.cache.clear?.()
+    resetGetMemoryFilesCache('compact')
+  }
+  clearSystemPromptSections()
+  clearClassifierApprovals()
+  clearSpeculativeChecks()
+  // Intentionally NOT calling resetSentSkillNames(): re-injecting the full
+  // skill_listing (~4K tokens) post-compact is pure cache_creation. The
+  // model still has SkillTool in schema, invoked_skills preserves used
+  // skills, and dynamic additions are handled by skillChangeDetector /
+  // cacheUtils resets. See compactConversation() for full rationale.
+  clearBetaTracingState()
+  if (feature('COMMIT_ATTRIBUTION')) {
+    void import('../../utils/attributionHooks.js').then(m =>
+      m.sweepFileContentCache(),
+    )
+  }
+  clearSessionMessagesCache()
+}
@@ -0,0 +1,374 @@
+import { feature } from 'bun:bundle'
+import type { PartialCompactDirection } from '../../types/message.js'
+
+// Dead code elimination: conditional import for proactive mode
+/* eslint-disable @typescript-eslint/no-require-imports */
+const proactiveModule =
+  feature('PROACTIVE') || feature('KAIROS')
+    ? (require('../../proactive/index.js') as typeof import('../../proactive/index.js'))
+    : null
+/* eslint-enable @typescript-eslint/no-require-imports */
+
+// Aggressive no-tools preamble. The cache-sharing fork path inherits the
+// parent's full tool set (required for cache-key match), and on Sonnet 4.6+
+// adaptive-thinking models the model sometimes attempts a tool call despite
+// the weaker trailer instruction. With maxTurns: 1, a denied tool call means
+// no text output → falls through to the streaming fallback (2.79% on 4.6 vs
+// 0.01% on 4.5). Putting this FIRST and making it explicit about rejection
+// consequences prevents the wasted turn.
+const NO_TOOLS_PREAMBLE = `CRITICAL: Respond with TEXT ONLY. Do NOT call any tools.
+
+- Do NOT use Read, Bash, Grep, Glob, Edit, Write, or ANY other tool.
+- You already have all the context you need in the conversation above.
+- Tool calls will be REJECTED and will waste your only turn — you will fail the task.
+- Your entire response must be plain text: an <analysis> block followed by a <summary> block.
+
+`
+
+// Two variants: BASE scopes to "the conversation", PARTIAL scopes to "the
+// recent messages". The <analysis> block is a drafting scratchpad that
+// formatCompactSummary() strips before the summary reaches context.
+const DETAILED_ANALYSIS_INSTRUCTION_BASE = `Before providing your final summary, wrap your analysis in <analysis> tags to organize your thoughts and ensure you've covered all necessary points. In your analysis process:
+
+1. Chronologically analyze each message and section of the conversation. For each section thoroughly identify:
+   - The user's explicit requests and intents
+   - Your approach to addressing the user's requests
+   - Key decisions, technical concepts and code patterns
+   - Specific details like:
+     - file names
+     - full code snippets
+     - function signatures
+     - file edits
+   - Errors that you ran into and how you fixed them
+   - Pay special attention to specific user feedback that you received, especially if the user told you to do something differently.
+2. Double-check for technical accuracy and completeness, addressing each required element thoroughly.`
+
+const DETAILED_ANALYSIS_INSTRUCTION_PARTIAL = `Before providing your final summary, wrap your analysis in <analysis> tags to organize your thoughts and ensure you've covered all necessary points. In your analysis process:
+
+1. Analyze the recent messages chronologically. For each section thoroughly identify:
+   - The user's explicit requests and intents
+   - Your approach to addressing the user's requests
+   - Key decisions, technical concepts and code patterns
+   - Specific details like:
+     - file names
+     - full code snippets
+     - function signatures
+     - file edits
+   - Errors that you ran into and how you fixed them
+   - Pay special attention to specific user feedback that you received, especially if the user told you to do something differently.
+2. Double-check for technical accuracy and completeness, addressing each required element thoroughly.`
+
+const BASE_COMPACT_PROMPT = `Your task is to create a detailed summary of the conversation so far, paying close attention to the user's explicit requests and your previous actions.
+This summary should be thorough in capturing technical details, code patterns, and architectural decisions that would be essential for continuing development work without losing context.
+
+${DETAILED_ANALYSIS_INSTRUCTION_BASE}
+
+Your summary should include the following sections:
+
+1. Primary Request and Intent: Capture all of the user's explicit requests and intents in detail
+2. Key Technical Concepts: List all important technical concepts, technologies, and frameworks discussed.
+3. Files and Code Sections: Enumerate specific files and code sections examined, modified, or created. Pay special attention to the most recent messages and include full code snippets where applicable and include a summary of why this file read or edit is important.
+4. Errors and fixes: List all errors that you ran into, and how you fixed them. Pay special attention to specific user feedback that you received, especially if the user told you to do something differently.
+5. Problem Solving: Document problems solved and any ongoing troubleshooting efforts.
+6. All user messages: List ALL user messages that are not tool results. These are critical for understanding the users' feedback and changing intent.
+7. Pending Tasks: Outline any pending tasks that you have explicitly been asked to work on.
+8. Current Work: Describe in detail precisely what was being worked on immediately before this summary request, paying special attention to the most recent messages from both user and assistant. Include file names and code snippets where applicable.
+9. Optional Next Step: List the next step that you will take that is related to the most recent work you were doing. IMPORTANT: ensure that this step is DIRECTLY in line with the user's most recent explicit requests, and the task you were working on immediately before this summary request. If your last task was concluded, then only list next steps if they are explicitly in line with the users request. Do not start on tangential requests or really old requests that were already completed without confirming with the user first.
+                       If there is a next step, include direct quotes from the most recent conversation showing exactly what task you were working on and where you left off. This should be verbatim to ensure there's no drift in task interpretation.
+
+Here's an example of how your output should be structured:
+
+<example>
+<analysis>
+[Your thought process, ensuring all points are covered thoroughly and accurately]
+</analysis>
+
+<summary>
+1. Primary Request and Intent:
+   [Detailed description]
+
+2. Key Technical Concepts:
+   - [Concept 1]
+   - [Concept 2]
+   - [...]
+
+3. Files and Code Sections:
+   - [File Name 1]
+      - [Summary of why this file is important]
+      - [Summary of the changes made to this file, if any]
+      - [Important Code Snippet]
+   - [File Name 2]
+      - [Important Code Snippet]
+   - [...]
+
+4. Errors and fixes:
+    - [Detailed description of error 1]:
+      - [How you fixed the error]
+      - [User feedback on the error if any]
+    - [...]
+
+5. Problem Solving:
+   [Description of solved problems and ongoing troubleshooting]
+
+6. All user messages: 
+    - [Detailed non tool use user message]
+    - [...]
+
+7. Pending Tasks:
+   - [Task 1]
+   - [Task 2]
+   - [...]
+
+8. Current Work:
+   [Precise description of current work]
+
+9. Optional Next Step:
+   [Optional Next step to take]
+
+</summary>
+</example>
+
+Please provide your summary based on the conversation so far, following this structure and ensuring precision and thoroughness in your response. 
+
+There may be additional summarization instructions provided in the included context. If so, remember to follow these instructions when creating the above summary. Examples of instructions include:
+<example>
+## Compact Instructions
+When summarizing the conversation focus on typescript code changes and also remember the mistakes you made and how you fixed them.
+</example>
+
+<example>
+# Summary instructions
+When you are using compact - please focus on test output and code changes. Include file reads verbatim.
+</example>
+`
+
+const PARTIAL_COMPACT_PROMPT = `Your task is to create a detailed summary of the RECENT portion of the conversation — the messages that follow earlier retained context. The earlier messages are being kept intact and do NOT need to be summarized. Focus your summary on what was discussed, learned, and accomplished in the recent messages only.
+
+${DETAILED_ANALYSIS_INSTRUCTION_PARTIAL}
+
+Your summary should include the following sections:
+
+1. Primary Request and Intent: Capture the user's explicit requests and intents from the recent messages
+2. Key Technical Concepts: List important technical concepts, technologies, and frameworks discussed recently.
+3. Files and Code Sections: Enumerate specific files and code sections examined, modified, or created. Include full code snippets where applicable and include a summary of why this file read or edit is important.
+4. Errors and fixes: List errors encountered and how they were fixed.
+5. Problem Solving: Document problems solved and any ongoing troubleshooting efforts.
+6. All user messages: List ALL user messages from the recent portion that are not tool results.
+7. Pending Tasks: Outline any pending tasks from the recent messages.
+8. Current Work: Describe precisely what was being worked on immediately before this summary request.
+9. Optional Next Step: List the next step related to the most recent work. Include direct quotes from the most recent conversation.
+
+Here's an example of how your output should be structured:
+
+<example>
+<analysis>
+[Your thought process, ensuring all points are covered thoroughly and accurately]
+</analysis>
+
+<summary>
+1. Primary Request and Intent:
+   [Detailed description]
+
+2. Key Technical Concepts:
+   - [Concept 1]
+   - [Concept 2]
+
+3. Files and Code Sections:
+   - [File Name 1]
+      - [Summary of why this file is important]
+      - [Important Code Snippet]
+
+4. Errors and fixes:
+    - [Error description]:
+      - [How you fixed it]
+
+5. Problem Solving:
+   [Description]
+
+6. All user messages:
+    - [Detailed non tool use user message]
+
+7. Pending Tasks:
+   - [Task 1]
+
+8. Current Work:
+   [Precise description of current work]
+
+9. Optional Next Step:
+   [Optional Next step to take]
+
+</summary>
+</example>
+
+Please provide your summary based on the RECENT messages only (after the retained earlier context), following this structure and ensuring precision and thoroughness in your response.
+`
+
+// 'up_to': model sees only the summarized prefix (cache hit). Summary will
+// precede kept recent messages, hence "Context for Continuing Work" section.
+const PARTIAL_COMPACT_UP_TO_PROMPT = `Your task is to create a detailed summary of this conversation. This summary will be placed at the start of a continuing session; newer messages that build on this context will follow after your summary (you do not see them here). Summarize thoroughly so that someone reading only your summary and then the newer messages can fully understand what happened and continue the work.
+
+${DETAILED_ANALYSIS_INSTRUCTION_BASE}
+
+Your summary should include the following sections:
+
+1. Primary Request and Intent: Capture the user's explicit requests and intents in detail
+2. Key Technical Concepts: List important technical concepts, technologies, and frameworks discussed.
+3. Files and Code Sections: Enumerate specific files and code sections examined, modified, or created. Include full code snippets where applicable and include a summary of why this file read or edit is important.
+4. Errors and fixes: List errors encountered and how they were fixed.
+5. Problem Solving: Document problems solved and any ongoing troubleshooting efforts.
+6. All user messages: List ALL user messages that are not tool results.
+7. Pending Tasks: Outline any pending tasks.
+8. Work Completed: Describe what was accomplished by the end of this portion.
+9. Context for Continuing Work: Summarize any context, decisions, or state that would be needed to understand and continue the work in subsequent messages.
+
+Here's an example of how your output should be structured:
+
+<example>
+<analysis>
+[Your thought process, ensuring all points are covered thoroughly and accurately]
+</analysis>
+
+<summary>
+1. Primary Request and Intent:
+   [Detailed description]
+
+2. Key Technical Concepts:
+   - [Concept 1]
+   - [Concept 2]
+
+3. Files and Code Sections:
+   - [File Name 1]
+      - [Summary of why this file is important]
+      - [Important Code Snippet]
+
+4. Errors and fixes:
+    - [Error description]:
+      - [How you fixed it]
+
+5. Problem Solving:
+   [Description]
+
+6. All user messages:
+    - [Detailed non tool use user message]
+
+7. Pending Tasks:
+   - [Task 1]
+
+8. Work Completed:
+   [Description of what was accomplished]
+
+9. Context for Continuing Work:
+   [Key context, decisions, or state needed to continue the work]
+
+</summary>
+</example>
+
+Please provide your summary following this structure, ensuring precision and thoroughness in your response.
+`
+
+const NO_TOOLS_TRAILER =
+  '\n\nREMINDER: Do NOT call any tools. Respond with plain text only — ' +
+  'an <analysis> block followed by a <summary> block. ' +
+  'Tool calls will be rejected and you will fail the task.'
+
+export function getPartialCompactPrompt(
+  customInstructions?: string,
+  direction: PartialCompactDirection = 'from',
+): string {
+  const template =
+    direction === 'up_to'
+      ? PARTIAL_COMPACT_UP_TO_PROMPT
+      : PARTIAL_COMPACT_PROMPT
+  let prompt = NO_TOOLS_PREAMBLE + template
+
+  if (customInstructions && customInstructions.trim() !== '') {
+    prompt += `\n\nAdditional Instructions:\n${customInstructions}`
+  }
+
+  prompt += NO_TOOLS_TRAILER
+
+  return prompt
+}
+
+export function getCompactPrompt(customInstructions?: string): string {
+  let prompt = NO_TOOLS_PREAMBLE + BASE_COMPACT_PROMPT
+
+  if (customInstructions && customInstructions.trim() !== '') {
+    prompt += `\n\nAdditional Instructions:\n${customInstructions}`
+  }
+
+  prompt += NO_TOOLS_TRAILER
+
+  return prompt
+}
+
+/**
+ * Formats the compact summary by stripping the <analysis> drafting scratchpad
+ * and replacing <summary> XML tags with readable section headers.
+ * @param summary The raw summary string potentially containing <analysis> and <summary> XML tags
+ * @returns The formatted summary with analysis stripped and summary tags replaced by headers
+ */
+export function formatCompactSummary(summary: string): string {
+  let formattedSummary = summary
+
+  // Strip analysis section — it's a drafting scratchpad that improves summary
+  // quality but has no informational value once the summary is written.
+  formattedSummary = formattedSummary.replace(
+    /<analysis>[\s\S]*?<\/analysis>/,
+    '',
+  )
+
+  // Extract and format summary section
+  const summaryMatch = formattedSummary.match(/<summary>([\s\S]*?)<\/summary>/)
+  if (summaryMatch) {
+    const content = summaryMatch[1] || ''
+    formattedSummary = formattedSummary.replace(
+      /<summary>[\s\S]*?<\/summary>/,
+      `Summary:\n${content.trim()}`,
+    )
+  }
+
+  // Clean up extra whitespace between sections
+  formattedSummary = formattedSummary.replace(/\n\n+/g, '\n\n')
+
+  return formattedSummary.trim()
+}
+
+export function getCompactUserSummaryMessage(
+  summary: string,
+  suppressFollowUpQuestions?: boolean,
+  transcriptPath?: string,
+  recentMessagesPreserved?: boolean,
+): string {
+  const formattedSummary = formatCompactSummary(summary)
+
+  let baseSummary = `This session is being continued from a previous conversation that ran out of context. The summary below covers the earlier portion of the conversation.
+
+${formattedSummary}`
+
+  if (transcriptPath) {
+    baseSummary += `\n\nIf you need specific details from before compaction (like exact code snippets, error messages, or content you generated), read the full transcript at: ${transcriptPath}`
+  }
+
+  if (recentMessagesPreserved) {
+    baseSummary += `\n\nRecent messages are preserved verbatim.`
+  }
+
+  if (suppressFollowUpQuestions) {
+    let continuation = `${baseSummary}
+Continue the conversation from where it left off without asking the user any further questions. Resume directly — do not acknowledge the summary, do not recap what was happening, do not preface with "I'll continue" or similar. Pick up the last task as if the break never happened.`
+
+    if (
+      (feature('PROACTIVE') || feature('KAIROS')) &&
+      proactiveModule?.isProactiveActive()
+    ) {
+      continuation += `
+
+You are running in autonomous/proactive mode. This is NOT a first wake-up — you were already working autonomously before compaction. Continue your work loop: pick up where you left off based on the summary above. Do not greet the user or ask what to work on.`
+    }
+
+    return continuation
+  }
+
+  return baseSummary
+}
@@ -0,0 +1,630 @@
+/**
+ * EXPERIMENT: Session memory compaction
+ */
+
+import type { AgentId } from '../../types/ids.js'
+import type { HookResultMessage, Message } from '../../types/message.js'
+import { logForDebugging } from '../../utils/debug.js'
+import { isEnvTruthy } from '../../utils/envUtils.js'
+import { errorMessage } from '../../utils/errors.js'
+import {
+  createCompactBoundaryMessage,
+  createUserMessage,
+  isCompactBoundaryMessage,
+} from '../../utils/messages.js'
+import { getMainLoopModel } from '../../utils/model/model.js'
+import { getSessionMemoryPath } from '../../utils/permissions/filesystem.js'
+import { processSessionStartHooks } from '../../utils/sessionStart.js'
+import { getTranscriptPath } from '../../utils/sessionStorage.js'
+import { tokenCountFromLastAPIResponse } from '../../utils/tokens.js'
+import { extractDiscoveredToolNames } from '../../utils/toolSearch.js'
+import {
+  getDynamicConfig_BLOCKS_ON_INIT,
+  getFeatureValue_CACHED_MAY_BE_STALE,
+} from '../analytics/growthbook.js'
+import { logEvent } from '../analytics/index.js'
+import {
+  isSessionMemoryEmpty,
+  truncateSessionMemoryForCompact,
+} from '../SessionMemory/prompts.js'
+import {
+  getLastSummarizedMessageId,
+  getSessionMemoryContent,
+  waitForSessionMemoryExtraction,
+} from '../SessionMemory/sessionMemoryUtils.js'
+import {
+  annotateBoundaryWithPreservedSegment,
+  buildPostCompactMessages,
+  type CompactionResult,
+  createPlanAttachmentIfNeeded,
+} from './compact.js'
+import { estimateMessageTokens } from './microCompact.js'
+import { getCompactUserSummaryMessage } from './prompt.js'
+
+/**
+ * Configuration for session memory compaction thresholds
+ */
+export type SessionMemoryCompactConfig = {
+  /** Minimum tokens to preserve after compaction */
+  minTokens: number
+  /** Minimum number of messages with text blocks to keep */
+  minTextBlockMessages: number
+  /** Maximum tokens to preserve after compaction (hard cap) */
+  maxTokens: number
+}
+
+// Default configuration values (exported for use in tests)
+export const DEFAULT_SM_COMPACT_CONFIG: SessionMemoryCompactConfig = {
+  minTokens: 10_000,
+  minTextBlockMessages: 5,
+  maxTokens: 40_000,
+}
+
+// Current configuration (starts with defaults)
+let smCompactConfig: SessionMemoryCompactConfig = {
+  ...DEFAULT_SM_COMPACT_CONFIG,
+}
+
+// Track whether config has been initialized from remote
+let configInitialized = false
+
+/**
+ * Set the session memory compact configuration
+ */
+export function setSessionMemoryCompactConfig(
+  config: Partial<SessionMemoryCompactConfig>,
+): void {
+  smCompactConfig = {
+    ...smCompactConfig,
+    ...config,
+  }
+}
+
+/**
+ * Get the current session memory compact configuration
+ */
+export function getSessionMemoryCompactConfig(): SessionMemoryCompactConfig {
+  return { ...smCompactConfig }
+}
+
+/**
+ * Reset config state (useful for testing)
+ */
+export function resetSessionMemoryCompactConfig(): void {
+  smCompactConfig = { ...DEFAULT_SM_COMPACT_CONFIG }
+  configInitialized = false
+}
+
+/**
+ * Initialize configuration from remote config (GrowthBook).
+ * Only fetches once per session - subsequent calls return immediately.
+ */
+async function initSessionMemoryCompactConfig(): Promise<void> {
+  if (configInitialized) {
+    return
+  }
+  configInitialized = true
+
+  // Load config from GrowthBook, merging with defaults
+  const remoteConfig = await getDynamicConfig_BLOCKS_ON_INIT<
+    Partial<SessionMemoryCompactConfig>
+  >('tengu_sm_compact_config', {})
+
+  // Only use remote values if they are explicitly set (positive numbers)
+  // This ensures sensible defaults aren't overridden by zero values
+  const config: SessionMemoryCompactConfig = {
+    minTokens:
+      remoteConfig.minTokens && remoteConfig.minTokens > 0
+        ? remoteConfig.minTokens
+        : DEFAULT_SM_COMPACT_CONFIG.minTokens,
+    minTextBlockMessages:
+      remoteConfig.minTextBlockMessages && remoteConfig.minTextBlockMessages > 0
+        ? remoteConfig.minTextBlockMessages
+        : DEFAULT_SM_COMPACT_CONFIG.minTextBlockMessages,
+    maxTokens:
+      remoteConfig.maxTokens && remoteConfig.maxTokens > 0
+        ? remoteConfig.maxTokens
+        : DEFAULT_SM_COMPACT_CONFIG.maxTokens,
+  }
+  setSessionMemoryCompactConfig(config)
+}
+
+/**
+ * Check if a message contains text blocks (text content for user/assistant interaction)
+ */
+export function hasTextBlocks(message: Message): boolean {
+  if (message.type === 'assistant') {
+    const content = message.message.content
+    return content.some(block => block.type === 'text')
+  }
+  if (message.type === 'user') {
+    const content = message.message.content
+    if (typeof content === 'string') {
+      return content.length > 0
+    }
+    if (Array.isArray(content)) {
+      return content.some(block => block.type === 'text')
+    }
+  }
+  return false
+}
+
+/**
+ * Check if a message contains tool_result blocks and return their tool_use_ids
+ */
+function getToolResultIds(message: Message): string[] {
+  if (message.type !== 'user') {
+    return []
+  }
+  const content = message.message.content
+  if (!Array.isArray(content)) {
+    return []
+  }
+  const ids: string[] = []
+  for (const block of content) {
+    if (block.type === 'tool_result') {
+      ids.push(block.tool_use_id)
+    }
+  }
+  return ids
+}
+
+/**
+ * Check if a message contains tool_use blocks with any of the given ids
+ */
+function hasToolUseWithIds(message: Message, toolUseIds: Set<string>): boolean {
+  if (message.type !== 'assistant') {
+    return false
+  }
+  const content = message.message.content
+  if (!Array.isArray(content)) {
+    return false
+  }
+  return content.some(
+    block => block.type === 'tool_use' && toolUseIds.has(block.id),
+  )
+}
+
+/**
+ * Adjust the start index to ensure we don't split tool_use/tool_result pairs
+ * or thinking blocks that share the same message.id with kept assistant messages.
+ *
+ * If ANY message we're keeping contains tool_result blocks, we need to
+ * include the preceding assistant message(s) that contain the matching tool_use blocks.
+ *
+ * Additionally, if ANY assistant message in the kept range has the same message.id
+ * as a preceding assistant message (which may contain thinking blocks), we need to
+ * include those messages so they can be properly merged by normalizeMessagesForAPI.
+ *
+ * This handles the case where streaming yields separate messages per content block
+ * (thinking, tool_use, etc.) with the same message.id but different uuids. If the
+ * startIndex lands on one of these streaming messages, we need to look at ALL kept
+ * messages for tool_results, not just the first one.
+ *
+ * Example bug scenarios this fixes:
+ *
+ * Tool pair scenario:
+ *   Session storage (before compaction):
+ *     Index N:   assistant, message.id: X, content: [thinking]
+ *     Index N+1: assistant, message.id: X, content: [tool_use: ORPHAN_ID]
+ *     Index N+2: assistant, message.id: X, content: [tool_use: VALID_ID]
+ *     Index N+3: user, content: [tool_result: ORPHAN_ID, tool_result: VALID_ID]
+ *
+ *   If startIndex = N+2:
+ *     - Old code: checked only message N+2 for tool_results, found none, returned N+2
+ *     - After slicing and normalizeMessagesForAPI merging by message.id:
+ *       msg[1]: assistant with [tool_use: VALID_ID]  (ORPHAN tool_use was excluded!)
+ *       msg[2]: user with [tool_result: ORPHAN_ID, tool_result: VALID_ID]
+ *     - API error: orphan tool_result references non-existent tool_use
+ *
+ * Thinking block scenario:
+ *   Session storage (before compaction):
+ *     Index N:   assistant, message.id: X, content: [thinking]
+ *     Index N+1: assistant, message.id: X, content: [tool_use: ID]
+ *     Index N+2: user, content: [tool_result: ID]
+ *
+ *   If startIndex = N+1:
+ *     - Without this fix: thinking block at N is excluded
+ *     - After normalizeMessagesForAPI: thinking block is lost (no message to merge with)
+ *
+ *   Fixed code: detects that message N+1 has same message.id as N, adjusts to N.
+ */
+export function adjustIndexToPreserveAPIInvariants(
+  messages: Message[],
+  startIndex: number,
+): number {
+  if (startIndex <= 0 || startIndex >= messages.length) {
+    return startIndex
+  }
+
+  let adjustedIndex = startIndex
+
+  // Step 1: Handle tool_use/tool_result pairs
+  // Collect tool_result IDs from ALL messages in the kept range
+  const allToolResultIds: string[] = []
+  for (let i = startIndex; i < messages.length; i++) {
+    allToolResultIds.push(...getToolResultIds(messages[i]!))
+  }
+
+  if (allToolResultIds.length > 0) {
+    // Collect tool_use IDs already in the kept range
+    const toolUseIdsInKeptRange = new Set<string>()
+    for (let i = adjustedIndex; i < messages.length; i++) {
+      const msg = messages[i]!
+      if (msg.type === 'assistant' && Array.isArray(msg.message.content)) {
+        for (const block of msg.message.content) {
+          if (block.type === 'tool_use') {
+            toolUseIdsInKeptRange.add(block.id)
+          }
+        }
+      }
+    }
+
+    // Only look for tool_uses that are NOT already in the kept range
+    const neededToolUseIds = new Set(
+      allToolResultIds.filter(id => !toolUseIdsInKeptRange.has(id)),
+    )
+
+    // Find the assistant message(s) with matching tool_use blocks
+    for (let i = adjustedIndex - 1; i >= 0 && neededToolUseIds.size > 0; i--) {
+      const message = messages[i]!
+      if (hasToolUseWithIds(message, neededToolUseIds)) {
+        adjustedIndex = i
+        // Remove found tool_use_ids from the set
+        if (
+          message.type === 'assistant' &&
+          Array.isArray(message.message.content)
+        ) {
+          for (const block of message.message.content) {
+            if (block.type === 'tool_use' && neededToolUseIds.has(block.id)) {
+              neededToolUseIds.delete(block.id)
+            }
+          }
+        }
+      }
+    }
+  }
+
+  // Step 2: Handle thinking blocks that share message.id with kept assistant messages
+  // Collect all message.ids from assistant messages in the kept range
+  const messageIdsInKeptRange = new Set<string>()
+  for (let i = adjustedIndex; i < messages.length; i++) {
+    const msg = messages[i]!
+    if (msg.type === 'assistant' && msg.message.id) {
+      messageIdsInKeptRange.add(msg.message.id)
+    }
+  }
+
+  // Look backwards for assistant messages with the same message.id that are not in the kept range
+  // These may contain thinking blocks that need to be merged by normalizeMessagesForAPI
+  for (let i = adjustedIndex - 1; i >= 0; i--) {
+    const message = messages[i]!
+    if (
+      message.type === 'assistant' &&
+      message.message.id &&
+      messageIdsInKeptRange.has(message.message.id)
+    ) {
+      // This message has the same message.id as one in the kept range
+      // Include it so thinking blocks can be properly merged
+      adjustedIndex = i
+    }
+  }
+
+  return adjustedIndex
+}
+
+/**
+ * Calculate the starting index for messages to keep after compaction.
+ * Starts from lastSummarizedMessageId, then expands backwards to meet minimums:
+ * - At least config.minTokens tokens
+ * - At least config.minTextBlockMessages messages with text blocks
+ * Stops expanding if config.maxTokens is reached.
+ * Also ensures tool_use/tool_result pairs are not split.
+ */
+export function calculateMessagesToKeepIndex(
+  messages: Message[],
+  lastSummarizedIndex: number,
+): number {
+  if (messages.length === 0) {
+    return 0
+  }
+
+  const config = getSessionMemoryCompactConfig()
+
+  // Start from the message after lastSummarizedIndex
+  // If lastSummarizedIndex is -1 (not found) or messages.length (no summarized id),
+  // we start with no messages kept
+  let startIndex =
+    lastSummarizedIndex >= 0 ? lastSummarizedIndex + 1 : messages.length
+
+  // Calculate current tokens and text-block message count from startIndex to end
+  let totalTokens = 0
+  let textBlockMessageCount = 0
+  for (let i = startIndex; i < messages.length; i++) {
+    const msg = messages[i]!
+    totalTokens += estimateMessageTokens([msg])
+    if (hasTextBlocks(msg)) {
+      textBlockMessageCount++
+    }
+  }
+
+  // Check if we already hit the max cap
+  if (totalTokens >= config.maxTokens) {
+    return adjustIndexToPreserveAPIInvariants(messages, startIndex)
+  }
+
+  // Check if we already meet both minimums
+  if (
+    totalTokens >= config.minTokens &&
+    textBlockMessageCount >= config.minTextBlockMessages
+  ) {
+    return adjustIndexToPreserveAPIInvariants(messages, startIndex)
+  }
+
+  // Expand backwards until we meet both minimums or hit max cap.
+  // Floor at the last boundary: the preserved-segment chain has a disk
+  // discontinuity there (att[0]→summary shortcut from dedup-skip), which
+  // would let the loader's tail→head walk bypass inner preserved messages
+  // and then prune them. Reactive compact already slices at the boundary
+  // via getMessagesAfterCompactBoundary; this is the same invariant.
+  const idx = messages.findLastIndex(m => isCompactBoundaryMessage(m))
+  const floor = idx === -1 ? 0 : idx + 1
+  for (let i = startIndex - 1; i >= floor; i--) {
+    const msg = messages[i]!
+    const msgTokens = estimateMessageTokens([msg])
+    totalTokens += msgTokens
+    if (hasTextBlocks(msg)) {
+      textBlockMessageCount++
+    }
+    startIndex = i
+
+    // Stop if we hit the max cap
+    if (totalTokens >= config.maxTokens) {
+      break
+    }
+
+    // Stop if we meet both minimums
+    if (
+      totalTokens >= config.minTokens &&
+      textBlockMessageCount >= config.minTextBlockMessages
+    ) {
+      break
+    }
+  }
+
+  // Adjust for tool pairs
+  return adjustIndexToPreserveAPIInvariants(messages, startIndex)
+}
+
+/**
+ * Check if we should use session memory for compaction
+ * Uses cached gate values to avoid blocking on Statsig initialization
+ */
+export function shouldUseSessionMemoryCompaction(): boolean {
+  // Allow env var override for eval runs and testing
+  if (isEnvTruthy(process.env.ENABLE_CLAUDE_CODE_SM_COMPACT)) {
+    return true
+  }
+  if (isEnvTruthy(process.env.DISABLE_CLAUDE_CODE_SM_COMPACT)) {
+    return false
+  }
+
+  const sessionMemoryFlag = getFeatureValue_CACHED_MAY_BE_STALE(
+    'tengu_session_memory',
+    false,
+  )
+  const smCompactFlag = getFeatureValue_CACHED_MAY_BE_STALE(
+    'tengu_sm_compact',
+    false,
+  )
+  const shouldUse = sessionMemoryFlag && smCompactFlag
+
+  // Log flag states for debugging (ant-only to avoid noise in external logs)
+  if (process.env.USER_TYPE === 'ant') {
+    logEvent('tengu_sm_compact_flag_check', {
+      tengu_session_memory: sessionMemoryFlag,
+      tengu_sm_compact: smCompactFlag,
+      should_use: shouldUse,
+    })
+  }
+
+  return shouldUse
+}
+
+/**
+ * Create a CompactionResult from session memory
+ */
+function createCompactionResultFromSessionMemory(
+  messages: Message[],
+  sessionMemory: string,
+  messagesToKeep: Message[],
+  hookResults: HookResultMessage[],
+  transcriptPath: string,
+  agentId?: AgentId,
+): CompactionResult {
+  const preCompactTokenCount = tokenCountFromLastAPIResponse(messages)
+
+  const boundaryMarker = createCompactBoundaryMessage(
+    'auto',
+    preCompactTokenCount ?? 0,
+    messages[messages.length - 1]?.uuid,
+  )
+  const preCompactDiscovered = extractDiscoveredToolNames(messages)
+  if (preCompactDiscovered.size > 0) {
+    boundaryMarker.compactMetadata.preCompactDiscoveredTools = [
+      ...preCompactDiscovered,
+    ].sort()
+  }
+
+  // Truncate oversized sections to prevent session memory from consuming
+  // the entire post-compact token budget
+  const { truncatedContent, wasTruncated } =
+    truncateSessionMemoryForCompact(sessionMemory)
+
+  let summaryContent = getCompactUserSummaryMessage(
+    truncatedContent,
+    true,
+    transcriptPath,
+    true,
+  )
+
+  if (wasTruncated) {
+    const memoryPath = getSessionMemoryPath()
+    summaryContent += `\n\nSome session memory sections were truncated for length. The full session memory can be viewed at: ${memoryPath}`
+  }
+
+  const summaryMessages = [
+    createUserMessage({
+      content: summaryContent,
+      isCompactSummary: true,
+      isVisibleInTranscriptOnly: true,
+    }),
+  ]
+
+  const planAttachment = createPlanAttachmentIfNeeded(agentId)
+  const attachments = planAttachment ? [planAttachment] : []
+
+  return {
+    boundaryMarker: annotateBoundaryWithPreservedSegment(
+      boundaryMarker,
+      summaryMessages[summaryMessages.length - 1]!.uuid,
+      messagesToKeep,
+    ),
+    summaryMessages,
+    attachments,
+    hookResults,
+    messagesToKeep,
+    preCompactTokenCount,
+    // SM-compact has no compact-API-call, so postCompactTokenCount (kept for
+    // event continuity) and truePostCompactTokenCount converge to the same value.
+    postCompactTokenCount: estimateMessageTokens(summaryMessages),
+    truePostCompactTokenCount: estimateMessageTokens(summaryMessages),
+  }
+}
+
+/**
+ * Try to use session memory for compaction instead of traditional compaction.
+ * Returns null if session memory compaction cannot be used.
+ *
+ * Handles two scenarios:
+ * 1. Normal case: lastSummarizedMessageId is set, keep only messages after that ID
+ * 2. Resumed session: lastSummarizedMessageId is not set but session memory has content,
+ *    keep all messages but use session memory as the summary
+ */
+export async function trySessionMemoryCompaction(
+  messages: Message[],
+  agentId?: AgentId,
+  autoCompactThreshold?: number,
+): Promise<CompactionResult | null> {
+  if (!shouldUseSessionMemoryCompaction()) {
+    return null
+  }
+
+  // Initialize config from remote (only fetches once)
+  await initSessionMemoryCompactConfig()
+
+  // Wait for any in-progress session memory extraction to complete (with timeout)
+  await waitForSessionMemoryExtraction()
+
+  const lastSummarizedMessageId = getLastSummarizedMessageId()
+  const sessionMemory = await getSessionMemoryContent()
+
+  // No session memory file exists at all
+  if (!sessionMemory) {
+    logEvent('tengu_sm_compact_no_session_memory', {})
+    return null
+  }
+
+  // Session memory exists but matches the template (no actual content extracted)
+  // Fall back to legacy compact behavior
+  if (await isSessionMemoryEmpty(sessionMemory)) {
+    logEvent('tengu_sm_compact_empty_template', {})
+    return null
+  }
+
+  try {
+    let lastSummarizedIndex: number
+
+    if (lastSummarizedMessageId) {
+      // Normal case: we know exactly which messages have been summarized
+      lastSummarizedIndex = messages.findIndex(
+        msg => msg.uuid === lastSummarizedMessageId,
+      )
+
+      if (lastSummarizedIndex === -1) {
+        // The summarized message ID doesn't exist in current messages
+        // This can happen if messages were modified - fall back to legacy compact
+        // since we can't determine the boundary between summarized and unsummarized messages
+        logEvent('tengu_sm_compact_summarized_id_not_found', {})
+        return null
+      }
+    } else {
+      // Resumed session case: session memory has content but we don't know the boundary
+      // Set lastSummarizedIndex to last message so startIndex becomes messages.length (no messages kept initially)
+      lastSummarizedIndex = messages.length - 1
+      logEvent('tengu_sm_compact_resumed_session', {})
+    }
+
+    // Calculate the starting index for messages to keep
+    // This starts from lastSummarizedIndex, expands to meet minimums,
+    // and adjusts to not split tool_use/tool_result pairs
+    const startIndex = calculateMessagesToKeepIndex(
+      messages,
+      lastSummarizedIndex,
+    )
+    // Filter out old compact boundary messages from messagesToKeep.
+    // After REPL pruning, old boundaries re-yielded from messagesToKeep would
+    // trigger an unwanted second prune (isCompactBoundaryMessage returns true),
+    // discarding the new boundary and summary.
+    const messagesToKeep = messages
+      .slice(startIndex)
+      .filter(m => !isCompactBoundaryMessage(m))
+
+    // Run session start hooks to restore CLAUDE.md and other context
+    const hookResults = await processSessionStartHooks('compact', {
+      model: getMainLoopModel(),
+    })
+
+    // Get transcript path for the summary message
+    const transcriptPath = getTranscriptPath()
+
+    const compactionResult = createCompactionResultFromSessionMemory(
+      messages,
+      sessionMemory,
+      messagesToKeep,
+      hookResults,
+      transcriptPath,
+      agentId,
+    )
+
+    const postCompactMessages = buildPostCompactMessages(compactionResult)
+
+    const postCompactTokenCount = estimateMessageTokens(postCompactMessages)
+
+    // Only check threshold if one was provided (for autocompact)
+    if (
+      autoCompactThreshold !== undefined &&
+      postCompactTokenCount >= autoCompactThreshold
+    ) {
+      logEvent('tengu_sm_compact_threshold_exceeded', {
+        postCompactTokenCount,
+        autoCompactThreshold,
+      })
+      return null
+    }
+
+    return {
+      ...compactionResult,
+      postCompactTokenCount,
+      truePostCompactTokenCount: postCompactTokenCount,
+    }
+  } catch (error) {
+    // Use logEvent instead of logError since errors here are expected
+    // (e.g., file not found, path issues) and shouldn't go to error logs
+    logEvent('tengu_sm_compact_error', {})
+    if (process.env.USER_TYPE === 'ant') {
+      logForDebugging(`Session memory compaction error: ${errorMessage(error)}`)
+    }
+    return null
+  }
+}
@@ -0,0 +1,43 @@
+import { getFeatureValue_CACHED_MAY_BE_STALE } from '../analytics/growthbook.js'
+
+/**
+ * GrowthBook config for time-based microcompact.
+ *
+ * Triggers content-clearing microcompact when the gap since the last main-loop
+ * assistant message exceeds a threshold — the server-side prompt cache has
+ * almost certainly expired, so the full prefix will be rewritten anyway.
+ * Clearing old tool results before the request shrinks what gets rewritten.
+ *
+ * Runs BEFORE the API call (in microcompactMessages, upstream of callModel)
+ * so the shrunk prompt is what actually gets sent. Running after the first
+ * miss would only help subsequent turns.
+ *
+ * Main thread only — subagents have short lifetimes where gap-based eviction
+ * doesn't apply.
+ */
+export type TimeBasedMCConfig = {
+  /** Master switch. When false, time-based microcompact is a no-op. */
+  enabled: boolean
+  /** Trigger when (now − last assistant timestamp) exceeds this many minutes.
+   *  60 is the safe choice: the server's 1h cache TTL is guaranteed expired
+   *  for all users, so we never force a miss that wouldn't have happened. */
+  gapThresholdMinutes: number
+  /** Keep this many most-recent compactable tool results.
+   *  When set, takes priority over any default; older results are cleared. */
+  keepRecent: number
+}
+
+const TIME_BASED_MC_CONFIG_DEFAULTS: TimeBasedMCConfig = {
+  enabled: false,
+  gapThresholdMinutes: 60,
+  keepRecent: 5,
+}
+
+export function getTimeBasedMCConfig(): TimeBasedMCConfig {
+  // Hoist the GB read so exposure fires on every eval path, not just when
+  // the caller's other conditions (querySource, messages.length) pass.
+  return getFeatureValue_CACHED_MAY_BE_STALE<TimeBasedMCConfig>(
+    'tengu_slate_heron',
+    TIME_BASED_MC_CONFIG_DEFAULTS,
+  )
+}