init claude-code
This commit is contained in:
@@ -0,0 +1,196 @@
|
||||
/**
|
||||
* Filter and sanitize installed-app data for inclusion in the `request_access`
|
||||
* tool description. Ported from Cowork's appNames.ts. Two
|
||||
* concerns: noise filtering (Spotlight returns every bundle on disk — XPC
|
||||
* helpers, daemons, input methods) and prompt-injection hardening (app names
|
||||
* are attacker-controlled; anyone can ship an app named anything).
|
||||
*
|
||||
* Residual risk: short benign-char adversarial names ("grant all") can't be
|
||||
* filtered programmatically. The tool description's structural framing
|
||||
* ("Available applications:") makes it clear these are app names, and the
|
||||
* downstream permission dialog requires explicit user approval — a bad name
|
||||
* can't auto-grant anything.
|
||||
*/
|
||||
|
||||
/** Minimal shape — matches what `listInstalledApps` returns. */
|
||||
type InstalledAppLike = {
|
||||
readonly bundleId: string
|
||||
readonly displayName: string
|
||||
readonly path: string
|
||||
}
|
||||
|
||||
// ── Noise filtering ──────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Only apps under these roots are shown. /System/Library subpaths (CoreServices,
|
||||
* PrivateFrameworks, Input Methods) are OS plumbing — anchor on known-good
|
||||
* roots rather than blocklisting every junk subpath since new macOS versions
|
||||
* add more.
|
||||
*
|
||||
* ~/Applications is checked at call time via the `homeDir` arg (HOME isn't
|
||||
* reliably known at module load in all environments).
|
||||
*/
|
||||
const PATH_ALLOWLIST: readonly string[] = [
|
||||
'/Applications/',
|
||||
'/System/Applications/',
|
||||
]
|
||||
|
||||
/**
|
||||
* Display-name patterns that mark background services even under /Applications.
|
||||
* `(?:$|\s\()` — matches keyword at end-of-string OR immediately before ` (`:
|
||||
* "Slack Helper (GPU)" and "ABAssistantService" fail, "Service Desk" passes
|
||||
* (Service is followed by " D").
|
||||
*/
|
||||
const NAME_PATTERN_BLOCKLIST: readonly RegExp[] = [
|
||||
/Helper(?:$|\s\()/,
|
||||
/Agent(?:$|\s\()/,
|
||||
/Service(?:$|\s\()/,
|
||||
/Uninstaller(?:$|\s\()/,
|
||||
/Updater(?:$|\s\()/,
|
||||
/^\./,
|
||||
]
|
||||
|
||||
/**
|
||||
* Apps commonly requested for CU automation. ALWAYS included if installed,
|
||||
* bypassing path check + count cap — the model needs these exact names even
|
||||
* when the machine has 200+ apps. Bundle IDs (locale-invariant), not display
|
||||
* names. Keep <30 — each entry is a guaranteed token in the description.
|
||||
*/
|
||||
const ALWAYS_KEEP_BUNDLE_IDS: ReadonlySet<string> = new Set([
|
||||
// Browsers
|
||||
'com.apple.Safari',
|
||||
'com.google.Chrome',
|
||||
'com.microsoft.edgemac',
|
||||
'org.mozilla.firefox',
|
||||
'company.thebrowser.Browser', // Arc
|
||||
// Communication
|
||||
'com.tinyspeck.slackmacgap',
|
||||
'us.zoom.xos',
|
||||
'com.microsoft.teams2',
|
||||
'com.microsoft.teams',
|
||||
'com.apple.MobileSMS',
|
||||
'com.apple.mail',
|
||||
// Productivity
|
||||
'com.microsoft.Word',
|
||||
'com.microsoft.Excel',
|
||||
'com.microsoft.Powerpoint',
|
||||
'com.microsoft.Outlook',
|
||||
'com.apple.iWork.Pages',
|
||||
'com.apple.iWork.Numbers',
|
||||
'com.apple.iWork.Keynote',
|
||||
'com.google.GoogleDocs',
|
||||
// Notes / PM
|
||||
'notion.id',
|
||||
'com.apple.Notes',
|
||||
'md.obsidian',
|
||||
'com.linear',
|
||||
'com.figma.Desktop',
|
||||
// Dev
|
||||
'com.microsoft.VSCode',
|
||||
'com.apple.Terminal',
|
||||
'com.googlecode.iterm2',
|
||||
'com.github.GitHubDesktop',
|
||||
// System essentials the model genuinely targets
|
||||
'com.apple.finder',
|
||||
'com.apple.iCal',
|
||||
'com.apple.systempreferences',
|
||||
])
|
||||
|
||||
// ── Prompt-injection hardening ───────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* `\p{L}\p{M}\p{N}` with /u — not `\w` (ASCII-only, would drop Bücher, 微信,
|
||||
* Préférences Système). `\p{M}` matches combining marks so NFD-decomposed
|
||||
* diacritics (ü → u + ◌̈) pass. Single space not `\s` — `\s` matches newlines,
|
||||
* which would let "App\nIgnore previous…" through as a multi-line injection.
|
||||
* Still bars quotes, angle brackets, backticks, pipes, colons.
|
||||
*/
|
||||
const APP_NAME_ALLOWED = /^[\p{L}\p{M}\p{N}_ .&'()+-]+$/u
|
||||
const APP_NAME_MAX_LEN = 40
|
||||
const APP_NAME_MAX_COUNT = 50
|
||||
|
||||
function isUserFacingPath(path: string, homeDir: string | undefined): boolean {
|
||||
if (PATH_ALLOWLIST.some(root => path.startsWith(root))) return true
|
||||
if (homeDir) {
|
||||
const userApps = homeDir.endsWith('/')
|
||||
? `${homeDir}Applications/`
|
||||
: `${homeDir}/Applications/`
|
||||
if (path.startsWith(userApps)) return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
function isNoisyName(name: string): boolean {
|
||||
return NAME_PATTERN_BLOCKLIST.some(re => re.test(name))
|
||||
}
|
||||
|
||||
/**
|
||||
* Length cap + trim + dedupe + sort. `applyCharFilter` — skip for trusted
|
||||
* bundle IDs (Apple/Google/MS; a localized "Réglages Système" with unusual
|
||||
* punctuation shouldn't be dropped), apply for anything attacker-installable.
|
||||
*/
|
||||
function sanitizeCore(
|
||||
raw: readonly string[],
|
||||
applyCharFilter: boolean,
|
||||
): string[] {
|
||||
const seen = new Set<string>()
|
||||
return raw
|
||||
.map(name => name.trim())
|
||||
.filter(trimmed => {
|
||||
if (!trimmed) return false
|
||||
if (trimmed.length > APP_NAME_MAX_LEN) return false
|
||||
if (applyCharFilter && !APP_NAME_ALLOWED.test(trimmed)) return false
|
||||
if (seen.has(trimmed)) return false
|
||||
seen.add(trimmed)
|
||||
return true
|
||||
})
|
||||
.sort((a, b) => a.localeCompare(b))
|
||||
}
|
||||
|
||||
function sanitizeAppNames(raw: readonly string[]): string[] {
|
||||
const filtered = sanitizeCore(raw, true)
|
||||
if (filtered.length <= APP_NAME_MAX_COUNT) return filtered
|
||||
return [
|
||||
...filtered.slice(0, APP_NAME_MAX_COUNT),
|
||||
`… and ${filtered.length - APP_NAME_MAX_COUNT} more`,
|
||||
]
|
||||
}
|
||||
|
||||
function sanitizeTrustedNames(raw: readonly string[]): string[] {
|
||||
return sanitizeCore(raw, false)
|
||||
}
|
||||
|
||||
/**
|
||||
* Filter raw Spotlight results to user-facing apps, then sanitize. Always-keep
|
||||
* apps bypass path/name filter AND char allowlist (trusted vendors, not
|
||||
* attacker-installed); still length-capped, deduped, sorted.
|
||||
*/
|
||||
export function filterAppsForDescription(
|
||||
installed: readonly InstalledAppLike[],
|
||||
homeDir: string | undefined,
|
||||
): string[] {
|
||||
const { alwaysKept, rest } = installed.reduce<{
|
||||
alwaysKept: string[]
|
||||
rest: string[]
|
||||
}>(
|
||||
(acc, app) => {
|
||||
if (ALWAYS_KEEP_BUNDLE_IDS.has(app.bundleId)) {
|
||||
acc.alwaysKept.push(app.displayName)
|
||||
} else if (
|
||||
isUserFacingPath(app.path, homeDir) &&
|
||||
!isNoisyName(app.displayName)
|
||||
) {
|
||||
acc.rest.push(app.displayName)
|
||||
}
|
||||
return acc
|
||||
},
|
||||
{ alwaysKept: [], rest: [] },
|
||||
)
|
||||
|
||||
const sanitizedAlways = sanitizeTrustedNames(alwaysKept)
|
||||
const alwaysSet = new Set(sanitizedAlways)
|
||||
return [
|
||||
...sanitizedAlways,
|
||||
...sanitizeAppNames(rest).filter(n => !alwaysSet.has(n)),
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,86 @@
|
||||
import type { ToolUseContext } from '../../Tool.js'
|
||||
|
||||
import { logForDebugging } from '../debug.js'
|
||||
import { errorMessage } from '../errors.js'
|
||||
import { withResolvers } from '../withResolvers.js'
|
||||
import { isLockHeldLocally, releaseComputerUseLock } from './computerUseLock.js'
|
||||
import { unregisterEscHotkey } from './escHotkey.js'
|
||||
|
||||
// cu.apps.unhide is NOT one of the four @MainActor methods wrapped by
|
||||
// drainRunLoop's 30s backstop. On abort paths (where the user hit Ctrl+C
|
||||
// because something was slow) a hang here would wedge the abort. Generous
|
||||
// timeout — unhide should be ~instant; if it takes 5s something is wrong
|
||||
// and proceeding is better than waiting. The Swift call continues in the
|
||||
// background regardless; we just stop blocking on it.
|
||||
const UNHIDE_TIMEOUT_MS = 5000
|
||||
|
||||
/**
|
||||
* Turn-end cleanup for the chicago MCP surface: auto-unhide apps that
|
||||
* `prepareForAction` hid, then release the file-based lock.
|
||||
*
|
||||
* Called from three sites: natural turn end (`stopHooks.ts`), abort during
|
||||
* streaming (`query.ts` aborted_streaming), abort during tool execution
|
||||
* (`query.ts` aborted_tools). All three reach this via dynamic import gated
|
||||
* on `feature('CHICAGO_MCP')`. `executor.js` (which pulls both native
|
||||
* modules) is dynamic-imported below so non-CU turns don't load native
|
||||
* modules just to no-op.
|
||||
*
|
||||
* No-ops cheaply on non-CU turns: both gate checks are zero-syscall.
|
||||
*/
|
||||
export async function cleanupComputerUseAfterTurn(
|
||||
ctx: Pick<
|
||||
ToolUseContext,
|
||||
'getAppState' | 'setAppState' | 'sendOSNotification'
|
||||
>,
|
||||
): Promise<void> {
|
||||
const appState = ctx.getAppState()
|
||||
|
||||
const hidden = appState.computerUseMcpState?.hiddenDuringTurn
|
||||
if (hidden && hidden.size > 0) {
|
||||
const { unhideComputerUseApps } = await import('./executor.js')
|
||||
const unhide = unhideComputerUseApps([...hidden]).catch(err =>
|
||||
logForDebugging(
|
||||
`[Computer Use MCP] auto-unhide failed: ${errorMessage(err)}`,
|
||||
),
|
||||
)
|
||||
const timeout = withResolvers<void>()
|
||||
const timer = setTimeout(timeout.resolve, UNHIDE_TIMEOUT_MS)
|
||||
await Promise.race([unhide, timeout.promise]).finally(() =>
|
||||
clearTimeout(timer),
|
||||
)
|
||||
ctx.setAppState(prev =>
|
||||
prev.computerUseMcpState?.hiddenDuringTurn === undefined
|
||||
? prev
|
||||
: {
|
||||
...prev,
|
||||
computerUseMcpState: {
|
||||
...prev.computerUseMcpState,
|
||||
hiddenDuringTurn: undefined,
|
||||
},
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
// Zero-syscall pre-check so non-CU turns don't touch disk. Release is still
|
||||
// idempotent (returns false if already released or owned by another session).
|
||||
if (!isLockHeldLocally()) return
|
||||
|
||||
// Unregister before lock release so the pump-retain drops as soon as the
|
||||
// CU session ends. Idempotent — no-ops if registration failed at acquire.
|
||||
// Swallow throws so a NAPI unregister error never prevents lock release —
|
||||
// a held lock blocks the next CU session with "in use by another session".
|
||||
try {
|
||||
unregisterEscHotkey()
|
||||
} catch (err) {
|
||||
logForDebugging(
|
||||
`[Computer Use MCP] unregisterEscHotkey failed: ${errorMessage(err)}`,
|
||||
)
|
||||
}
|
||||
|
||||
if (await releaseComputerUseLock()) {
|
||||
ctx.sendOSNotification?.({
|
||||
message: 'Claude is done using your computer',
|
||||
notificationType: 'computer_use_exit',
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,61 @@
|
||||
import { normalizeNameForMCP } from '../../services/mcp/normalization.js'
|
||||
import { env } from '../env.js'
|
||||
|
||||
export const COMPUTER_USE_MCP_SERVER_NAME = 'computer-use'
|
||||
|
||||
/**
|
||||
* Sentinel bundle ID for the frontmost gate. Claude Code is a terminal — it has
|
||||
* no window. This never matches a real `NSWorkspace.frontmostApplication`, so
|
||||
* the package's "host is frontmost" branch (mouse click-through exemption,
|
||||
* keyboard safety-net) is dead code for us. `prepareForAction`'s "exempt our
|
||||
* own window" is likewise a no-op — there is no window to exempt.
|
||||
*/
|
||||
export const CLI_HOST_BUNDLE_ID = 'com.anthropic.claude-code.cli-no-window'
|
||||
|
||||
/**
|
||||
* Fallback `env.terminal` → bundleId map for when `__CFBundleIdentifier` is
|
||||
* unset. Covers the macOS terminals we can distinguish — Linux entries
|
||||
* (konsole, gnome-terminal, xterm) are deliberately absent since
|
||||
* `createCliExecutor` is darwin-guarded.
|
||||
*/
|
||||
const TERMINAL_BUNDLE_ID_FALLBACK: Readonly<Record<string, string>> = {
|
||||
'iTerm.app': 'com.googlecode.iterm2',
|
||||
Apple_Terminal: 'com.apple.Terminal',
|
||||
ghostty: 'com.mitchellh.ghostty',
|
||||
kitty: 'net.kovidgoyal.kitty',
|
||||
WarpTerminal: 'dev.warp.Warp-Stable',
|
||||
vscode: 'com.microsoft.VSCode',
|
||||
}
|
||||
|
||||
/**
|
||||
* Bundle ID of the terminal emulator we're running inside, so `prepareDisplay`
|
||||
* can exempt it from hiding and `captureExcluding` can keep it out of
|
||||
* screenshots. Returns null when undetectable (ssh, cleared env, unknown
|
||||
* terminal) — caller must handle the null case.
|
||||
*
|
||||
* `__CFBundleIdentifier` is set by LaunchServices when a .app bundle spawns a
|
||||
* process and is inherited by children. It's the exact bundleId, no lookup
|
||||
* needed — handles terminals the fallback table doesn't know about. Under
|
||||
* tmux/screen it reflects the terminal that started the SERVER, which may
|
||||
* differ from the attached client. That's harmless here: we exempt A
|
||||
* terminal window, and the screenshots exclude it regardless.
|
||||
*/
|
||||
export function getTerminalBundleId(): string | null {
|
||||
const cfBundleId = process.env.__CFBundleIdentifier
|
||||
if (cfBundleId) return cfBundleId
|
||||
return TERMINAL_BUNDLE_ID_FALLBACK[env.terminal ?? ''] ?? null
|
||||
}
|
||||
|
||||
/**
|
||||
* Static capabilities for macOS CLI. `hostBundleId` is not here — it's added
|
||||
* by `executor.ts` per `ComputerExecutor.capabilities`. `buildComputerUseTools`
|
||||
* takes this shape (no `hostBundleId`, no `teachMode`).
|
||||
*/
|
||||
export const CLI_CU_CAPABILITIES = {
|
||||
screenshotFiltering: 'native' as const,
|
||||
platform: 'darwin' as const,
|
||||
}
|
||||
|
||||
export function isComputerUseMCPServer(name: string): boolean {
|
||||
return normalizeNameForMCP(name) === COMPUTER_USE_MCP_SERVER_NAME
|
||||
}
|
||||
@@ -0,0 +1,215 @@
|
||||
import { mkdir, readFile, unlink, writeFile } from 'fs/promises'
|
||||
import { join } from 'path'
|
||||
import { getSessionId } from '../../bootstrap/state.js'
|
||||
import { registerCleanup } from '../../utils/cleanupRegistry.js'
|
||||
import { logForDebugging } from '../../utils/debug.js'
|
||||
import { getClaudeConfigHomeDir } from '../../utils/envUtils.js'
|
||||
import { jsonParse, jsonStringify } from '../../utils/slowOperations.js'
|
||||
import { getErrnoCode } from '../errors.js'
|
||||
|
||||
const LOCK_FILENAME = 'computer-use.lock'
|
||||
|
||||
// Holds the unregister function for the shutdown cleanup handler.
|
||||
// Set when the lock is acquired, cleared when released.
|
||||
let unregisterCleanup: (() => void) | undefined
|
||||
|
||||
type ComputerUseLock = {
|
||||
readonly sessionId: string
|
||||
readonly pid: number
|
||||
readonly acquiredAt: number
|
||||
}
|
||||
|
||||
export type AcquireResult =
|
||||
| { readonly kind: 'acquired'; readonly fresh: boolean }
|
||||
| { readonly kind: 'blocked'; readonly by: string }
|
||||
|
||||
export type CheckResult =
|
||||
| { readonly kind: 'free' }
|
||||
| { readonly kind: 'held_by_self' }
|
||||
| { readonly kind: 'blocked'; readonly by: string }
|
||||
|
||||
const FRESH: AcquireResult = { kind: 'acquired', fresh: true }
|
||||
const REENTRANT: AcquireResult = { kind: 'acquired', fresh: false }
|
||||
|
||||
function isComputerUseLock(value: unknown): value is ComputerUseLock {
|
||||
if (typeof value !== 'object' || value === null) return false
|
||||
return (
|
||||
'sessionId' in value &&
|
||||
typeof value.sessionId === 'string' &&
|
||||
'pid' in value &&
|
||||
typeof value.pid === 'number'
|
||||
)
|
||||
}
|
||||
|
||||
function getLockPath(): string {
|
||||
return join(getClaudeConfigHomeDir(), LOCK_FILENAME)
|
||||
}
|
||||
|
||||
async function readLock(): Promise<ComputerUseLock | undefined> {
|
||||
try {
|
||||
const raw = await readFile(getLockPath(), 'utf8')
|
||||
const parsed: unknown = jsonParse(raw)
|
||||
return isComputerUseLock(parsed) ? parsed : undefined
|
||||
} catch {
|
||||
return undefined
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check whether a process is still running (signal 0 probe).
|
||||
*
|
||||
* Note: there is a small window for PID reuse — if the owning process
|
||||
* exits and an unrelated process is assigned the same PID, the check
|
||||
* will return true. This is extremely unlikely in practice.
|
||||
*/
|
||||
function isProcessRunning(pid: number): boolean {
|
||||
try {
|
||||
process.kill(pid, 0)
|
||||
return true
|
||||
} catch {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Attempt to create the lock file atomically with O_EXCL.
|
||||
* Returns true on success, false if the file already exists.
|
||||
* Throws for other errors.
|
||||
*/
|
||||
async function tryCreateExclusive(lock: ComputerUseLock): Promise<boolean> {
|
||||
try {
|
||||
await writeFile(getLockPath(), jsonStringify(lock), { flag: 'wx' })
|
||||
return true
|
||||
} catch (e: unknown) {
|
||||
if (getErrnoCode(e) === 'EEXIST') return false
|
||||
throw e
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Register a shutdown cleanup handler so the lock is released even if
|
||||
* turn-end cleanup is never reached (e.g. the user runs /exit while
|
||||
* a tool call is in progress).
|
||||
*/
|
||||
function registerLockCleanup(): void {
|
||||
unregisterCleanup?.()
|
||||
unregisterCleanup = registerCleanup(async () => {
|
||||
await releaseComputerUseLock()
|
||||
})
|
||||
}
|
||||
|
||||
/**
|
||||
* Check lock state without acquiring. Used for `request_access` /
|
||||
* `list_granted_applications` — the package's `defersLockAcquire` contract:
|
||||
* these tools check but don't take the lock, so the enter-notification and
|
||||
* overlay don't fire while the model is only asking for permission.
|
||||
*
|
||||
* Does stale-PID recovery (unlinks) so a dead session's lock doesn't block
|
||||
* `request_access`. Does NOT create — that's `tryAcquireComputerUseLock`'s job.
|
||||
*/
|
||||
export async function checkComputerUseLock(): Promise<CheckResult> {
|
||||
const existing = await readLock()
|
||||
if (!existing) return { kind: 'free' }
|
||||
if (existing.sessionId === getSessionId()) return { kind: 'held_by_self' }
|
||||
if (isProcessRunning(existing.pid)) {
|
||||
return { kind: 'blocked', by: existing.sessionId }
|
||||
}
|
||||
logForDebugging(
|
||||
`Recovering stale computer-use lock from session ${existing.sessionId} (PID ${existing.pid})`,
|
||||
)
|
||||
await unlink(getLockPath()).catch(() => {})
|
||||
return { kind: 'free' }
|
||||
}
|
||||
|
||||
/**
|
||||
* Zero-syscall check: does THIS process believe it holds the lock?
|
||||
* True iff `tryAcquireComputerUseLock` succeeded and `releaseComputerUseLock`
|
||||
* hasn't run yet. Used to gate the per-turn release in `cleanup.ts` so
|
||||
* non-CU turns don't touch disk.
|
||||
*/
|
||||
export function isLockHeldLocally(): boolean {
|
||||
return unregisterCleanup !== undefined
|
||||
}
|
||||
|
||||
/**
|
||||
* Try to acquire the computer-use lock for the current session.
|
||||
*
|
||||
* `{kind: 'acquired', fresh: true}` — first tool call of a CU turn. Callers fire
|
||||
* enter notifications on this. `{kind: 'acquired', fresh: false}` — re-entrant,
|
||||
* same session already holds it. `{kind: 'blocked', by}` — another live session
|
||||
* holds it.
|
||||
*
|
||||
* Uses O_EXCL (open 'wx') for atomic test-and-set — the OS guarantees at
|
||||
* most one process sees the create succeed. If the file already exists,
|
||||
* we check ownership and PID liveness; for a stale lock we unlink and
|
||||
* retry the exclusive create once. If two sessions race to recover the
|
||||
* same stale lock, only one create succeeds (the other reads the winner).
|
||||
*/
|
||||
export async function tryAcquireComputerUseLock(): Promise<AcquireResult> {
|
||||
const sessionId = getSessionId()
|
||||
const lock: ComputerUseLock = {
|
||||
sessionId,
|
||||
pid: process.pid,
|
||||
acquiredAt: Date.now(),
|
||||
}
|
||||
|
||||
await mkdir(getClaudeConfigHomeDir(), { recursive: true })
|
||||
|
||||
// Fresh acquisition.
|
||||
if (await tryCreateExclusive(lock)) {
|
||||
registerLockCleanup()
|
||||
return FRESH
|
||||
}
|
||||
|
||||
const existing = await readLock()
|
||||
|
||||
// Corrupt/unparseable — treat as stale (can't extract a blocking ID).
|
||||
if (!existing) {
|
||||
await unlink(getLockPath()).catch(() => {})
|
||||
if (await tryCreateExclusive(lock)) {
|
||||
registerLockCleanup()
|
||||
return FRESH
|
||||
}
|
||||
return { kind: 'blocked', by: (await readLock())?.sessionId ?? 'unknown' }
|
||||
}
|
||||
|
||||
// Already held by this session.
|
||||
if (existing.sessionId === sessionId) return REENTRANT
|
||||
|
||||
// Another live session holds it — blocked.
|
||||
if (isProcessRunning(existing.pid)) {
|
||||
return { kind: 'blocked', by: existing.sessionId }
|
||||
}
|
||||
|
||||
// Stale lock — recover. Unlink then retry the exclusive create.
|
||||
// If another session is also recovering, one EEXISTs and reads the winner.
|
||||
logForDebugging(
|
||||
`Recovering stale computer-use lock from session ${existing.sessionId} (PID ${existing.pid})`,
|
||||
)
|
||||
await unlink(getLockPath()).catch(() => {})
|
||||
if (await tryCreateExclusive(lock)) {
|
||||
registerLockCleanup()
|
||||
return FRESH
|
||||
}
|
||||
return { kind: 'blocked', by: (await readLock())?.sessionId ?? 'unknown' }
|
||||
}
|
||||
|
||||
/**
|
||||
* Release the computer-use lock if the current session owns it. Returns
|
||||
* `true` if we actually unlinked the file (i.e., we held it) — callers fire
|
||||
* exit notifications on this. Idempotent: subsequent calls return `false`.
|
||||
*/
|
||||
export async function releaseComputerUseLock(): Promise<boolean> {
|
||||
unregisterCleanup?.()
|
||||
unregisterCleanup = undefined
|
||||
|
||||
const existing = await readLock()
|
||||
if (!existing || existing.sessionId !== getSessionId()) return false
|
||||
try {
|
||||
await unlink(getLockPath())
|
||||
logForDebugging('Released computer-use lock')
|
||||
return true
|
||||
} catch {
|
||||
return false
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,79 @@
|
||||
import { logForDebugging } from '../debug.js'
|
||||
import { withResolvers } from '../withResolvers.js'
|
||||
import { requireComputerUseSwift } from './swiftLoader.js'
|
||||
|
||||
/**
|
||||
* Shared CFRunLoop pump. Swift's four `@MainActor` async methods
|
||||
* (captureExcluding, captureRegion, apps.listInstalled, resolvePrepareCapture)
|
||||
* and `@ant/computer-use-input`'s key()/keys() all dispatch to
|
||||
* DispatchQueue.main. Under libuv (Node/bun) that queue never drains — the
|
||||
* promises hang. Electron drains it via CFRunLoop so Cowork doesn't need this.
|
||||
*
|
||||
* One refcounted setInterval calls `_drainMainRunLoop` (RunLoop.main.run)
|
||||
* every 1ms while any main-queue-dependent call is pending. Multiple
|
||||
* concurrent drainRunLoop() calls share the single pump via retain/release.
|
||||
*/
|
||||
|
||||
let pump: ReturnType<typeof setInterval> | undefined
|
||||
let pending = 0
|
||||
|
||||
function drainTick(cu: ReturnType<typeof requireComputerUseSwift>): void {
|
||||
cu._drainMainRunLoop()
|
||||
}
|
||||
|
||||
function retain(): void {
|
||||
pending++
|
||||
if (pump === undefined) {
|
||||
pump = setInterval(drainTick, 1, requireComputerUseSwift())
|
||||
logForDebugging('[drainRunLoop] pump started', { level: 'verbose' })
|
||||
}
|
||||
}
|
||||
|
||||
function release(): void {
|
||||
pending--
|
||||
if (pending <= 0 && pump !== undefined) {
|
||||
clearInterval(pump)
|
||||
pump = undefined
|
||||
logForDebugging('[drainRunLoop] pump stopped', { level: 'verbose' })
|
||||
pending = 0
|
||||
}
|
||||
}
|
||||
|
||||
const TIMEOUT_MS = 30_000
|
||||
|
||||
function timeoutReject(reject: (e: Error) => void): void {
|
||||
reject(new Error(`computer-use native call exceeded ${TIMEOUT_MS}ms`))
|
||||
}
|
||||
|
||||
/**
|
||||
* Hold a pump reference for the lifetime of a long-lived registration
|
||||
* (e.g. the CGEventTap Escape handler). Unlike `drainRunLoop(fn)` this has
|
||||
* no timeout — the caller is responsible for calling `releasePump()`. Same
|
||||
* refcount as drainRunLoop calls, so nesting is safe.
|
||||
*/
|
||||
export const retainPump = retain
|
||||
export const releasePump = release
|
||||
|
||||
/**
|
||||
* Await `fn()` with the shared drain pump running. Safe to nest — multiple
|
||||
* concurrent drainRunLoop() calls share one setInterval.
|
||||
*/
|
||||
export async function drainRunLoop<T>(fn: () => Promise<T>): Promise<T> {
|
||||
retain()
|
||||
let timer: ReturnType<typeof setTimeout> | undefined
|
||||
try {
|
||||
// If the timeout wins the race, fn()'s promise is orphaned — a late
|
||||
// rejection from the native layer would become an unhandledRejection.
|
||||
// Attaching a no-op catch swallows it; the timeout error is what surfaces.
|
||||
// fn() sits inside try so a synchronous throw (e.g. NAPI argument
|
||||
// validation) still reaches release() — otherwise the pump leaks.
|
||||
const work = fn()
|
||||
work.catch(() => {})
|
||||
const timeout = withResolvers<never>()
|
||||
timer = setTimeout(timeoutReject, TIMEOUT_MS, timeout.reject)
|
||||
return await Promise.race([work, timeout.promise])
|
||||
} finally {
|
||||
clearTimeout(timer)
|
||||
release()
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,54 @@
|
||||
import { logForDebugging } from '../debug.js'
|
||||
import { releasePump, retainPump } from './drainRunLoop.js'
|
||||
import { requireComputerUseSwift } from './swiftLoader.js'
|
||||
|
||||
/**
|
||||
* Global Escape → abort. Mirrors Cowork's `escAbort.ts` but without Electron:
|
||||
* CGEventTap via `@ant/computer-use-swift`. While registered, Escape is
|
||||
* consumed system-wide (PI defense — a prompt-injected action can't dismiss
|
||||
* a dialog with Escape).
|
||||
*
|
||||
* Lifecycle: register on fresh lock acquire (`wrapper.tsx` `acquireCuLock`),
|
||||
* unregister on lock release (`cleanup.ts`). The tap's CFRunLoopSource sits
|
||||
* in .defaultMode on CFRunLoopGetMain(), so we hold a drainRunLoop pump
|
||||
* retain for the registration's lifetime — same refcounted setInterval as
|
||||
* the `@MainActor` methods.
|
||||
*
|
||||
* `notifyExpectedEscape()` punches a hole for model-synthesized Escapes: the
|
||||
* executor's `key("escape")` calls it before posting the CGEvent. Swift
|
||||
* schedules a 100ms decay so a CGEvent that never reaches the tap callback
|
||||
* doesn't eat the next user ESC.
|
||||
*/
|
||||
|
||||
let registered = false
|
||||
|
||||
export function registerEscHotkey(onEscape: () => void): boolean {
|
||||
if (registered) return true
|
||||
const cu = requireComputerUseSwift()
|
||||
if (!cu.hotkey.registerEscape(onEscape)) {
|
||||
// CGEvent.tapCreate failed — typically missing Accessibility permission.
|
||||
// CU still works, just without ESC abort. Mirrors Cowork's escAbort.ts:81.
|
||||
logForDebugging('[cu-esc] registerEscape returned false', { level: 'warn' })
|
||||
return false
|
||||
}
|
||||
retainPump()
|
||||
registered = true
|
||||
logForDebugging('[cu-esc] registered')
|
||||
return true
|
||||
}
|
||||
|
||||
export function unregisterEscHotkey(): void {
|
||||
if (!registered) return
|
||||
try {
|
||||
requireComputerUseSwift().hotkey.unregister()
|
||||
} finally {
|
||||
releasePump()
|
||||
registered = false
|
||||
logForDebugging('[cu-esc] unregistered')
|
||||
}
|
||||
}
|
||||
|
||||
export function notifyExpectedEscape(): void {
|
||||
if (!registered) return
|
||||
requireComputerUseSwift().hotkey.notifyExpectedEscape()
|
||||
}
|
||||
@@ -0,0 +1,658 @@
|
||||
/**
|
||||
* CLI `ComputerExecutor` implementation. Wraps two native modules:
|
||||
* - `@ant/computer-use-input` (Rust/enigo) — mouse, keyboard, frontmost app
|
||||
* - `@ant/computer-use-swift` — SCContentFilter screenshots, NSWorkspace apps, TCC
|
||||
*
|
||||
* Contract: `packages/desktop/computer-use-mcp/src/executor.ts` in the apps
|
||||
* repo. The reference impl is Cowork's `apps/desktop/src/main/nest-only/
|
||||
* computer-use/executor.ts` — see notable deviations under "CLI deltas" below.
|
||||
*
|
||||
* ── CLI deltas from Cowork ─────────────────────────────────────────────────
|
||||
*
|
||||
* No `withClickThrough`. Cowork wraps every mouse op in
|
||||
* `BrowserWindow.setIgnoreMouseEvents(true)` so clicks fall through the
|
||||
* overlay. We're a terminal — no window — so the click-through bracket is
|
||||
* a no-op. The sentinel `CLI_HOST_BUNDLE_ID` never matches frontmost.
|
||||
*
|
||||
* Terminal as surrogate host. `getTerminalBundleId()` detects the emulator
|
||||
* we're running inside. It's passed as `hostBundleId` to `prepareDisplay`/
|
||||
* `resolvePrepareCapture` so the Swift side exempts it from hide AND skips
|
||||
* it in the activate z-order walk (so the terminal being frontmost doesn't
|
||||
* eat clicks meant for the target app). Also stripped from `allowedBundleIds`
|
||||
* via `withoutTerminal()` so screenshots don't capture it (Swift 0.2.1's
|
||||
* captureExcluding takes an allow-list despite the name — apps#30355).
|
||||
* `capabilities.hostBundleId` stays as the sentinel — the package's
|
||||
* frontmost gate uses that, and the terminal being frontmost is fine.
|
||||
*
|
||||
* Clipboard via `pbcopy`/`pbpaste`. No Electron `clipboard` module.
|
||||
*/
|
||||
|
||||
import type {
|
||||
ComputerExecutor,
|
||||
DisplayGeometry,
|
||||
FrontmostApp,
|
||||
InstalledApp,
|
||||
ResolvePrepareCaptureResult,
|
||||
RunningApp,
|
||||
ScreenshotResult,
|
||||
} from '@ant/computer-use-mcp'
|
||||
|
||||
import { API_RESIZE_PARAMS, targetImageSize } from '@ant/computer-use-mcp'
|
||||
import { logForDebugging } from '../debug.js'
|
||||
import { errorMessage } from '../errors.js'
|
||||
import { execFileNoThrow } from '../execFileNoThrow.js'
|
||||
import { sleep } from '../sleep.js'
|
||||
import {
|
||||
CLI_CU_CAPABILITIES,
|
||||
CLI_HOST_BUNDLE_ID,
|
||||
getTerminalBundleId,
|
||||
} from './common.js'
|
||||
import { drainRunLoop } from './drainRunLoop.js'
|
||||
import { notifyExpectedEscape } from './escHotkey.js'
|
||||
import { requireComputerUseInput } from './inputLoader.js'
|
||||
import { requireComputerUseSwift } from './swiftLoader.js'
|
||||
|
||||
// ── Helpers ───────────────────────────────────────────────────────────────────
|
||||
|
||||
const SCREENSHOT_JPEG_QUALITY = 0.75
|
||||
|
||||
/** Logical → physical → API target dims. See `targetImageSize` + COORDINATES.md. */
|
||||
function computeTargetDims(
|
||||
logicalW: number,
|
||||
logicalH: number,
|
||||
scaleFactor: number,
|
||||
): [number, number] {
|
||||
const physW = Math.round(logicalW * scaleFactor)
|
||||
const physH = Math.round(logicalH * scaleFactor)
|
||||
return targetImageSize(physW, physH, API_RESIZE_PARAMS)
|
||||
}
|
||||
|
||||
async function readClipboardViaPbpaste(): Promise<string> {
|
||||
const { stdout, code } = await execFileNoThrow('pbpaste', [], {
|
||||
useCwd: false,
|
||||
})
|
||||
if (code !== 0) {
|
||||
throw new Error(`pbpaste exited with code ${code}`)
|
||||
}
|
||||
return stdout
|
||||
}
|
||||
|
||||
async function writeClipboardViaPbcopy(text: string): Promise<void> {
|
||||
const { code } = await execFileNoThrow('pbcopy', [], {
|
||||
input: text,
|
||||
useCwd: false,
|
||||
})
|
||||
if (code !== 0) {
|
||||
throw new Error(`pbcopy exited with code ${code}`)
|
||||
}
|
||||
}
|
||||
|
||||
type Input = ReturnType<typeof requireComputerUseInput>
|
||||
|
||||
/**
|
||||
* Single-element key sequence matching "escape" or "esc" (case-insensitive).
|
||||
* Used to hole-punch the CGEventTap abort for model-synthesized Escape — enigo
|
||||
* accepts both spellings, so the tap must too.
|
||||
*/
|
||||
function isBareEscape(parts: readonly string[]): boolean {
|
||||
if (parts.length !== 1) return false
|
||||
const lower = parts[0]!.toLowerCase()
|
||||
return lower === 'escape' || lower === 'esc'
|
||||
}
|
||||
|
||||
/**
|
||||
* Instant move, then 50ms — an input→HID→AppKit→NSEvent round-trip before the
|
||||
* caller reads `NSEvent.mouseLocation` or dispatches a click. Used for click,
|
||||
* scroll, and drag-from; `animatedMove` is reserved for drag-to only. The
|
||||
* intermediate animation frames were triggering hover states and, on the
|
||||
* decomposed mouseDown/moveMouse path, emitting stray `.leftMouseDragged`
|
||||
* events (toolCalls.ts handleScroll's mouse_full workaround).
|
||||
*/
|
||||
const MOVE_SETTLE_MS = 50
|
||||
|
||||
async function moveAndSettle(
|
||||
input: Input,
|
||||
x: number,
|
||||
y: number,
|
||||
): Promise<void> {
|
||||
await input.moveMouse(x, y, false)
|
||||
await sleep(MOVE_SETTLE_MS)
|
||||
}
|
||||
|
||||
/**
|
||||
* Release `pressed` in reverse (last pressed = first released). Errors are
|
||||
* swallowed so a release failure never masks the real error.
|
||||
*
|
||||
* Drains via pop() rather than snapshotting length: if a drainRunLoop-
|
||||
* orphaned press lambda resolves an in-flight input.key() AFTER finally
|
||||
* calls us, that late push is still released on the next iteration. The
|
||||
* orphaned flag stops the lambda at its NEXT check, not the current await.
|
||||
*/
|
||||
async function releasePressed(input: Input, pressed: string[]): Promise<void> {
|
||||
let k: string | undefined
|
||||
while ((k = pressed.pop()) !== undefined) {
|
||||
try {
|
||||
await input.key(k, 'release')
|
||||
} catch {
|
||||
// Swallow — best-effort release.
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Bracket `fn()` with modifier press/release. `pressed` tracks which presses
|
||||
* actually landed, so a mid-press throw only releases what was pressed — no
|
||||
* stuck modifiers. The finally covers both press-phase and fn() throws.
|
||||
*
|
||||
* Caller must already be inside drainRunLoop() — key() dispatches to the
|
||||
* main queue and needs the pump to resolve.
|
||||
*/
|
||||
async function withModifiers<T>(
|
||||
input: Input,
|
||||
mods: string[],
|
||||
fn: () => Promise<T>,
|
||||
): Promise<T> {
|
||||
const pressed: string[] = []
|
||||
try {
|
||||
for (const m of mods) {
|
||||
await input.key(m, 'press')
|
||||
pressed.push(m)
|
||||
}
|
||||
return await fn()
|
||||
} finally {
|
||||
await releasePressed(input, pressed)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Port of Cowork's `typeViaClipboard`. Sequence:
|
||||
* 1. Save the user's clipboard.
|
||||
* 2. Write our text.
|
||||
* 3. READ-BACK VERIFY — clipboard writes can silently fail. If the
|
||||
* read-back doesn't match, never press Cmd+V (would paste junk).
|
||||
* 4. Cmd+V via keys().
|
||||
* 5. Sleep 100ms — battle-tested threshold for the paste-effect vs
|
||||
* clipboard-restore race. Restoring too soon means the target app
|
||||
* pastes the RESTORED content.
|
||||
* 6. Restore — in a `finally`, so a throw between 2-5 never leaves the
|
||||
* user's clipboard clobbered. Restore failures are swallowed.
|
||||
*/
|
||||
async function typeViaClipboard(input: Input, text: string): Promise<void> {
|
||||
let saved: string | undefined
|
||||
try {
|
||||
saved = await readClipboardViaPbpaste()
|
||||
} catch {
|
||||
logForDebugging(
|
||||
'[computer-use] pbpaste before paste failed; proceeding without restore',
|
||||
)
|
||||
}
|
||||
|
||||
try {
|
||||
await writeClipboardViaPbcopy(text)
|
||||
if ((await readClipboardViaPbpaste()) !== text) {
|
||||
throw new Error('Clipboard write did not round-trip.')
|
||||
}
|
||||
await input.keys(['command', 'v'])
|
||||
await sleep(100)
|
||||
} finally {
|
||||
if (typeof saved === 'string') {
|
||||
try {
|
||||
await writeClipboardViaPbcopy(saved)
|
||||
} catch {
|
||||
logForDebugging('[computer-use] clipboard restore after paste failed')
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Port of Cowork's `animateMouseMovement` + `animatedMove`. Ease-out-cubic at
|
||||
* 60fps; distance-proportional duration at 2000 px/sec, capped at 0.5s. When
|
||||
* the sub-gate is off (or distance < ~2 frames), falls through to
|
||||
* `moveAndSettle`. Called only from `drag` for the press→to motion — target
|
||||
* apps may watch for `.leftMouseDragged` specifically (not just "button down +
|
||||
* position changed") and the slow motion gives them time to process
|
||||
* intermediate positions (scrollbars, window resizes).
|
||||
*/
|
||||
async function animatedMove(
|
||||
input: Input,
|
||||
targetX: number,
|
||||
targetY: number,
|
||||
mouseAnimationEnabled: boolean,
|
||||
): Promise<void> {
|
||||
if (!mouseAnimationEnabled) {
|
||||
await moveAndSettle(input, targetX, targetY)
|
||||
return
|
||||
}
|
||||
const start = await input.mouseLocation()
|
||||
const deltaX = targetX - start.x
|
||||
const deltaY = targetY - start.y
|
||||
const distance = Math.hypot(deltaX, deltaY)
|
||||
if (distance < 1) return
|
||||
const durationSec = Math.min(distance / 2000, 0.5)
|
||||
if (durationSec < 0.03) {
|
||||
await moveAndSettle(input, targetX, targetY)
|
||||
return
|
||||
}
|
||||
const frameRate = 60
|
||||
const frameIntervalMs = 1000 / frameRate
|
||||
const totalFrames = Math.floor(durationSec * frameRate)
|
||||
for (let frame = 1; frame <= totalFrames; frame++) {
|
||||
const t = frame / totalFrames
|
||||
const eased = 1 - Math.pow(1 - t, 3)
|
||||
await input.moveMouse(
|
||||
Math.round(start.x + deltaX * eased),
|
||||
Math.round(start.y + deltaY * eased),
|
||||
false,
|
||||
)
|
||||
if (frame < totalFrames) {
|
||||
await sleep(frameIntervalMs)
|
||||
}
|
||||
}
|
||||
// Last frame has no trailing sleep — same HID round-trip before the
|
||||
// caller's mouseButton reads NSEvent.mouseLocation.
|
||||
await sleep(MOVE_SETTLE_MS)
|
||||
}
|
||||
|
||||
// ── Factory ───────────────────────────────────────────────────────────────
|
||||
|
||||
export function createCliExecutor(opts: {
|
||||
getMouseAnimationEnabled: () => boolean
|
||||
getHideBeforeActionEnabled: () => boolean
|
||||
}): ComputerExecutor {
|
||||
if (process.platform !== 'darwin') {
|
||||
throw new Error(
|
||||
`createCliExecutor called on ${process.platform}. Computer control is macOS-only.`,
|
||||
)
|
||||
}
|
||||
|
||||
// Swift loaded once at factory time — every executor method needs it.
|
||||
// Input loaded lazily via requireComputerUseInput() on first mouse/keyboard
|
||||
// call — it caches internally, so screenshot-only flows never pull the
|
||||
// enigo .node.
|
||||
const cu = requireComputerUseSwift()
|
||||
|
||||
const { getMouseAnimationEnabled, getHideBeforeActionEnabled } = opts
|
||||
const terminalBundleId = getTerminalBundleId()
|
||||
const surrogateHost = terminalBundleId ?? CLI_HOST_BUNDLE_ID
|
||||
// Swift 0.2.1's captureExcluding/captureRegion take an ALLOW list despite the
|
||||
// name (apps#30355 — complement computed Swift-side against running apps).
|
||||
// The terminal isn't in the user's grants so it's naturally excluded, but if
|
||||
// the package ever passes it through we strip it here so the terminal never
|
||||
// photobombs a screenshot.
|
||||
const withoutTerminal = (allowed: readonly string[]): string[] =>
|
||||
terminalBundleId === null
|
||||
? [...allowed]
|
||||
: allowed.filter(id => id !== terminalBundleId)
|
||||
|
||||
logForDebugging(
|
||||
terminalBundleId
|
||||
? `[computer-use] terminal ${terminalBundleId} → surrogate host (hide-exempt, activate-skip, screenshot-excluded)`
|
||||
: '[computer-use] terminal not detected; falling back to sentinel host',
|
||||
)
|
||||
|
||||
return {
|
||||
capabilities: {
|
||||
...CLI_CU_CAPABILITIES,
|
||||
hostBundleId: CLI_HOST_BUNDLE_ID,
|
||||
},
|
||||
|
||||
// ── Pre-action sequence (hide + defocus) ────────────────────────────
|
||||
|
||||
async prepareForAction(
|
||||
allowlistBundleIds: string[],
|
||||
displayId?: number,
|
||||
): Promise<string[]> {
|
||||
if (!getHideBeforeActionEnabled()) {
|
||||
return []
|
||||
}
|
||||
// prepareDisplay isn't @MainActor (plain Task{}), but its .hide() calls
|
||||
// trigger window-manager events that queue on CFRunLoop. Without the
|
||||
// pump, those pile up during Swift's ~1s of usleeps and flush all at
|
||||
// once when the next pumped call runs — visible window flashing.
|
||||
// Electron drains CFRunLoop continuously so Cowork doesn't see this.
|
||||
// Worst-case 100ms + 5×200ms safety-net ≈ 1.1s, well under the 30s
|
||||
// drainRunLoop ceiling.
|
||||
//
|
||||
// "Continue with action execution even if switching fails" — the
|
||||
// frontmost gate in toolCalls.ts catches any actual unsafe state.
|
||||
return drainRunLoop(async () => {
|
||||
try {
|
||||
const result = await cu.apps.prepareDisplay(
|
||||
allowlistBundleIds,
|
||||
surrogateHost,
|
||||
displayId,
|
||||
)
|
||||
if (result.activated) {
|
||||
logForDebugging(
|
||||
`[computer-use] prepareForAction: activated ${result.activated}`,
|
||||
)
|
||||
}
|
||||
return result.hidden
|
||||
} catch (err) {
|
||||
logForDebugging(
|
||||
`[computer-use] prepareForAction failed; continuing to action: ${errorMessage(err)}`,
|
||||
{ level: 'warn' },
|
||||
)
|
||||
return []
|
||||
}
|
||||
})
|
||||
},
|
||||
|
||||
async previewHideSet(
|
||||
allowlistBundleIds: string[],
|
||||
displayId?: number,
|
||||
): Promise<Array<{ bundleId: string; displayName: string }>> {
|
||||
return cu.apps.previewHideSet(
|
||||
[...allowlistBundleIds, surrogateHost],
|
||||
displayId,
|
||||
)
|
||||
},
|
||||
|
||||
// ── Display ──────────────────────────────────────────────────────────
|
||||
|
||||
async getDisplaySize(displayId?: number): Promise<DisplayGeometry> {
|
||||
return cu.display.getSize(displayId)
|
||||
},
|
||||
|
||||
async listDisplays(): Promise<DisplayGeometry[]> {
|
||||
return cu.display.listAll()
|
||||
},
|
||||
|
||||
async findWindowDisplays(
|
||||
bundleIds: string[],
|
||||
): Promise<Array<{ bundleId: string; displayIds: number[] }>> {
|
||||
return cu.apps.findWindowDisplays(bundleIds)
|
||||
},
|
||||
|
||||
async resolvePrepareCapture(opts: {
|
||||
allowedBundleIds: string[]
|
||||
preferredDisplayId?: number
|
||||
autoResolve: boolean
|
||||
doHide?: boolean
|
||||
}): Promise<ResolvePrepareCaptureResult> {
|
||||
const d = cu.display.getSize(opts.preferredDisplayId)
|
||||
const [targetW, targetH] = computeTargetDims(
|
||||
d.width,
|
||||
d.height,
|
||||
d.scaleFactor,
|
||||
)
|
||||
return drainRunLoop(() =>
|
||||
cu.resolvePrepareCapture(
|
||||
withoutTerminal(opts.allowedBundleIds),
|
||||
surrogateHost,
|
||||
SCREENSHOT_JPEG_QUALITY,
|
||||
targetW,
|
||||
targetH,
|
||||
opts.preferredDisplayId,
|
||||
opts.autoResolve,
|
||||
opts.doHide,
|
||||
),
|
||||
)
|
||||
},
|
||||
|
||||
/**
|
||||
* Pre-size to `targetImageSize` output so the API transcoder's early-return
|
||||
* fires — no server-side resize, `scaleCoord` stays coherent. See
|
||||
* packages/desktop/computer-use-mcp/COORDINATES.md.
|
||||
*/
|
||||
async screenshot(opts: {
|
||||
allowedBundleIds: string[]
|
||||
displayId?: number
|
||||
}): Promise<ScreenshotResult> {
|
||||
const d = cu.display.getSize(opts.displayId)
|
||||
const [targetW, targetH] = computeTargetDims(
|
||||
d.width,
|
||||
d.height,
|
||||
d.scaleFactor,
|
||||
)
|
||||
return drainRunLoop(() =>
|
||||
cu.screenshot.captureExcluding(
|
||||
withoutTerminal(opts.allowedBundleIds),
|
||||
SCREENSHOT_JPEG_QUALITY,
|
||||
targetW,
|
||||
targetH,
|
||||
opts.displayId,
|
||||
),
|
||||
)
|
||||
},
|
||||
|
||||
async zoom(
|
||||
regionLogical: { x: number; y: number; w: number; h: number },
|
||||
allowedBundleIds: string[],
|
||||
displayId?: number,
|
||||
): Promise<{ base64: string; width: number; height: number }> {
|
||||
const d = cu.display.getSize(displayId)
|
||||
const [outW, outH] = computeTargetDims(
|
||||
regionLogical.w,
|
||||
regionLogical.h,
|
||||
d.scaleFactor,
|
||||
)
|
||||
return drainRunLoop(() =>
|
||||
cu.screenshot.captureRegion(
|
||||
withoutTerminal(allowedBundleIds),
|
||||
regionLogical.x,
|
||||
regionLogical.y,
|
||||
regionLogical.w,
|
||||
regionLogical.h,
|
||||
outW,
|
||||
outH,
|
||||
SCREENSHOT_JPEG_QUALITY,
|
||||
displayId,
|
||||
),
|
||||
)
|
||||
},
|
||||
|
||||
// ── Keyboard ─────────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* xdotool-style sequence e.g. "ctrl+shift+a" → split on '+' and pass to
|
||||
* keys(). keys() dispatches to DispatchQueue.main — drainRunLoop pumps
|
||||
* CFRunLoop so it resolves. Rust's error-path cleanup (enigo_wrap.rs)
|
||||
* releases modifiers on each invocation, so a mid-loop throw leaves
|
||||
* nothing stuck. 8ms between iterations — 125Hz USB polling cadence.
|
||||
*/
|
||||
async key(keySequence: string, repeat?: number): Promise<void> {
|
||||
const input = requireComputerUseInput()
|
||||
const parts = keySequence.split('+').filter(p => p.length > 0)
|
||||
// Bare-only: the CGEventTap checks event.flags.isEmpty so ctrl+escape
|
||||
// etc. pass through without aborting.
|
||||
const isEsc = isBareEscape(parts)
|
||||
const n = repeat ?? 1
|
||||
await drainRunLoop(async () => {
|
||||
for (let i = 0; i < n; i++) {
|
||||
if (i > 0) {
|
||||
await sleep(8)
|
||||
}
|
||||
if (isEsc) {
|
||||
notifyExpectedEscape()
|
||||
}
|
||||
await input.keys(parts)
|
||||
}
|
||||
})
|
||||
},
|
||||
|
||||
async holdKey(keyNames: string[], durationMs: number): Promise<void> {
|
||||
const input = requireComputerUseInput()
|
||||
// Press/release each wrapped in drainRunLoop; the sleep sits outside so
|
||||
// durationMs isn't bounded by drainRunLoop's 30s timeout. `pressed`
|
||||
// tracks which presses landed so a mid-press throw still releases
|
||||
// everything that was actually pressed.
|
||||
//
|
||||
// `orphaned` guards against a timeout-orphan race: if the press-phase
|
||||
// drainRunLoop times out while the esc-hotkey pump-retain keeps the
|
||||
// pump running, the orphaned lambda would continue pushing to `pressed`
|
||||
// after finally's releasePressed snapshotted the length — leaving keys
|
||||
// stuck. The flag stops the lambda at the next iteration.
|
||||
const pressed: string[] = []
|
||||
let orphaned = false
|
||||
try {
|
||||
await drainRunLoop(async () => {
|
||||
for (const k of keyNames) {
|
||||
if (orphaned) return
|
||||
// Bare Escape: notify the CGEventTap so it doesn't fire the
|
||||
// abort callback for a model-synthesized press. Same as key().
|
||||
if (isBareEscape([k])) {
|
||||
notifyExpectedEscape()
|
||||
}
|
||||
await input.key(k, 'press')
|
||||
pressed.push(k)
|
||||
}
|
||||
})
|
||||
await sleep(durationMs)
|
||||
} finally {
|
||||
orphaned = true
|
||||
await drainRunLoop(() => releasePressed(input, pressed))
|
||||
}
|
||||
},
|
||||
|
||||
async type(text: string, opts: { viaClipboard: boolean }): Promise<void> {
|
||||
const input = requireComputerUseInput()
|
||||
if (opts.viaClipboard) {
|
||||
// keys(['command','v']) inside needs the pump.
|
||||
await drainRunLoop(() => typeViaClipboard(input, text))
|
||||
return
|
||||
}
|
||||
// `toolCalls.ts` handles the grapheme loop + 8ms sleeps and calls this
|
||||
// once per grapheme. typeText doesn't dispatch to the main queue.
|
||||
await input.typeText(text)
|
||||
},
|
||||
|
||||
readClipboard: readClipboardViaPbpaste,
|
||||
|
||||
writeClipboard: writeClipboardViaPbcopy,
|
||||
|
||||
// ── Mouse ────────────────────────────────────────────────────────────
|
||||
|
||||
async moveMouse(x: number, y: number): Promise<void> {
|
||||
await moveAndSettle(requireComputerUseInput(), x, y)
|
||||
},
|
||||
|
||||
/**
|
||||
* Move, then click. Modifiers are press/release bracketed via withModifiers
|
||||
* — same pattern as Cowork. AppKit computes NSEvent.clickCount from timing
|
||||
* + position proximity, so double/triple click work without setting the
|
||||
* CGEvent clickState field. key() inside withModifiers needs the pump;
|
||||
* the modifier-less path doesn't.
|
||||
*/
|
||||
async click(
|
||||
x: number,
|
||||
y: number,
|
||||
button: 'left' | 'right' | 'middle',
|
||||
count: 1 | 2 | 3,
|
||||
modifiers?: string[],
|
||||
): Promise<void> {
|
||||
const input = requireComputerUseInput()
|
||||
await moveAndSettle(input, x, y)
|
||||
if (modifiers && modifiers.length > 0) {
|
||||
await drainRunLoop(() =>
|
||||
withModifiers(input, modifiers, () =>
|
||||
input.mouseButton(button, 'click', count),
|
||||
),
|
||||
)
|
||||
} else {
|
||||
await input.mouseButton(button, 'click', count)
|
||||
}
|
||||
},
|
||||
|
||||
async mouseDown(): Promise<void> {
|
||||
await requireComputerUseInput().mouseButton('left', 'press')
|
||||
},
|
||||
|
||||
async mouseUp(): Promise<void> {
|
||||
await requireComputerUseInput().mouseButton('left', 'release')
|
||||
},
|
||||
|
||||
async getCursorPosition(): Promise<{ x: number; y: number }> {
|
||||
return requireComputerUseInput().mouseLocation()
|
||||
},
|
||||
|
||||
/**
|
||||
* `from === undefined` → drag from current cursor (training's
|
||||
* left_click_drag with start_coordinate omitted). Inner `finally`: the
|
||||
* button is ALWAYS released even if the move throws — otherwise the
|
||||
* user's left button is stuck-pressed until they physically click.
|
||||
* 50ms sleep after press: enigo's move_mouse reads NSEvent.pressedMouseButtons
|
||||
* to decide .leftMouseDragged vs .mouseMoved; the synthetic leftMouseDown
|
||||
* needs a HID-tap round-trip to show up there.
|
||||
*/
|
||||
async drag(
|
||||
from: { x: number; y: number } | undefined,
|
||||
to: { x: number; y: number },
|
||||
): Promise<void> {
|
||||
const input = requireComputerUseInput()
|
||||
if (from !== undefined) {
|
||||
await moveAndSettle(input, from.x, from.y)
|
||||
}
|
||||
await input.mouseButton('left', 'press')
|
||||
await sleep(MOVE_SETTLE_MS)
|
||||
try {
|
||||
await animatedMove(input, to.x, to.y, getMouseAnimationEnabled())
|
||||
} finally {
|
||||
await input.mouseButton('left', 'release')
|
||||
}
|
||||
},
|
||||
|
||||
/**
|
||||
* Move first, then scroll each axis. Vertical-first — it's the common
|
||||
* axis; a horizontal failure shouldn't lose the vertical.
|
||||
*/
|
||||
async scroll(x: number, y: number, dx: number, dy: number): Promise<void> {
|
||||
const input = requireComputerUseInput()
|
||||
await moveAndSettle(input, x, y)
|
||||
if (dy !== 0) {
|
||||
await input.mouseScroll(dy, 'vertical')
|
||||
}
|
||||
if (dx !== 0) {
|
||||
await input.mouseScroll(dx, 'horizontal')
|
||||
}
|
||||
},
|
||||
|
||||
// ── App management ───────────────────────────────────────────────────
|
||||
|
||||
async getFrontmostApp(): Promise<FrontmostApp | null> {
|
||||
const info = requireComputerUseInput().getFrontmostAppInfo()
|
||||
if (!info || !info.bundleId) return null
|
||||
return { bundleId: info.bundleId, displayName: info.appName }
|
||||
},
|
||||
|
||||
async appUnderPoint(
|
||||
x: number,
|
||||
y: number,
|
||||
): Promise<{ bundleId: string; displayName: string } | null> {
|
||||
return cu.apps.appUnderPoint(x, y)
|
||||
},
|
||||
|
||||
async listInstalledApps(): Promise<InstalledApp[]> {
|
||||
// `ComputerUseInstalledApp` is `{bundleId, displayName, path}`.
|
||||
// `InstalledApp` adds optional `iconDataUrl` — left unpopulated;
|
||||
// the approval dialog fetches lazily via getAppIcon() below.
|
||||
return drainRunLoop(() => cu.apps.listInstalled())
|
||||
},
|
||||
|
||||
async getAppIcon(path: string): Promise<string | undefined> {
|
||||
return cu.apps.iconDataUrl(path) ?? undefined
|
||||
},
|
||||
|
||||
async listRunningApps(): Promise<RunningApp[]> {
|
||||
return cu.apps.listRunning()
|
||||
},
|
||||
|
||||
async openApp(bundleId: string): Promise<void> {
|
||||
await cu.apps.open(bundleId)
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Module-level export (not on the executor object) — called at turn-end from
|
||||
* `stopHooks.ts` / `query.ts`, outside the executor lifecycle. Fire-and-forget
|
||||
* at the call site; the caller `.catch()`es.
|
||||
*/
|
||||
export async function unhideComputerUseApps(
|
||||
bundleIds: readonly string[],
|
||||
): Promise<void> {
|
||||
if (bundleIds.length === 0) return
|
||||
const cu = requireComputerUseSwift()
|
||||
await cu.apps.unhide([...bundleIds])
|
||||
}
|
||||
@@ -0,0 +1,72 @@
|
||||
import type { CoordinateMode, CuSubGates } from '@ant/computer-use-mcp/types'
|
||||
|
||||
import { getDynamicConfig_CACHED_MAY_BE_STALE } from '../../services/analytics/growthbook.js'
|
||||
import { getSubscriptionType } from '../auth.js'
|
||||
import { isEnvTruthy } from '../envUtils.js'
|
||||
|
||||
type ChicagoConfig = CuSubGates & {
|
||||
enabled: boolean
|
||||
coordinateMode: CoordinateMode
|
||||
}
|
||||
|
||||
const DEFAULTS: ChicagoConfig = {
|
||||
enabled: false,
|
||||
pixelValidation: false,
|
||||
clipboardPasteMultiline: true,
|
||||
mouseAnimation: true,
|
||||
hideBeforeAction: true,
|
||||
autoTargetDisplay: true,
|
||||
clipboardGuard: true,
|
||||
coordinateMode: 'pixels',
|
||||
}
|
||||
|
||||
// Spread over defaults so a partial JSON ({"enabled": true} alone) inherits the
|
||||
// rest. The generic on getDynamicConfig is a type assertion, not a validator —
|
||||
// GB returning a partial object would otherwise surface undefined fields.
|
||||
function readConfig(): ChicagoConfig {
|
||||
return {
|
||||
...DEFAULTS,
|
||||
...getDynamicConfig_CACHED_MAY_BE_STALE<Partial<ChicagoConfig>>(
|
||||
'tengu_malort_pedway',
|
||||
DEFAULTS,
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
// Max/Pro only for external rollout. Ant bypass so dogfooding continues
|
||||
// regardless of subscription tier — not all ants are max/pro, and per
|
||||
// CLAUDE.md:281, USER_TYPE !== 'ant' branches get zero antfooding.
|
||||
function hasRequiredSubscription(): boolean {
|
||||
if (process.env.USER_TYPE === 'ant') return true
|
||||
const tier = getSubscriptionType()
|
||||
return tier === 'max' || tier === 'pro'
|
||||
}
|
||||
|
||||
export function getChicagoEnabled(): boolean {
|
||||
// Disable for ants whose shell inherited monorepo dev config.
|
||||
// MONOREPO_ROOT_DIR is exported by config/local/zsh/zshrc, which
|
||||
// laptop-setup.sh wires into ~/.zshrc — its presence is the cheap
|
||||
// proxy for "has monorepo access". Override: ALLOW_ANT_COMPUTER_USE_MCP=1.
|
||||
if (
|
||||
process.env.USER_TYPE === 'ant' &&
|
||||
process.env.MONOREPO_ROOT_DIR &&
|
||||
!isEnvTruthy(process.env.ALLOW_ANT_COMPUTER_USE_MCP)
|
||||
) {
|
||||
return false
|
||||
}
|
||||
return hasRequiredSubscription() && readConfig().enabled
|
||||
}
|
||||
|
||||
export function getChicagoSubGates(): CuSubGates {
|
||||
const { enabled: _e, coordinateMode: _c, ...subGates } = readConfig()
|
||||
return subGates
|
||||
}
|
||||
|
||||
// Frozen at first read — setup.ts builds tool descriptions and executor.ts
|
||||
// scales coordinates off the same value. A live read here lets a mid-session
|
||||
// GB flip tell the model "pixels" while transforming clicks as normalized.
|
||||
let frozenCoordinateMode: CoordinateMode | undefined
|
||||
export function getChicagoCoordinateMode(): CoordinateMode {
|
||||
frozenCoordinateMode ??= readConfig().coordinateMode
|
||||
return frozenCoordinateMode
|
||||
}
|
||||
@@ -0,0 +1,69 @@
|
||||
import type {
|
||||
ComputerUseHostAdapter,
|
||||
Logger,
|
||||
} from '@ant/computer-use-mcp/types'
|
||||
import { format } from 'util'
|
||||
import { logForDebugging } from '../debug.js'
|
||||
import { COMPUTER_USE_MCP_SERVER_NAME } from './common.js'
|
||||
import { createCliExecutor } from './executor.js'
|
||||
import { getChicagoEnabled, getChicagoSubGates } from './gates.js'
|
||||
import { requireComputerUseSwift } from './swiftLoader.js'
|
||||
|
||||
class DebugLogger implements Logger {
|
||||
silly(message: string, ...args: unknown[]): void {
|
||||
logForDebugging(format(message, ...args), { level: 'debug' })
|
||||
}
|
||||
debug(message: string, ...args: unknown[]): void {
|
||||
logForDebugging(format(message, ...args), { level: 'debug' })
|
||||
}
|
||||
info(message: string, ...args: unknown[]): void {
|
||||
logForDebugging(format(message, ...args), { level: 'info' })
|
||||
}
|
||||
warn(message: string, ...args: unknown[]): void {
|
||||
logForDebugging(format(message, ...args), { level: 'warn' })
|
||||
}
|
||||
error(message: string, ...args: unknown[]): void {
|
||||
logForDebugging(format(message, ...args), { level: 'error' })
|
||||
}
|
||||
}
|
||||
|
||||
let cached: ComputerUseHostAdapter | undefined
|
||||
|
||||
/**
|
||||
* Process-lifetime singleton. Built once on first CU tool call; native modules
|
||||
* (both `@ant/computer-use-input` and `@ant/computer-use-swift`) are loaded
|
||||
* here via the executor factory, which throws on load failure — there is no
|
||||
* degraded mode.
|
||||
*/
|
||||
export function getComputerUseHostAdapter(): ComputerUseHostAdapter {
|
||||
if (cached) return cached
|
||||
cached = {
|
||||
serverName: COMPUTER_USE_MCP_SERVER_NAME,
|
||||
logger: new DebugLogger(),
|
||||
executor: createCliExecutor({
|
||||
getMouseAnimationEnabled: () => getChicagoSubGates().mouseAnimation,
|
||||
getHideBeforeActionEnabled: () => getChicagoSubGates().hideBeforeAction,
|
||||
}),
|
||||
ensureOsPermissions: async () => {
|
||||
const cu = requireComputerUseSwift()
|
||||
const accessibility = cu.tcc.checkAccessibility()
|
||||
const screenRecording = cu.tcc.checkScreenRecording()
|
||||
return accessibility && screenRecording
|
||||
? { granted: true }
|
||||
: { granted: false, accessibility, screenRecording }
|
||||
},
|
||||
isDisabled: () => !getChicagoEnabled(),
|
||||
getSubGates: getChicagoSubGates,
|
||||
// cleanup.ts always unhides at turn end — no user preference to disable it.
|
||||
getAutoUnhideEnabled: () => true,
|
||||
|
||||
// Pixel-validation JPEG decode+crop. MUST be synchronous (the package
|
||||
// does `patch1.equals(patch2)` directly on the return value). Cowork uses
|
||||
// Electron's `nativeImage` (sync); our `image-processor-napi` is
|
||||
// sharp-compatible and async-only. Returning null → validation skipped,
|
||||
// click proceeds — the designed fallback per `PixelCompareResult.skipped`.
|
||||
// The sub-gate defaults to false anyway.
|
||||
cropRawPatch: () => null,
|
||||
}
|
||||
return cached
|
||||
}
|
||||
@@ -0,0 +1,30 @@
|
||||
import type {
|
||||
ComputerUseInput,
|
||||
ComputerUseInputAPI,
|
||||
} from '@ant/computer-use-input'
|
||||
|
||||
let cached: ComputerUseInputAPI | undefined
|
||||
|
||||
/**
|
||||
* Package's js/index.js reads COMPUTER_USE_INPUT_NODE_PATH (baked by
|
||||
* build-with-plugins.ts on darwin targets, unset otherwise — falls through to
|
||||
* the node_modules prebuilds/ path).
|
||||
*
|
||||
* The package exports a discriminated union on `isSupported` — narrowed here
|
||||
* once so callers get the bare `ComputerUseInputAPI` without re-checking.
|
||||
*
|
||||
* key()/keys() dispatch enigo work onto DispatchQueue.main via
|
||||
* dispatch2::run_on_main, then block a tokio worker on a channel. Under
|
||||
* Electron (CFRunLoop drains the main queue) this works; under libuv
|
||||
* (Node/bun) the main queue never drains and the promise hangs. The executor
|
||||
* calls these inside drainRunLoop().
|
||||
*/
|
||||
export function requireComputerUseInput(): ComputerUseInputAPI {
|
||||
if (cached) return cached
|
||||
// eslint-disable-next-line @typescript-eslint/no-require-imports
|
||||
const input = require('@ant/computer-use-input') as ComputerUseInput
|
||||
if (!input.isSupported) {
|
||||
throw new Error('@ant/computer-use-input is not supported on this platform')
|
||||
}
|
||||
return (cached = input)
|
||||
}
|
||||
@@ -0,0 +1,106 @@
|
||||
import {
|
||||
buildComputerUseTools,
|
||||
createComputerUseMcpServer,
|
||||
} from '@ant/computer-use-mcp'
|
||||
import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js'
|
||||
import { ListToolsRequestSchema } from '@modelcontextprotocol/sdk/types.js'
|
||||
import { homedir } from 'os'
|
||||
|
||||
import { shutdownDatadog } from '../../services/analytics/datadog.js'
|
||||
import { shutdown1PEventLogging } from '../../services/analytics/firstPartyEventLogger.js'
|
||||
import { initializeAnalyticsSink } from '../../services/analytics/sink.js'
|
||||
import { enableConfigs } from '../config.js'
|
||||
import { logForDebugging } from '../debug.js'
|
||||
import { filterAppsForDescription } from './appNames.js'
|
||||
import { getChicagoCoordinateMode } from './gates.js'
|
||||
import { getComputerUseHostAdapter } from './hostAdapter.js'
|
||||
|
||||
const APP_ENUM_TIMEOUT_MS = 1000
|
||||
|
||||
/**
|
||||
* Enumerate installed apps, timed. Fails soft — if Spotlight is slow or
|
||||
* claude-swift throws, the tool description just omits the list. Resolution
|
||||
* happens at call time regardless; the model just doesn't get hints.
|
||||
*/
|
||||
async function tryGetInstalledAppNames(): Promise<string[] | undefined> {
|
||||
const adapter = getComputerUseHostAdapter()
|
||||
const enumP = adapter.executor.listInstalledApps()
|
||||
let timer: ReturnType<typeof setTimeout> | undefined
|
||||
const timeoutP = new Promise<undefined>(resolve => {
|
||||
timer = setTimeout(resolve, APP_ENUM_TIMEOUT_MS, undefined)
|
||||
})
|
||||
const installed = await Promise.race([enumP, timeoutP])
|
||||
.catch(() => undefined)
|
||||
.finally(() => clearTimeout(timer))
|
||||
if (!installed) {
|
||||
// The enumeration continues in the background — swallow late rejections.
|
||||
void enumP.catch(() => {})
|
||||
logForDebugging(
|
||||
`[Computer Use MCP] app enumeration exceeded ${APP_ENUM_TIMEOUT_MS}ms or failed; tool description omits list`,
|
||||
)
|
||||
return undefined
|
||||
}
|
||||
return filterAppsForDescription(installed, homedir())
|
||||
}
|
||||
|
||||
/**
|
||||
* Construct the in-process server. Delegates to the package's
|
||||
* `createComputerUseMcpServer` for the Server object + stub CallTool handler,
|
||||
* then REPLACES the ListTools handler with one that includes installed-app
|
||||
* names in the `request_access` description (the package's factory doesn't
|
||||
* take `installedAppNames`, and Cowork builds its own tool array in
|
||||
* serverDef.ts for the same reason).
|
||||
*
|
||||
* Async so the 1s app-enumeration timeout doesn't block startup — called from
|
||||
* an `await import()` in `client.ts` on first CU connection, not `main.tsx`.
|
||||
*
|
||||
* Real dispatch still goes through `wrapper.tsx`'s `.call()` override; this
|
||||
* server exists only to answer ListTools.
|
||||
*/
|
||||
export async function createComputerUseMcpServerForCli(): Promise<
|
||||
ReturnType<typeof createComputerUseMcpServer>
|
||||
> {
|
||||
const adapter = getComputerUseHostAdapter()
|
||||
const coordinateMode = getChicagoCoordinateMode()
|
||||
const server = createComputerUseMcpServer(adapter, coordinateMode)
|
||||
|
||||
const installedAppNames = await tryGetInstalledAppNames()
|
||||
const tools = buildComputerUseTools(
|
||||
adapter.executor.capabilities,
|
||||
coordinateMode,
|
||||
installedAppNames,
|
||||
)
|
||||
server.setRequestHandler(ListToolsRequestSchema, async () =>
|
||||
adapter.isDisabled() ? { tools: [] } : { tools },
|
||||
)
|
||||
|
||||
return server
|
||||
}
|
||||
|
||||
/**
|
||||
* Subprocess entrypoint for `--computer-use-mcp`. Mirror of
|
||||
* `runClaudeInChromeMcpServer` — stdio transport, exit on stdin close,
|
||||
* flush analytics before exit.
|
||||
*/
|
||||
export async function runComputerUseMcpServer(): Promise<void> {
|
||||
enableConfigs()
|
||||
initializeAnalyticsSink()
|
||||
|
||||
const server = await createComputerUseMcpServerForCli()
|
||||
const transport = new StdioServerTransport()
|
||||
|
||||
let exiting = false
|
||||
const shutdownAndExit = async (): Promise<void> => {
|
||||
if (exiting) return
|
||||
exiting = true
|
||||
await Promise.all([shutdown1PEventLogging(), shutdownDatadog()])
|
||||
// eslint-disable-next-line custom-rules/no-process-exit
|
||||
process.exit(0)
|
||||
}
|
||||
process.stdin.on('end', () => void shutdownAndExit())
|
||||
process.stdin.on('error', () => void shutdownAndExit())
|
||||
|
||||
logForDebugging('[Computer Use MCP] Starting MCP server')
|
||||
await server.connect(transport)
|
||||
logForDebugging('[Computer Use MCP] MCP server started')
|
||||
}
|
||||
@@ -0,0 +1,53 @@
|
||||
import { buildComputerUseTools } from '@ant/computer-use-mcp'
|
||||
import { join } from 'path'
|
||||
import { fileURLToPath } from 'url'
|
||||
import { buildMcpToolName } from '../../services/mcp/mcpStringUtils.js'
|
||||
import type { ScopedMcpServerConfig } from '../../services/mcp/types.js'
|
||||
|
||||
import { isInBundledMode } from '../bundledMode.js'
|
||||
import { CLI_CU_CAPABILITIES, COMPUTER_USE_MCP_SERVER_NAME } from './common.js'
|
||||
import { getChicagoCoordinateMode } from './gates.js'
|
||||
|
||||
/**
|
||||
* Build the dynamic MCP config + allowed tool names. Mirror of
|
||||
* `setupClaudeInChrome`. The `mcp__computer-use__*` tools are added to
|
||||
* `allowedTools` so they bypass the normal permission prompt — the package's
|
||||
* `request_access` handles approval for the whole session.
|
||||
*
|
||||
* The MCP layer isn't ceremony: the API backend detects `mcp__computer-use__*`
|
||||
* tool names and emits a CU availability hint into the system prompt
|
||||
* (COMPUTER_USE_MCP_AVAILABILITY_HINT in the anthropic repo). Built-in tools
|
||||
* with different names wouldn't trigger it. Cowork uses the same names for the
|
||||
* same reason (apps/desktop/src/main/local-agent-mode/systemPrompt.ts:314).
|
||||
*/
|
||||
export function setupComputerUseMCP(): {
|
||||
mcpConfig: Record<string, ScopedMcpServerConfig>
|
||||
allowedTools: string[]
|
||||
} {
|
||||
const allowedTools = buildComputerUseTools(
|
||||
CLI_CU_CAPABILITIES,
|
||||
getChicagoCoordinateMode(),
|
||||
).map(t => buildMcpToolName(COMPUTER_USE_MCP_SERVER_NAME, t.name))
|
||||
|
||||
// command/args are never spawned — client.ts intercepts by name and
|
||||
// uses the in-process server. The config just needs to exist with
|
||||
// type 'stdio' to hit the right branch. Mirrors Chrome's setup.
|
||||
const args = isInBundledMode()
|
||||
? ['--computer-use-mcp']
|
||||
: [
|
||||
join(fileURLToPath(import.meta.url), '..', 'cli.js'),
|
||||
'--computer-use-mcp',
|
||||
]
|
||||
|
||||
return {
|
||||
mcpConfig: {
|
||||
[COMPUTER_USE_MCP_SERVER_NAME]: {
|
||||
type: 'stdio',
|
||||
command: process.execPath,
|
||||
args,
|
||||
scope: 'dynamic',
|
||||
} as const,
|
||||
},
|
||||
allowedTools,
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,23 @@
|
||||
import type { ComputerUseAPI } from '@ant/computer-use-swift'
|
||||
|
||||
let cached: ComputerUseAPI | undefined
|
||||
|
||||
/**
|
||||
* Package's js/index.js reads COMPUTER_USE_SWIFT_NODE_PATH (baked by
|
||||
* build-with-plugins.ts on darwin targets, unset otherwise — falls through to
|
||||
* the node_modules prebuilds/ path). We cache the loaded native module.
|
||||
*
|
||||
* The four @MainActor methods (captureExcluding, captureRegion,
|
||||
* apps.listInstalled, resolvePrepareCapture) dispatch to DispatchQueue.main
|
||||
* and will hang under libuv unless CFRunLoop is pumped — call sites wrap
|
||||
* these in drainRunLoop().
|
||||
*/
|
||||
export function requireComputerUseSwift(): ComputerUseAPI {
|
||||
if (process.platform !== 'darwin') {
|
||||
throw new Error('@ant/computer-use-swift is macOS-only')
|
||||
}
|
||||
// eslint-disable-next-line @typescript-eslint/no-require-imports
|
||||
return (cached ??= require('@ant/computer-use-swift') as ComputerUseAPI)
|
||||
}
|
||||
|
||||
export type { ComputerUseAPI }
|
||||
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user