leaked-claude-code

This commit is contained in:
idbzoomh 2026-03-31 12:57:31 +00:00 committed by GitHub
parent ba3fd7d050
commit a7a57bee00
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
57 changed files with 26355 additions and 0 deletions

View file

@ -0,0 +1,87 @@
import axios from 'axios'
import { getOauthConfig } from '../constants/oauth.js'
import type { SDKMessage } from '../entrypoints/agentSdkTypes.js'
import { logForDebugging } from '../utils/debug.js'
import { getOAuthHeaders, prepareApiRequest } from '../utils/teleport/api.js'
export const HISTORY_PAGE_SIZE = 100
export type HistoryPage = {
/** Chronological order within the page. */
events: SDKMessage[]
/** Oldest event ID in this page → before_id cursor for next-older page. */
firstId: string | null
/** true = older events exist. */
hasMore: boolean
}
type SessionEventsResponse = {
data: SDKMessage[]
has_more: boolean
first_id: string | null
last_id: string | null
}
export type HistoryAuthCtx = {
baseUrl: string
headers: Record<string, string>
}
/** Prepare auth + headers + base URL once, reuse across pages. */
export async function createHistoryAuthCtx(
sessionId: string,
): Promise<HistoryAuthCtx> {
const { accessToken, orgUUID } = await prepareApiRequest()
return {
baseUrl: `${getOauthConfig().BASE_API_URL}/v1/sessions/${sessionId}/events`,
headers: {
...getOAuthHeaders(accessToken),
'anthropic-beta': 'ccr-byoc-2025-07-29',
'x-organization-uuid': orgUUID,
},
}
}
async function fetchPage(
ctx: HistoryAuthCtx,
params: Record<string, string | number | boolean>,
label: string,
): Promise<HistoryPage | null> {
const resp = await axios
.get<SessionEventsResponse>(ctx.baseUrl, {
headers: ctx.headers,
params,
timeout: 15000,
validateStatus: () => true,
})
.catch(() => null)
if (!resp || resp.status !== 200) {
logForDebugging(`[${label}] HTTP ${resp?.status ?? 'error'}`)
return null
}
return {
events: Array.isArray(resp.data.data) ? resp.data.data : [],
firstId: resp.data.first_id,
hasMore: resp.data.has_more,
}
}
/**
* Newest page: last `limit` events, chronological, via anchor_to_latest.
* has_more=true means older events exist.
*/
export async function fetchLatestEvents(
ctx: HistoryAuthCtx,
limit = HISTORY_PAGE_SIZE,
): Promise<HistoryPage | null> {
return fetchPage(ctx, { limit, anchor_to_latest: true }, 'fetchLatestEvents')
}
/** Older page: events immediately before `beforeId` cursor. */
export async function fetchOlderEvents(
ctx: HistoryAuthCtx,
beforeId: string,
limit = HISTORY_PAGE_SIZE,
): Promise<HistoryPage | null> {
return fetchPage(ctx, { limit, before_id: beforeId }, 'fetchOlderEvents')
}

539
bridge/bridgeApi.ts Normal file
View file

@ -0,0 +1,539 @@
import axios from 'axios'
import { debugBody, extractErrorDetail } from './debugUtils.js'
import {
BRIDGE_LOGIN_INSTRUCTION,
type BridgeApiClient,
type BridgeConfig,
type PermissionResponseEvent,
type WorkResponse,
} from './types.js'
type BridgeApiDeps = {
baseUrl: string
getAccessToken: () => string | undefined
runnerVersion: string
onDebug?: (msg: string) => void
/**
* Called on 401 to attempt OAuth token refresh. Returns true if refreshed,
* in which case the request is retried once. Injected because
* handleOAuth401Error from utils/auth.ts transitively pulls in config.ts
* file.ts permissions/filesystem.ts sessionStorage.ts commands.ts
* (~1300 modules). Daemon callers using env-var tokens omit this their
* tokens don't refresh, so 401 goes straight to BridgeFatalError.
*/
onAuth401?: (staleAccessToken: string) => Promise<boolean>
/**
* Returns the trusted device token to send as X-Trusted-Device-Token on
* bridge API calls. Bridge sessions have SecurityTier=ELEVATED on the
* server (CCR v2); when the server's enforcement flag is on,
* ConnectBridgeWorker requires a trusted device at JWT-issuance.
* Optional when absent or returning undefined, the header is omitted
* and the server falls through to its flag-off/no-op path. The CLI-side
* gate is tengu_sessions_elevated_auth_enforcement (see trustedDevice.ts).
*/
getTrustedDeviceToken?: () => string | undefined
}
const BETA_HEADER = 'environments-2025-11-01'
/** Allowlist pattern for server-provided IDs used in URL path segments. */
const SAFE_ID_PATTERN = /^[a-zA-Z0-9_-]+$/
/**
* Validate that a server-provided ID is safe to interpolate into a URL path.
* Prevents path traversal (e.g. `../../admin`) and injection via IDs that
* contain slashes, dots, or other special characters.
*/
export function validateBridgeId(id: string, label: string): string {
if (!id || !SAFE_ID_PATTERN.test(id)) {
throw new Error(`Invalid ${label}: contains unsafe characters`)
}
return id
}
/** Fatal bridge errors that should not be retried (e.g. auth failures). */
export class BridgeFatalError extends Error {
readonly status: number
/** Server-provided error type, e.g. "environment_expired". */
readonly errorType: string | undefined
constructor(message: string, status: number, errorType?: string) {
super(message)
this.name = 'BridgeFatalError'
this.status = status
this.errorType = errorType
}
}
export function createBridgeApiClient(deps: BridgeApiDeps): BridgeApiClient {
function debug(msg: string): void {
deps.onDebug?.(msg)
}
let consecutiveEmptyPolls = 0
const EMPTY_POLL_LOG_INTERVAL = 100
function getHeaders(accessToken: string): Record<string, string> {
const headers: Record<string, string> = {
Authorization: `Bearer ${accessToken}`,
'Content-Type': 'application/json',
'anthropic-version': '2023-06-01',
'anthropic-beta': BETA_HEADER,
'x-environment-runner-version': deps.runnerVersion,
}
const deviceToken = deps.getTrustedDeviceToken?.()
if (deviceToken) {
headers['X-Trusted-Device-Token'] = deviceToken
}
return headers
}
function resolveAuth(): string {
const accessToken = deps.getAccessToken()
if (!accessToken) {
throw new Error(BRIDGE_LOGIN_INSTRUCTION)
}
return accessToken
}
/**
* Execute an OAuth-authenticated request with a single retry on 401.
* On 401, attempts token refresh via handleOAuth401Error (same pattern as
* withRetry.ts for v1/messages). If refresh succeeds, retries the request
* once with the new token. If refresh fails or the retry also returns 401,
* the 401 response is returned for handleErrorStatus to throw BridgeFatalError.
*/
async function withOAuthRetry<T>(
fn: (accessToken: string) => Promise<{ status: number; data: T }>,
context: string,
): Promise<{ status: number; data: T }> {
const accessToken = resolveAuth()
const response = await fn(accessToken)
if (response.status !== 401) {
return response
}
if (!deps.onAuth401) {
debug(`[bridge:api] ${context}: 401 received, no refresh handler`)
return response
}
// Attempt token refresh — matches the pattern in withRetry.ts
debug(`[bridge:api] ${context}: 401 received, attempting token refresh`)
const refreshed = await deps.onAuth401(accessToken)
if (refreshed) {
debug(`[bridge:api] ${context}: Token refreshed, retrying request`)
const newToken = resolveAuth()
const retryResponse = await fn(newToken)
if (retryResponse.status !== 401) {
return retryResponse
}
debug(`[bridge:api] ${context}: Retry after refresh also got 401`)
} else {
debug(`[bridge:api] ${context}: Token refresh failed`)
}
// Refresh failed — return 401 for handleErrorStatus to throw
return response
}
return {
async registerBridgeEnvironment(
config: BridgeConfig,
): Promise<{ environment_id: string; environment_secret: string }> {
debug(
`[bridge:api] POST /v1/environments/bridge bridgeId=${config.bridgeId}`,
)
const response = await withOAuthRetry(
(token: string) =>
axios.post<{
environment_id: string
environment_secret: string
}>(
`${deps.baseUrl}/v1/environments/bridge`,
{
machine_name: config.machineName,
directory: config.dir,
branch: config.branch,
git_repo_url: config.gitRepoUrl,
// Advertise session capacity so claude.ai/code can show
// "2/4 sessions" badges and only block the picker when
// actually at capacity. Backends that don't yet accept
// this field will silently ignore it.
max_sessions: config.maxSessions,
// worker_type lets claude.ai filter environments by origin
// (e.g. assistant picker only shows assistant-mode workers).
// Desktop cowork app sends "cowork"; we send a distinct value.
metadata: { worker_type: config.workerType },
// Idempotent re-registration: if we have a backend-issued
// environment_id from a prior session (--session-id resume),
// send it back so the backend reattaches instead of creating
// a new env. The backend may still hand back a fresh ID if
// the old one expired — callers must compare the response.
...(config.reuseEnvironmentId && {
environment_id: config.reuseEnvironmentId,
}),
},
{
headers: getHeaders(token),
timeout: 15_000,
validateStatus: status => status < 500,
},
),
'Registration',
)
handleErrorStatus(response.status, response.data, 'Registration')
debug(
`[bridge:api] POST /v1/environments/bridge -> ${response.status} environment_id=${response.data.environment_id}`,
)
debug(
`[bridge:api] >>> ${debugBody({ machine_name: config.machineName, directory: config.dir, branch: config.branch, git_repo_url: config.gitRepoUrl, max_sessions: config.maxSessions, metadata: { worker_type: config.workerType } })}`,
)
debug(`[bridge:api] <<< ${debugBody(response.data)}`)
return response.data
},
async pollForWork(
environmentId: string,
environmentSecret: string,
signal?: AbortSignal,
reclaimOlderThanMs?: number,
): Promise<WorkResponse | null> {
validateBridgeId(environmentId, 'environmentId')
// Save and reset so errors break the "consecutive empty" streak.
// Restored below when the response is truly empty.
const prevEmptyPolls = consecutiveEmptyPolls
consecutiveEmptyPolls = 0
const response = await axios.get<WorkResponse | null>(
`${deps.baseUrl}/v1/environments/${environmentId}/work/poll`,
{
headers: getHeaders(environmentSecret),
params:
reclaimOlderThanMs !== undefined
? { reclaim_older_than_ms: reclaimOlderThanMs }
: undefined,
timeout: 10_000,
signal,
validateStatus: status => status < 500,
},
)
handleErrorStatus(response.status, response.data, 'Poll')
// Empty body or null = no work available
if (!response.data) {
consecutiveEmptyPolls = prevEmptyPolls + 1
if (
consecutiveEmptyPolls === 1 ||
consecutiveEmptyPolls % EMPTY_POLL_LOG_INTERVAL === 0
) {
debug(
`[bridge:api] GET .../work/poll -> ${response.status} (no work, ${consecutiveEmptyPolls} consecutive empty polls)`,
)
}
return null
}
debug(
`[bridge:api] GET .../work/poll -> ${response.status} workId=${response.data.id} type=${response.data.data?.type}${response.data.data?.id ? ` sessionId=${response.data.data.id}` : ''}`,
)
debug(`[bridge:api] <<< ${debugBody(response.data)}`)
return response.data
},
async acknowledgeWork(
environmentId: string,
workId: string,
sessionToken: string,
): Promise<void> {
validateBridgeId(environmentId, 'environmentId')
validateBridgeId(workId, 'workId')
debug(`[bridge:api] POST .../work/${workId}/ack`)
const response = await axios.post(
`${deps.baseUrl}/v1/environments/${environmentId}/work/${workId}/ack`,
{},
{
headers: getHeaders(sessionToken),
timeout: 10_000,
validateStatus: s => s < 500,
},
)
handleErrorStatus(response.status, response.data, 'Acknowledge')
debug(`[bridge:api] POST .../work/${workId}/ack -> ${response.status}`)
},
async stopWork(
environmentId: string,
workId: string,
force: boolean,
): Promise<void> {
validateBridgeId(environmentId, 'environmentId')
validateBridgeId(workId, 'workId')
debug(`[bridge:api] POST .../work/${workId}/stop force=${force}`)
const response = await withOAuthRetry(
(token: string) =>
axios.post(
`${deps.baseUrl}/v1/environments/${environmentId}/work/${workId}/stop`,
{ force },
{
headers: getHeaders(token),
timeout: 10_000,
validateStatus: s => s < 500,
},
),
'StopWork',
)
handleErrorStatus(response.status, response.data, 'StopWork')
debug(`[bridge:api] POST .../work/${workId}/stop -> ${response.status}`)
},
async deregisterEnvironment(environmentId: string): Promise<void> {
validateBridgeId(environmentId, 'environmentId')
debug(`[bridge:api] DELETE /v1/environments/bridge/${environmentId}`)
const response = await withOAuthRetry(
(token: string) =>
axios.delete(
`${deps.baseUrl}/v1/environments/bridge/${environmentId}`,
{
headers: getHeaders(token),
timeout: 10_000,
validateStatus: s => s < 500,
},
),
'Deregister',
)
handleErrorStatus(response.status, response.data, 'Deregister')
debug(
`[bridge:api] DELETE /v1/environments/bridge/${environmentId} -> ${response.status}`,
)
},
async archiveSession(sessionId: string): Promise<void> {
validateBridgeId(sessionId, 'sessionId')
debug(`[bridge:api] POST /v1/sessions/${sessionId}/archive`)
const response = await withOAuthRetry(
(token: string) =>
axios.post(
`${deps.baseUrl}/v1/sessions/${sessionId}/archive`,
{},
{
headers: getHeaders(token),
timeout: 10_000,
validateStatus: s => s < 500,
},
),
'ArchiveSession',
)
// 409 = already archived (idempotent, not an error)
if (response.status === 409) {
debug(
`[bridge:api] POST /v1/sessions/${sessionId}/archive -> 409 (already archived)`,
)
return
}
handleErrorStatus(response.status, response.data, 'ArchiveSession')
debug(
`[bridge:api] POST /v1/sessions/${sessionId}/archive -> ${response.status}`,
)
},
async reconnectSession(
environmentId: string,
sessionId: string,
): Promise<void> {
validateBridgeId(environmentId, 'environmentId')
validateBridgeId(sessionId, 'sessionId')
debug(
`[bridge:api] POST /v1/environments/${environmentId}/bridge/reconnect session_id=${sessionId}`,
)
const response = await withOAuthRetry(
(token: string) =>
axios.post(
`${deps.baseUrl}/v1/environments/${environmentId}/bridge/reconnect`,
{ session_id: sessionId },
{
headers: getHeaders(token),
timeout: 10_000,
validateStatus: s => s < 500,
},
),
'ReconnectSession',
)
handleErrorStatus(response.status, response.data, 'ReconnectSession')
debug(`[bridge:api] POST .../bridge/reconnect -> ${response.status}`)
},
async heartbeatWork(
environmentId: string,
workId: string,
sessionToken: string,
): Promise<{ lease_extended: boolean; state: string }> {
validateBridgeId(environmentId, 'environmentId')
validateBridgeId(workId, 'workId')
debug(`[bridge:api] POST .../work/${workId}/heartbeat`)
const response = await axios.post<{
lease_extended: boolean
state: string
last_heartbeat: string
ttl_seconds: number
}>(
`${deps.baseUrl}/v1/environments/${environmentId}/work/${workId}/heartbeat`,
{},
{
headers: getHeaders(sessionToken),
timeout: 10_000,
validateStatus: s => s < 500,
},
)
handleErrorStatus(response.status, response.data, 'Heartbeat')
debug(
`[bridge:api] POST .../work/${workId}/heartbeat -> ${response.status} lease_extended=${response.data.lease_extended} state=${response.data.state}`,
)
return response.data
},
async sendPermissionResponseEvent(
sessionId: string,
event: PermissionResponseEvent,
sessionToken: string,
): Promise<void> {
validateBridgeId(sessionId, 'sessionId')
debug(
`[bridge:api] POST /v1/sessions/${sessionId}/events type=${event.type}`,
)
const response = await axios.post(
`${deps.baseUrl}/v1/sessions/${sessionId}/events`,
{ events: [event] },
{
headers: getHeaders(sessionToken),
timeout: 10_000,
validateStatus: s => s < 500,
},
)
handleErrorStatus(
response.status,
response.data,
'SendPermissionResponseEvent',
)
debug(
`[bridge:api] POST /v1/sessions/${sessionId}/events -> ${response.status}`,
)
debug(`[bridge:api] >>> ${debugBody({ events: [event] })}`)
debug(`[bridge:api] <<< ${debugBody(response.data)}`)
},
}
}
function handleErrorStatus(
status: number,
data: unknown,
context: string,
): void {
if (status === 200 || status === 204) {
return
}
const detail = extractErrorDetail(data)
const errorType = extractErrorTypeFromData(data)
switch (status) {
case 401:
throw new BridgeFatalError(
`${context}: Authentication failed (401)${detail ? `: ${detail}` : ''}. ${BRIDGE_LOGIN_INSTRUCTION}`,
401,
errorType,
)
case 403:
throw new BridgeFatalError(
isExpiredErrorType(errorType)
? 'Remote Control session has expired. Please restart with `claude remote-control` or /remote-control.'
: `${context}: Access denied (403)${detail ? `: ${detail}` : ''}. Check your organization permissions.`,
403,
errorType,
)
case 404:
throw new BridgeFatalError(
detail ??
`${context}: Not found (404). Remote Control may not be available for this organization.`,
404,
errorType,
)
case 410:
throw new BridgeFatalError(
detail ??
'Remote Control session has expired. Please restart with `claude remote-control` or /remote-control.',
410,
errorType ?? 'environment_expired',
)
case 429:
throw new Error(`${context}: Rate limited (429). Polling too frequently.`)
default:
throw new Error(
`${context}: Failed with status ${status}${detail ? `: ${detail}` : ''}`,
)
}
}
/** Check whether an error type string indicates a session/environment expiry. */
export function isExpiredErrorType(errorType: string | undefined): boolean {
if (!errorType) {
return false
}
return errorType.includes('expired') || errorType.includes('lifetime')
}
/**
* Check whether a BridgeFatalError is a suppressible 403 permission error.
* These are 403 errors for scopes like 'external_poll_sessions' or operations
* like StopWork that fail because the user's role lacks 'environments:manage'.
* They don't affect core functionality and shouldn't be shown to users.
*/
export function isSuppressible403(err: BridgeFatalError): boolean {
if (err.status !== 403) {
return false
}
return (
err.message.includes('external_poll_sessions') ||
err.message.includes('environments:manage')
)
}
function extractErrorTypeFromData(data: unknown): string | undefined {
if (data && typeof data === 'object') {
if (
'error' in data &&
data.error &&
typeof data.error === 'object' &&
'type' in data.error &&
typeof data.error.type === 'string'
) {
return data.error.type
}
}
return undefined
}

48
bridge/bridgeConfig.ts Normal file
View file

@ -0,0 +1,48 @@
/**
* Shared bridge auth/URL resolution. Consolidates the ant-only
* CLAUDE_BRIDGE_* dev overrides that were previously copy-pasted across
* a dozen files inboundAttachments, BriefTool/upload, bridgeMain,
* initReplBridge, remoteBridgeCore, daemon workers, /rename,
* /remote-control.
*
* Two layers: *Override() returns the ant-only env var (or undefined);
* the non-Override versions fall through to the real OAuth store/config.
* Callers that compose with a different auth source (e.g. daemon workers
* using IPC auth) use the Override getters directly.
*/
import { getOauthConfig } from '../constants/oauth.js'
import { getClaudeAIOAuthTokens } from '../utils/auth.js'
/** Ant-only dev override: CLAUDE_BRIDGE_OAUTH_TOKEN, else undefined. */
export function getBridgeTokenOverride(): string | undefined {
return (
(process.env.USER_TYPE === 'ant' &&
process.env.CLAUDE_BRIDGE_OAUTH_TOKEN) ||
undefined
)
}
/** Ant-only dev override: CLAUDE_BRIDGE_BASE_URL, else undefined. */
export function getBridgeBaseUrlOverride(): string | undefined {
return (
(process.env.USER_TYPE === 'ant' && process.env.CLAUDE_BRIDGE_BASE_URL) ||
undefined
)
}
/**
* Access token for bridge API calls: dev override first, then the OAuth
* keychain. Undefined means "not logged in".
*/
export function getBridgeAccessToken(): string | undefined {
return getBridgeTokenOverride() ?? getClaudeAIOAuthTokens()?.accessToken
}
/**
* Base URL for bridge API calls: dev override first, then the production
* OAuth config. Always returns a URL.
*/
export function getBridgeBaseUrl(): string {
return getBridgeBaseUrlOverride() ?? getOauthConfig().BASE_API_URL
}

135
bridge/bridgeDebug.ts Normal file
View file

@ -0,0 +1,135 @@
import { logForDebugging } from '../utils/debug.js'
import { BridgeFatalError } from './bridgeApi.js'
import type { BridgeApiClient } from './types.js'
/**
* Ant-only fault injection for manually testing bridge recovery paths.
*
* Real failure modes this targets (BQ 2026-03-12, 7-day window):
* poll 404 not_found_error 147K sessions/week, dead onEnvironmentLost gate
* ws_closed 1002/1006 22K sessions/week, zombie poll after close
* register transient failure residual: network blips during doReconnect
*
* Usage: /bridge-kick <subcommand> from the REPL while Remote Control is
* connected, then tail debug.log to watch the recovery machinery react.
*
* Module-level state is intentional here: one bridge per REPL process, the
* /bridge-kick slash command has no other way to reach into initBridgeCore's
* closures, and teardown clears the slot.
*/
/** One-shot fault to inject on the next matching api call. */
type BridgeFault = {
method:
| 'pollForWork'
| 'registerBridgeEnvironment'
| 'reconnectSession'
| 'heartbeatWork'
/** Fatal errors go through handleErrorStatus BridgeFatalError. Transient
* errors surface as plain axios rejections (5xx / network). Recovery code
* distinguishes the two: fatal teardown, transient retry/backoff. */
kind: 'fatal' | 'transient'
status: number
errorType?: string
/** Remaining injections. Decremented on consume; removed at 0. */
count: number
}
export type BridgeDebugHandle = {
/** Invoke the transport's permanent-close handler directly. Tests the
* ws_closed reconnectEnvironmentWithSession escalation (#22148). */
fireClose: (code: number) => void
/** Call reconnectEnvironmentWithSession() same as SIGUSR2 but
* reachable from the slash command. */
forceReconnect: () => void
/** Queue a fault for the next N calls to the named api method. */
injectFault: (fault: BridgeFault) => void
/** Abort the at-capacity sleep so an injected poll fault lands
* immediately instead of up to 10min later. */
wakePollLoop: () => void
/** env/session IDs for the debug.log grep. */
describe: () => string
}
let debugHandle: BridgeDebugHandle | null = null
const faultQueue: BridgeFault[] = []
export function registerBridgeDebugHandle(h: BridgeDebugHandle): void {
debugHandle = h
}
export function clearBridgeDebugHandle(): void {
debugHandle = null
faultQueue.length = 0
}
export function getBridgeDebugHandle(): BridgeDebugHandle | null {
return debugHandle
}
export function injectBridgeFault(fault: BridgeFault): void {
faultQueue.push(fault)
logForDebugging(
`[bridge:debug] Queued fault: ${fault.method} ${fault.kind}/${fault.status}${fault.errorType ? `/${fault.errorType}` : ''} ×${fault.count}`,
)
}
/**
* Wrap a BridgeApiClient so each call first checks the fault queue. If a
* matching fault is queued, throw the specified error instead of calling
* through. Delegates everything else to the real client.
*
* Only called when USER_TYPE === 'ant' zero overhead in external builds.
*/
export function wrapApiForFaultInjection(
api: BridgeApiClient,
): BridgeApiClient {
function consume(method: BridgeFault['method']): BridgeFault | null {
const idx = faultQueue.findIndex(f => f.method === method)
if (idx === -1) return null
const fault = faultQueue[idx]!
fault.count--
if (fault.count <= 0) faultQueue.splice(idx, 1)
return fault
}
function throwFault(fault: BridgeFault, context: string): never {
logForDebugging(
`[bridge:debug] Injecting ${fault.kind} fault into ${context}: status=${fault.status} errorType=${fault.errorType ?? 'none'}`,
)
if (fault.kind === 'fatal') {
throw new BridgeFatalError(
`[injected] ${context} ${fault.status}`,
fault.status,
fault.errorType,
)
}
// Transient: mimic an axios rejection (5xx / network). No .status on
// the error itself — that's how the catch blocks distinguish.
throw new Error(`[injected transient] ${context} ${fault.status}`)
}
return {
...api,
async pollForWork(envId, secret, signal, reclaimMs) {
const f = consume('pollForWork')
if (f) throwFault(f, 'Poll')
return api.pollForWork(envId, secret, signal, reclaimMs)
},
async registerBridgeEnvironment(config) {
const f = consume('registerBridgeEnvironment')
if (f) throwFault(f, 'Registration')
return api.registerBridgeEnvironment(config)
},
async reconnectSession(envId, sessionId) {
const f = consume('reconnectSession')
if (f) throwFault(f, 'ReconnectSession')
return api.reconnectSession(envId, sessionId)
},
async heartbeatWork(envId, workId, token) {
const f = consume('heartbeatWork')
if (f) throwFault(f, 'Heartbeat')
return api.heartbeatWork(envId, workId, token)
},
}
}

202
bridge/bridgeEnabled.ts Normal file
View file

@ -0,0 +1,202 @@
import { feature } from 'bun:bundle'
import {
checkGate_CACHED_OR_BLOCKING,
getDynamicConfig_CACHED_MAY_BE_STALE,
getFeatureValue_CACHED_MAY_BE_STALE,
} from '../services/analytics/growthbook.js'
// Namespace import breaks the bridgeEnabled → auth → config → bridgeEnabled
// cycle — authModule.foo is a live binding, so by the time the helpers below
// call it, auth.js is fully loaded. Previously used require() for the same
// deferral, but require() hits a CJS cache that diverges from the ESM
// namespace after mock.module() (daemon/auth.test.ts), breaking spyOn.
import * as authModule from '../utils/auth.js'
import { isEnvTruthy } from '../utils/envUtils.js'
import { lt } from '../utils/semver.js'
/**
* Runtime check for bridge mode entitlement.
*
* Remote Control requires a claude.ai subscription (the bridge auths to CCR
* with the claude.ai OAuth token). isClaudeAISubscriber() excludes
* Bedrock/Vertex/Foundry, apiKeyHelper/gateway deployments, env-var API keys,
* and Console API logins none of which have the OAuth token CCR needs.
* See github.com/deshaw/anthropic-issues/issues/24.
*
* The `feature('BRIDGE_MODE')` guard ensures the GrowthBook string literal
* is only referenced when bridge mode is enabled at build time.
*/
export function isBridgeEnabled(): boolean {
// Positive ternary pattern — see docs/feature-gating.md.
// Negative pattern (if (!feature(...)) return) does not eliminate
// inline string literals from external builds.
return feature('BRIDGE_MODE')
? isClaudeAISubscriber() &&
getFeatureValue_CACHED_MAY_BE_STALE('tengu_ccr_bridge', false)
: false
}
/**
* Blocking entitlement check for Remote Control.
*
* Returns cached `true` immediately (fast path). If the disk cache says
* `false` or is missing, awaits GrowthBook init and fetches the fresh
* server value (slow path, max ~5s), then writes it to disk.
*
* Use at entitlement gates where a stale `false` would unfairly block access.
* For user-facing error paths, prefer `getBridgeDisabledReason()` which gives
* a specific diagnostic. For render-body UI visibility checks, use
* `isBridgeEnabled()` instead.
*/
export async function isBridgeEnabledBlocking(): Promise<boolean> {
return feature('BRIDGE_MODE')
? isClaudeAISubscriber() &&
(await checkGate_CACHED_OR_BLOCKING('tengu_ccr_bridge'))
: false
}
/**
* Diagnostic message for why Remote Control is unavailable, or null if
* it's enabled. Call this instead of a bare `isBridgeEnabledBlocking()`
* check when you need to show the user an actionable error.
*
* The GrowthBook gate targets on organizationUUID, which comes from
* config.oauthAccount populated by /api/oauth/profile during login.
* That endpoint requires the user:profile scope. Tokens without it
* (setup-token, CLAUDE_CODE_OAUTH_TOKEN env var, or pre-scope-expansion
* logins) leave oauthAccount unpopulated, so the gate falls back to
* false and users see a dead-end "not enabled" message with no hint
* that re-login would fix it. See CC-1165 / gh-33105.
*/
export async function getBridgeDisabledReason(): Promise<string | null> {
if (feature('BRIDGE_MODE')) {
if (!isClaudeAISubscriber()) {
return 'Remote Control requires a claude.ai subscription. Run `claude auth login` to sign in with your claude.ai account.'
}
if (!hasProfileScope()) {
return 'Remote Control requires a full-scope login token. Long-lived tokens (from `claude setup-token` or CLAUDE_CODE_OAUTH_TOKEN) are limited to inference-only for security reasons. Run `claude auth login` to use Remote Control.'
}
if (!getOauthAccountInfo()?.organizationUuid) {
return 'Unable to determine your organization for Remote Control eligibility. Run `claude auth login` to refresh your account information.'
}
if (!(await checkGate_CACHED_OR_BLOCKING('tengu_ccr_bridge'))) {
return 'Remote Control is not yet enabled for your account.'
}
return null
}
return 'Remote Control is not available in this build.'
}
// try/catch: main.tsx:5698 calls isBridgeEnabled() while defining the Commander
// program, before enableConfigs() runs. isClaudeAISubscriber() → getGlobalConfig()
// throws "Config accessed before allowed" there. Pre-config, no OAuth token can
// exist anyway — false is correct. Same swallow getFeatureValue_CACHED_MAY_BE_STALE
// already does at growthbook.ts:775-780.
function isClaudeAISubscriber(): boolean {
try {
return authModule.isClaudeAISubscriber()
} catch {
return false
}
}
function hasProfileScope(): boolean {
try {
return authModule.hasProfileScope()
} catch {
return false
}
}
function getOauthAccountInfo(): ReturnType<
typeof authModule.getOauthAccountInfo
> {
try {
return authModule.getOauthAccountInfo()
} catch {
return undefined
}
}
/**
* Runtime check for the env-less (v2) REPL bridge path.
* Returns true when the GrowthBook flag `tengu_bridge_repl_v2` is enabled.
*
* This gates which implementation initReplBridge uses NOT whether bridge
* is available at all (see isBridgeEnabled above). Daemon/print paths stay
* on the env-based implementation regardless of this gate.
*/
export function isEnvLessBridgeEnabled(): boolean {
return feature('BRIDGE_MODE')
? getFeatureValue_CACHED_MAY_BE_STALE('tengu_bridge_repl_v2', false)
: false
}
/**
* Kill-switch for the `cse_*` `session_*` client-side retag shim.
*
* The shim exists because compat/convert.go:27 validates TagSession and the
* claude.ai frontend routes on `session_*`, while v2 worker endpoints hand out
* `cse_*`. Once the server tags by environment_kind and the frontend accepts
* `cse_*` directly, flip this to false to make toCompatSessionId a no-op.
* Defaults to true the shim stays active until explicitly disabled.
*/
export function isCseShimEnabled(): boolean {
return feature('BRIDGE_MODE')
? getFeatureValue_CACHED_MAY_BE_STALE(
'tengu_bridge_repl_v2_cse_shim_enabled',
true,
)
: true
}
/**
* Returns an error message if the current CLI version is below the
* minimum required for the v1 (env-based) Remote Control path, or null if the
* version is fine. The v2 (env-less) path uses checkEnvLessBridgeMinVersion()
* in envLessBridgeConfig.ts instead the two implementations have independent
* version floors.
*
* Uses cached (non-blocking) GrowthBook config. If GrowthBook hasn't
* loaded yet, the default '0.0.0' means the check passes a safe fallback.
*/
export function checkBridgeMinVersion(): string | null {
// Positive pattern — see docs/feature-gating.md.
// Negative pattern (if (!feature(...)) return) does not eliminate
// inline string literals from external builds.
if (feature('BRIDGE_MODE')) {
const config = getDynamicConfig_CACHED_MAY_BE_STALE<{
minVersion: string
}>('tengu_bridge_min_version', { minVersion: '0.0.0' })
if (config.minVersion && lt(MACRO.VERSION, config.minVersion)) {
return `Your version of Claude Code (${MACRO.VERSION}) is too old for Remote Control.\nVersion ${config.minVersion} or higher is required. Run \`claude update\` to update.`
}
}
return null
}
/**
* Default for remoteControlAtStartup when the user hasn't explicitly set it.
* When the CCR_AUTO_CONNECT build flag is present (ant-only) and the
* tengu_cobalt_harbor GrowthBook gate is on, all sessions connect to CCR by
* default the user can still opt out by setting remoteControlAtStartup=false
* in config (explicit settings always win over this default).
*
* Defined here rather than in config.ts to avoid a direct
* config.ts growthbook.ts import cycle (growthbook.ts user.ts config.ts).
*/
export function getCcrAutoConnectDefault(): boolean {
return feature('CCR_AUTO_CONNECT')
? getFeatureValue_CACHED_MAY_BE_STALE('tengu_cobalt_harbor', false)
: false
}
/**
* Opt-in CCR mirror mode every local session spawns an outbound-only
* Remote Control session that receives forwarded events. Separate from
* getCcrAutoConnectDefault (bidirectional Remote Control). Env var wins for
* local opt-in; GrowthBook controls rollout.
*/
export function isCcrMirrorEnabled(): boolean {
return feature('CCR_MIRROR')
? isEnvTruthy(process.env.CLAUDE_CODE_CCR_MIRROR) ||
getFeatureValue_CACHED_MAY_BE_STALE('tengu_ccr_mirror', false)
: false
}

2999
bridge/bridgeMain.ts Normal file

File diff suppressed because it is too large Load diff

461
bridge/bridgeMessaging.ts Normal file
View file

@ -0,0 +1,461 @@
/**
* Shared transport-layer helpers for bridge message handling.
*
* Extracted from replBridge.ts so both the env-based core (initBridgeCore)
* and the env-less core (initEnvLessBridgeCore) can use the same ingress
* parsing, control-request handling, and echo-dedup machinery.
*
* Everything here is pure no closure over bridge-specific state. All
* collaborators (transport, sessionId, UUID sets, callbacks) are passed
* as params.
*/
import { randomUUID } from 'crypto'
import type { SDKMessage } from '../entrypoints/agentSdkTypes.js'
import type {
SDKControlRequest,
SDKControlResponse,
} from '../entrypoints/sdk/controlTypes.js'
import type { SDKResultSuccess } from '../entrypoints/sdk/coreTypes.js'
import { logEvent } from '../services/analytics/index.js'
import { EMPTY_USAGE } from '../services/api/emptyUsage.js'
import type { Message } from '../types/message.js'
import { normalizeControlMessageKeys } from '../utils/controlMessageCompat.js'
import { logForDebugging } from '../utils/debug.js'
import { stripDisplayTagsAllowEmpty } from '../utils/displayTags.js'
import { errorMessage } from '../utils/errors.js'
import type { PermissionMode } from '../utils/permissions/PermissionMode.js'
import { jsonParse } from '../utils/slowOperations.js'
import type { ReplBridgeTransport } from './replBridgeTransport.js'
// ─── Type guards ─────────────────────────────────────────────────────────────
/** Type predicate for parsed WebSocket messages. SDKMessage is a
* discriminated union on `type` validating the discriminant is
* sufficient for the predicate; callers narrow further via the union. */
export function isSDKMessage(value: unknown): value is SDKMessage {
return (
value !== null &&
typeof value === 'object' &&
'type' in value &&
typeof value.type === 'string'
)
}
/** Type predicate for control_response messages from the server. */
export function isSDKControlResponse(
value: unknown,
): value is SDKControlResponse {
return (
value !== null &&
typeof value === 'object' &&
'type' in value &&
value.type === 'control_response' &&
'response' in value
)
}
/** Type predicate for control_request messages from the server. */
export function isSDKControlRequest(
value: unknown,
): value is SDKControlRequest {
return (
value !== null &&
typeof value === 'object' &&
'type' in value &&
value.type === 'control_request' &&
'request_id' in value &&
'request' in value
)
}
/**
* True for message types that should be forwarded to the bridge transport.
* The server only wants user/assistant turns and slash-command system events;
* everything else (tool_result, progress, etc.) is internal REPL chatter.
*/
export function isEligibleBridgeMessage(m: Message): boolean {
// Virtual messages (REPL inner calls) are display-only — bridge/SDK
// consumers see the REPL tool_use/result which summarizes the work.
if ((m.type === 'user' || m.type === 'assistant') && m.isVirtual) {
return false
}
return (
m.type === 'user' ||
m.type === 'assistant' ||
(m.type === 'system' && m.subtype === 'local_command')
)
}
/**
* Extract title-worthy text from a Message for onUserMessage. Returns
* undefined for messages that shouldn't title the session: non-user, meta
* (nudges), tool results, compact summaries, non-human origins (task
* notifications, channel messages), or pure display-tag content
* (<ide_opened_file>, <session-start-hook>, etc.).
*
* Synthetic interrupts ([Request interrupted by user]) are NOT filtered here
* isSyntheticMessage lives in messages.ts (heavy import, pulls command
* registry). The initialMessages path in initReplBridge checks it; the
* writeMessages path reaching an interrupt as the *first* message is
* implausible (an interrupt implies a prior prompt already flowed through).
*/
export function extractTitleText(m: Message): string | undefined {
if (m.type !== 'user' || m.isMeta || m.toolUseResult || m.isCompactSummary)
return undefined
if (m.origin && m.origin.kind !== 'human') return undefined
const content = m.message.content
let raw: string | undefined
if (typeof content === 'string') {
raw = content
} else {
for (const block of content) {
if (block.type === 'text') {
raw = block.text
break
}
}
}
if (!raw) return undefined
const clean = stripDisplayTagsAllowEmpty(raw)
return clean || undefined
}
// ─── Ingress routing ─────────────────────────────────────────────────────────
/**
* Parse an ingress WebSocket message and route it to the appropriate handler.
* Ignores messages whose UUID is in recentPostedUUIDs (echoes of what we sent)
* or in recentInboundUUIDs (re-deliveries we've already forwarded e.g.
* server replayed history after a transport swap lost the seq-num cursor).
*/
export function handleIngressMessage(
data: string,
recentPostedUUIDs: BoundedUUIDSet,
recentInboundUUIDs: BoundedUUIDSet,
onInboundMessage: ((msg: SDKMessage) => void | Promise<void>) | undefined,
onPermissionResponse?: ((response: SDKControlResponse) => void) | undefined,
onControlRequest?: ((request: SDKControlRequest) => void) | undefined,
): void {
try {
const parsed: unknown = normalizeControlMessageKeys(jsonParse(data))
// control_response is not an SDKMessage — check before the type guard
if (isSDKControlResponse(parsed)) {
logForDebugging('[bridge:repl] Ingress message type=control_response')
onPermissionResponse?.(parsed)
return
}
// control_request from the server (initialize, set_model, can_use_tool).
// Must respond promptly or the server kills the WS (~10-14s timeout).
if (isSDKControlRequest(parsed)) {
logForDebugging(
`[bridge:repl] Inbound control_request subtype=${parsed.request.subtype}`,
)
onControlRequest?.(parsed)
return
}
if (!isSDKMessage(parsed)) return
// Check for UUID to detect echoes of our own messages
const uuid =
'uuid' in parsed && typeof parsed.uuid === 'string'
? parsed.uuid
: undefined
if (uuid && recentPostedUUIDs.has(uuid)) {
logForDebugging(
`[bridge:repl] Ignoring echo: type=${parsed.type} uuid=${uuid}`,
)
return
}
// Defensive dedup: drop inbound prompts we've already forwarded. The
// SSE seq-num carryover (lastTransportSequenceNum) is the primary fix
// for history-replay; this catches edge cases where that negotiation
// fails (server ignores from_sequence_num, transport died before
// receiving any frames, etc).
if (uuid && recentInboundUUIDs.has(uuid)) {
logForDebugging(
`[bridge:repl] Ignoring re-delivered inbound: type=${parsed.type} uuid=${uuid}`,
)
return
}
logForDebugging(
`[bridge:repl] Ingress message type=${parsed.type}${uuid ? ` uuid=${uuid}` : ''}`,
)
if (parsed.type === 'user') {
if (uuid) recentInboundUUIDs.add(uuid)
logEvent('tengu_bridge_message_received', {
is_repl: true,
})
// Fire-and-forget — handler may be async (attachment resolution).
void onInboundMessage?.(parsed)
} else {
logForDebugging(
`[bridge:repl] Ignoring non-user inbound message: type=${parsed.type}`,
)
}
} catch (err) {
logForDebugging(
`[bridge:repl] Failed to parse ingress message: ${errorMessage(err)}`,
)
}
}
// ─── Server-initiated control requests ───────────────────────────────────────
export type ServerControlRequestHandlers = {
transport: ReplBridgeTransport | null
sessionId: string
/**
* When true, all mutable requests (interrupt, set_model, set_permission_mode,
* set_max_thinking_tokens) reply with an error instead of false-success.
* initialize still replies success the server kills the connection otherwise.
* Used by the outbound-only bridge mode and the SDK's /bridge subpath so claude.ai sees a
* proper error instead of "action succeeded but nothing happened locally".
*/
outboundOnly?: boolean
onInterrupt?: () => void
onSetModel?: (model: string | undefined) => void
onSetMaxThinkingTokens?: (maxTokens: number | null) => void
onSetPermissionMode?: (
mode: PermissionMode,
) => { ok: true } | { ok: false; error: string }
}
const OUTBOUND_ONLY_ERROR =
'This session is outbound-only. Enable Remote Control locally to allow inbound control.'
/**
* Respond to inbound control_request messages from the server. The server
* sends these for session lifecycle events (initialize, set_model) and
* for turn-level coordination (interrupt, set_max_thinking_tokens). If we
* don't respond, the server hangs and kills the WS after ~10-14s.
*
* Previously a closure inside initBridgeCore's onWorkReceived; now takes
* collaborators as params so both cores can use it.
*/
export function handleServerControlRequest(
request: SDKControlRequest,
handlers: ServerControlRequestHandlers,
): void {
const {
transport,
sessionId,
outboundOnly,
onInterrupt,
onSetModel,
onSetMaxThinkingTokens,
onSetPermissionMode,
} = handlers
if (!transport) {
logForDebugging(
'[bridge:repl] Cannot respond to control_request: transport not configured',
)
return
}
let response: SDKControlResponse
// Outbound-only: reply error for mutable requests so claude.ai doesn't show
// false success. initialize must still succeed (server kills the connection
// if it doesn't — see comment above).
if (outboundOnly && request.request.subtype !== 'initialize') {
response = {
type: 'control_response',
response: {
subtype: 'error',
request_id: request.request_id,
error: OUTBOUND_ONLY_ERROR,
},
}
const event = { ...response, session_id: sessionId }
void transport.write(event)
logForDebugging(
`[bridge:repl] Rejected ${request.request.subtype} (outbound-only) request_id=${request.request_id}`,
)
return
}
switch (request.request.subtype) {
case 'initialize':
// Respond with minimal capabilities — the REPL handles
// commands, models, and account info itself.
response = {
type: 'control_response',
response: {
subtype: 'success',
request_id: request.request_id,
response: {
commands: [],
output_style: 'normal',
available_output_styles: ['normal'],
models: [],
account: {},
pid: process.pid,
},
},
}
break
case 'set_model':
onSetModel?.(request.request.model)
response = {
type: 'control_response',
response: {
subtype: 'success',
request_id: request.request_id,
},
}
break
case 'set_max_thinking_tokens':
onSetMaxThinkingTokens?.(request.request.max_thinking_tokens)
response = {
type: 'control_response',
response: {
subtype: 'success',
request_id: request.request_id,
},
}
break
case 'set_permission_mode': {
// The callback returns a policy verdict so we can send an error
// control_response without importing isAutoModeGateEnabled /
// isBypassPermissionsModeDisabled here (bootstrap-isolation). If no
// callback is registered (daemon context, which doesn't wire this —
// see daemonBridge.ts), return an error verdict rather than a silent
// false-success: the mode is never actually applied in that context,
// so success would lie to the client.
const verdict = onSetPermissionMode?.(request.request.mode) ?? {
ok: false,
error:
'set_permission_mode is not supported in this context (onSetPermissionMode callback not registered)',
}
if (verdict.ok) {
response = {
type: 'control_response',
response: {
subtype: 'success',
request_id: request.request_id,
},
}
} else {
response = {
type: 'control_response',
response: {
subtype: 'error',
request_id: request.request_id,
error: verdict.error,
},
}
}
break
}
case 'interrupt':
onInterrupt?.()
response = {
type: 'control_response',
response: {
subtype: 'success',
request_id: request.request_id,
},
}
break
default:
// Unknown subtype — respond with error so the server doesn't
// hang waiting for a reply that never comes.
response = {
type: 'control_response',
response: {
subtype: 'error',
request_id: request.request_id,
error: `REPL bridge does not handle control_request subtype: ${request.request.subtype}`,
},
}
}
const event = { ...response, session_id: sessionId }
void transport.write(event)
logForDebugging(
`[bridge:repl] Sent control_response for ${request.request.subtype} request_id=${request.request_id} result=${response.response.subtype}`,
)
}
// ─── Result message (for session archival on teardown) ───────────────────────
/**
* Build a minimal `SDKResultSuccess` message for session archival.
* The server needs this event before a WS close to trigger archival.
*/
export function makeResultMessage(sessionId: string): SDKResultSuccess {
return {
type: 'result',
subtype: 'success',
duration_ms: 0,
duration_api_ms: 0,
is_error: false,
num_turns: 0,
result: '',
stop_reason: null,
total_cost_usd: 0,
usage: { ...EMPTY_USAGE },
modelUsage: {},
permission_denials: [],
session_id: sessionId,
uuid: randomUUID(),
}
}
// ─── BoundedUUIDSet (echo-dedup ring buffer) ─────────────────────────────────
/**
* FIFO-bounded set backed by a circular buffer. Evicts the oldest entry
* when capacity is reached, keeping memory usage constant at O(capacity).
*
* Messages are added in chronological order, so evicted entries are always
* the oldest. The caller relies on external ordering (the hook's
* lastWrittenIndexRef) as the primary dedup this set is a secondary
* safety net for echo filtering and race-condition dedup.
*/
export class BoundedUUIDSet {
private readonly capacity: number
private readonly ring: (string | undefined)[]
private readonly set = new Set<string>()
private writeIdx = 0
constructor(capacity: number) {
this.capacity = capacity
this.ring = new Array<string | undefined>(capacity)
}
add(uuid: string): void {
if (this.set.has(uuid)) return
// Evict the entry at the current write position (if occupied)
const evicted = this.ring[this.writeIdx]
if (evicted !== undefined) {
this.set.delete(evicted)
}
this.ring[this.writeIdx] = uuid
this.set.add(uuid)
this.writeIdx = (this.writeIdx + 1) % this.capacity
}
has(uuid: string): boolean {
return this.set.has(uuid)
}
clear(): void {
this.set.clear()
this.ring.fill(undefined)
this.writeIdx = 0
}
}

View file

@ -0,0 +1,43 @@
import type { PermissionUpdate } from '../utils/permissions/PermissionUpdateSchema.js'
type BridgePermissionResponse = {
behavior: 'allow' | 'deny'
updatedInput?: Record<string, unknown>
updatedPermissions?: PermissionUpdate[]
message?: string
}
type BridgePermissionCallbacks = {
sendRequest(
requestId: string,
toolName: string,
input: Record<string, unknown>,
toolUseId: string,
description: string,
permissionSuggestions?: PermissionUpdate[],
blockedPath?: string,
): void
sendResponse(requestId: string, response: BridgePermissionResponse): void
/** Cancel a pending control_request so the web app can dismiss its prompt. */
cancelRequest(requestId: string): void
onResponse(
requestId: string,
handler: (response: BridgePermissionResponse) => void,
): () => void // returns unsubscribe
}
/** Type predicate for validating a parsed control_response payload
* as a BridgePermissionResponse. Checks the required `behavior`
* discriminant rather than using an unsafe `as` cast. */
function isBridgePermissionResponse(
value: unknown,
): value is BridgePermissionResponse {
if (!value || typeof value !== 'object') return false
return (
'behavior' in value &&
(value.behavior === 'allow' || value.behavior === 'deny')
)
}
export { isBridgePermissionResponse }
export type { BridgePermissionCallbacks, BridgePermissionResponse }

210
bridge/bridgePointer.ts Normal file
View file

@ -0,0 +1,210 @@
import { mkdir, readFile, stat, unlink, writeFile } from 'fs/promises'
import { dirname, join } from 'path'
import { z } from 'zod/v4'
import { logForDebugging } from '../utils/debug.js'
import { isENOENT } from '../utils/errors.js'
import { getWorktreePathsPortable } from '../utils/getWorktreePathsPortable.js'
import { lazySchema } from '../utils/lazySchema.js'
import {
getProjectsDir,
sanitizePath,
} from '../utils/sessionStoragePortable.js'
import { jsonParse, jsonStringify } from '../utils/slowOperations.js'
/**
* Upper bound on worktree fanout. git worktree list is naturally bounded
* (50 is a LOT), but this caps the parallel stat() burst and guards against
* pathological setups. Above this, --continue falls back to current-dir-only.
*/
const MAX_WORKTREE_FANOUT = 50
/**
* Crash-recovery pointer for Remote Control sessions.
*
* Written immediately after a bridge session is created, periodically
* refreshed during the session, and cleared on clean shutdown. If the
* process dies unclean (crash, kill -9, terminal closed), the pointer
* persists. On next startup, `claude remote-control` detects it and offers
* to resume via the --session-id flow from #20460.
*
* Staleness is checked against the file's mtime (not an embedded timestamp)
* so that a periodic re-write with the same content serves as a refresh
* matches the backend's rolling BRIDGE_LAST_POLL_TTL (4h) semantics. A
* bridge that's been polling for 5+ hours and then crashes still has a
* fresh pointer as long as the refresh ran within the window.
*
* Scoped per working directory (alongside transcript JSONL files) so two
* concurrent bridges in different repos don't clobber each other.
*/
export const BRIDGE_POINTER_TTL_MS = 4 * 60 * 60 * 1000
const BridgePointerSchema = lazySchema(() =>
z.object({
sessionId: z.string(),
environmentId: z.string(),
source: z.enum(['standalone', 'repl']),
}),
)
export type BridgePointer = z.infer<ReturnType<typeof BridgePointerSchema>>
export function getBridgePointerPath(dir: string): string {
return join(getProjectsDir(), sanitizePath(dir), 'bridge-pointer.json')
}
/**
* Write the pointer. Also used to refresh mtime during long sessions
* calling with the same IDs is a cheap no-content-change write that bumps
* the staleness clock. Best-effort a crash-recovery file must never
* itself cause a crash. Logs and swallows on error.
*/
export async function writeBridgePointer(
dir: string,
pointer: BridgePointer,
): Promise<void> {
const path = getBridgePointerPath(dir)
try {
await mkdir(dirname(path), { recursive: true })
await writeFile(path, jsonStringify(pointer), 'utf8')
logForDebugging(`[bridge:pointer] wrote ${path}`)
} catch (err: unknown) {
logForDebugging(`[bridge:pointer] write failed: ${err}`, { level: 'warn' })
}
}
/**
* Read the pointer and its age (ms since last write). Operates directly
* and handles errors no existence check (CLAUDE.md TOCTOU rule). Returns
* null on any failure: missing file, corrupted JSON, schema mismatch, or
* stale (mtime > 4h ago). Stale/invalid pointers are deleted so they don't
* keep re-prompting after the backend has already GC'd the env.
*/
export async function readBridgePointer(
dir: string,
): Promise<(BridgePointer & { ageMs: number }) | null> {
const path = getBridgePointerPath(dir)
let raw: string
let mtimeMs: number
try {
// stat for mtime (staleness anchor), then read. Two syscalls, but both
// are needed — mtime IS the data we return, not a TOCTOU guard.
mtimeMs = (await stat(path)).mtimeMs
raw = await readFile(path, 'utf8')
} catch {
return null
}
const parsed = BridgePointerSchema().safeParse(safeJsonParse(raw))
if (!parsed.success) {
logForDebugging(`[bridge:pointer] invalid schema, clearing: ${path}`)
await clearBridgePointer(dir)
return null
}
const ageMs = Math.max(0, Date.now() - mtimeMs)
if (ageMs > BRIDGE_POINTER_TTL_MS) {
logForDebugging(`[bridge:pointer] stale (>4h mtime), clearing: ${path}`)
await clearBridgePointer(dir)
return null
}
return { ...parsed.data, ageMs }
}
/**
* Worktree-aware read for `--continue`. The REPL bridge writes its pointer
* to `getOriginalCwd()` which EnterWorktreeTool/activeWorktreeSession can
* mutate to a worktree path but `claude remote-control --continue` runs
* with `resolve('.')` = shell CWD. This fans out across git worktree
* siblings to find the freshest pointer, matching /resume's semantics.
*
* Fast path: checks `dir` first. Only shells out to `git worktree list` if
* that misses the common case (pointer in launch dir) is one stat, zero
* exec. Fanout reads run in parallel; capped at MAX_WORKTREE_FANOUT.
*
* Returns the pointer AND the dir it was found in, so the caller can clear
* the right file on resume failure.
*/
export async function readBridgePointerAcrossWorktrees(
dir: string,
): Promise<{ pointer: BridgePointer & { ageMs: number }; dir: string } | null> {
// Fast path: current dir. Covers standalone bridge (always matches) and
// REPL bridge when no worktree mutation happened.
const here = await readBridgePointer(dir)
if (here) {
return { pointer: here, dir }
}
// Fanout: scan worktree siblings. getWorktreePathsPortable has a 5s
// timeout and returns [] on any error (not a git repo, git not installed).
const worktrees = await getWorktreePathsPortable(dir)
if (worktrees.length <= 1) return null
if (worktrees.length > MAX_WORKTREE_FANOUT) {
logForDebugging(
`[bridge:pointer] ${worktrees.length} worktrees exceeds fanout cap ${MAX_WORKTREE_FANOUT}, skipping`,
)
return null
}
// Dedupe against `dir` so we don't re-stat it. sanitizePath normalizes
// case/separators so worktree-list output matches our fast-path key even
// on Windows where git may emit C:/ vs stored c:/.
const dirKey = sanitizePath(dir)
const candidates = worktrees.filter(wt => sanitizePath(wt) !== dirKey)
// Parallel stat+read. Each readBridgePointer is a stat() that ENOENTs
// for worktrees with no pointer (cheap) plus a ~100-byte read for the
// rare ones that have one. Promise.all → latency ≈ slowest single stat.
const results = await Promise.all(
candidates.map(async wt => {
const p = await readBridgePointer(wt)
return p ? { pointer: p, dir: wt } : null
}),
)
// Pick freshest (lowest ageMs). The pointer stores environmentId so
// resume reconnects to the right env regardless of which worktree
// --continue was invoked from.
let freshest: {
pointer: BridgePointer & { ageMs: number }
dir: string
} | null = null
for (const r of results) {
if (r && (!freshest || r.pointer.ageMs < freshest.pointer.ageMs)) {
freshest = r
}
}
if (freshest) {
logForDebugging(
`[bridge:pointer] fanout found pointer in worktree ${freshest.dir} (ageMs=${freshest.pointer.ageMs})`,
)
}
return freshest
}
/**
* Delete the pointer. Idempotent ENOENT is expected when the process
* shut down clean previously.
*/
export async function clearBridgePointer(dir: string): Promise<void> {
const path = getBridgePointerPath(dir)
try {
await unlink(path)
logForDebugging(`[bridge:pointer] cleared ${path}`)
} catch (err: unknown) {
if (!isENOENT(err)) {
logForDebugging(`[bridge:pointer] clear failed: ${err}`, {
level: 'warn',
})
}
}
}
function safeJsonParse(raw: string): unknown {
try {
return jsonParse(raw)
} catch {
return null
}
}

163
bridge/bridgeStatusUtil.ts Normal file
View file

@ -0,0 +1,163 @@
import {
getClaudeAiBaseUrl,
getRemoteSessionUrl,
} from '../constants/product.js'
import { stringWidth } from '../ink/stringWidth.js'
import { formatDuration, truncateToWidth } from '../utils/format.js'
import { getGraphemeSegmenter } from '../utils/intl.js'
/** Bridge status state machine states. */
export type StatusState =
| 'idle'
| 'attached'
| 'titled'
| 'reconnecting'
| 'failed'
/** How long a tool activity line stays visible after last tool_start (ms). */
export const TOOL_DISPLAY_EXPIRY_MS = 30_000
/** Interval for the shimmer animation tick (ms). */
export const SHIMMER_INTERVAL_MS = 150
export function timestamp(): string {
const now = new Date()
const h = String(now.getHours()).padStart(2, '0')
const m = String(now.getMinutes()).padStart(2, '0')
const s = String(now.getSeconds()).padStart(2, '0')
return `${h}:${m}:${s}`
}
export { formatDuration, truncateToWidth as truncatePrompt }
/** Abbreviate a tool activity summary for the trail display. */
export function abbreviateActivity(summary: string): string {
return truncateToWidth(summary, 30)
}
/** Build the connect URL shown when the bridge is idle. */
export function buildBridgeConnectUrl(
environmentId: string,
ingressUrl?: string,
): string {
const baseUrl = getClaudeAiBaseUrl(undefined, ingressUrl)
return `${baseUrl}/code?bridge=${environmentId}`
}
/**
* Build the session URL shown when a session is attached. Delegates to
* getRemoteSessionUrl for the cse_session_ prefix translation, then appends
* the v1-specific ?bridge={environmentId} query.
*/
export function buildBridgeSessionUrl(
sessionId: string,
environmentId: string,
ingressUrl?: string,
): string {
return `${getRemoteSessionUrl(sessionId, ingressUrl)}?bridge=${environmentId}`
}
/** Compute the glimmer index for a reverse-sweep shimmer animation. */
export function computeGlimmerIndex(
tick: number,
messageWidth: number,
): number {
const cycleLength = messageWidth + 20
return messageWidth + 10 - (tick % cycleLength)
}
/**
* Split text into three segments by visual column position for shimmer rendering.
*
* Uses grapheme segmentation and `stringWidth` so the split is correct for
* multi-byte characters, emoji, and CJK glyphs.
*
* Returns `{ before, shimmer, after }` strings. Both renderers (chalk in
* bridgeUI.ts and React/Ink in bridge.tsx) apply their own coloring to
* these segments.
*/
export function computeShimmerSegments(
text: string,
glimmerIndex: number,
): { before: string; shimmer: string; after: string } {
const messageWidth = stringWidth(text)
const shimmerStart = glimmerIndex - 1
const shimmerEnd = glimmerIndex + 1
// When shimmer is offscreen, return all text as "before"
if (shimmerStart >= messageWidth || shimmerEnd < 0) {
return { before: text, shimmer: '', after: '' }
}
// Split into at most 3 segments by visual column position
const clampedStart = Math.max(0, shimmerStart)
let colPos = 0
let before = ''
let shimmer = ''
let after = ''
for (const { segment } of getGraphemeSegmenter().segment(text)) {
const segWidth = stringWidth(segment)
if (colPos + segWidth <= clampedStart) {
before += segment
} else if (colPos > shimmerEnd) {
after += segment
} else {
shimmer += segment
}
colPos += segWidth
}
return { before, shimmer, after }
}
/** Computed bridge status label and color from connection state. */
export type BridgeStatusInfo = {
label:
| 'Remote Control failed'
| 'Remote Control reconnecting'
| 'Remote Control active'
| 'Remote Control connecting\u2026'
color: 'error' | 'warning' | 'success'
}
/** Derive a status label and color from the bridge connection state. */
export function getBridgeStatus({
error,
connected,
sessionActive,
reconnecting,
}: {
error: string | undefined
connected: boolean
sessionActive: boolean
reconnecting: boolean
}): BridgeStatusInfo {
if (error) return { label: 'Remote Control failed', color: 'error' }
if (reconnecting)
return { label: 'Remote Control reconnecting', color: 'warning' }
if (sessionActive || connected)
return { label: 'Remote Control active', color: 'success' }
return { label: 'Remote Control connecting\u2026', color: 'warning' }
}
/** Footer text shown when bridge is idle (Ready state). */
export function buildIdleFooterText(url: string): string {
return `Code everywhere with the Claude app or ${url}`
}
/** Footer text shown when a session is active (Connected state). */
export function buildActiveFooterText(url: string): string {
return `Continue coding in the Claude app or ${url}`
}
/** Footer text shown when the bridge has failed. */
export const FAILED_FOOTER_TEXT = 'Something went wrong, please try again'
/**
* Wrap text in an OSC 8 terminal hyperlink. Zero visual width for layout purposes.
* strip-ansi (used by stringWidth) correctly strips these sequences, so
* countVisualLines in bridgeUI.ts remains accurate.
*/
export function wrapWithOsc8Link(text: string, url: string): string {
return `\x1b]8;;${url}\x07${text}\x1b]8;;\x07`
}

530
bridge/bridgeUI.ts Normal file
View file

@ -0,0 +1,530 @@
import chalk from 'chalk'
import { toString as qrToString } from 'qrcode'
import {
BRIDGE_FAILED_INDICATOR,
BRIDGE_READY_INDICATOR,
BRIDGE_SPINNER_FRAMES,
} from '../constants/figures.js'
import { stringWidth } from '../ink/stringWidth.js'
import { logForDebugging } from '../utils/debug.js'
import {
buildActiveFooterText,
buildBridgeConnectUrl,
buildBridgeSessionUrl,
buildIdleFooterText,
FAILED_FOOTER_TEXT,
formatDuration,
type StatusState,
TOOL_DISPLAY_EXPIRY_MS,
timestamp,
truncatePrompt,
wrapWithOsc8Link,
} from './bridgeStatusUtil.js'
import type {
BridgeConfig,
BridgeLogger,
SessionActivity,
SpawnMode,
} from './types.js'
const QR_OPTIONS = {
type: 'utf8' as const,
errorCorrectionLevel: 'L' as const,
small: true,
}
/** Generate a QR code and return its lines. */
async function generateQr(url: string): Promise<string[]> {
const qr = await qrToString(url, QR_OPTIONS)
return qr.split('\n').filter((line: string) => line.length > 0)
}
export function createBridgeLogger(options: {
verbose: boolean
write?: (s: string) => void
}): BridgeLogger {
const write = options.write ?? ((s: string) => process.stdout.write(s))
const verbose = options.verbose
// Track how many status lines are currently displayed at the bottom
let statusLineCount = 0
// Status state machine
let currentState: StatusState = 'idle'
let currentStateText = 'Ready'
let repoName = ''
let branch = ''
let debugLogPath = ''
// Connect URL (built in printBanner with correct base for staging/prod)
let connectUrl = ''
let cachedIngressUrl = ''
let cachedEnvironmentId = ''
let activeSessionUrl: string | null = null
// QR code lines for the current URL
let qrLines: string[] = []
let qrVisible = false
// Tool activity for the second status line
let lastToolSummary: string | null = null
let lastToolTime = 0
// Session count indicator (shown when multi-session mode is enabled)
let sessionActive = 0
let sessionMax = 1
// Spawn mode shown in the session-count line + gates the `w` hint
let spawnModeDisplay: 'same-dir' | 'worktree' | null = null
let spawnMode: SpawnMode = 'single-session'
// Per-session display info for the multi-session bullet list (keyed by compat sessionId)
const sessionDisplayInfo = new Map<
string,
{ title?: string; url: string; activity?: SessionActivity }
>()
// Connecting spinner state
let connectingTimer: ReturnType<typeof setInterval> | null = null
let connectingTick = 0
/**
* Count how many visual terminal rows a string occupies, accounting for
* line wrapping. Each `\n` is one row, and content wider than the terminal
* wraps to additional rows.
*/
function countVisualLines(text: string): number {
// eslint-disable-next-line custom-rules/prefer-use-terminal-size
const cols = process.stdout.columns || 80 // non-React CLI context
let count = 0
// Split on newlines to get logical lines
for (const logical of text.split('\n')) {
if (logical.length === 0) {
// Empty segment between consecutive \n — counts as 1 row
count++
continue
}
const width = stringWidth(logical)
count += Math.max(1, Math.ceil(width / cols))
}
// The trailing \n in "line\n" produces an empty last element — don't count it
// because the cursor sits at the start of the next line, not a new visual row.
if (text.endsWith('\n')) {
count--
}
return count
}
/** Write a status line and track its visual line count. */
function writeStatus(text: string): void {
write(text)
statusLineCount += countVisualLines(text)
}
/** Clear any currently displayed status lines. */
function clearStatusLines(): void {
if (statusLineCount <= 0) return
logForDebugging(`[bridge:ui] clearStatusLines count=${statusLineCount}`)
// Move cursor up to the start of the status block, then erase everything below
write(`\x1b[${statusLineCount}A`) // cursor up N lines
write('\x1b[J') // erase from cursor to end of screen
statusLineCount = 0
}
/** Print a permanent log line, clearing status first and restoring after. */
function printLog(line: string): void {
clearStatusLines()
write(line)
}
/** Regenerate the QR code with the given URL. */
function regenerateQr(url: string): void {
generateQr(url)
.then(lines => {
qrLines = lines
renderStatusLine()
})
.catch(e => {
logForDebugging(`QR code generation failed: ${e}`, { level: 'error' })
})
}
/** Render the connecting spinner line (shown before first updateIdleStatus). */
function renderConnectingLine(): void {
clearStatusLines()
const frame =
BRIDGE_SPINNER_FRAMES[connectingTick % BRIDGE_SPINNER_FRAMES.length]!
let suffix = ''
if (repoName) {
suffix += chalk.dim(' \u00b7 ') + chalk.dim(repoName)
}
if (branch) {
suffix += chalk.dim(' \u00b7 ') + chalk.dim(branch)
}
writeStatus(
`${chalk.yellow(frame)} ${chalk.yellow('Connecting')}${suffix}\n`,
)
}
/** Start the connecting spinner. Stopped by first updateIdleStatus(). */
function startConnecting(): void {
stopConnecting()
renderConnectingLine()
connectingTimer = setInterval(() => {
connectingTick++
renderConnectingLine()
}, 150)
}
/** Stop the connecting spinner. */
function stopConnecting(): void {
if (connectingTimer) {
clearInterval(connectingTimer)
connectingTimer = null
}
}
/** Render and write the current status lines based on state. */
function renderStatusLine(): void {
if (currentState === 'reconnecting' || currentState === 'failed') {
// These states are handled separately (updateReconnectingStatus /
// updateFailedStatus). Return before clearing so callers like toggleQr
// and setSpawnModeDisplay don't blank the display during these states.
return
}
clearStatusLines()
const isIdle = currentState === 'idle'
// QR code above the status line
if (qrVisible) {
for (const line of qrLines) {
writeStatus(`${chalk.dim(line)}\n`)
}
}
// Determine indicator and colors based on state
const indicator = BRIDGE_READY_INDICATOR
const indicatorColor = isIdle ? chalk.green : chalk.cyan
const baseColor = isIdle ? chalk.green : chalk.cyan
const stateText = baseColor(currentStateText)
// Build the suffix with repo and branch
let suffix = ''
if (repoName) {
suffix += chalk.dim(' \u00b7 ') + chalk.dim(repoName)
}
// In worktree mode each session gets its own branch, so showing the
// bridge's branch would be misleading.
if (branch && spawnMode !== 'worktree') {
suffix += chalk.dim(' \u00b7 ') + chalk.dim(branch)
}
if (process.env.USER_TYPE === 'ant' && debugLogPath) {
writeStatus(
`${chalk.yellow('[ANT-ONLY] Logs:')} ${chalk.dim(debugLogPath)}\n`,
)
}
writeStatus(`${indicatorColor(indicator)} ${stateText}${suffix}\n`)
// Session count and per-session list (multi-session mode only)
if (sessionMax > 1) {
const modeHint =
spawnMode === 'worktree'
? 'New sessions will be created in an isolated worktree'
: 'New sessions will be created in the current directory'
writeStatus(
` ${chalk.dim(`Capacity: ${sessionActive}/${sessionMax} \u00b7 ${modeHint}`)}\n`,
)
for (const [, info] of sessionDisplayInfo) {
const titleText = info.title
? truncatePrompt(info.title, 35)
: chalk.dim('Attached')
const titleLinked = wrapWithOsc8Link(titleText, info.url)
const act = info.activity
const showAct = act && act.type !== 'result' && act.type !== 'error'
const actText = showAct
? chalk.dim(` ${truncatePrompt(act.summary, 40)}`)
: ''
writeStatus(` ${titleLinked}${actText}
`)
}
}
// Mode line for spawn modes with a single slot (or true single-session mode)
if (sessionMax === 1) {
const modeText =
spawnMode === 'single-session'
? 'Single session \u00b7 exits when complete'
: spawnMode === 'worktree'
? `Capacity: ${sessionActive}/1 \u00b7 New sessions will be created in an isolated worktree`
: `Capacity: ${sessionActive}/1 \u00b7 New sessions will be created in the current directory`
writeStatus(` ${chalk.dim(modeText)}\n`)
}
// Tool activity line for single-session mode
if (
sessionMax === 1 &&
!isIdle &&
lastToolSummary &&
Date.now() - lastToolTime < TOOL_DISPLAY_EXPIRY_MS
) {
writeStatus(` ${chalk.dim(truncatePrompt(lastToolSummary, 60))}\n`)
}
// Blank line separator before footer
const url = activeSessionUrl ?? connectUrl
if (url) {
writeStatus('\n')
const footerText = isIdle
? buildIdleFooterText(url)
: buildActiveFooterText(url)
const qrHint = qrVisible
? chalk.dim.italic('space to hide QR code')
: chalk.dim.italic('space to show QR code')
const toggleHint = spawnModeDisplay
? chalk.dim.italic(' \u00b7 w to toggle spawn mode')
: ''
writeStatus(`${chalk.dim(footerText)}\n`)
writeStatus(`${qrHint}${toggleHint}\n`)
}
}
return {
printBanner(config: BridgeConfig, environmentId: string): void {
cachedIngressUrl = config.sessionIngressUrl
cachedEnvironmentId = environmentId
connectUrl = buildBridgeConnectUrl(environmentId, cachedIngressUrl)
regenerateQr(connectUrl)
if (verbose) {
write(chalk.dim(`Remote Control`) + ` v${MACRO.VERSION}\n`)
}
if (verbose) {
if (config.spawnMode !== 'single-session') {
write(chalk.dim(`Spawn mode: `) + `${config.spawnMode}\n`)
write(
chalk.dim(`Max concurrent sessions: `) + `${config.maxSessions}\n`,
)
}
write(chalk.dim(`Environment ID: `) + `${environmentId}\n`)
}
if (config.sandbox) {
write(chalk.dim(`Sandbox: `) + `${chalk.green('Enabled')}\n`)
}
write('\n')
// Start connecting spinner — first updateIdleStatus() will stop it
startConnecting()
},
logSessionStart(sessionId: string, prompt: string): void {
if (verbose) {
const short = truncatePrompt(prompt, 80)
printLog(
chalk.dim(`[${timestamp()}]`) +
` Session started: ${chalk.white(`"${short}"`)} (${chalk.dim(sessionId)})\n`,
)
}
},
logSessionComplete(sessionId: string, durationMs: number): void {
printLog(
chalk.dim(`[${timestamp()}]`) +
` Session ${chalk.green('completed')} (${formatDuration(durationMs)}) ${chalk.dim(sessionId)}\n`,
)
},
logSessionFailed(sessionId: string, error: string): void {
printLog(
chalk.dim(`[${timestamp()}]`) +
` Session ${chalk.red('failed')}: ${error} ${chalk.dim(sessionId)}\n`,
)
},
logStatus(message: string): void {
printLog(chalk.dim(`[${timestamp()}]`) + ` ${message}\n`)
},
logVerbose(message: string): void {
if (verbose) {
printLog(chalk.dim(`[${timestamp()}] ${message}`) + '\n')
}
},
logError(message: string): void {
printLog(chalk.red(`[${timestamp()}] Error: ${message}`) + '\n')
},
logReconnected(disconnectedMs: number): void {
printLog(
chalk.dim(`[${timestamp()}]`) +
` ${chalk.green('Reconnected')} after ${formatDuration(disconnectedMs)}\n`,
)
},
setRepoInfo(repo: string, branchName: string): void {
repoName = repo
branch = branchName
},
setDebugLogPath(path: string): void {
debugLogPath = path
},
updateIdleStatus(): void {
stopConnecting()
currentState = 'idle'
currentStateText = 'Ready'
lastToolSummary = null
lastToolTime = 0
activeSessionUrl = null
regenerateQr(connectUrl)
renderStatusLine()
},
setAttached(sessionId: string): void {
stopConnecting()
currentState = 'attached'
currentStateText = 'Connected'
lastToolSummary = null
lastToolTime = 0
// Multi-session: keep footer/QR on the environment connect URL so users
// can spawn more sessions. Per-session links are in the bullet list.
if (sessionMax <= 1) {
activeSessionUrl = buildBridgeSessionUrl(
sessionId,
cachedEnvironmentId,
cachedIngressUrl,
)
regenerateQr(activeSessionUrl)
}
renderStatusLine()
},
updateReconnectingStatus(delayStr: string, elapsedStr: string): void {
stopConnecting()
clearStatusLines()
currentState = 'reconnecting'
// QR code above the status line
if (qrVisible) {
for (const line of qrLines) {
writeStatus(`${chalk.dim(line)}\n`)
}
}
const frame =
BRIDGE_SPINNER_FRAMES[connectingTick % BRIDGE_SPINNER_FRAMES.length]!
connectingTick++
writeStatus(
`${chalk.yellow(frame)} ${chalk.yellow('Reconnecting')} ${chalk.dim('\u00b7')} ${chalk.dim(`retrying in ${delayStr}`)} ${chalk.dim('\u00b7')} ${chalk.dim(`disconnected ${elapsedStr}`)}\n`,
)
},
updateFailedStatus(error: string): void {
stopConnecting()
clearStatusLines()
currentState = 'failed'
let suffix = ''
if (repoName) {
suffix += chalk.dim(' \u00b7 ') + chalk.dim(repoName)
}
if (branch) {
suffix += chalk.dim(' \u00b7 ') + chalk.dim(branch)
}
writeStatus(
`${chalk.red(BRIDGE_FAILED_INDICATOR)} ${chalk.red('Remote Control Failed')}${suffix}\n`,
)
writeStatus(`${chalk.dim(FAILED_FOOTER_TEXT)}\n`)
if (error) {
writeStatus(`${chalk.red(error)}\n`)
}
},
updateSessionStatus(
_sessionId: string,
_elapsed: string,
activity: SessionActivity,
_trail: string[],
): void {
// Cache tool activity for the second status line
if (activity.type === 'tool_start') {
lastToolSummary = activity.summary
lastToolTime = Date.now()
}
renderStatusLine()
},
clearStatus(): void {
stopConnecting()
clearStatusLines()
},
toggleQr(): void {
qrVisible = !qrVisible
renderStatusLine()
},
updateSessionCount(active: number, max: number, mode: SpawnMode): void {
if (sessionActive === active && sessionMax === max && spawnMode === mode)
return
sessionActive = active
sessionMax = max
spawnMode = mode
// Don't re-render here — the status ticker calls renderStatusLine
// on its own cadence, and the next tick will pick up the new values.
},
setSpawnModeDisplay(mode: 'same-dir' | 'worktree' | null): void {
if (spawnModeDisplay === mode) return
spawnModeDisplay = mode
// Also sync the #21118-added spawnMode so the next render shows correct
// mode hint + branch visibility. Don't render here — matches
// updateSessionCount: called before printBanner (initial setup) and
// again from the `w` handler (which follows with refreshDisplay).
if (mode) spawnMode = mode
},
addSession(sessionId: string, url: string): void {
sessionDisplayInfo.set(sessionId, { url })
},
updateSessionActivity(sessionId: string, activity: SessionActivity): void {
const info = sessionDisplayInfo.get(sessionId)
if (!info) return
info.activity = activity
},
setSessionTitle(sessionId: string, title: string): void {
const info = sessionDisplayInfo.get(sessionId)
if (!info) return
info.title = title
// Guard against reconnecting/failed — renderStatusLine clears then returns
// early for those states, which would erase the spinner/error.
if (currentState === 'reconnecting' || currentState === 'failed') return
if (sessionMax === 1) {
// Single-session: show title in the main status line too.
currentState = 'titled'
currentStateText = truncatePrompt(title, 40)
}
renderStatusLine()
},
removeSession(sessionId: string): void {
sessionDisplayInfo.delete(sessionId)
},
refreshDisplay(): void {
// Skip during reconnecting/failed — renderStatusLine clears then returns
// early for those states, which would erase the spinner/error.
if (currentState === 'reconnecting' || currentState === 'failed') return
renderStatusLine()
},
}
}

56
bridge/capacityWake.ts Normal file
View file

@ -0,0 +1,56 @@
/**
* Shared capacity-wake primitive for bridge poll loops.
*
* Both replBridge.ts and bridgeMain.ts need to sleep while "at capacity"
* but wake early when either (a) the outer loop signal aborts (shutdown),
* or (b) capacity frees up (session done / transport lost). This module
* encapsulates the mutable wake-controller + two-signal merger that both
* poll loops previously duplicated byte-for-byte.
*/
export type CapacitySignal = { signal: AbortSignal; cleanup: () => void }
export type CapacityWake = {
/**
* Create a signal that aborts when either the outer loop signal or the
* capacity-wake controller fires. Returns the merged signal and a cleanup
* function that removes listeners when the sleep resolves normally
* (without abort).
*/
signal(): CapacitySignal
/**
* Abort the current at-capacity sleep and arm a fresh controller so the
* poll loop immediately re-checks for new work.
*/
wake(): void
}
export function createCapacityWake(outerSignal: AbortSignal): CapacityWake {
let wakeController = new AbortController()
function wake(): void {
wakeController.abort()
wakeController = new AbortController()
}
function signal(): CapacitySignal {
const merged = new AbortController()
const abort = (): void => merged.abort()
if (outerSignal.aborted || wakeController.signal.aborted) {
merged.abort()
return { signal: merged.signal, cleanup: () => {} }
}
outerSignal.addEventListener('abort', abort, { once: true })
const capSig = wakeController.signal
capSig.addEventListener('abort', abort, { once: true })
return {
signal: merged.signal,
cleanup: () => {
outerSignal.removeEventListener('abort', abort)
capSig.removeEventListener('abort', abort)
},
}
}
return { signal, wake }
}

168
bridge/codeSessionApi.ts Normal file
View file

@ -0,0 +1,168 @@
/**
* Thin HTTP wrappers for the CCR v2 code-session API.
*
* Separate file from remoteBridgeCore.ts so the SDK /bridge subpath can
* export createCodeSession + fetchRemoteCredentials without bundling the
* heavy CLI tree (analytics, transport, etc.). Callers supply explicit
* accessToken + baseUrl no implicit auth or config reads.
*/
import axios from 'axios'
import { logForDebugging } from '../utils/debug.js'
import { errorMessage } from '../utils/errors.js'
import { jsonStringify } from '../utils/slowOperations.js'
import { extractErrorDetail } from './debugUtils.js'
const ANTHROPIC_VERSION = '2023-06-01'
function oauthHeaders(accessToken: string): Record<string, string> {
return {
Authorization: `Bearer ${accessToken}`,
'Content-Type': 'application/json',
'anthropic-version': ANTHROPIC_VERSION,
}
}
export async function createCodeSession(
baseUrl: string,
accessToken: string,
title: string,
timeoutMs: number,
tags?: string[],
): Promise<string | null> {
const url = `${baseUrl}/v1/code/sessions`
let response
try {
response = await axios.post(
url,
// bridge: {} is the positive signal for the oneof runner — omitting it
// (or sending environment_id: "") now 400s. BridgeRunner is an empty
// message today; it's a placeholder for future bridge-specific options.
{ title, bridge: {}, ...(tags?.length ? { tags } : {}) },
{
headers: oauthHeaders(accessToken),
timeout: timeoutMs,
validateStatus: s => s < 500,
},
)
} catch (err: unknown) {
logForDebugging(
`[code-session] Session create request failed: ${errorMessage(err)}`,
)
return null
}
if (response.status !== 200 && response.status !== 201) {
const detail = extractErrorDetail(response.data)
logForDebugging(
`[code-session] Session create failed ${response.status}${detail ? `: ${detail}` : ''}`,
)
return null
}
const data: unknown = response.data
if (
!data ||
typeof data !== 'object' ||
!('session' in data) ||
!data.session ||
typeof data.session !== 'object' ||
!('id' in data.session) ||
typeof data.session.id !== 'string' ||
!data.session.id.startsWith('cse_')
) {
logForDebugging(
`[code-session] No session.id (cse_*) in response: ${jsonStringify(data).slice(0, 200)}`,
)
return null
}
return data.session.id
}
/**
* Credentials from POST /bridge. JWT is opaque do not decode.
* Each /bridge call bumps worker_epoch server-side (it IS the register).
*/
export type RemoteCredentials = {
worker_jwt: string
api_base_url: string
expires_in: number
worker_epoch: number
}
export async function fetchRemoteCredentials(
sessionId: string,
baseUrl: string,
accessToken: string,
timeoutMs: number,
trustedDeviceToken?: string,
): Promise<RemoteCredentials | null> {
const url = `${baseUrl}/v1/code/sessions/${sessionId}/bridge`
const headers = oauthHeaders(accessToken)
if (trustedDeviceToken) {
headers['X-Trusted-Device-Token'] = trustedDeviceToken
}
let response
try {
response = await axios.post(
url,
{},
{
headers,
timeout: timeoutMs,
validateStatus: s => s < 500,
},
)
} catch (err: unknown) {
logForDebugging(
`[code-session] /bridge request failed: ${errorMessage(err)}`,
)
return null
}
if (response.status !== 200) {
const detail = extractErrorDetail(response.data)
logForDebugging(
`[code-session] /bridge failed ${response.status}${detail ? `: ${detail}` : ''}`,
)
return null
}
const data: unknown = response.data
if (
data === null ||
typeof data !== 'object' ||
!('worker_jwt' in data) ||
typeof data.worker_jwt !== 'string' ||
!('expires_in' in data) ||
typeof data.expires_in !== 'number' ||
!('api_base_url' in data) ||
typeof data.api_base_url !== 'string' ||
!('worker_epoch' in data)
) {
logForDebugging(
`[code-session] /bridge response malformed (need worker_jwt, expires_in, api_base_url, worker_epoch): ${jsonStringify(data).slice(0, 200)}`,
)
return null
}
// protojson serializes int64 as a string to avoid JS precision loss;
// Go may also return a number depending on encoder settings.
const rawEpoch = data.worker_epoch
const epoch = typeof rawEpoch === 'string' ? Number(rawEpoch) : rawEpoch
if (
typeof epoch !== 'number' ||
!Number.isFinite(epoch) ||
!Number.isSafeInteger(epoch)
) {
logForDebugging(
`[code-session] /bridge worker_epoch invalid: ${jsonStringify(rawEpoch)}`,
)
return null
}
return {
worker_jwt: data.worker_jwt,
api_base_url: data.api_base_url,
expires_in: data.expires_in,
worker_epoch: epoch,
}
}

384
bridge/createSession.ts Normal file
View file

@ -0,0 +1,384 @@
import type { SDKMessage } from '../entrypoints/agentSdkTypes.js'
import { logForDebugging } from '../utils/debug.js'
import { errorMessage } from '../utils/errors.js'
import { extractErrorDetail } from './debugUtils.js'
import { toCompatSessionId } from './sessionIdCompat.js'
type GitSource = {
type: 'git_repository'
url: string
revision?: string
}
type GitOutcome = {
type: 'git_repository'
git_info: { type: 'github'; repo: string; branches: string[] }
}
// Events must be wrapped in { type: 'event', data: <sdk_message> } for the
// POST /v1/sessions endpoint (discriminated union format).
type SessionEvent = {
type: 'event'
data: SDKMessage
}
/**
* Create a session on a bridge environment via POST /v1/sessions.
*
* Used by both `claude remote-control` (empty session so the user has somewhere to
* type immediately) and `/remote-control` (session pre-populated with conversation
* history).
*
* Returns the session ID on success, or null if creation fails (non-fatal).
*/
export async function createBridgeSession({
environmentId,
title,
events,
gitRepoUrl,
branch,
signal,
baseUrl: baseUrlOverride,
getAccessToken,
permissionMode,
}: {
environmentId: string
title?: string
events: SessionEvent[]
gitRepoUrl: string | null
branch: string
signal: AbortSignal
baseUrl?: string
getAccessToken?: () => string | undefined
permissionMode?: string
}): Promise<string | null> {
const { getClaudeAIOAuthTokens } = await import('../utils/auth.js')
const { getOrganizationUUID } = await import('../services/oauth/client.js')
const { getOauthConfig } = await import('../constants/oauth.js')
const { getOAuthHeaders } = await import('../utils/teleport/api.js')
const { parseGitHubRepository } = await import('../utils/detectRepository.js')
const { getDefaultBranch } = await import('../utils/git.js')
const { getMainLoopModel } = await import('../utils/model/model.js')
const { default: axios } = await import('axios')
const accessToken =
getAccessToken?.() ?? getClaudeAIOAuthTokens()?.accessToken
if (!accessToken) {
logForDebugging('[bridge] No access token for session creation')
return null
}
const orgUUID = await getOrganizationUUID()
if (!orgUUID) {
logForDebugging('[bridge] No org UUID for session creation')
return null
}
// Build git source and outcome context
let gitSource: GitSource | null = null
let gitOutcome: GitOutcome | null = null
if (gitRepoUrl) {
const { parseGitRemote } = await import('../utils/detectRepository.js')
const parsed = parseGitRemote(gitRepoUrl)
if (parsed) {
const { host, owner, name } = parsed
const revision = branch || (await getDefaultBranch()) || undefined
gitSource = {
type: 'git_repository',
url: `https://${host}/${owner}/${name}`,
revision,
}
gitOutcome = {
type: 'git_repository',
git_info: {
type: 'github',
repo: `${owner}/${name}`,
branches: [`claude/${branch || 'task'}`],
},
}
} else {
// Fallback: try parseGitHubRepository for owner/repo format
const ownerRepo = parseGitHubRepository(gitRepoUrl)
if (ownerRepo) {
const [owner, name] = ownerRepo.split('/')
if (owner && name) {
const revision = branch || (await getDefaultBranch()) || undefined
gitSource = {
type: 'git_repository',
url: `https://github.com/${owner}/${name}`,
revision,
}
gitOutcome = {
type: 'git_repository',
git_info: {
type: 'github',
repo: `${owner}/${name}`,
branches: [`claude/${branch || 'task'}`],
},
}
}
}
}
}
const requestBody = {
...(title !== undefined && { title }),
events,
session_context: {
sources: gitSource ? [gitSource] : [],
outcomes: gitOutcome ? [gitOutcome] : [],
model: getMainLoopModel(),
},
environment_id: environmentId,
source: 'remote-control',
...(permissionMode && { permission_mode: permissionMode }),
}
const headers = {
...getOAuthHeaders(accessToken),
'anthropic-beta': 'ccr-byoc-2025-07-29',
'x-organization-uuid': orgUUID,
}
const url = `${baseUrlOverride ?? getOauthConfig().BASE_API_URL}/v1/sessions`
let response
try {
response = await axios.post(url, requestBody, {
headers,
signal,
validateStatus: s => s < 500,
})
} catch (err: unknown) {
logForDebugging(
`[bridge] Session creation request failed: ${errorMessage(err)}`,
)
return null
}
const isSuccess = response.status === 200 || response.status === 201
if (!isSuccess) {
const detail = extractErrorDetail(response.data)
logForDebugging(
`[bridge] Session creation failed with status ${response.status}${detail ? `: ${detail}` : ''}`,
)
return null
}
const sessionData: unknown = response.data
if (
!sessionData ||
typeof sessionData !== 'object' ||
!('id' in sessionData) ||
typeof sessionData.id !== 'string'
) {
logForDebugging('[bridge] No session ID in response')
return null
}
return sessionData.id
}
/**
* Fetch a bridge session via GET /v1/sessions/{id}.
*
* Returns the session's environment_id (for `--session-id` resume) and title.
* Uses the same org-scoped headers as create/archive the environments-level
* client in bridgeApi.ts uses a different beta header and no org UUID, which
* makes the Sessions API return 404.
*/
export async function getBridgeSession(
sessionId: string,
opts?: { baseUrl?: string; getAccessToken?: () => string | undefined },
): Promise<{ environment_id?: string; title?: string } | null> {
const { getClaudeAIOAuthTokens } = await import('../utils/auth.js')
const { getOrganizationUUID } = await import('../services/oauth/client.js')
const { getOauthConfig } = await import('../constants/oauth.js')
const { getOAuthHeaders } = await import('../utils/teleport/api.js')
const { default: axios } = await import('axios')
const accessToken =
opts?.getAccessToken?.() ?? getClaudeAIOAuthTokens()?.accessToken
if (!accessToken) {
logForDebugging('[bridge] No access token for session fetch')
return null
}
const orgUUID = await getOrganizationUUID()
if (!orgUUID) {
logForDebugging('[bridge] No org UUID for session fetch')
return null
}
const headers = {
...getOAuthHeaders(accessToken),
'anthropic-beta': 'ccr-byoc-2025-07-29',
'x-organization-uuid': orgUUID,
}
const url = `${opts?.baseUrl ?? getOauthConfig().BASE_API_URL}/v1/sessions/${sessionId}`
logForDebugging(`[bridge] Fetching session ${sessionId}`)
let response
try {
response = await axios.get<{ environment_id?: string; title?: string }>(
url,
{ headers, timeout: 10_000, validateStatus: s => s < 500 },
)
} catch (err: unknown) {
logForDebugging(
`[bridge] Session fetch request failed: ${errorMessage(err)}`,
)
return null
}
if (response.status !== 200) {
const detail = extractErrorDetail(response.data)
logForDebugging(
`[bridge] Session fetch failed with status ${response.status}${detail ? `: ${detail}` : ''}`,
)
return null
}
return response.data
}
/**
* Archive a bridge session via POST /v1/sessions/{id}/archive.
*
* The CCR server never auto-archives sessions archival is always an
* explicit client action. Both `claude remote-control` (standalone bridge) and the
* always-on `/remote-control` REPL bridge call this during shutdown to archive any
* sessions that are still alive.
*
* The archive endpoint accepts sessions in any status (running, idle,
* requires_action, pending) and returns 409 if already archived, making
* it safe to call even if the server-side runner already archived the
* session.
*
* Callers must handle errors this function has no try/catch; 5xx,
* timeouts, and network errors throw. Archival is best-effort during
* cleanup; call sites wrap with .catch().
*/
export async function archiveBridgeSession(
sessionId: string,
opts?: {
baseUrl?: string
getAccessToken?: () => string | undefined
timeoutMs?: number
},
): Promise<void> {
const { getClaudeAIOAuthTokens } = await import('../utils/auth.js')
const { getOrganizationUUID } = await import('../services/oauth/client.js')
const { getOauthConfig } = await import('../constants/oauth.js')
const { getOAuthHeaders } = await import('../utils/teleport/api.js')
const { default: axios } = await import('axios')
const accessToken =
opts?.getAccessToken?.() ?? getClaudeAIOAuthTokens()?.accessToken
if (!accessToken) {
logForDebugging('[bridge] No access token for session archive')
return
}
const orgUUID = await getOrganizationUUID()
if (!orgUUID) {
logForDebugging('[bridge] No org UUID for session archive')
return
}
const headers = {
...getOAuthHeaders(accessToken),
'anthropic-beta': 'ccr-byoc-2025-07-29',
'x-organization-uuid': orgUUID,
}
const url = `${opts?.baseUrl ?? getOauthConfig().BASE_API_URL}/v1/sessions/${sessionId}/archive`
logForDebugging(`[bridge] Archiving session ${sessionId}`)
const response = await axios.post(
url,
{},
{
headers,
timeout: opts?.timeoutMs ?? 10_000,
validateStatus: s => s < 500,
},
)
if (response.status === 200) {
logForDebugging(`[bridge] Session ${sessionId} archived successfully`)
} else {
const detail = extractErrorDetail(response.data)
logForDebugging(
`[bridge] Session archive failed with status ${response.status}${detail ? `: ${detail}` : ''}`,
)
}
}
/**
* Update the title of a bridge session via PATCH /v1/sessions/{id}.
*
* Called when the user renames a session via /rename while a bridge
* connection is active, so the title stays in sync on claude.ai/code.
*
* Errors are swallowed title sync is best-effort.
*/
export async function updateBridgeSessionTitle(
sessionId: string,
title: string,
opts?: { baseUrl?: string; getAccessToken?: () => string | undefined },
): Promise<void> {
const { getClaudeAIOAuthTokens } = await import('../utils/auth.js')
const { getOrganizationUUID } = await import('../services/oauth/client.js')
const { getOauthConfig } = await import('../constants/oauth.js')
const { getOAuthHeaders } = await import('../utils/teleport/api.js')
const { default: axios } = await import('axios')
const accessToken =
opts?.getAccessToken?.() ?? getClaudeAIOAuthTokens()?.accessToken
if (!accessToken) {
logForDebugging('[bridge] No access token for session title update')
return
}
const orgUUID = await getOrganizationUUID()
if (!orgUUID) {
logForDebugging('[bridge] No org UUID for session title update')
return
}
const headers = {
...getOAuthHeaders(accessToken),
'anthropic-beta': 'ccr-byoc-2025-07-29',
'x-organization-uuid': orgUUID,
}
// Compat gateway only accepts session_* (compat/convert.go:27). v2 callers
// pass raw cse_*; retag here so all callers can pass whatever they hold.
// Idempotent for v1's session_* and bridgeMain's pre-converted compatSessionId.
const compatId = toCompatSessionId(sessionId)
const url = `${opts?.baseUrl ?? getOauthConfig().BASE_API_URL}/v1/sessions/${compatId}`
logForDebugging(`[bridge] Updating session title: ${compatId}${title}`)
try {
const response = await axios.patch(
url,
{ title },
{ headers, timeout: 10_000, validateStatus: s => s < 500 },
)
if (response.status === 200) {
logForDebugging(`[bridge] Session title updated successfully`)
} else {
const detail = extractErrorDetail(response.data)
logForDebugging(
`[bridge] Session title update failed with status ${response.status}${detail ? `: ${detail}` : ''}`,
)
}
} catch (err: unknown) {
logForDebugging(
`[bridge] Session title update request failed: ${errorMessage(err)}`,
)
}
}

141
bridge/debugUtils.ts Normal file
View file

@ -0,0 +1,141 @@
import {
type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
logEvent,
} from '../services/analytics/index.js'
import { logForDebugging } from '../utils/debug.js'
import { errorMessage } from '../utils/errors.js'
import { jsonStringify } from '../utils/slowOperations.js'
const DEBUG_MSG_LIMIT = 2000
const SECRET_FIELD_NAMES = [
'session_ingress_token',
'environment_secret',
'access_token',
'secret',
'token',
]
const SECRET_PATTERN = new RegExp(
`"(${SECRET_FIELD_NAMES.join('|')})"\\s*:\\s*"([^"]*)"`,
'g',
)
const REDACT_MIN_LENGTH = 16
export function redactSecrets(s: string): string {
return s.replace(SECRET_PATTERN, (_match, field: string, value: string) => {
if (value.length < REDACT_MIN_LENGTH) {
return `"${field}":"[REDACTED]"`
}
const redacted = `${value.slice(0, 8)}...${value.slice(-4)}`
return `"${field}":"${redacted}"`
})
}
/** Truncate a string for debug logging, collapsing newlines. */
export function debugTruncate(s: string): string {
const flat = s.replace(/\n/g, '\\n')
if (flat.length <= DEBUG_MSG_LIMIT) {
return flat
}
return flat.slice(0, DEBUG_MSG_LIMIT) + `... (${flat.length} chars)`
}
/** Truncate a JSON-serializable value for debug logging. */
export function debugBody(data: unknown): string {
const raw = typeof data === 'string' ? data : jsonStringify(data)
const s = redactSecrets(raw)
if (s.length <= DEBUG_MSG_LIMIT) {
return s
}
return s.slice(0, DEBUG_MSG_LIMIT) + `... (${s.length} chars)`
}
/**
* Extract a descriptive error message from an axios error (or any error).
* For HTTP errors, appends the server's response body message if available,
* since axios's default message only includes the status code.
*/
export function describeAxiosError(err: unknown): string {
const msg = errorMessage(err)
if (err && typeof err === 'object' && 'response' in err) {
const response = (err as { response?: { data?: unknown } }).response
if (response?.data && typeof response.data === 'object') {
const data = response.data as Record<string, unknown>
const detail =
typeof data.message === 'string'
? data.message
: typeof data.error === 'object' &&
data.error &&
'message' in data.error &&
typeof (data.error as Record<string, unknown>).message ===
'string'
? (data.error as Record<string, unknown>).message
: undefined
if (detail) {
return `${msg}: ${detail}`
}
}
}
return msg
}
/**
* Extract the HTTP status code from an axios error, if present.
* Returns undefined for non-HTTP errors (e.g. network failures).
*/
export function extractHttpStatus(err: unknown): number | undefined {
if (
err &&
typeof err === 'object' &&
'response' in err &&
(err as { response?: { status?: unknown } }).response &&
typeof (err as { response: { status?: unknown } }).response.status ===
'number'
) {
return (err as { response: { status: number } }).response.status
}
return undefined
}
/**
* Pull a human-readable message out of an API error response body.
* Checks `data.message` first, then `data.error.message`.
*/
export function extractErrorDetail(data: unknown): string | undefined {
if (!data || typeof data !== 'object') return undefined
if ('message' in data && typeof data.message === 'string') {
return data.message
}
if (
'error' in data &&
data.error !== null &&
typeof data.error === 'object' &&
'message' in data.error &&
typeof data.error.message === 'string'
) {
return data.error.message
}
return undefined
}
/**
* Log a bridge init skip debug message + `tengu_bridge_repl_skipped`
* analytics event. Centralizes the event name and the AnalyticsMetadata
* cast so call sites don't each repeat the 5-line boilerplate.
*/
export function logBridgeSkip(
reason: string,
debugMsg?: string,
v2?: boolean,
): void {
if (debugMsg) {
logForDebugging(debugMsg)
}
logEvent('tengu_bridge_repl_skipped', {
reason:
reason as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
...(v2 !== undefined && { v2 }),
})
}

View file

@ -0,0 +1,165 @@
import { z } from 'zod/v4'
import { getFeatureValue_DEPRECATED } from '../services/analytics/growthbook.js'
import { lazySchema } from '../utils/lazySchema.js'
import { lt } from '../utils/semver.js'
import { isEnvLessBridgeEnabled } from './bridgeEnabled.js'
export type EnvLessBridgeConfig = {
// withRetry — init-phase backoff (createSession, POST /bridge, recovery /bridge)
init_retry_max_attempts: number
init_retry_base_delay_ms: number
init_retry_jitter_fraction: number
init_retry_max_delay_ms: number
// axios timeout for POST /sessions, POST /bridge, POST /archive
http_timeout_ms: number
// BoundedUUIDSet ring size (echo + re-delivery dedup)
uuid_dedup_buffer_size: number
// CCRClient worker heartbeat cadence. Server TTL is 60s — 20s gives 3× margin.
heartbeat_interval_ms: number
// ±fraction of interval — per-beat jitter to spread fleet load.
heartbeat_jitter_fraction: number
// Fire proactive JWT refresh this long before expires_in. Larger buffer =
// more frequent refresh (refresh cadence ≈ expires_in - buffer).
token_refresh_buffer_ms: number
// Archive POST timeout in teardown(). Distinct from http_timeout_ms because
// gracefulShutdown races runCleanupFunctions() against a 2s cap — a 10s
// axios timeout on a slow/stalled archive burns the whole budget on a
// request that forceExit will kill anyway.
teardown_archive_timeout_ms: number
// Deadline for onConnect after transport.connect(). If neither onConnect
// nor onClose fires before this, emit tengu_bridge_repl_connect_timeout
// — the only telemetry for the ~1% of sessions that emit `started` then
// go silent (no error, no event, just nothing).
connect_timeout_ms: number
// Semver floor for the env-less bridge path. Separate from the v1
// tengu_bridge_min_version config so a v2-specific bug can force upgrades
// without blocking v1 (env-based) clients, and vice versa.
min_version: string
// When true, tell users their claude.ai app may be too old to see v2
// sessions — lets us roll the v2 bridge before the app ships the new
// session-list query.
should_show_app_upgrade_message: boolean
}
export const DEFAULT_ENV_LESS_BRIDGE_CONFIG: EnvLessBridgeConfig = {
init_retry_max_attempts: 3,
init_retry_base_delay_ms: 500,
init_retry_jitter_fraction: 0.25,
init_retry_max_delay_ms: 4000,
http_timeout_ms: 10_000,
uuid_dedup_buffer_size: 2000,
heartbeat_interval_ms: 20_000,
heartbeat_jitter_fraction: 0.1,
token_refresh_buffer_ms: 300_000,
teardown_archive_timeout_ms: 1500,
connect_timeout_ms: 15_000,
min_version: '0.0.0',
should_show_app_upgrade_message: false,
}
// Floors reject the whole object on violation (fall back to DEFAULT) rather
// than partially trusting — same defense-in-depth as pollConfig.ts.
const envLessBridgeConfigSchema = lazySchema(() =>
z.object({
init_retry_max_attempts: z.number().int().min(1).max(10).default(3),
init_retry_base_delay_ms: z.number().int().min(100).default(500),
init_retry_jitter_fraction: z.number().min(0).max(1).default(0.25),
init_retry_max_delay_ms: z.number().int().min(500).default(4000),
http_timeout_ms: z.number().int().min(2000).default(10_000),
uuid_dedup_buffer_size: z.number().int().min(100).max(50_000).default(2000),
// Server TTL is 60s. Floor 5s prevents thrash; cap 30s keeps ≥2× margin.
heartbeat_interval_ms: z
.number()
.int()
.min(5000)
.max(30_000)
.default(20_000),
// ±fraction per beat. Cap 0.5: at max interval (30s) × 1.5 = 45s worst case,
// still under the 60s TTL.
heartbeat_jitter_fraction: z.number().min(0).max(0.5).default(0.1),
// Floor 30s prevents tight-looping. Cap 30min rejects buffer-vs-delay
// semantic inversion: ops entering expires_in-5min (the *delay until
// refresh*) instead of 5min (the *buffer before expiry*) yields
// delayMs = expires_in - buffer ≈ 5min instead of ≈4h. Both are positive
// durations so .min() alone can't distinguish; .max() catches the
// inverted value since buffer ≥ 30min is nonsensical for a multi-hour JWT.
token_refresh_buffer_ms: z
.number()
.int()
.min(30_000)
.max(1_800_000)
.default(300_000),
// Cap 2000 keeps this under gracefulShutdown's 2s cleanup race — a higher
// timeout just lies to axios since forceExit kills the socket regardless.
teardown_archive_timeout_ms: z
.number()
.int()
.min(500)
.max(2000)
.default(1500),
// Observed p99 connect is ~2-3s; 15s is ~5× headroom. Floor 5s bounds
// false-positive rate under transient slowness; cap 60s bounds how long
// a truly-stalled session stays dark.
connect_timeout_ms: z.number().int().min(5_000).max(60_000).default(15_000),
min_version: z
.string()
.refine(v => {
try {
lt(v, '0.0.0')
return true
} catch {
return false
}
})
.default('0.0.0'),
should_show_app_upgrade_message: z.boolean().default(false),
}),
)
/**
* Fetch the env-less bridge timing config from GrowthBook. Read once per
* initEnvLessBridgeCore call config is fixed for the lifetime of a bridge
* session.
*
* Uses the blocking getter (not _CACHED_MAY_BE_STALE) because /remote-control
* runs well after GrowthBook init initializeGrowthBook() resolves instantly,
* so there's no startup penalty, and we get the fresh in-memory remoteEval
* value instead of the stale-on-first-read disk cache. The _DEPRECATED suffix
* warns against startup-path usage, which this isn't.
*/
export async function getEnvLessBridgeConfig(): Promise<EnvLessBridgeConfig> {
const raw = await getFeatureValue_DEPRECATED<unknown>(
'tengu_bridge_repl_v2_config',
DEFAULT_ENV_LESS_BRIDGE_CONFIG,
)
const parsed = envLessBridgeConfigSchema().safeParse(raw)
return parsed.success ? parsed.data : DEFAULT_ENV_LESS_BRIDGE_CONFIG
}
/**
* Returns an error message if the current CLI version is below the minimum
* required for the env-less (v2) bridge path, or null if the version is fine.
*
* v2 analogue of checkBridgeMinVersion() reads from tengu_bridge_repl_v2_config
* instead of tengu_bridge_min_version so the two implementations can enforce
* independent floors.
*/
export async function checkEnvLessBridgeMinVersion(): Promise<string | null> {
const cfg = await getEnvLessBridgeConfig()
if (cfg.min_version && lt(MACRO.VERSION, cfg.min_version)) {
return `Your version of Claude Code (${MACRO.VERSION}) is too old for Remote Control.\nVersion ${cfg.min_version} or higher is required. Run \`claude update\` to update.`
}
return null
}
/**
* Whether to nudge users toward upgrading their claude.ai app when a
* Remote Control session starts. True only when the v2 bridge is active
* AND the should_show_app_upgrade_message config bit is set lets us
* roll the v2 bridge before the app ships the new session-list query.
*/
export async function shouldShowAppUpgradeMessage(): Promise<boolean> {
if (!isEnvLessBridgeEnabled()) return false
const cfg = await getEnvLessBridgeConfig()
return cfg.should_show_app_upgrade_message
}

71
bridge/flushGate.ts Normal file
View file

@ -0,0 +1,71 @@
/**
* State machine for gating message writes during an initial flush.
*
* When a bridge session starts, historical messages are flushed to the
* server via a single HTTP POST. During that flush, new messages must
* be queued to prevent them from arriving at the server interleaved
* with the historical messages.
*
* Lifecycle:
* start() enqueue() returns true, items are queued
* end() returns queued items for draining, enqueue() returns false
* drop() discards queued items (permanent transport close)
* deactivate() clears active flag without dropping items
* (transport replacement new transport will drain)
*/
export class FlushGate<T> {
private _active = false
private _pending: T[] = []
get active(): boolean {
return this._active
}
get pendingCount(): number {
return this._pending.length
}
/** Mark flush as in-progress. enqueue() will start queuing items. */
start(): void {
this._active = true
}
/**
* End the flush and return any queued items for draining.
* Caller is responsible for sending the returned items.
*/
end(): T[] {
this._active = false
return this._pending.splice(0)
}
/**
* If flush is active, queue the items and return true.
* If flush is not active, return false (caller should send directly).
*/
enqueue(...items: T[]): boolean {
if (!this._active) return false
this._pending.push(...items)
return true
}
/**
* Discard all queued items (permanent transport close).
* Returns the number of items dropped.
*/
drop(): number {
this._active = false
const count = this._pending.length
this._pending.length = 0
return count
}
/**
* Clear the active flag without dropping queued items.
* Used when the transport is replaced (onWorkReceived) the new
* transport's flush will drain the pending items.
*/
deactivate(): void {
this._active = false
}
}

View file

@ -0,0 +1,175 @@
/**
* Resolve file_uuid attachments on inbound bridge user messages.
*
* Web composer uploads via cookie-authed /api/{org}/upload, sends file_uuid
* alongside the message. Here we fetch each via GET /api/oauth/files/{uuid}/content
* (oauth-authed, same store), write to ~/.claude/uploads/{sessionId}/, and
* return @path refs to prepend. Claude's Read tool takes it from there.
*
* Best-effort: any failure (no token, network, non-2xx, disk) logs debug and
* skips that attachment. The message still reaches Claude, just without @path.
*/
import type { ContentBlockParam } from '@anthropic-ai/sdk/resources/messages.mjs'
import axios from 'axios'
import { randomUUID } from 'crypto'
import { mkdir, writeFile } from 'fs/promises'
import { basename, join } from 'path'
import { z } from 'zod/v4'
import { getSessionId } from '../bootstrap/state.js'
import { logForDebugging } from '../utils/debug.js'
import { getClaudeConfigHomeDir } from '../utils/envUtils.js'
import { lazySchema } from '../utils/lazySchema.js'
import { getBridgeAccessToken, getBridgeBaseUrl } from './bridgeConfig.js'
const DOWNLOAD_TIMEOUT_MS = 30_000
function debug(msg: string): void {
logForDebugging(`[bridge:inbound-attach] ${msg}`)
}
const attachmentSchema = lazySchema(() =>
z.object({
file_uuid: z.string(),
file_name: z.string(),
}),
)
const attachmentsArraySchema = lazySchema(() => z.array(attachmentSchema()))
export type InboundAttachment = z.infer<ReturnType<typeof attachmentSchema>>
/** Pull file_attachments off a loosely-typed inbound message. */
export function extractInboundAttachments(msg: unknown): InboundAttachment[] {
if (typeof msg !== 'object' || msg === null || !('file_attachments' in msg)) {
return []
}
const parsed = attachmentsArraySchema().safeParse(msg.file_attachments)
return parsed.success ? parsed.data : []
}
/**
* Strip path components and keep only filename-safe chars. file_name comes
* from the network (web composer), so treat it as untrusted even though the
* composer controls it.
*/
function sanitizeFileName(name: string): string {
const base = basename(name).replace(/[^a-zA-Z0-9._-]/g, '_')
return base || 'attachment'
}
function uploadsDir(): string {
return join(getClaudeConfigHomeDir(), 'uploads', getSessionId())
}
/**
* Fetch + write one attachment. Returns the absolute path on success,
* undefined on any failure.
*/
async function resolveOne(att: InboundAttachment): Promise<string | undefined> {
const token = getBridgeAccessToken()
if (!token) {
debug('skip: no oauth token')
return undefined
}
let data: Buffer
try {
// getOauthConfig() (via getBridgeBaseUrl) throws on a non-allowlisted
// CLAUDE_CODE_CUSTOM_OAUTH_URL — keep it inside the try so a bad
// FedStart URL degrades to "no @path" instead of crashing print.ts's
// reader loop (which has no catch around the await).
const url = `${getBridgeBaseUrl()}/api/oauth/files/${encodeURIComponent(att.file_uuid)}/content`
const response = await axios.get(url, {
headers: { Authorization: `Bearer ${token}` },
responseType: 'arraybuffer',
timeout: DOWNLOAD_TIMEOUT_MS,
validateStatus: () => true,
})
if (response.status !== 200) {
debug(`fetch ${att.file_uuid} failed: status=${response.status}`)
return undefined
}
data = Buffer.from(response.data)
} catch (e) {
debug(`fetch ${att.file_uuid} threw: ${e}`)
return undefined
}
// uuid-prefix makes collisions impossible across messages and within one
// (same filename, different files). 8 chars is enough — this isn't security.
const safeName = sanitizeFileName(att.file_name)
const prefix = (
att.file_uuid.slice(0, 8) || randomUUID().slice(0, 8)
).replace(/[^a-zA-Z0-9_-]/g, '_')
const dir = uploadsDir()
const outPath = join(dir, `${prefix}-${safeName}`)
try {
await mkdir(dir, { recursive: true })
await writeFile(outPath, data)
} catch (e) {
debug(`write ${outPath} failed: ${e}`)
return undefined
}
debug(`resolved ${att.file_uuid}${outPath} (${data.length} bytes)`)
return outPath
}
/**
* Resolve all attachments on an inbound message to a prefix string of
* @path refs. Empty string if none resolved.
*/
export async function resolveInboundAttachments(
attachments: InboundAttachment[],
): Promise<string> {
if (attachments.length === 0) return ''
debug(`resolving ${attachments.length} attachment(s)`)
const paths = await Promise.all(attachments.map(resolveOne))
const ok = paths.filter((p): p is string => p !== undefined)
if (ok.length === 0) return ''
// Quoted form — extractAtMentionedFiles truncates unquoted @refs at the
// first space, which breaks any home dir with spaces (/Users/John Smith/).
return ok.map(p => `@"${p}"`).join(' ') + ' '
}
/**
* Prepend @path refs to content, whichever form it's in.
* Targets the LAST text block processUserInputBase reads inputString
* from processedBlocks[processedBlocks.length - 1], so putting refs in
* block[0] means they're silently ignored for [text, image] content.
*/
export function prependPathRefs(
content: string | Array<ContentBlockParam>,
prefix: string,
): string | Array<ContentBlockParam> {
if (!prefix) return content
if (typeof content === 'string') return prefix + content
const i = content.findLastIndex(b => b.type === 'text')
if (i !== -1) {
const b = content[i]!
if (b.type === 'text') {
return [
...content.slice(0, i),
{ ...b, text: prefix + b.text },
...content.slice(i + 1),
]
}
}
// No text block — append one at the end so it's last.
return [...content, { type: 'text', text: prefix.trimEnd() }]
}
/**
* Convenience: extract + resolve + prepend. No-op when the message has no
* file_attachments field (fast path no network, returns same reference).
*/
export async function resolveAndPrepend(
msg: unknown,
content: string | Array<ContentBlockParam>,
): Promise<string | Array<ContentBlockParam>> {
const attachments = extractInboundAttachments(msg)
if (attachments.length === 0) return content
const prefix = await resolveInboundAttachments(attachments)
return prependPathRefs(content, prefix)
}

80
bridge/inboundMessages.ts Normal file
View file

@ -0,0 +1,80 @@
import type {
Base64ImageSource,
ContentBlockParam,
ImageBlockParam,
} from '@anthropic-ai/sdk/resources/messages.mjs'
import type { UUID } from 'crypto'
import type { SDKMessage } from '../entrypoints/agentSdkTypes.js'
import { detectImageFormatFromBase64 } from '../utils/imageResizer.js'
/**
* Process an inbound user message from the bridge, extracting content
* and UUID for enqueueing. Supports both string content and
* ContentBlockParam[] (e.g. messages containing images).
*
* Normalizes image blocks from bridge clients that may use camelCase
* `mediaType` instead of snake_case `media_type` (mobile-apps#5825).
*
* Returns the extracted fields, or undefined if the message should be
* skipped (non-user type, missing/empty content).
*/
export function extractInboundMessageFields(
msg: SDKMessage,
):
| { content: string | Array<ContentBlockParam>; uuid: UUID | undefined }
| undefined {
if (msg.type !== 'user') return undefined
const content = msg.message?.content
if (!content) return undefined
if (Array.isArray(content) && content.length === 0) return undefined
const uuid =
'uuid' in msg && typeof msg.uuid === 'string'
? (msg.uuid as UUID)
: undefined
return {
content: Array.isArray(content) ? normalizeImageBlocks(content) : content,
uuid,
}
}
/**
* Normalize image content blocks from bridge clients. iOS/web clients may
* send `mediaType` (camelCase) instead of `media_type` (snake_case), or
* omit the field entirely. Without normalization, the bad block poisons
* the session every subsequent API call fails with
* "media_type: Field required".
*
* Fast-path scan returns the original array reference when no
* normalization is needed (zero allocation on the happy path).
*/
export function normalizeImageBlocks(
blocks: Array<ContentBlockParam>,
): Array<ContentBlockParam> {
if (!blocks.some(isMalformedBase64Image)) return blocks
return blocks.map(block => {
if (!isMalformedBase64Image(block)) return block
const src = block.source as unknown as Record<string, unknown>
const mediaType =
typeof src.mediaType === 'string' && src.mediaType
? src.mediaType
: detectImageFormatFromBase64(block.source.data)
return {
...block,
source: {
type: 'base64' as const,
media_type: mediaType as Base64ImageSource['media_type'],
data: block.source.data,
},
}
})
}
function isMalformedBase64Image(
block: ContentBlockParam,
): block is ImageBlockParam & { source: Base64ImageSource } {
if (block.type !== 'image' || block.source?.type !== 'base64') return false
return !(block.source as unknown as Record<string, unknown>).media_type
}

569
bridge/initReplBridge.ts Normal file
View file

@ -0,0 +1,569 @@
/**
* REPL-specific wrapper around initBridgeCore. Owns the parts that read
* bootstrap state gates, cwd, session ID, git context, OAuth, title
* derivation then delegates to the bootstrap-free core.
*
* Split out of replBridge.ts because the sessionStorage import
* (getCurrentSessionTitle) transitively pulls in src/commands.ts the
* entire slash command + React component tree (~1300 modules). Keeping
* initBridgeCore in a file that doesn't touch sessionStorage lets
* daemonBridge.ts import the core without bloating the Agent SDK bundle.
*
* Called via dynamic import by useReplBridge (auto-start) and print.ts
* (SDK -p mode via query.enableRemoteControl).
*/
import { feature } from 'bun:bundle'
import { hostname } from 'os'
import { getOriginalCwd, getSessionId } from '../bootstrap/state.js'
import type { SDKMessage } from '../entrypoints/agentSdkTypes.js'
import type { SDKControlResponse } from '../entrypoints/sdk/controlTypes.js'
import { getFeatureValue_CACHED_WITH_REFRESH } from '../services/analytics/growthbook.js'
import { getOrganizationUUID } from '../services/oauth/client.js'
import {
isPolicyAllowed,
waitForPolicyLimitsToLoad,
} from '../services/policyLimits/index.js'
import type { Message } from '../types/message.js'
import {
checkAndRefreshOAuthTokenIfNeeded,
getClaudeAIOAuthTokens,
handleOAuth401Error,
} from '../utils/auth.js'
import { getGlobalConfig, saveGlobalConfig } from '../utils/config.js'
import { logForDebugging } from '../utils/debug.js'
import { stripDisplayTagsAllowEmpty } from '../utils/displayTags.js'
import { errorMessage } from '../utils/errors.js'
import { getBranch, getRemoteUrl } from '../utils/git.js'
import { toSDKMessages } from '../utils/messages/mappers.js'
import {
getContentText,
getMessagesAfterCompactBoundary,
isSyntheticMessage,
} from '../utils/messages.js'
import type { PermissionMode } from '../utils/permissions/PermissionMode.js'
import { getCurrentSessionTitle } from '../utils/sessionStorage.js'
import {
extractConversationText,
generateSessionTitle,
} from '../utils/sessionTitle.js'
import { generateShortWordSlug } from '../utils/words.js'
import {
getBridgeAccessToken,
getBridgeBaseUrl,
getBridgeTokenOverride,
} from './bridgeConfig.js'
import {
checkBridgeMinVersion,
isBridgeEnabledBlocking,
isCseShimEnabled,
isEnvLessBridgeEnabled,
} from './bridgeEnabled.js'
import {
archiveBridgeSession,
createBridgeSession,
updateBridgeSessionTitle,
} from './createSession.js'
import { logBridgeSkip } from './debugUtils.js'
import { checkEnvLessBridgeMinVersion } from './envLessBridgeConfig.js'
import { getPollIntervalConfig } from './pollConfig.js'
import type { BridgeState, ReplBridgeHandle } from './replBridge.js'
import { initBridgeCore } from './replBridge.js'
import { setCseShimGate } from './sessionIdCompat.js'
import type { BridgeWorkerType } from './types.js'
export type InitBridgeOptions = {
onInboundMessage?: (msg: SDKMessage) => void | Promise<void>
onPermissionResponse?: (response: SDKControlResponse) => void
onInterrupt?: () => void
onSetModel?: (model: string | undefined) => void
onSetMaxThinkingTokens?: (maxTokens: number | null) => void
onSetPermissionMode?: (
mode: PermissionMode,
) => { ok: true } | { ok: false; error: string }
onStateChange?: (state: BridgeState, detail?: string) => void
initialMessages?: Message[]
// Explicit session name from `/remote-control <name>`. When set, overrides
// the title derived from the conversation or /rename.
initialName?: string
// Fresh view of the full conversation at call time. Used by onUserMessage's
// count-3 derivation to call generateSessionTitle over the full conversation.
// Optional — print.ts's SDK enableRemoteControl path has no REPL message
// array; count-3 falls back to the single message text when absent.
getMessages?: () => Message[]
// UUIDs already flushed in a prior bridge session. Messages with these
// UUIDs are excluded from the initial flush to avoid poisoning the
// server (duplicate UUIDs across sessions cause the WS to be killed).
// Mutated in place — newly flushed UUIDs are added after each flush.
previouslyFlushedUUIDs?: Set<string>
/** See BridgeCoreParams.perpetual. */
perpetual?: boolean
/**
* When true, the bridge only forwards events outbound (no SSE inbound
* stream). Used by CCR mirror mode local sessions visible on claude.ai
* without enabling inbound control.
*/
outboundOnly?: boolean
tags?: string[]
}
export async function initReplBridge(
options?: InitBridgeOptions,
): Promise<ReplBridgeHandle | null> {
const {
onInboundMessage,
onPermissionResponse,
onInterrupt,
onSetModel,
onSetMaxThinkingTokens,
onSetPermissionMode,
onStateChange,
initialMessages,
getMessages,
previouslyFlushedUUIDs,
initialName,
perpetual,
outboundOnly,
tags,
} = options ?? {}
// Wire the cse_ shim kill switch so toCompatSessionId respects the
// GrowthBook gate. Daemon/SDK paths skip this — shim defaults to active.
setCseShimGate(isCseShimEnabled)
// 1. Runtime gate
if (!(await isBridgeEnabledBlocking())) {
logBridgeSkip('not_enabled', '[bridge:repl] Skipping: bridge not enabled')
return null
}
// 1b. Minimum version check — deferred to after the v1/v2 branch below,
// since each implementation has its own floor (tengu_bridge_min_version
// for v1, tengu_bridge_repl_v2_config.min_version for v2).
// 2. Check OAuth — must be signed in with claude.ai. Runs before the
// policy check so console-auth users get the actionable "/login" hint
// instead of a misleading policy error from a stale/wrong-org cache.
if (!getBridgeAccessToken()) {
logBridgeSkip('no_oauth', '[bridge:repl] Skipping: no OAuth tokens')
onStateChange?.('failed', '/login')
return null
}
// 3. Check organization policy — remote control may be disabled
await waitForPolicyLimitsToLoad()
if (!isPolicyAllowed('allow_remote_control')) {
logBridgeSkip(
'policy_denied',
'[bridge:repl] Skipping: allow_remote_control policy not allowed',
)
onStateChange?.('failed', "disabled by your organization's policy")
return null
}
// When CLAUDE_BRIDGE_OAUTH_TOKEN is set (ant-only local dev), the bridge
// uses that token directly via getBridgeAccessToken() — keychain state is
// irrelevant. Skip 2b/2c to preserve that decoupling: an expired keychain
// token shouldn't block a bridge connection that doesn't use it.
if (!getBridgeTokenOverride()) {
// 2a. Cross-process backoff. If N prior processes already saw this exact
// dead token (matched by expiresAt), skip silently — no event, no refresh
// attempt. The count threshold tolerates transient refresh failures (auth
// server 5xx, lockfile errors per auth.ts:1437/1444/1485): each process
// independently retries until 3 consecutive failures prove the token dead.
// Mirrors useReplBridge's MAX_CONSECUTIVE_INIT_FAILURES for in-process.
// The expiresAt key is content-addressed: /login → new token → new expiresAt
// → this stops matching without any explicit clear.
const cfg = getGlobalConfig()
if (
cfg.bridgeOauthDeadExpiresAt != null &&
(cfg.bridgeOauthDeadFailCount ?? 0) >= 3 &&
getClaudeAIOAuthTokens()?.expiresAt === cfg.bridgeOauthDeadExpiresAt
) {
logForDebugging(
`[bridge:repl] Skipping: cross-process backoff (dead token seen ${cfg.bridgeOauthDeadFailCount} times)`,
)
return null
}
// 2b. Proactively refresh if expired. Mirrors bridgeMain.ts:2096 — the REPL
// bridge fires at useEffect mount BEFORE any v1/messages call, making this
// usually the first OAuth request of the session. Without this, ~9% of
// registrations hit the server with a >8h-expired token → 401 → withOAuthRetry
// recovers, but the server logs a 401 we can avoid. VPN egress IPs observed
// at 30:1 401:200 when many unrelated users cluster at the 8h TTL boundary.
//
// Fresh-token cost: one memoized read + one Date.now() comparison (~µs).
// checkAndRefreshOAuthTokenIfNeeded clears its own cache in every path that
// touches the keychain (refresh success, lockfile race, throw), so no
// explicit clearOAuthTokenCache() here — that would force a blocking
// keychain spawn on the 91%+ fresh-token path.
await checkAndRefreshOAuthTokenIfNeeded()
// 2c. Skip if token is still expired post-refresh-attempt. Env-var / FD
// tokens (auth.ts:894-917) have expiresAt=null → never trip this. But a
// keychain token whose refresh token is dead (password change, org left,
// token GC'd) has expiresAt<now AND refresh just failed — the client would
// otherwise loop 401 forever: withOAuthRetry → handleOAuth401Error →
// refresh fails again → retry with same stale token → 401 again.
// Datadog 2026-03-08: single IPs generating 2,879 such 401s/day. Skip the
// guaranteed-fail API call; useReplBridge surfaces the failure.
//
// Intentionally NOT using isOAuthTokenExpired here — that has a 5-minute
// proactive-refresh buffer, which is the right heuristic for "should
// refresh soon" but wrong for "provably unusable". A token with 3min left
// + transient refresh endpoint blip (5xx/timeout/wifi-reconnect) would
// falsely trip a buffered check; the still-valid token would connect fine.
// Check actual expiry instead: past-expiry AND refresh-failed → truly dead.
const tokens = getClaudeAIOAuthTokens()
if (tokens && tokens.expiresAt !== null && tokens.expiresAt <= Date.now()) {
logBridgeSkip(
'oauth_expired_unrefreshable',
'[bridge:repl] Skipping: OAuth token expired and refresh failed (re-login required)',
)
onStateChange?.('failed', '/login')
// Persist for the next process. Increments failCount when re-discovering
// the same dead token (matched by expiresAt); resets to 1 for a different
// token. Once count reaches 3, step 2a's early-return fires and this path
// is never reached again — writes are capped at 3 per dead token.
// Local const captures the narrowed type (closure loses !==null narrowing).
const deadExpiresAt = tokens.expiresAt
saveGlobalConfig(c => ({
...c,
bridgeOauthDeadExpiresAt: deadExpiresAt,
bridgeOauthDeadFailCount:
c.bridgeOauthDeadExpiresAt === deadExpiresAt
? (c.bridgeOauthDeadFailCount ?? 0) + 1
: 1,
}))
return null
}
}
// 4. Compute baseUrl — needed by both v1 (env-based) and v2 (env-less)
// paths. Hoisted above the v2 gate so both can use it.
const baseUrl = getBridgeBaseUrl()
// 5. Derive session title. Precedence: explicit initialName → /rename
// (session storage) → last meaningful user message → generated slug.
// Cosmetic only (claude.ai session list); the model never sees it.
// Two flags: `hasExplicitTitle` (initialName or /rename — never auto-
// overwrite) vs. `hasTitle` (any title, including auto-derived — blocks
// the count-1 re-derivation but not count-3). The onUserMessage callback
// (wired to both v1 and v2 below) derives from the 1st prompt and again
// from the 3rd so mobile/web show a title that reflects more context.
// The slug fallback (e.g. "remote-control-graceful-unicorn") makes
// auto-started sessions distinguishable in the claude.ai list before the
// first prompt.
let title = `remote-control-${generateShortWordSlug()}`
let hasTitle = false
let hasExplicitTitle = false
if (initialName) {
title = initialName
hasTitle = true
hasExplicitTitle = true
} else {
const sessionId = getSessionId()
const customTitle = sessionId
? getCurrentSessionTitle(sessionId)
: undefined
if (customTitle) {
title = customTitle
hasTitle = true
hasExplicitTitle = true
} else if (initialMessages && initialMessages.length > 0) {
// Find the last user message that has meaningful content. Skip meta
// (nudges), tool results, compact summaries ("This session is being
// continued…"), non-human origins (task notifications, channel pushes),
// and synthetic interrupts ([Request interrupted by user]) — none are
// human-authored. Same filter as extractTitleText + isSyntheticMessage.
for (let i = initialMessages.length - 1; i >= 0; i--) {
const msg = initialMessages[i]!
if (
msg.type !== 'user' ||
msg.isMeta ||
msg.toolUseResult ||
msg.isCompactSummary ||
(msg.origin && msg.origin.kind !== 'human') ||
isSyntheticMessage(msg)
)
continue
const rawContent = getContentText(msg.message.content)
if (!rawContent) continue
const derived = deriveTitle(rawContent)
if (!derived) continue
title = derived
hasTitle = true
break
}
}
}
// Shared by both v1 and v2 — fires on every title-worthy user message until
// it returns true. At count 1: deriveTitle placeholder immediately, then
// generateSessionTitle (Haiku, sentence-case) fire-and-forget upgrade. At
// count 3: re-generate over the full conversation. Skips entirely if the
// title is explicit (/remote-control <name> or /rename) — re-checks
// sessionStorage at call time so /rename between messages isn't clobbered.
// Skips count 1 if initialMessages already derived (that title is fresh);
// still refreshes at count 3. v2 passes cse_*; updateBridgeSessionTitle
// retags internally.
let userMessageCount = 0
let lastBridgeSessionId: string | undefined
let genSeq = 0
const patch = (
derived: string,
bridgeSessionId: string,
atCount: number,
): void => {
hasTitle = true
title = derived
logForDebugging(
`[bridge:repl] derived title from message ${atCount}: ${derived}`,
)
void updateBridgeSessionTitle(bridgeSessionId, derived, {
baseUrl,
getAccessToken: getBridgeAccessToken,
}).catch(() => {})
}
// Fire-and-forget Haiku generation with post-await guards. Re-checks /rename
// (sessionStorage), v1 env-lost (lastBridgeSessionId), and same-session
// out-of-order resolution (genSeq — count-1's Haiku resolving after count-3
// would clobber the richer title). generateSessionTitle never rejects.
const generateAndPatch = (input: string, bridgeSessionId: string): void => {
const gen = ++genSeq
const atCount = userMessageCount
void generateSessionTitle(input, AbortSignal.timeout(15_000)).then(
generated => {
if (
generated &&
gen === genSeq &&
lastBridgeSessionId === bridgeSessionId &&
!getCurrentSessionTitle(getSessionId())
) {
patch(generated, bridgeSessionId, atCount)
}
},
)
}
const onUserMessage = (text: string, bridgeSessionId: string): boolean => {
if (hasExplicitTitle || getCurrentSessionTitle(getSessionId())) {
return true
}
// v1 env-lost re-creates the session with a new ID. Reset the count so
// the new session gets its own count-3 derivation; hasTitle stays true
// (new session was created via getCurrentTitle(), which reads the count-1
// title from this closure), so count-1 of the fresh cycle correctly skips.
if (
lastBridgeSessionId !== undefined &&
lastBridgeSessionId !== bridgeSessionId
) {
userMessageCount = 0
}
lastBridgeSessionId = bridgeSessionId
userMessageCount++
if (userMessageCount === 1 && !hasTitle) {
const placeholder = deriveTitle(text)
if (placeholder) patch(placeholder, bridgeSessionId, userMessageCount)
generateAndPatch(text, bridgeSessionId)
} else if (userMessageCount === 3) {
const msgs = getMessages?.()
const input = msgs
? extractConversationText(getMessagesAfterCompactBoundary(msgs))
: text
generateAndPatch(input, bridgeSessionId)
}
// Also re-latches if v1 env-lost resets the transport's done flag past 3.
return userMessageCount >= 3
}
const initialHistoryCap = getFeatureValue_CACHED_WITH_REFRESH(
'tengu_bridge_initial_history_cap',
200,
5 * 60 * 1000,
)
// Fetch orgUUID before the v1/v2 branch — both paths need it. v1 for
// environment registration; v2 for archive (which lives at the compat
// /v1/sessions/{id}/archive, not /v1/code/sessions). Without it, v2
// archive 404s and sessions stay alive in CCR after /exit.
const orgUUID = await getOrganizationUUID()
if (!orgUUID) {
logBridgeSkip('no_org_uuid', '[bridge:repl] Skipping: no org UUID')
onStateChange?.('failed', '/login')
return null
}
// ── GrowthBook gate: env-less bridge ──────────────────────────────────
// When enabled, skips the Environments API layer entirely (no register/
// poll/ack/heartbeat) and connects directly via POST /bridge → worker_jwt.
// See server PR #292605 (renamed in #293280). REPL-only — daemon/print stay
// on env-based.
//
// NAMING: "env-less" is distinct from "CCR v2" (the /worker/* transport).
// The env-based path below can ALSO use CCR v2 via CLAUDE_CODE_USE_CCR_V2.
// tengu_bridge_repl_v2 gates env-less (no poll loop), not transport version.
//
// perpetual (assistant-mode session continuity via bridge-pointer.json) is
// env-coupled and not yet implemented here — fall back to env-based when set
// so KAIROS users don't silently lose cross-restart continuity.
if (isEnvLessBridgeEnabled() && !perpetual) {
const versionError = await checkEnvLessBridgeMinVersion()
if (versionError) {
logBridgeSkip(
'version_too_old',
`[bridge:repl] Skipping: ${versionError}`,
true,
)
onStateChange?.('failed', 'run `claude update` to upgrade')
return null
}
logForDebugging(
'[bridge:repl] Using env-less bridge path (tengu_bridge_repl_v2)',
)
const { initEnvLessBridgeCore } = await import('./remoteBridgeCore.js')
return initEnvLessBridgeCore({
baseUrl,
orgUUID,
title,
getAccessToken: getBridgeAccessToken,
onAuth401: handleOAuth401Error,
toSDKMessages,
initialHistoryCap,
initialMessages,
// v2 always creates a fresh server session (new cse_* id), so
// previouslyFlushedUUIDs is not passed — there's no cross-session
// UUID collision risk, and the ref persists across enable→disable→
// re-enable cycles which would cause the new session to receive zero
// history (all UUIDs already in the set from the prior enable).
// v1 handles this by calling previouslyFlushedUUIDs.clear() on fresh
// session creation (replBridge.ts:768); v2 skips the param entirely.
onInboundMessage,
onUserMessage,
onPermissionResponse,
onInterrupt,
onSetModel,
onSetMaxThinkingTokens,
onSetPermissionMode,
onStateChange,
outboundOnly,
tags,
})
}
// ── v1 path: env-based (register/poll/ack/heartbeat) ──────────────────
const versionError = checkBridgeMinVersion()
if (versionError) {
logBridgeSkip('version_too_old', `[bridge:repl] Skipping: ${versionError}`)
onStateChange?.('failed', 'run `claude update` to upgrade')
return null
}
// Gather git context — this is the bootstrap-read boundary.
// Everything from here down is passed explicitly to bridgeCore.
const branch = await getBranch()
const gitRepoUrl = await getRemoteUrl()
const sessionIngressUrl =
process.env.USER_TYPE === 'ant' &&
process.env.CLAUDE_BRIDGE_SESSION_INGRESS_URL
? process.env.CLAUDE_BRIDGE_SESSION_INGRESS_URL
: baseUrl
// Assistant-mode sessions advertise a distinct worker_type so the web UI
// can filter them into a dedicated picker. KAIROS guard keeps the
// assistant module out of external builds entirely.
let workerType: BridgeWorkerType = 'claude_code'
if (feature('KAIROS')) {
/* eslint-disable @typescript-eslint/no-require-imports */
const { isAssistantMode } =
require('../assistant/index.js') as typeof import('../assistant/index.js')
/* eslint-enable @typescript-eslint/no-require-imports */
if (isAssistantMode()) {
workerType = 'claude_code_assistant'
}
}
// 6. Delegate. BridgeCoreHandle is a structural superset of
// ReplBridgeHandle (adds writeSdkMessages which REPL callers don't use),
// so no adapter needed — just the narrower type on the way out.
return initBridgeCore({
dir: getOriginalCwd(),
machineName: hostname(),
branch,
gitRepoUrl,
title,
baseUrl,
sessionIngressUrl,
workerType,
getAccessToken: getBridgeAccessToken,
createSession: opts =>
createBridgeSession({
...opts,
events: [],
baseUrl,
getAccessToken: getBridgeAccessToken,
}),
archiveSession: sessionId =>
archiveBridgeSession(sessionId, {
baseUrl,
getAccessToken: getBridgeAccessToken,
// gracefulShutdown.ts:407 races runCleanupFunctions against 2s.
// Teardown also does stopWork (parallel) + deregister (sequential),
// so archive can't have the full budget. 1.5s matches v2's
// teardown_archive_timeout_ms default.
timeoutMs: 1500,
}).catch((err: unknown) => {
// archiveBridgeSession has no try/catch — 5xx/timeout/network throw
// straight through. Previously swallowed silently, making archive
// failures BQ-invisible and undiagnosable from debug logs.
logForDebugging(
`[bridge:repl] archiveBridgeSession threw: ${errorMessage(err)}`,
{ level: 'error' },
)
}),
// getCurrentTitle is read on reconnect-after-env-lost to re-title the new
// session. /rename writes to session storage; onUserMessage mutates
// `title` directly — both paths are picked up here.
getCurrentTitle: () => getCurrentSessionTitle(getSessionId()) ?? title,
onUserMessage,
toSDKMessages,
onAuth401: handleOAuth401Error,
getPollIntervalConfig,
initialHistoryCap,
initialMessages,
previouslyFlushedUUIDs,
onInboundMessage,
onPermissionResponse,
onInterrupt,
onSetModel,
onSetMaxThinkingTokens,
onSetPermissionMode,
onStateChange,
perpetual,
})
}
const TITLE_MAX_LEN = 50
/**
* Quick placeholder title: strip display tags, take the first sentence,
* collapse whitespace, truncate to 50 chars. Returns undefined if the result
* is empty (e.g. message was only <local-command-stdout>). Replaced by
* generateSessionTitle once Haiku resolves (~1-15s).
*/
function deriveTitle(raw: string): string | undefined {
// Strip <ide_opened_file>, <session-start-hook>, etc. — these appear in
// user messages when IDE/hooks inject context. stripDisplayTagsAllowEmpty
// returns '' (not the original) so pure-tag messages are skipped.
const clean = stripDisplayTagsAllowEmpty(raw)
// First sentence is usually the intent; rest is often context/detail.
// Capture group instead of lookbehind — keeps YARR JIT happy.
const firstSentence = /^(.*?[.!?])\s/.exec(clean)?.[1] ?? clean
// Collapse newlines/tabs — titles are single-line in the claude.ai list.
const flat = firstSentence.replace(/\s+/g, ' ').trim()
if (!flat) return undefined
return flat.length > TITLE_MAX_LEN
? flat.slice(0, TITLE_MAX_LEN - 1) + '\u2026'
: flat
}

256
bridge/jwtUtils.ts Normal file
View file

@ -0,0 +1,256 @@
import { logEvent } from '../services/analytics/index.js'
import { logForDebugging } from '../utils/debug.js'
import { logForDiagnosticsNoPII } from '../utils/diagLogs.js'
import { errorMessage } from '../utils/errors.js'
import { jsonParse } from '../utils/slowOperations.js'
/** Format a millisecond duration as a human-readable string (e.g. "5m 30s"). */
function formatDuration(ms: number): string {
if (ms < 60_000) return `${Math.round(ms / 1000)}s`
const m = Math.floor(ms / 60_000)
const s = Math.round((ms % 60_000) / 1000)
return s > 0 ? `${m}m ${s}s` : `${m}m`
}
/**
* Decode a JWT's payload segment without verifying the signature.
* Strips the `sk-ant-si-` session-ingress prefix if present.
* Returns the parsed JSON payload as `unknown`, or `null` if the
* token is malformed or the payload is not valid JSON.
*/
export function decodeJwtPayload(token: string): unknown | null {
const jwt = token.startsWith('sk-ant-si-')
? token.slice('sk-ant-si-'.length)
: token
const parts = jwt.split('.')
if (parts.length !== 3 || !parts[1]) return null
try {
return jsonParse(Buffer.from(parts[1], 'base64url').toString('utf8'))
} catch {
return null
}
}
/**
* Decode the `exp` (expiry) claim from a JWT without verifying the signature.
* @returns The `exp` value in Unix seconds, or `null` if unparseable
*/
export function decodeJwtExpiry(token: string): number | null {
const payload = decodeJwtPayload(token)
if (
payload !== null &&
typeof payload === 'object' &&
'exp' in payload &&
typeof payload.exp === 'number'
) {
return payload.exp
}
return null
}
/** Refresh buffer: request a new token before expiry. */
const TOKEN_REFRESH_BUFFER_MS = 5 * 60 * 1000
/** Fallback refresh interval when the new token's expiry is unknown. */
const FALLBACK_REFRESH_INTERVAL_MS = 30 * 60 * 1000 // 30 minutes
/** Max consecutive failures before giving up on the refresh chain. */
const MAX_REFRESH_FAILURES = 3
/** Retry delay when getAccessToken returns undefined. */
const REFRESH_RETRY_DELAY_MS = 60_000
/**
* Creates a token refresh scheduler that proactively refreshes session tokens
* before they expire. Used by both the standalone bridge and the REPL bridge.
*
* When a token is about to expire, the scheduler calls `onRefresh` with the
* session ID and the bridge's OAuth access token. The caller is responsible
* for delivering the token to the appropriate transport (child process stdin
* for standalone bridge, WebSocket reconnect for REPL bridge).
*/
export function createTokenRefreshScheduler({
getAccessToken,
onRefresh,
label,
refreshBufferMs = TOKEN_REFRESH_BUFFER_MS,
}: {
getAccessToken: () => string | undefined | Promise<string | undefined>
onRefresh: (sessionId: string, oauthToken: string) => void
label: string
/** How long before expiry to fire refresh. Defaults to 5 min. */
refreshBufferMs?: number
}): {
schedule: (sessionId: string, token: string) => void
scheduleFromExpiresIn: (sessionId: string, expiresInSeconds: number) => void
cancel: (sessionId: string) => void
cancelAll: () => void
} {
const timers = new Map<string, ReturnType<typeof setTimeout>>()
const failureCounts = new Map<string, number>()
// Generation counter per session — incremented by schedule() and cancel()
// so that in-flight async doRefresh() calls can detect when they've been
// superseded and should skip setting follow-up timers.
const generations = new Map<string, number>()
function nextGeneration(sessionId: string): number {
const gen = (generations.get(sessionId) ?? 0) + 1
generations.set(sessionId, gen)
return gen
}
function schedule(sessionId: string, token: string): void {
const expiry = decodeJwtExpiry(token)
if (!expiry) {
// Token is not a decodable JWT (e.g. an OAuth token passed from the
// REPL bridge WebSocket open handler). Preserve any existing timer
// (such as the follow-up refresh set by doRefresh) so the refresh
// chain is not broken.
logForDebugging(
`[${label}:token] Could not decode JWT expiry for sessionId=${sessionId}, token prefix=${token.slice(0, 15)}…, keeping existing timer`,
)
return
}
// Clear any existing refresh timer — we have a concrete expiry to replace it.
const existing = timers.get(sessionId)
if (existing) {
clearTimeout(existing)
}
// Bump generation to invalidate any in-flight async doRefresh.
const gen = nextGeneration(sessionId)
const expiryDate = new Date(expiry * 1000).toISOString()
const delayMs = expiry * 1000 - Date.now() - refreshBufferMs
if (delayMs <= 0) {
logForDebugging(
`[${label}:token] Token for sessionId=${sessionId} expires=${expiryDate} (past or within buffer), refreshing immediately`,
)
void doRefresh(sessionId, gen)
return
}
logForDebugging(
`[${label}:token] Scheduled token refresh for sessionId=${sessionId} in ${formatDuration(delayMs)} (expires=${expiryDate}, buffer=${refreshBufferMs / 1000}s)`,
)
const timer = setTimeout(doRefresh, delayMs, sessionId, gen)
timers.set(sessionId, timer)
}
/**
* Schedule refresh using an explicit TTL (seconds until expiry) rather
* than decoding a JWT's exp claim. Used by callers whose JWT is opaque
* (e.g. POST /v1/code/sessions/{id}/bridge returns expires_in directly).
*/
function scheduleFromExpiresIn(
sessionId: string,
expiresInSeconds: number,
): void {
const existing = timers.get(sessionId)
if (existing) clearTimeout(existing)
const gen = nextGeneration(sessionId)
// Clamp to 30s floor — if refreshBufferMs exceeds the server's expires_in
// (e.g. very large buffer for frequent-refresh testing, or server shortens
// expires_in unexpectedly), unclamped delayMs ≤ 0 would tight-loop.
const delayMs = Math.max(expiresInSeconds * 1000 - refreshBufferMs, 30_000)
logForDebugging(
`[${label}:token] Scheduled token refresh for sessionId=${sessionId} in ${formatDuration(delayMs)} (expires_in=${expiresInSeconds}s, buffer=${refreshBufferMs / 1000}s)`,
)
const timer = setTimeout(doRefresh, delayMs, sessionId, gen)
timers.set(sessionId, timer)
}
async function doRefresh(sessionId: string, gen: number): Promise<void> {
let oauthToken: string | undefined
try {
oauthToken = await getAccessToken()
} catch (err) {
logForDebugging(
`[${label}:token] getAccessToken threw for sessionId=${sessionId}: ${errorMessage(err)}`,
{ level: 'error' },
)
}
// If the session was cancelled or rescheduled while we were awaiting,
// the generation will have changed — bail out to avoid orphaned timers.
if (generations.get(sessionId) !== gen) {
logForDebugging(
`[${label}:token] doRefresh for sessionId=${sessionId} stale (gen ${gen} vs ${generations.get(sessionId)}), skipping`,
)
return
}
if (!oauthToken) {
const failures = (failureCounts.get(sessionId) ?? 0) + 1
failureCounts.set(sessionId, failures)
logForDebugging(
`[${label}:token] No OAuth token available for refresh, sessionId=${sessionId} (failure ${failures}/${MAX_REFRESH_FAILURES})`,
{ level: 'error' },
)
logForDiagnosticsNoPII('error', 'bridge_token_refresh_no_oauth')
// Schedule a retry so the refresh chain can recover if the token
// becomes available again (e.g. transient cache clear during refresh).
// Cap retries to avoid spamming on genuine failures.
if (failures < MAX_REFRESH_FAILURES) {
const retryTimer = setTimeout(
doRefresh,
REFRESH_RETRY_DELAY_MS,
sessionId,
gen,
)
timers.set(sessionId, retryTimer)
}
return
}
// Reset failure counter on successful token retrieval
failureCounts.delete(sessionId)
logForDebugging(
`[${label}:token] Refreshing token for sessionId=${sessionId}: new token prefix=${oauthToken.slice(0, 15)}`,
)
logEvent('tengu_bridge_token_refreshed', {})
onRefresh(sessionId, oauthToken)
// Schedule a follow-up refresh so long-running sessions stay authenticated.
// Without this, the initial one-shot timer leaves the session vulnerable
// to token expiry if it runs past the first refresh window.
const timer = setTimeout(
doRefresh,
FALLBACK_REFRESH_INTERVAL_MS,
sessionId,
gen,
)
timers.set(sessionId, timer)
logForDebugging(
`[${label}:token] Scheduled follow-up refresh for sessionId=${sessionId} in ${formatDuration(FALLBACK_REFRESH_INTERVAL_MS)}`,
)
}
function cancel(sessionId: string): void {
// Bump generation to invalidate any in-flight async doRefresh.
nextGeneration(sessionId)
const timer = timers.get(sessionId)
if (timer) {
clearTimeout(timer)
timers.delete(sessionId)
}
failureCounts.delete(sessionId)
}
function cancelAll(): void {
// Bump all generations so in-flight doRefresh calls are invalidated.
for (const sessionId of generations.keys()) {
nextGeneration(sessionId)
}
for (const timer of timers.values()) {
clearTimeout(timer)
}
timers.clear()
failureCounts.clear()
}
return { schedule, scheduleFromExpiresIn, cancel, cancelAll }
}

110
bridge/pollConfig.ts Normal file
View file

@ -0,0 +1,110 @@
import { z } from 'zod/v4'
import { getFeatureValue_CACHED_WITH_REFRESH } from '../services/analytics/growthbook.js'
import { lazySchema } from '../utils/lazySchema.js'
import {
DEFAULT_POLL_CONFIG,
type PollIntervalConfig,
} from './pollConfigDefaults.js'
// .min(100) on the seek-work intervals restores the old Math.max(..., 100)
// defense-in-depth floor against fat-fingered GrowthBook values. Unlike a
// clamp, Zod rejects the whole object on violation — a config with one bad
// field falls back to DEFAULT_POLL_CONFIG entirely rather than being
// partially trusted.
//
// The at_capacity intervals use a 0-or-≥100 refinement: 0 means "disabled"
// (heartbeat-only mode), ≥100 is the fat-finger floor. Values 199 are
// rejected so unit confusion (ops thinks seconds, enters 10) doesn't poll
// every 10ms against the VerifyEnvironmentSecretAuth DB path.
//
// The object-level refines require at least one at-capacity liveness
// mechanism enabled: heartbeat OR the relevant poll interval. Without this,
// the hb=0, atCapMs=0 drift config (ops disables heartbeat without
// restoring at_capacity) falls through every throttle site with no sleep —
// tight-looping /poll at HTTP-round-trip speed.
const zeroOrAtLeast100 = {
message: 'must be 0 (disabled) or ≥100ms',
}
const pollIntervalConfigSchema = lazySchema(() =>
z
.object({
poll_interval_ms_not_at_capacity: z.number().int().min(100),
// 0 = no at-capacity polling. Independent of heartbeat — both can be
// enabled (heartbeat runs, periodically breaks out to poll).
poll_interval_ms_at_capacity: z
.number()
.int()
.refine(v => v === 0 || v >= 100, zeroOrAtLeast100),
// 0 = disabled; positive value = heartbeat at this interval while at
// capacity. Runs alongside at-capacity polling, not instead of it.
// Named non_exclusive to distinguish from the old heartbeat_interval_ms
// (either-or semantics in pre-#22145 clients). .default(0) so existing
// GrowthBook configs without this field parse successfully.
non_exclusive_heartbeat_interval_ms: z.number().int().min(0).default(0),
// Multisession (bridgeMain.ts) intervals. Defaults match the
// single-session values so existing configs without these fields
// preserve current behavior.
multisession_poll_interval_ms_not_at_capacity: z
.number()
.int()
.min(100)
.default(
DEFAULT_POLL_CONFIG.multisession_poll_interval_ms_not_at_capacity,
),
multisession_poll_interval_ms_partial_capacity: z
.number()
.int()
.min(100)
.default(
DEFAULT_POLL_CONFIG.multisession_poll_interval_ms_partial_capacity,
),
multisession_poll_interval_ms_at_capacity: z
.number()
.int()
.refine(v => v === 0 || v >= 100, zeroOrAtLeast100)
.default(DEFAULT_POLL_CONFIG.multisession_poll_interval_ms_at_capacity),
// .min(1) matches the server's ge=1 constraint (work_v1.py:230).
reclaim_older_than_ms: z.number().int().min(1).default(5000),
session_keepalive_interval_v2_ms: z
.number()
.int()
.min(0)
.default(120_000),
})
.refine(
cfg =>
cfg.non_exclusive_heartbeat_interval_ms > 0 ||
cfg.poll_interval_ms_at_capacity > 0,
{
message:
'at-capacity liveness requires non_exclusive_heartbeat_interval_ms > 0 or poll_interval_ms_at_capacity > 0',
},
)
.refine(
cfg =>
cfg.non_exclusive_heartbeat_interval_ms > 0 ||
cfg.multisession_poll_interval_ms_at_capacity > 0,
{
message:
'at-capacity liveness requires non_exclusive_heartbeat_interval_ms > 0 or multisession_poll_interval_ms_at_capacity > 0',
},
),
)
/**
* Fetch the bridge poll interval config from GrowthBook with a 5-minute
* refresh window. Validates the served JSON against the schema; falls back
* to defaults if the flag is absent, malformed, or partially-specified.
*
* Shared by bridgeMain.ts (standalone) and replBridge.ts (REPL) so ops
* can tune both poll rates fleet-wide with a single config push.
*/
export function getPollIntervalConfig(): PollIntervalConfig {
const raw = getFeatureValue_CACHED_WITH_REFRESH<unknown>(
'tengu_bridge_poll_interval_config',
DEFAULT_POLL_CONFIG,
5 * 60 * 1000,
)
const parsed = pollIntervalConfigSchema().safeParse(raw)
return parsed.success ? parsed.data : DEFAULT_POLL_CONFIG
}

View file

@ -0,0 +1,82 @@
/**
* Bridge poll interval defaults. Extracted from pollConfig.ts so callers
* that don't need live GrowthBook tuning (daemon via Agent SDK) can avoid
* the growthbook.ts config.ts file.ts sessionStorage.ts commands.ts
* transitive dependency chain.
*/
/**
* Poll interval when actively seeking work (no transport / below maxSessions).
* Governs user-visible "connecting…" latency on initial work pickup and
* recovery speed after the server re-dispatches a work item.
*/
const POLL_INTERVAL_MS_NOT_AT_CAPACITY = 2000
/**
* Poll interval when the transport is connected. Runs independently of
* heartbeat when both are enabled, the heartbeat loop breaks out to poll
* at this interval. Set to 0 to disable at-capacity polling entirely.
*
* Server-side constraints that bound this value:
* - BRIDGE_LAST_POLL_TTL = 4h (Redis key expiry environment auto-archived)
* - max_poll_stale_seconds = 24h (session-creation health gate, currently disabled)
*
* 10 minutes gives 24× headroom on the Redis TTL while still picking up
* server-initiated token-rotation redispatches within one poll cycle.
* The transport auto-reconnects internally for 10 minutes on transient WS
* failures, so poll is not the recovery path it's strictly a liveness
* signal plus a backstop for permanent close.
*/
const POLL_INTERVAL_MS_AT_CAPACITY = 600_000
/**
* Multisession bridge (bridgeMain.ts) poll intervals. Defaults match the
* single-session values so existing GrowthBook configs without these fields
* preserve current behavior. Ops can tune these independently via the
* tengu_bridge_poll_interval_config GB flag.
*/
const MULTISESSION_POLL_INTERVAL_MS_NOT_AT_CAPACITY =
POLL_INTERVAL_MS_NOT_AT_CAPACITY
const MULTISESSION_POLL_INTERVAL_MS_PARTIAL_CAPACITY =
POLL_INTERVAL_MS_NOT_AT_CAPACITY
const MULTISESSION_POLL_INTERVAL_MS_AT_CAPACITY = POLL_INTERVAL_MS_AT_CAPACITY
export type PollIntervalConfig = {
poll_interval_ms_not_at_capacity: number
poll_interval_ms_at_capacity: number
non_exclusive_heartbeat_interval_ms: number
multisession_poll_interval_ms_not_at_capacity: number
multisession_poll_interval_ms_partial_capacity: number
multisession_poll_interval_ms_at_capacity: number
reclaim_older_than_ms: number
session_keepalive_interval_v2_ms: number
}
export const DEFAULT_POLL_CONFIG: PollIntervalConfig = {
poll_interval_ms_not_at_capacity: POLL_INTERVAL_MS_NOT_AT_CAPACITY,
poll_interval_ms_at_capacity: POLL_INTERVAL_MS_AT_CAPACITY,
// 0 = disabled. When > 0, at-capacity loops send per-work-item heartbeats
// at this interval. Independent of poll_interval_ms_at_capacity — both may
// run (heartbeat periodically yields to poll). 60s gives 5× headroom under
// the server's 300s heartbeat TTL. Named non_exclusive to distinguish from
// the old heartbeat_interval_ms field (either-or semantics in pre-#22145
// clients — heartbeat suppressed poll). Old clients ignore this key; ops
// can set both fields during rollout.
non_exclusive_heartbeat_interval_ms: 0,
multisession_poll_interval_ms_not_at_capacity:
MULTISESSION_POLL_INTERVAL_MS_NOT_AT_CAPACITY,
multisession_poll_interval_ms_partial_capacity:
MULTISESSION_POLL_INTERVAL_MS_PARTIAL_CAPACITY,
multisession_poll_interval_ms_at_capacity:
MULTISESSION_POLL_INTERVAL_MS_AT_CAPACITY,
// Poll query param: reclaim unacknowledged work items older than this.
// Matches the server's DEFAULT_RECLAIM_OLDER_THAN_MS (work_service.py:24).
// Enables picking up stale-pending work after JWT expiry, when the prior
// ack failed because the session_ingress_token was already stale.
reclaim_older_than_ms: 5000,
// 0 = disabled. When > 0, push a silent {type:'keep_alive'} frame to
// session-ingress at this interval so upstream proxies don't GC an idle
// remote-control session. 2 min is the default. _v2: bridge-only gate
// (pre-v2 clients read the old key, new clients ignore it).
session_keepalive_interval_v2_ms: 120_000,
}

1008
bridge/remoteBridgeCore.ts Normal file

File diff suppressed because it is too large Load diff

2406
bridge/replBridge.ts Normal file

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,36 @@
import { updateSessionBridgeId } from '../utils/concurrentSessions.js'
import type { ReplBridgeHandle } from './replBridge.js'
import { toCompatSessionId } from './sessionIdCompat.js'
/**
* Global pointer to the active REPL bridge handle, so callers outside
* useReplBridge's React tree (tools, slash commands) can invoke handle methods
* like subscribePR. Same one-bridge-per-process justification as bridgeDebug.ts
* the handle's closure captures the sessionId and getAccessToken that created
* the session, and re-deriving those independently (BriefTool/upload.ts pattern)
* risks staging/prod token divergence.
*
* Set from useReplBridge.tsx when init completes; cleared on teardown.
*/
let handle: ReplBridgeHandle | null = null
export function setReplBridgeHandle(h: ReplBridgeHandle | null): void {
handle = h
// Publish (or clear) our bridge session ID in the session record so other
// local peers can dedup us out of their bridge list — local is preferred.
void updateSessionBridgeId(getSelfBridgeCompatId() ?? null).catch(() => {})
}
export function getReplBridgeHandle(): ReplBridgeHandle | null {
return handle
}
/**
* Our own bridge session ID in the session_* compat format the API returns
* in /v1/sessions responses or undefined if bridge isn't connected.
*/
export function getSelfBridgeCompatId(): string | undefined {
const h = getReplBridgeHandle()
return h ? toCompatSessionId(h.bridgeSessionId) : undefined
}

View file

@ -0,0 +1,370 @@
import type { StdoutMessage } from 'src/entrypoints/sdk/controlTypes.js'
import { CCRClient } from '../cli/transports/ccrClient.js'
import type { HybridTransport } from '../cli/transports/HybridTransport.js'
import { SSETransport } from '../cli/transports/SSETransport.js'
import { logForDebugging } from '../utils/debug.js'
import { errorMessage } from '../utils/errors.js'
import { updateSessionIngressAuthToken } from '../utils/sessionIngressAuth.js'
import type { SessionState } from '../utils/sessionState.js'
import { registerWorker } from './workSecret.js'
/**
* Transport abstraction for replBridge. Covers exactly the surface that
* replBridge.ts uses against HybridTransport so the v1/v2 choice is
* confined to the construction site.
*
* - v1: HybridTransport (WS reads + POST writes to Session-Ingress)
* - v2: SSETransport (reads) + CCRClient (writes to CCR v2 /worker/*)
*
* The v2 write path goes through CCRClient.writeEvent SerialBatchEventUploader,
* NOT through SSETransport.write() SSETransport.write() targets the
* Session-Ingress POST URL shape, which is wrong for CCR v2.
*/
export type ReplBridgeTransport = {
write(message: StdoutMessage): Promise<void>
writeBatch(messages: StdoutMessage[]): Promise<void>
close(): void
isConnectedStatus(): boolean
getStateLabel(): string
setOnData(callback: (data: string) => void): void
setOnClose(callback: (closeCode?: number) => void): void
setOnConnect(callback: () => void): void
connect(): void
/**
* High-water mark of the underlying read stream's event sequence numbers.
* replBridge reads this before swapping transports so the new one can
* resume from where the old one left off (otherwise the server replays
* the entire session history from seq 0).
*
* v1 returns 0 Session-Ingress WS doesn't use SSE sequence numbers;
* replay-on-reconnect is handled by the server-side message cursor.
*/
getLastSequenceNum(): number
/**
* Monotonic count of batches dropped via maxConsecutiveFailures.
* Snapshot before writeBatch() and compare after to detect silent drops
* (writeBatch() resolves normally even when batches were dropped).
* v2 returns 0 the v2 write path doesn't set maxConsecutiveFailures.
*/
readonly droppedBatchCount: number
/**
* PUT /worker state (v2 only; v1 is a no-op). `requires_action` tells
* the backend a permission prompt is pending claude.ai shows the
* "waiting for input" indicator. REPL/daemon callers don't need this
* (user watches the REPL locally); multi-session worker callers do.
*/
reportState(state: SessionState): void
/** PUT /worker external_metadata (v2 only; v1 is a no-op). */
reportMetadata(metadata: Record<string, unknown>): void
/**
* POST /worker/events/{id}/delivery (v2 only; v1 is a no-op). Populates
* CCR's processing_at/processed_at columns. `received` is auto-fired by
* CCRClient on every SSE frame and is not exposed here.
*/
reportDelivery(eventId: string, status: 'processing' | 'processed'): void
/**
* Drain the write queue before close() (v2 only; v1 resolves
* immediately HybridTransport POSTs are already awaited per-write).
*/
flush(): Promise<void>
}
/**
* v1 adapter: HybridTransport already has the full surface (it extends
* WebSocketTransport which has setOnConnect + getStateLabel). This is a
* no-op wrapper that exists only so replBridge's `transport` variable
* has a single type.
*/
export function createV1ReplTransport(
hybrid: HybridTransport,
): ReplBridgeTransport {
return {
write: msg => hybrid.write(msg),
writeBatch: msgs => hybrid.writeBatch(msgs),
close: () => hybrid.close(),
isConnectedStatus: () => hybrid.isConnectedStatus(),
getStateLabel: () => hybrid.getStateLabel(),
setOnData: cb => hybrid.setOnData(cb),
setOnClose: cb => hybrid.setOnClose(cb),
setOnConnect: cb => hybrid.setOnConnect(cb),
connect: () => void hybrid.connect(),
// v1 Session-Ingress WS doesn't use SSE sequence numbers; replay
// semantics are different. Always return 0 so the seq-num carryover
// logic in replBridge is a no-op for v1.
getLastSequenceNum: () => 0,
get droppedBatchCount() {
return hybrid.droppedBatchCount
},
reportState: () => {},
reportMetadata: () => {},
reportDelivery: () => {},
flush: () => Promise.resolve(),
}
}
/**
* v2 adapter: wrap SSETransport (reads) + CCRClient (writes, heartbeat,
* state, delivery tracking).
*
* Auth: v2 endpoints validate the JWT's session_id claim (register_worker.go:32)
* and worker role (environment_auth.py:856). OAuth tokens have neither.
* This is the inverse of the v1 replBridge path, which deliberately uses OAuth.
* The JWT is refreshed when the poll loop re-dispatches work the caller
* invokes createV2ReplTransport again with the fresh token.
*
* Registration happens here (not in the caller) so the entire v2 handshake
* is one async step. registerWorker failure propagates replBridge will
* catch it and stay on the poll loop.
*/
export async function createV2ReplTransport(opts: {
sessionUrl: string
ingressToken: string
sessionId: string
/**
* SSE sequence-number high-water mark from the previous transport.
* Passed to the new SSETransport so its first connect() sends
* from_sequence_num / Last-Event-ID and the server resumes from where
* the old stream left off. Without this, every transport swap asks the
* server to replay the entire session history from seq 0.
*/
initialSequenceNum?: number
/**
* Worker epoch from POST /bridge response. When provided, the server
* already bumped epoch (the /bridge call IS the register see server
* PR #293280). When omitted (v1 CCR-v2 path via replBridge.ts poll loop),
* call registerWorker as before.
*/
epoch?: number
/** CCRClient heartbeat interval. Defaults to 20s when omitted. */
heartbeatIntervalMs?: number
/** ±fraction per-beat jitter. Defaults to 0 (no jitter) when omitted. */
heartbeatJitterFraction?: number
/**
* When true, skip opening the SSE read stream only the CCRClient write
* path is activated. Use for mirror-mode attachments that forward events
* but never receive inbound prompts or control requests.
*/
outboundOnly?: boolean
/**
* Per-instance auth header source. When provided, CCRClient + SSETransport
* read auth from this closure instead of the process-wide
* CLAUDE_CODE_SESSION_ACCESS_TOKEN env var. Required for callers managing
* multiple concurrent sessions the env-var path stomps across sessions.
* When omitted, falls back to the env var (single-session callers).
*/
getAuthToken?: () => string | undefined
}): Promise<ReplBridgeTransport> {
const {
sessionUrl,
ingressToken,
sessionId,
initialSequenceNum,
getAuthToken,
} = opts
// Auth header builder. If getAuthToken is provided, read from it
// (per-instance, multi-session safe). Otherwise write ingressToken to
// the process-wide env var (legacy single-session path — CCRClient's
// default getAuthHeaders reads it via getSessionIngressAuthHeaders).
let getAuthHeaders: (() => Record<string, string>) | undefined
if (getAuthToken) {
getAuthHeaders = (): Record<string, string> => {
const token = getAuthToken()
if (!token) return {}
return { Authorization: `Bearer ${token}` }
}
} else {
// CCRClient.request() and SSETransport.connect() both read auth via
// getSessionIngressAuthHeaders() → this env var. Set it before either
// touches the network.
updateSessionIngressAuthToken(ingressToken)
}
const epoch = opts.epoch ?? (await registerWorker(sessionUrl, ingressToken))
logForDebugging(
`[bridge:repl] CCR v2: worker sessionId=${sessionId} epoch=${epoch}${opts.epoch !== undefined ? ' (from /bridge)' : ' (via registerWorker)'}`,
)
// Derive SSE stream URL. Same logic as transportUtils.ts:26-33 but
// starting from an http(s) base instead of a --sdk-url that might be ws://.
const sseUrl = new URL(sessionUrl)
sseUrl.pathname = sseUrl.pathname.replace(/\/$/, '') + '/worker/events/stream'
const sse = new SSETransport(
sseUrl,
{},
sessionId,
undefined,
initialSequenceNum,
getAuthHeaders,
)
let onCloseCb: ((closeCode?: number) => void) | undefined
const ccr = new CCRClient(sse, new URL(sessionUrl), {
getAuthHeaders,
heartbeatIntervalMs: opts.heartbeatIntervalMs,
heartbeatJitterFraction: opts.heartbeatJitterFraction,
// Default is process.exit(1) — correct for spawn-mode children. In-process,
// that kills the REPL. Close instead: replBridge's onClose wakes the poll
// loop, which picks up the server's re-dispatch (with fresh epoch).
onEpochMismatch: () => {
logForDebugging(
'[bridge:repl] CCR v2: epoch superseded (409) — closing for poll-loop recovery',
)
// Close resources in a try block so the throw always executes.
// If ccr.close() or sse.close() throw, we still need to unwind
// the caller (request()) — otherwise handleEpochMismatch's `never`
// return type is violated at runtime and control falls through.
try {
ccr.close()
sse.close()
onCloseCb?.(4090)
} catch (closeErr: unknown) {
logForDebugging(
`[bridge:repl] CCR v2: error during epoch-mismatch cleanup: ${errorMessage(closeErr)}`,
{ level: 'error' },
)
}
// Don't return — the calling request() code continues after the 409
// branch, so callers see the logged warning and a false return. We
// throw to unwind; the uploaders catch it as a send failure.
throw new Error('epoch superseded')
},
})
// CCRClient's constructor wired sse.setOnEvent → reportDelivery('received').
// remoteIO.ts additionally sends 'processing'/'processed' via
// setCommandLifecycleListener, which the in-process query loop fires. This
// transport's only caller (replBridge/daemonBridge) has no such wiring — the
// daemon's agent child is a separate process (ProcessTransport), and its
// notifyCommandLifecycle calls fire with listener=null in its own module
// scope. So events stay at 'received' forever, and reconnectSession re-queues
// them on every daemon restart (observed: 21→24→25 phantom prompts as
// "user sent a new message while you were working" system-reminders).
//
// Fix: ACK 'processed' immediately alongside 'received'. The window between
// SSE receipt and transcript-write is narrow (queue → SDK → child stdin →
// model); a crash there loses one prompt vs. the observed N-prompt flood on
// every restart. Overwrite the constructor's wiring to do both — setOnEvent
// replaces, not appends (SSETransport.ts:658).
sse.setOnEvent(event => {
ccr.reportDelivery(event.event_id, 'received')
ccr.reportDelivery(event.event_id, 'processed')
})
// Both sse.connect() and ccr.initialize() are deferred to connect() below.
// replBridge's calling order is newTransport → setOnConnect → setOnData →
// setOnClose → connect(), and both calls need those callbacks wired first:
// sse.connect() opens the stream (events flow to onData/onClose immediately),
// and ccr.initialize().then() fires onConnectCb.
//
// onConnect fires once ccr.initialize() resolves. Writes go via
// CCRClient HTTP POST (SerialBatchEventUploader), not SSE, so the
// write path is ready the moment workerEpoch is set. SSE.connect()
// awaits its read loop and never resolves — don't gate on it.
// The SSE stream opens in parallel (~30ms) and starts delivering
// inbound events via setOnData; outbound doesn't need to wait for it.
let onConnectCb: (() => void) | undefined
let ccrInitialized = false
let closed = false
return {
write(msg) {
return ccr.writeEvent(msg)
},
async writeBatch(msgs) {
// SerialBatchEventUploader already batches internally (maxBatchSize=100);
// sequential enqueue preserves order and the uploader coalesces.
// Check closed between writes to avoid sending partial batches after
// transport teardown (epoch mismatch, SSE drop).
for (const m of msgs) {
if (closed) break
await ccr.writeEvent(m)
}
},
close() {
closed = true
ccr.close()
sse.close()
},
isConnectedStatus() {
// Write-readiness, not read-readiness — replBridge checks this
// before calling writeBatch. SSE open state is orthogonal.
return ccrInitialized
},
getStateLabel() {
// SSETransport doesn't expose its state string; synthesize from
// what we can observe. replBridge only uses this for debug logging.
if (sse.isClosedStatus()) return 'closed'
if (sse.isConnectedStatus()) return ccrInitialized ? 'connected' : 'init'
return 'connecting'
},
setOnData(cb) {
sse.setOnData(cb)
},
setOnClose(cb) {
onCloseCb = cb
// SSE reconnect-budget exhaustion fires onClose(undefined) — map to
// 4092 so ws_closed telemetry can distinguish it from HTTP-status
// closes (SSETransport:280 passes response.status). Stop CCRClient's
// heartbeat timer before notifying replBridge. (sse.close() doesn't
// invoke this, so the epoch-mismatch path above isn't double-firing.)
sse.setOnClose(code => {
ccr.close()
cb(code ?? 4092)
})
},
setOnConnect(cb) {
onConnectCb = cb
},
getLastSequenceNum() {
return sse.getLastSequenceNum()
},
// v2 write path (CCRClient) doesn't set maxConsecutiveFailures — no drops.
droppedBatchCount: 0,
reportState(state) {
ccr.reportState(state)
},
reportMetadata(metadata) {
ccr.reportMetadata(metadata)
},
reportDelivery(eventId, status) {
ccr.reportDelivery(eventId, status)
},
flush() {
return ccr.flush()
},
connect() {
// Outbound-only: skip the SSE read stream entirely — no inbound
// events to receive, no delivery ACKs to send. Only the CCRClient
// write path (POST /worker/events) and heartbeat are needed.
if (!opts.outboundOnly) {
// Fire-and-forget — SSETransport.connect() awaits readStream()
// (the read loop) and only resolves on stream close/error. The
// spawn-mode path in remoteIO.ts does the same void discard.
void sse.connect()
}
void ccr.initialize(epoch).then(
() => {
ccrInitialized = true
logForDebugging(
`[bridge:repl] v2 transport ready for writes (epoch=${epoch}, sse=${sse.isConnectedStatus() ? 'open' : 'opening'})`,
)
onConnectCb?.()
},
(err: unknown) => {
logForDebugging(
`[bridge:repl] CCR v2 initialize failed: ${errorMessage(err)}`,
{ level: 'error' },
)
// Close transport resources and notify replBridge via onClose
// so the poll loop can retry on the next work dispatch.
// Without this callback, replBridge never learns the transport
// failed to initialize and sits with transport === null forever.
ccr.close()
sse.close()
onCloseCb?.(4091) // 4091 = init failure, distinguishable from 4090 epoch mismatch
},
)
},
}
}

57
bridge/sessionIdCompat.ts Normal file
View file

@ -0,0 +1,57 @@
/**
* Session ID tag translation helpers for the CCR v2 compat layer.
*
* Lives in its own file (rather than workSecret.ts) so that sessionHandle.ts
* and replBridgeTransport.ts (bridge.mjs entry points) can import from
* workSecret.ts without pulling in these retag functions.
*
* The isCseShimEnabled kill switch is injected via setCseShimGate() to avoid
* a static import of bridgeEnabled.ts growthbook.ts config.ts all
* banned from the sdk.mjs bundle (scripts/build-agent-sdk.sh). Callers that
* already import bridgeEnabled.ts register the gate; the SDK path never does,
* so the shim defaults to active (matching isCseShimEnabled()'s own default).
*/
let _isCseShimEnabled: (() => boolean) | undefined
/**
* Register the GrowthBook gate for the cse_ shim. Called from bridge
* init code that already imports bridgeEnabled.ts.
*/
export function setCseShimGate(gate: () => boolean): void {
_isCseShimEnabled = gate
}
/**
* Re-tag a `cse_*` session ID to `session_*` for use with the v1 compat API.
*
* Worker endpoints (/v1/code/sessions/{id}/worker/*) want `cse_*`; that's
* what the work poll delivers. Client-facing compat endpoints
* (/v1/sessions/{id}, /v1/sessions/{id}/archive, /v1/sessions/{id}/events)
* want `session_*` compat/convert.go:27 validates TagSession. Same UUID,
* different costume. No-op for IDs that aren't `cse_*`.
*
* bridgeMain holds one sessionId variable for both worker registration and
* session-management calls. It arrives as `cse_*` from the work poll under
* the compat gate, so archiveSession/fetchSessionTitle need this re-tag.
*/
export function toCompatSessionId(id: string): string {
if (!id.startsWith('cse_')) return id
if (_isCseShimEnabled && !_isCseShimEnabled()) return id
return 'session_' + id.slice('cse_'.length)
}
/**
* Re-tag a `session_*` session ID to `cse_*` for infrastructure-layer calls.
*
* Inverse of toCompatSessionId. POST /v1/environments/{id}/bridge/reconnect
* lives below the compat layer: once ccr_v2_compat_enabled is on server-side,
* it looks sessions up by their infra tag (`cse_*`). createBridgeSession still
* returns `session_*` (compat/convert.go:41) and that's what bridge-pointer
* stores so perpetual reconnect passes the wrong costume and gets "Session
* not found" back. Same UUID, wrong tag. No-op for IDs that aren't `session_*`.
*/
export function toInfraSessionId(id: string): string {
if (!id.startsWith('session_')) return id
return 'cse_' + id.slice('session_'.length)
}

550
bridge/sessionRunner.ts Normal file
View file

@ -0,0 +1,550 @@
import { type ChildProcess, spawn } from 'child_process'
import { createWriteStream, type WriteStream } from 'fs'
import { tmpdir } from 'os'
import { dirname, join } from 'path'
import { createInterface } from 'readline'
import { jsonParse, jsonStringify } from '../utils/slowOperations.js'
import { debugTruncate } from './debugUtils.js'
import type {
SessionActivity,
SessionDoneStatus,
SessionHandle,
SessionSpawner,
SessionSpawnOpts,
} from './types.js'
const MAX_ACTIVITIES = 10
const MAX_STDERR_LINES = 10
/**
* Sanitize a session ID for use in file names.
* Strips any characters that could cause path traversal (e.g. `../`, `/`)
* or other filesystem issues, replacing them with underscores.
*/
export function safeFilenameId(id: string): string {
return id.replace(/[^a-zA-Z0-9_-]/g, '_')
}
/**
* A control_request emitted by the child CLI when it needs permission to
* execute a **specific** tool invocation (not a general capability check).
* The bridge forwards this to the server so the user can approve/deny.
*/
export type PermissionRequest = {
type: 'control_request'
request_id: string
request: {
/** Per-invocation permission check — "may I run this tool with these inputs?" */
subtype: 'can_use_tool'
tool_name: string
input: Record<string, unknown>
tool_use_id: string
}
}
type SessionSpawnerDeps = {
execPath: string
/**
* Arguments that must precede the CLI flags when spawning. Empty for
* compiled binaries (where execPath is the claude binary itself); contains
* the script path (process.argv[1]) for npm installs where execPath is the
* node runtime. Without this, node sees --sdk-url as a node option and
* exits with "bad option: --sdk-url" (see anthropics/claude-code#28334).
*/
scriptArgs: string[]
env: NodeJS.ProcessEnv
verbose: boolean
sandbox: boolean
debugFile?: string
permissionMode?: string
onDebug: (msg: string) => void
onActivity?: (sessionId: string, activity: SessionActivity) => void
onPermissionRequest?: (
sessionId: string,
request: PermissionRequest,
accessToken: string,
) => void
}
/** Map tool names to human-readable verbs for the status display. */
const TOOL_VERBS: Record<string, string> = {
Read: 'Reading',
Write: 'Writing',
Edit: 'Editing',
MultiEdit: 'Editing',
Bash: 'Running',
Glob: 'Searching',
Grep: 'Searching',
WebFetch: 'Fetching',
WebSearch: 'Searching',
Task: 'Running task',
FileReadTool: 'Reading',
FileWriteTool: 'Writing',
FileEditTool: 'Editing',
GlobTool: 'Searching',
GrepTool: 'Searching',
BashTool: 'Running',
NotebookEditTool: 'Editing notebook',
LSP: 'LSP',
}
function toolSummary(name: string, input: Record<string, unknown>): string {
const verb = TOOL_VERBS[name] ?? name
const target =
(input.file_path as string) ??
(input.filePath as string) ??
(input.pattern as string) ??
(input.command as string | undefined)?.slice(0, 60) ??
(input.url as string) ??
(input.query as string) ??
''
if (target) {
return `${verb} ${target}`
}
return verb
}
function extractActivities(
line: string,
sessionId: string,
onDebug: (msg: string) => void,
): SessionActivity[] {
let parsed: unknown
try {
parsed = jsonParse(line)
} catch {
return []
}
if (!parsed || typeof parsed !== 'object') {
return []
}
const msg = parsed as Record<string, unknown>
const activities: SessionActivity[] = []
const now = Date.now()
switch (msg.type) {
case 'assistant': {
const message = msg.message as Record<string, unknown> | undefined
if (!message) break
const content = message.content
if (!Array.isArray(content)) break
for (const block of content) {
if (!block || typeof block !== 'object') continue
const b = block as Record<string, unknown>
if (b.type === 'tool_use') {
const name = (b.name as string) ?? 'Tool'
const input = (b.input as Record<string, unknown>) ?? {}
const summary = toolSummary(name, input)
activities.push({
type: 'tool_start',
summary,
timestamp: now,
})
onDebug(
`[bridge:activity] sessionId=${sessionId} tool_use name=${name} ${inputPreview(input)}`,
)
} else if (b.type === 'text') {
const text = (b.text as string) ?? ''
if (text.length > 0) {
activities.push({
type: 'text',
summary: text.slice(0, 80),
timestamp: now,
})
onDebug(
`[bridge:activity] sessionId=${sessionId} text "${text.slice(0, 100)}"`,
)
}
}
}
break
}
case 'result': {
const subtype = msg.subtype as string | undefined
if (subtype === 'success') {
activities.push({
type: 'result',
summary: 'Session completed',
timestamp: now,
})
onDebug(
`[bridge:activity] sessionId=${sessionId} result subtype=success`,
)
} else if (subtype) {
const errors = msg.errors as string[] | undefined
const errorSummary = errors?.[0] ?? `Error: ${subtype}`
activities.push({
type: 'error',
summary: errorSummary,
timestamp: now,
})
onDebug(
`[bridge:activity] sessionId=${sessionId} result subtype=${subtype} error="${errorSummary}"`,
)
} else {
onDebug(
`[bridge:activity] sessionId=${sessionId} result subtype=undefined`,
)
}
break
}
default:
break
}
return activities
}
/**
* Extract plain text from a replayed SDKUserMessage NDJSON line. Returns the
* trimmed text if this looks like a real human-authored message, otherwise
* undefined so the caller keeps waiting for the first real message.
*/
function extractUserMessageText(
msg: Record<string, unknown>,
): string | undefined {
// Skip tool-result user messages (wrapped subagent results) and synthetic
// caveat messages — neither is human-authored.
if (msg.parent_tool_use_id != null || msg.isSynthetic || msg.isReplay)
return undefined
const message = msg.message as Record<string, unknown> | undefined
const content = message?.content
let text: string | undefined
if (typeof content === 'string') {
text = content
} else if (Array.isArray(content)) {
for (const block of content) {
if (
block &&
typeof block === 'object' &&
(block as Record<string, unknown>).type === 'text'
) {
text = (block as Record<string, unknown>).text as string | undefined
break
}
}
}
text = text?.trim()
return text ? text : undefined
}
/** Build a short preview of tool input for debug logging. */
function inputPreview(input: Record<string, unknown>): string {
const parts: string[] = []
for (const [key, val] of Object.entries(input)) {
if (typeof val === 'string') {
parts.push(`${key}="${val.slice(0, 100)}"`)
}
if (parts.length >= 3) break
}
return parts.join(' ')
}
export function createSessionSpawner(deps: SessionSpawnerDeps): SessionSpawner {
return {
spawn(opts: SessionSpawnOpts, dir: string): SessionHandle {
// Debug file resolution:
// 1. If deps.debugFile is provided, use it with session ID suffix for uniqueness
// 2. If verbose or ant build, auto-generate a temp file path
// 3. Otherwise, no debug file
const safeId = safeFilenameId(opts.sessionId)
let debugFile: string | undefined
if (deps.debugFile) {
const ext = deps.debugFile.lastIndexOf('.')
if (ext > 0) {
debugFile = `${deps.debugFile.slice(0, ext)}-${safeId}${deps.debugFile.slice(ext)}`
} else {
debugFile = `${deps.debugFile}-${safeId}`
}
} else if (deps.verbose || process.env.USER_TYPE === 'ant') {
debugFile = join(tmpdir(), 'claude', `bridge-session-${safeId}.log`)
}
// Transcript file: write raw NDJSON lines for post-hoc analysis.
// Placed alongside the debug file when one is configured.
let transcriptStream: WriteStream | null = null
let transcriptPath: string | undefined
if (deps.debugFile) {
transcriptPath = join(
dirname(deps.debugFile),
`bridge-transcript-${safeId}.jsonl`,
)
transcriptStream = createWriteStream(transcriptPath, { flags: 'a' })
transcriptStream.on('error', err => {
deps.onDebug(
`[bridge:session] Transcript write error: ${err.message}`,
)
transcriptStream = null
})
deps.onDebug(`[bridge:session] Transcript log: ${transcriptPath}`)
}
const args = [
...deps.scriptArgs,
'--print',
'--sdk-url',
opts.sdkUrl,
'--session-id',
opts.sessionId,
'--input-format',
'stream-json',
'--output-format',
'stream-json',
'--replay-user-messages',
...(deps.verbose ? ['--verbose'] : []),
...(debugFile ? ['--debug-file', debugFile] : []),
...(deps.permissionMode
? ['--permission-mode', deps.permissionMode]
: []),
]
const env: NodeJS.ProcessEnv = {
...deps.env,
// Strip the bridge's OAuth token so the child CC process uses
// the session access token for inference instead.
CLAUDE_CODE_OAUTH_TOKEN: undefined,
CLAUDE_CODE_ENVIRONMENT_KIND: 'bridge',
...(deps.sandbox && { CLAUDE_CODE_FORCE_SANDBOX: '1' }),
CLAUDE_CODE_SESSION_ACCESS_TOKEN: opts.accessToken,
// v1: HybridTransport (WS reads + POST writes) to Session-Ingress.
// Harmless in v2 mode — transportUtils checks CLAUDE_CODE_USE_CCR_V2 first.
CLAUDE_CODE_POST_FOR_SESSION_INGRESS_V2: '1',
// v2: SSETransport + CCRClient to CCR's /v1/code/sessions/* endpoints.
// Same env vars environment-manager sets in the container path.
...(opts.useCcrV2 && {
CLAUDE_CODE_USE_CCR_V2: '1',
CLAUDE_CODE_WORKER_EPOCH: String(opts.workerEpoch),
}),
}
deps.onDebug(
`[bridge:session] Spawning sessionId=${opts.sessionId} sdkUrl=${opts.sdkUrl} accessToken=${opts.accessToken ? 'present' : 'MISSING'}`,
)
deps.onDebug(`[bridge:session] Child args: ${args.join(' ')}`)
if (debugFile) {
deps.onDebug(`[bridge:session] Debug log: ${debugFile}`)
}
// Pipe all three streams: stdin for control, stdout for NDJSON parsing,
// stderr for error capture and diagnostics.
const child: ChildProcess = spawn(deps.execPath, args, {
cwd: dir,
stdio: ['pipe', 'pipe', 'pipe'],
env,
windowsHide: true,
})
deps.onDebug(
`[bridge:session] sessionId=${opts.sessionId} pid=${child.pid}`,
)
const activities: SessionActivity[] = []
let currentActivity: SessionActivity | null = null
const lastStderr: string[] = []
let sigkillSent = false
let firstUserMessageSeen = false
// Buffer stderr for error diagnostics
if (child.stderr) {
const stderrRl = createInterface({ input: child.stderr })
stderrRl.on('line', line => {
// Forward stderr to bridge's stderr in verbose mode
if (deps.verbose) {
process.stderr.write(line + '\n')
}
// Ring buffer of last N lines
if (lastStderr.length >= MAX_STDERR_LINES) {
lastStderr.shift()
}
lastStderr.push(line)
})
}
// Parse NDJSON from child stdout
if (child.stdout) {
const rl = createInterface({ input: child.stdout })
rl.on('line', line => {
// Write raw NDJSON to transcript file
if (transcriptStream) {
transcriptStream.write(line + '\n')
}
// Log all messages flowing from the child CLI to the bridge
deps.onDebug(
`[bridge:ws] sessionId=${opts.sessionId} <<< ${debugTruncate(line)}`,
)
// In verbose mode, forward raw output to stderr
if (deps.verbose) {
process.stderr.write(line + '\n')
}
const extracted = extractActivities(
line,
opts.sessionId,
deps.onDebug,
)
for (const activity of extracted) {
// Maintain ring buffer
if (activities.length >= MAX_ACTIVITIES) {
activities.shift()
}
activities.push(activity)
currentActivity = activity
deps.onActivity?.(opts.sessionId, activity)
}
// Detect control_request and replayed user messages.
// extractActivities parses the same line but swallows parse errors
// and skips 'user' type — re-parse here is cheap (NDJSON lines are
// small) and keeps each path self-contained.
{
let parsed: unknown
try {
parsed = jsonParse(line)
} catch {
// Non-JSON line, skip detection
}
if (parsed && typeof parsed === 'object') {
const msg = parsed as Record<string, unknown>
if (msg.type === 'control_request') {
const request = msg.request as
| Record<string, unknown>
| undefined
if (
request?.subtype === 'can_use_tool' &&
deps.onPermissionRequest
) {
deps.onPermissionRequest(
opts.sessionId,
parsed as PermissionRequest,
opts.accessToken,
)
}
// interrupt is turn-level; the child handles it internally (print.ts)
} else if (
msg.type === 'user' &&
!firstUserMessageSeen &&
opts.onFirstUserMessage
) {
const text = extractUserMessageText(msg)
if (text) {
firstUserMessageSeen = true
opts.onFirstUserMessage(text)
}
}
}
}
})
}
const done = new Promise<SessionDoneStatus>(resolve => {
child.on('close', (code, signal) => {
// Close transcript stream on exit
if (transcriptStream) {
transcriptStream.end()
transcriptStream = null
}
if (signal === 'SIGTERM' || signal === 'SIGINT') {
deps.onDebug(
`[bridge:session] sessionId=${opts.sessionId} interrupted signal=${signal} pid=${child.pid}`,
)
resolve('interrupted')
} else if (code === 0) {
deps.onDebug(
`[bridge:session] sessionId=${opts.sessionId} completed exit_code=0 pid=${child.pid}`,
)
resolve('completed')
} else {
deps.onDebug(
`[bridge:session] sessionId=${opts.sessionId} failed exit_code=${code} pid=${child.pid}`,
)
resolve('failed')
}
})
child.on('error', err => {
deps.onDebug(
`[bridge:session] sessionId=${opts.sessionId} spawn error: ${err.message}`,
)
resolve('failed')
})
})
const handle: SessionHandle = {
sessionId: opts.sessionId,
done,
activities,
accessToken: opts.accessToken,
lastStderr,
get currentActivity(): SessionActivity | null {
return currentActivity
},
kill(): void {
if (!child.killed) {
deps.onDebug(
`[bridge:session] Sending SIGTERM to sessionId=${opts.sessionId} pid=${child.pid}`,
)
// On Windows, child.kill('SIGTERM') throws; use default signal.
if (process.platform === 'win32') {
child.kill()
} else {
child.kill('SIGTERM')
}
}
},
forceKill(): void {
// Use separate flag because child.killed is set when kill() is called,
// not when the process exits. We need to send SIGKILL even after SIGTERM.
if (!sigkillSent && child.pid) {
sigkillSent = true
deps.onDebug(
`[bridge:session] Sending SIGKILL to sessionId=${opts.sessionId} pid=${child.pid}`,
)
if (process.platform === 'win32') {
child.kill()
} else {
child.kill('SIGKILL')
}
}
},
writeStdin(data: string): void {
if (child.stdin && !child.stdin.destroyed) {
deps.onDebug(
`[bridge:ws] sessionId=${opts.sessionId} >>> ${debugTruncate(data)}`,
)
child.stdin.write(data)
}
},
updateAccessToken(token: string): void {
handle.accessToken = token
// Send the fresh token to the child process via stdin. The child's
// StructuredIO handles update_environment_variables messages by
// setting process.env directly, so getSessionIngressAuthToken()
// picks up the new token on the next refreshHeaders call.
handle.writeStdin(
jsonStringify({
type: 'update_environment_variables',
variables: { CLAUDE_CODE_SESSION_ACCESS_TOKEN: token },
}) + '\n',
)
deps.onDebug(
`[bridge:session] Sent token refresh via stdin for sessionId=${opts.sessionId}`,
)
},
}
return handle
},
}
}
export { extractActivities as _extractActivitiesForTesting }

210
bridge/trustedDevice.ts Normal file
View file

@ -0,0 +1,210 @@
import axios from 'axios'
import memoize from 'lodash-es/memoize.js'
import { hostname } from 'os'
import { getOauthConfig } from '../constants/oauth.js'
import {
checkGate_CACHED_OR_BLOCKING,
getFeatureValue_CACHED_MAY_BE_STALE,
} from '../services/analytics/growthbook.js'
import { logForDebugging } from '../utils/debug.js'
import { errorMessage } from '../utils/errors.js'
import { isEssentialTrafficOnly } from '../utils/privacyLevel.js'
import { getSecureStorage } from '../utils/secureStorage/index.js'
import { jsonStringify } from '../utils/slowOperations.js'
/**
* Trusted device token source for bridge (remote-control) sessions.
*
* Bridge sessions have SecurityTier=ELEVATED on the server (CCR v2).
* The server gates ConnectBridgeWorker on its own flag
* (sessions_elevated_auth_enforcement in Anthropic Main); this CLI-side
* flag controls whether the CLI sends X-Trusted-Device-Token at all.
* Two flags so rollout can be staged: flip CLI-side first (headers
* start flowing, server still no-ops), then flip server-side.
*
* Enrollment (POST /auth/trusted_devices) is gated server-side by
* account_session.created_at < 10min, so it must happen during /login.
* Token is persistent (90d rolling expiry) and stored in keychain.
*
* See anthropics/anthropic#274559 (spec), #310375 (B1b tenant RPCs),
* #295987 (B2 Python routes), #307150 (C1' CCR v2 gate).
*/
const TRUSTED_DEVICE_GATE = 'tengu_sessions_elevated_auth_enforcement'
function isGateEnabled(): boolean {
return getFeatureValue_CACHED_MAY_BE_STALE(TRUSTED_DEVICE_GATE, false)
}
// Memoized — secureStorage.read() spawns a macOS `security` subprocess (~40ms).
// bridgeApi.ts calls this from getHeaders() on every poll/heartbeat/ack.
// Cache cleared after enrollment (below) and on logout (clearAuthRelatedCaches).
//
// Only the storage read is memoized — the GrowthBook gate is checked live so
// that a gate flip after GrowthBook refresh takes effect without a restart.
const readStoredToken = memoize((): string | undefined => {
// Env var takes precedence for testing/canary.
const envToken = process.env.CLAUDE_TRUSTED_DEVICE_TOKEN
if (envToken) {
return envToken
}
return getSecureStorage().read()?.trustedDeviceToken
})
export function getTrustedDeviceToken(): string | undefined {
if (!isGateEnabled()) {
return undefined
}
return readStoredToken()
}
export function clearTrustedDeviceTokenCache(): void {
readStoredToken.cache?.clear?.()
}
/**
* Clear the stored trusted device token from secure storage and the memo cache.
* Called before enrollTrustedDevice() during /login so a stale token from the
* previous account isn't sent as X-Trusted-Device-Token while enrollment is
* in-flight (enrollTrustedDevice is async bridge API calls between login and
* enrollment completion would otherwise still read the old cached token).
*/
export function clearTrustedDeviceToken(): void {
if (!isGateEnabled()) {
return
}
const secureStorage = getSecureStorage()
try {
const data = secureStorage.read()
if (data?.trustedDeviceToken) {
delete data.trustedDeviceToken
secureStorage.update(data)
}
} catch {
// Best-effort — don't block login if storage is inaccessible
}
readStoredToken.cache?.clear?.()
}
/**
* Enroll this device via POST /auth/trusted_devices and persist the token
* to keychain. Best-effort logs and returns on failure so callers
* (post-login hooks) don't block the login flow.
*
* The server gates enrollment on account_session.created_at < 10min, so
* this must be called immediately after a fresh /login. Calling it later
* (e.g. lazy enrollment on /bridge 403) will fail with 403 stale_session.
*/
export async function enrollTrustedDevice(): Promise<void> {
try {
// checkGate_CACHED_OR_BLOCKING awaits any in-flight GrowthBook re-init
// (triggered by refreshGrowthBookAfterAuthChange in login.tsx) before
// reading the gate, so we get the post-refresh value.
if (!(await checkGate_CACHED_OR_BLOCKING(TRUSTED_DEVICE_GATE))) {
logForDebugging(
`[trusted-device] Gate ${TRUSTED_DEVICE_GATE} is off, skipping enrollment`,
)
return
}
// If CLAUDE_TRUSTED_DEVICE_TOKEN is set (e.g. by an enterprise wrapper),
// skip enrollment — the env var takes precedence in readStoredToken() so
// any enrolled token would be shadowed and never used.
if (process.env.CLAUDE_TRUSTED_DEVICE_TOKEN) {
logForDebugging(
'[trusted-device] CLAUDE_TRUSTED_DEVICE_TOKEN env var is set, skipping enrollment (env var takes precedence)',
)
return
}
// Lazy require — utils/auth.ts transitively pulls ~1300 modules
// (config → file → permissions → sessionStorage → commands). Daemon callers
// of getTrustedDeviceToken() don't need this; only /login does.
/* eslint-disable @typescript-eslint/no-require-imports */
const { getClaudeAIOAuthTokens } =
require('../utils/auth.js') as typeof import('../utils/auth.js')
/* eslint-enable @typescript-eslint/no-require-imports */
const accessToken = getClaudeAIOAuthTokens()?.accessToken
if (!accessToken) {
logForDebugging('[trusted-device] No OAuth token, skipping enrollment')
return
}
// Always re-enroll on /login — the existing token may belong to a
// different account (account-switch without /logout). Skipping enrollment
// would send the old account's token on the new account's bridge calls.
const secureStorage = getSecureStorage()
if (isEssentialTrafficOnly()) {
logForDebugging(
'[trusted-device] Essential traffic only, skipping enrollment',
)
return
}
const baseUrl = getOauthConfig().BASE_API_URL
let response
try {
response = await axios.post<{
device_token?: string
device_id?: string
}>(
`${baseUrl}/api/auth/trusted_devices`,
{ display_name: `Claude Code on ${hostname()} · ${process.platform}` },
{
headers: {
Authorization: `Bearer ${accessToken}`,
'Content-Type': 'application/json',
},
timeout: 10_000,
validateStatus: s => s < 500,
},
)
} catch (err: unknown) {
logForDebugging(
`[trusted-device] Enrollment request failed: ${errorMessage(err)}`,
)
return
}
if (response.status !== 200 && response.status !== 201) {
logForDebugging(
`[trusted-device] Enrollment failed ${response.status}: ${jsonStringify(response.data).slice(0, 200)}`,
)
return
}
const token = response.data?.device_token
if (!token || typeof token !== 'string') {
logForDebugging(
'[trusted-device] Enrollment response missing device_token field',
)
return
}
try {
const storageData = secureStorage.read()
if (!storageData) {
logForDebugging(
'[trusted-device] Cannot read storage, skipping token persist',
)
return
}
storageData.trustedDeviceToken = token
const result = secureStorage.update(storageData)
if (!result.success) {
logForDebugging(
`[trusted-device] Failed to persist token: ${result.warning ?? 'unknown'}`,
)
return
}
readStoredToken.cache?.clear?.()
logForDebugging(
`[trusted-device] Enrolled device_id=${response.data.device_id ?? 'unknown'}`,
)
} catch (err: unknown) {
logForDebugging(
`[trusted-device] Storage write failed: ${errorMessage(err)}`,
)
}
} catch (err: unknown) {
logForDebugging(`[trusted-device] Enrollment error: ${errorMessage(err)}`)
}
}

262
bridge/types.ts Normal file
View file

@ -0,0 +1,262 @@
/** Default per-session timeout (24 hours). */
export const DEFAULT_SESSION_TIMEOUT_MS = 24 * 60 * 60 * 1000
/** Reusable login guidance appended to bridge auth errors. */
export const BRIDGE_LOGIN_INSTRUCTION =
'Remote Control is only available with claude.ai subscriptions. Please use `/login` to sign in with your claude.ai account.'
/** Full error printed when `claude remote-control` is run without auth. */
export const BRIDGE_LOGIN_ERROR =
'Error: You must be logged in to use Remote Control.\n\n' +
BRIDGE_LOGIN_INSTRUCTION
/** Shown when the user disconnects Remote Control (via /remote-control or ultraplan launch). */
export const REMOTE_CONTROL_DISCONNECTED_MSG = 'Remote Control disconnected.'
// --- Protocol types for the environments API ---
export type WorkData = {
type: 'session' | 'healthcheck'
id: string
}
export type WorkResponse = {
id: string
type: 'work'
environment_id: string
state: string
data: WorkData
secret: string // base64url-encoded JSON
created_at: string
}
export type WorkSecret = {
version: number
session_ingress_token: string
api_base_url: string
sources: Array<{
type: string
git_info?: { type: string; repo: string; ref?: string; token?: string }
}>
auth: Array<{ type: string; token: string }>
claude_code_args?: Record<string, string> | null
mcp_config?: unknown | null
environment_variables?: Record<string, string> | null
/**
* Server-driven CCR v2 selector. Set by prepare_work_secret() when the
* session was created via the v2 compat layer (ccr_v2_compat_enabled).
* Same field the BYOC runner reads at environment-runner/sessionExecutor.ts.
*/
use_code_sessions?: boolean
}
export type SessionDoneStatus = 'completed' | 'failed' | 'interrupted'
export type SessionActivityType = 'tool_start' | 'text' | 'result' | 'error'
export type SessionActivity = {
type: SessionActivityType
summary: string // e.g. "Editing src/foo.ts", "Reading package.json"
timestamp: number
}
/**
* How `claude remote-control` chooses session working directories.
* - `single-session`: one session in cwd, bridge tears down when it ends
* - `worktree`: persistent server, every session gets an isolated git worktree
* - `same-dir`: persistent server, every session shares cwd (can stomp each other)
*/
export type SpawnMode = 'single-session' | 'worktree' | 'same-dir'
/**
* Well-known worker_type values THIS codebase produces. Sent as
* `metadata.worker_type` at environment registration so claude.ai can filter
* the session picker by origin (e.g. assistant tab only shows assistant
* workers). The backend treats this as an opaque string desktop cowork
* sends `"cowork"`, which isn't in this union. REPL code uses this narrow
* type for its own exhaustiveness; wire-level fields accept any string.
*/
export type BridgeWorkerType = 'claude_code' | 'claude_code_assistant'
export type BridgeConfig = {
dir: string
machineName: string
branch: string
gitRepoUrl: string | null
maxSessions: number
spawnMode: SpawnMode
verbose: boolean
sandbox: boolean
/** Client-generated UUID identifying this bridge instance. */
bridgeId: string
/**
* Sent as metadata.worker_type so web clients can filter by origin.
* Backend treats this as opaque any string, not just BridgeWorkerType.
*/
workerType: string
/** Client-generated UUID for idempotent environment registration. */
environmentId: string
/**
* Backend-issued environment_id to reuse on re-register. When set, the
* backend treats registration as a reconnect to the existing environment
* instead of creating a new one. Used by `claude remote-control
* --session-id` resume. Must be a backend-format ID — client UUIDs are
* rejected with 400.
*/
reuseEnvironmentId?: string
/** API base URL the bridge is connected to (used for polling). */
apiBaseUrl: string
/** Session ingress base URL for WebSocket connections (may differ from apiBaseUrl locally). */
sessionIngressUrl: string
/** Debug file path passed via --debug-file. */
debugFile?: string
/** Per-session timeout in milliseconds. Sessions exceeding this are killed. */
sessionTimeoutMs?: number
}
// --- Dependency interfaces (for testability) ---
/**
* A control_response event sent back to a session (e.g. a permission decision).
* The `subtype` is `'success'` per the SDK protocol; the inner `response`
* carries the permission decision payload (e.g. `{ behavior: 'allow' }`).
*/
export type PermissionResponseEvent = {
type: 'control_response'
response: {
subtype: 'success'
request_id: string
response: Record<string, unknown>
}
}
export type BridgeApiClient = {
registerBridgeEnvironment(config: BridgeConfig): Promise<{
environment_id: string
environment_secret: string
}>
pollForWork(
environmentId: string,
environmentSecret: string,
signal?: AbortSignal,
reclaimOlderThanMs?: number,
): Promise<WorkResponse | null>
acknowledgeWork(
environmentId: string,
workId: string,
sessionToken: string,
): Promise<void>
/** Stop a work item via the environments API. */
stopWork(environmentId: string, workId: string, force: boolean): Promise<void>
/** Deregister/delete the bridge environment on graceful shutdown. */
deregisterEnvironment(environmentId: string): Promise<void>
/** Send a permission response (control_response) to a session via the session events API. */
sendPermissionResponseEvent(
sessionId: string,
event: PermissionResponseEvent,
sessionToken: string,
): Promise<void>
/** Archive a session so it no longer appears as active on the server. */
archiveSession(sessionId: string): Promise<void>
/**
* Force-stop stale worker instances and re-queue a session on an environment.
* Used by `--session-id` to resume a session after the original bridge died.
*/
reconnectSession(environmentId: string, sessionId: string): Promise<void>
/**
* Send a lightweight heartbeat for an active work item, extending its lease.
* Uses SessionIngressAuth (JWT, no DB hit) instead of EnvironmentSecretAuth.
* Returns the server's response with lease status.
*/
heartbeatWork(
environmentId: string,
workId: string,
sessionToken: string,
): Promise<{ lease_extended: boolean; state: string }>
}
export type SessionHandle = {
sessionId: string
done: Promise<SessionDoneStatus>
kill(): void
forceKill(): void
activities: SessionActivity[] // ring buffer of recent activities (last ~10)
currentActivity: SessionActivity | null // most recent
accessToken: string // session_ingress_token for API calls
lastStderr: string[] // ring buffer of last stderr lines
writeStdin(data: string): void // write directly to child stdin
/** Update the access token for a running session (e.g. after token refresh). */
updateAccessToken(token: string): void
}
export type SessionSpawnOpts = {
sessionId: string
sdkUrl: string
accessToken: string
/** When true, spawn the child with CCR v2 env vars (SSE transport + CCRClient). */
useCcrV2?: boolean
/** Required when useCcrV2 is true. Obtained from POST /worker/register. */
workerEpoch?: number
/**
* Fires once with the text of the first real user message seen on the
* child's stdout (via --replay-user-messages). Lets the caller derive a
* session title when none exists yet. Tool-result and synthetic user
* messages are skipped.
*/
onFirstUserMessage?: (text: string) => void
}
export type SessionSpawner = {
spawn(opts: SessionSpawnOpts, dir: string): SessionHandle
}
export type BridgeLogger = {
printBanner(config: BridgeConfig, environmentId: string): void
logSessionStart(sessionId: string, prompt: string): void
logSessionComplete(sessionId: string, durationMs: number): void
logSessionFailed(sessionId: string, error: string): void
logStatus(message: string): void
logVerbose(message: string): void
logError(message: string): void
/** Log a reconnection success event after recovering from connection errors. */
logReconnected(disconnectedMs: number): void
/** Show idle status with repo/branch info and shimmer animation. */
updateIdleStatus(): void
/** Show reconnecting status in the live display. */
updateReconnectingStatus(delayStr: string, elapsedStr: string): void
updateSessionStatus(
sessionId: string,
elapsed: string,
activity: SessionActivity,
trail: string[],
): void
clearStatus(): void
/** Set repository info for status line display. */
setRepoInfo(repoName: string, branch: string): void
/** Set debug log glob shown above the status line (ant users). */
setDebugLogPath(path: string): void
/** Transition to "Attached" state when a session starts. */
setAttached(sessionId: string): void
/** Show failed status in the live display. */
updateFailedStatus(error: string): void
/** Toggle QR code visibility. */
toggleQr(): void
/** Update the "<n> of <m> sessions" indicator and spawn mode hint. */
updateSessionCount(active: number, max: number, mode: SpawnMode): void
/** Update the spawn mode shown in the session-count line. Pass null to hide (single-session or toggle unavailable). */
setSpawnModeDisplay(mode: 'same-dir' | 'worktree' | null): void
/** Register a new session for multi-session display (called after spawn succeeds). */
addSession(sessionId: string, url: string): void
/** Update the per-session activity summary (tool being run) in the multi-session list. */
updateSessionActivity(sessionId: string, activity: SessionActivity): void
/**
* Set a session's display title. In multi-session mode, updates the bullet list
* entry. In single-session mode, also shows the title in the main status line.
* Triggers a render (guarded against reconnecting/failed states).
*/
setSessionTitle(sessionId: string, title: string): void
/** Remove a session from the multi-session display when it ends. */
removeSession(sessionId: string): void
/** Force a re-render of the status display (for multi-session activity refresh). */
refreshDisplay(): void
}

127
bridge/workSecret.ts Normal file
View file

@ -0,0 +1,127 @@
import axios from 'axios'
import { jsonParse, jsonStringify } from '../utils/slowOperations.js'
import type { WorkSecret } from './types.js'
/** Decode a base64url-encoded work secret and validate its version. */
export function decodeWorkSecret(secret: string): WorkSecret {
const json = Buffer.from(secret, 'base64url').toString('utf-8')
const parsed: unknown = jsonParse(json)
if (
!parsed ||
typeof parsed !== 'object' ||
!('version' in parsed) ||
parsed.version !== 1
) {
throw new Error(
`Unsupported work secret version: ${parsed && typeof parsed === 'object' && 'version' in parsed ? parsed.version : 'unknown'}`,
)
}
const obj = parsed as Record<string, unknown>
if (
typeof obj.session_ingress_token !== 'string' ||
obj.session_ingress_token.length === 0
) {
throw new Error(
'Invalid work secret: missing or empty session_ingress_token',
)
}
if (typeof obj.api_base_url !== 'string') {
throw new Error('Invalid work secret: missing api_base_url')
}
return parsed as WorkSecret
}
/**
* Build a WebSocket SDK URL from the API base URL and session ID.
* Strips the HTTP(S) protocol and constructs a ws(s):// ingress URL.
*
* Uses /v2/ for localhost (direct to session-ingress, no Envoy rewrite)
* and /v1/ for production (Envoy rewrites /v1/ /v2/).
*/
export function buildSdkUrl(apiBaseUrl: string, sessionId: string): string {
const isLocalhost =
apiBaseUrl.includes('localhost') || apiBaseUrl.includes('127.0.0.1')
const protocol = isLocalhost ? 'ws' : 'wss'
const version = isLocalhost ? 'v2' : 'v1'
const host = apiBaseUrl.replace(/^https?:\/\//, '').replace(/\/+$/, '')
return `${protocol}://${host}/${version}/session_ingress/ws/${sessionId}`
}
/**
* Compare two session IDs regardless of their tagged-ID prefix.
*
* Tagged IDs have the form {tag}_{body} or {tag}_staging_{body}, where the
* body encodes a UUID. CCR v2's compat layer returns `session_*` to v1 API
* clients (compat/convert.go:41) but the infrastructure layer (sandbox-gateway
* work queue, work poll response) uses `cse_*` (compat/CLAUDE.md:13). Both
* have the same underlying UUID.
*
* Without this, replBridge rejects its own session as "foreign" at the
* work-received check when the ccr_v2_compat_enabled gate is on.
*/
export function sameSessionId(a: string, b: string): boolean {
if (a === b) return true
// The body is everything after the last underscore — this handles both
// `{tag}_{body}` and `{tag}_staging_{body}`.
const aBody = a.slice(a.lastIndexOf('_') + 1)
const bBody = b.slice(b.lastIndexOf('_') + 1)
// Guard against IDs with no underscore (bare UUIDs): lastIndexOf returns -1,
// slice(0) returns the whole string, and we already checked a === b above.
// Require a minimum length to avoid accidental matches on short suffixes
// (e.g. single-char tag remnants from malformed IDs).
return aBody.length >= 4 && aBody === bBody
}
/**
* Build a CCR v2 session URL from the API base URL and session ID.
* Unlike buildSdkUrl, this returns an HTTP(S) URL (not ws://) and points at
* /v1/code/sessions/{id} the child CC will derive the SSE stream path
* and worker endpoints from this base.
*/
export function buildCCRv2SdkUrl(
apiBaseUrl: string,
sessionId: string,
): string {
const base = apiBaseUrl.replace(/\/+$/, '')
return `${base}/v1/code/sessions/${sessionId}`
}
/**
* Register this bridge as the worker for a CCR v2 session.
* Returns the worker_epoch, which must be passed to the child CC process
* so its CCRClient can include it in every heartbeat/state/event request.
*
* Mirrors what environment-manager does in the container path
* (api-go/environment-manager/cmd/cmd_task_run.go RegisterWorker).
*/
export async function registerWorker(
sessionUrl: string,
accessToken: string,
): Promise<number> {
const response = await axios.post(
`${sessionUrl}/worker/register`,
{},
{
headers: {
Authorization: `Bearer ${accessToken}`,
'Content-Type': 'application/json',
'anthropic-version': '2023-06-01',
},
timeout: 10_000,
},
)
// protojson serializes int64 as a string to avoid JS number precision loss;
// the Go side may also return a number depending on encoder settings.
const raw = response.data?.worker_epoch
const epoch = typeof raw === 'string' ? Number(raw) : raw
if (
typeof epoch !== 'number' ||
!Number.isFinite(epoch) ||
!Number.isSafeInteger(epoch)
) {
throw new Error(
`registerWorker: invalid worker_epoch in response: ${jsonStringify(response.data)}`,
)
}
return epoch
}

371
buddy/CompanionSprite.tsx Normal file

File diff suppressed because one or more lines are too long

133
buddy/companion.ts Normal file
View file

@ -0,0 +1,133 @@
import { getGlobalConfig } from '../utils/config.js'
import {
type Companion,
type CompanionBones,
EYES,
HATS,
RARITIES,
RARITY_WEIGHTS,
type Rarity,
SPECIES,
STAT_NAMES,
type StatName,
} from './types.js'
// Mulberry32 — tiny seeded PRNG, good enough for picking ducks
function mulberry32(seed: number): () => number {
let a = seed >>> 0
return function () {
a |= 0
a = (a + 0x6d2b79f5) | 0
let t = Math.imul(a ^ (a >>> 15), 1 | a)
t = (t + Math.imul(t ^ (t >>> 7), 61 | t)) ^ t
return ((t ^ (t >>> 14)) >>> 0) / 4294967296
}
}
function hashString(s: string): number {
if (typeof Bun !== 'undefined') {
return Number(BigInt(Bun.hash(s)) & 0xffffffffn)
}
let h = 2166136261
for (let i = 0; i < s.length; i++) {
h ^= s.charCodeAt(i)
h = Math.imul(h, 16777619)
}
return h >>> 0
}
function pick<T>(rng: () => number, arr: readonly T[]): T {
return arr[Math.floor(rng() * arr.length)]!
}
function rollRarity(rng: () => number): Rarity {
const total = Object.values(RARITY_WEIGHTS).reduce((a, b) => a + b, 0)
let roll = rng() * total
for (const rarity of RARITIES) {
roll -= RARITY_WEIGHTS[rarity]
if (roll < 0) return rarity
}
return 'common'
}
const RARITY_FLOOR: Record<Rarity, number> = {
common: 5,
uncommon: 15,
rare: 25,
epic: 35,
legendary: 50,
}
// One peak stat, one dump stat, rest scattered. Rarity bumps the floor.
function rollStats(
rng: () => number,
rarity: Rarity,
): Record<StatName, number> {
const floor = RARITY_FLOOR[rarity]
const peak = pick(rng, STAT_NAMES)
let dump = pick(rng, STAT_NAMES)
while (dump === peak) dump = pick(rng, STAT_NAMES)
const stats = {} as Record<StatName, number>
for (const name of STAT_NAMES) {
if (name === peak) {
stats[name] = Math.min(100, floor + 50 + Math.floor(rng() * 30))
} else if (name === dump) {
stats[name] = Math.max(1, floor - 10 + Math.floor(rng() * 15))
} else {
stats[name] = floor + Math.floor(rng() * 40)
}
}
return stats
}
const SALT = 'friend-2026-401'
export type Roll = {
bones: CompanionBones
inspirationSeed: number
}
function rollFrom(rng: () => number): Roll {
const rarity = rollRarity(rng)
const bones: CompanionBones = {
rarity,
species: pick(rng, SPECIES),
eye: pick(rng, EYES),
hat: rarity === 'common' ? 'none' : pick(rng, HATS),
shiny: rng() < 0.01,
stats: rollStats(rng, rarity),
}
return { bones, inspirationSeed: Math.floor(rng() * 1e9) }
}
// Called from three hot paths (500ms sprite tick, per-keystroke PromptInput,
// per-turn observer) with the same userId → cache the deterministic result.
let rollCache: { key: string; value: Roll } | undefined
export function roll(userId: string): Roll {
const key = userId + SALT
if (rollCache?.key === key) return rollCache.value
const value = rollFrom(mulberry32(hashString(key)))
rollCache = { key, value }
return value
}
export function rollWithSeed(seed: string): Roll {
return rollFrom(mulberry32(hashString(seed)))
}
export function companionUserId(): string {
const config = getGlobalConfig()
return config.oauthAccount?.accountUuid ?? config.userID ?? 'anon'
}
// Regenerate bones from userId, merge with stored soul. Bones never persist
// so species renames and SPECIES-array edits can't break stored companions,
// and editing config.companion can't fake a rarity.
export function getCompanion(): Companion | undefined {
const stored = getGlobalConfig().companion
if (!stored) return undefined
const { bones } = roll(companionUserId())
// bones last so stale bones fields in old-format configs get overridden
return { ...stored, ...bones }
}

36
buddy/prompt.ts Normal file
View file

@ -0,0 +1,36 @@
import { feature } from 'bun:bundle'
import type { Message } from '../types/message.js'
import type { Attachment } from '../utils/attachments.js'
import { getGlobalConfig } from '../utils/config.js'
import { getCompanion } from './companion.js'
export function companionIntroText(name: string, species: string): string {
return `# Companion
A small ${species} named ${name} sits beside the user's input box and occasionally comments in a speech bubble. You're not ${name} it's a separate watcher.
When the user addresses ${name} directly (by name), its bubble will answer. Your job in that moment is to stay out of the way: respond in ONE line or less, or just answer any part of the message meant for you. Don't explain that you're not ${name} they know. Don't narrate what ${name} might say the bubble handles that.`
}
export function getCompanionIntroAttachment(
messages: Message[] | undefined,
): Attachment[] {
if (!feature('BUDDY')) return []
const companion = getCompanion()
if (!companion || getGlobalConfig().companionMuted) return []
// Skip if already announced for this companion.
for (const msg of messages ?? []) {
if (msg.type !== 'attachment') continue
if (msg.attachment.type !== 'companion_intro') continue
if (msg.attachment.name === companion.name) return []
}
return [
{
type: 'companion_intro',
name: companion.name,
species: companion.species,
},
]
}

514
buddy/sprites.ts Normal file
View file

@ -0,0 +1,514 @@
import type { CompanionBones, Eye, Hat, Species } from './types.js'
import {
axolotl,
blob,
cactus,
capybara,
cat,
chonk,
dragon,
duck,
ghost,
goose,
mushroom,
octopus,
owl,
penguin,
rabbit,
robot,
snail,
turtle,
} from './types.js'
// Each sprite is 5 lines tall, 12 wide (after {E}→1char substitution).
// Multiple frames per species for idle fidget animation.
// Line 0 is the hat slot — must be blank in frames 0-1; frame 2 may use it.
const BODIES: Record<Species, string[][]> = {
[duck]: [
[
' ',
' __ ',
' <({E} )___ ',
' ( ._> ',
' `--´ ',
],
[
' ',
' __ ',
' <({E} )___ ',
' ( ._> ',
' `--´~ ',
],
[
' ',
' __ ',
' <({E} )___ ',
' ( .__> ',
' `--´ ',
],
],
[goose]: [
[
' ',
' ({E}> ',
' || ',
' _(__)_ ',
' ^^^^ ',
],
[
' ',
' ({E}> ',
' || ',
' _(__)_ ',
' ^^^^ ',
],
[
' ',
' ({E}>> ',
' || ',
' _(__)_ ',
' ^^^^ ',
],
],
[blob]: [
[
' ',
' .----. ',
' ( {E} {E} ) ',
' ( ) ',
' `----´ ',
],
[
' ',
' .------. ',
' ( {E} {E} ) ',
' ( ) ',
' `------´ ',
],
[
' ',
' .--. ',
' ({E} {E}) ',
' ( ) ',
' `--´ ',
],
],
[cat]: [
[
' ',
' /\\_/\\ ',
' ( {E} {E}) ',
' ( ω ) ',
' (")_(") ',
],
[
' ',
' /\\_/\\ ',
' ( {E} {E}) ',
' ( ω ) ',
' (")_(")~ ',
],
[
' ',
' /\\-/\\ ',
' ( {E} {E}) ',
' ( ω ) ',
' (")_(") ',
],
],
[dragon]: [
[
' ',
' /^\\ /^\\ ',
' < {E} {E} > ',
' ( ~~ ) ',
' `-vvvv-´ ',
],
[
' ',
' /^\\ /^\\ ',
' < {E} {E} > ',
' ( ) ',
' `-vvvv-´ ',
],
[
' ~ ~ ',
' /^\\ /^\\ ',
' < {E} {E} > ',
' ( ~~ ) ',
' `-vvvv-´ ',
],
],
[octopus]: [
[
' ',
' .----. ',
' ( {E} {E} ) ',
' (______) ',
' /\\/\\/\\/\\ ',
],
[
' ',
' .----. ',
' ( {E} {E} ) ',
' (______) ',
' \\/\\/\\/\\/ ',
],
[
' o ',
' .----. ',
' ( {E} {E} ) ',
' (______) ',
' /\\/\\/\\/\\ ',
],
],
[owl]: [
[
' ',
' /\\ /\\ ',
' (({E})({E})) ',
' ( >< ) ',
' `----´ ',
],
[
' ',
' /\\ /\\ ',
' (({E})({E})) ',
' ( >< ) ',
' .----. ',
],
[
' ',
' /\\ /\\ ',
' (({E})(-)) ',
' ( >< ) ',
' `----´ ',
],
],
[penguin]: [
[
' ',
' .---. ',
' ({E}>{E}) ',
' /( )\\ ',
' `---´ ',
],
[
' ',
' .---. ',
' ({E}>{E}) ',
' |( )| ',
' `---´ ',
],
[
' .---. ',
' ({E}>{E}) ',
' /( )\\ ',
' `---´ ',
' ~ ~ ',
],
],
[turtle]: [
[
' ',
' _,--._ ',
' ( {E} {E} ) ',
' /[______]\\ ',
' `` `` ',
],
[
' ',
' _,--._ ',
' ( {E} {E} ) ',
' /[______]\\ ',
' `` `` ',
],
[
' ',
' _,--._ ',
' ( {E} {E} ) ',
' /[======]\\ ',
' `` `` ',
],
],
[snail]: [
[
' ',
' {E} .--. ',
' \\ ( @ ) ',
' \\_`--´ ',
' ~~~~~~~ ',
],
[
' ',
' {E} .--. ',
' | ( @ ) ',
' \\_`--´ ',
' ~~~~~~~ ',
],
[
' ',
' {E} .--. ',
' \\ ( @ ) ',
' \\_`--´ ',
' ~~~~~~ ',
],
],
[ghost]: [
[
' ',
' .----. ',
' / {E} {E} \\ ',
' | | ',
' ~`~``~`~ ',
],
[
' ',
' .----. ',
' / {E} {E} \\ ',
' | | ',
' `~`~~`~` ',
],
[
' ~ ~ ',
' .----. ',
' / {E} {E} \\ ',
' | | ',
' ~~`~~`~~ ',
],
],
[axolotl]: [
[
' ',
'}~(______)~{',
'}~({E} .. {E})~{',
' ( .--. ) ',
' (_/ \\_) ',
],
[
' ',
'~}(______){~',
'~}({E} .. {E}){~',
' ( .--. ) ',
' (_/ \\_) ',
],
[
' ',
'}~(______)~{',
'}~({E} .. {E})~{',
' ( -- ) ',
' ~_/ \\_~ ',
],
],
[capybara]: [
[
' ',
' n______n ',
' ( {E} {E} ) ',
' ( oo ) ',
' `------´ ',
],
[
' ',
' n______n ',
' ( {E} {E} ) ',
' ( Oo ) ',
' `------´ ',
],
[
' ~ ~ ',
' u______n ',
' ( {E} {E} ) ',
' ( oo ) ',
' `------´ ',
],
],
[cactus]: [
[
' ',
' n ____ n ',
' | |{E} {E}| | ',
' |_| |_| ',
' | | ',
],
[
' ',
' ____ ',
' n |{E} {E}| n ',
' |_| |_| ',
' | | ',
],
[
' n n ',
' | ____ | ',
' | |{E} {E}| | ',
' |_| |_| ',
' | | ',
],
],
[robot]: [
[
' ',
' .[||]. ',
' [ {E} {E} ] ',
' [ ==== ] ',
' `------´ ',
],
[
' ',
' .[||]. ',
' [ {E} {E} ] ',
' [ -==- ] ',
' `------´ ',
],
[
' * ',
' .[||]. ',
' [ {E} {E} ] ',
' [ ==== ] ',
' `------´ ',
],
],
[rabbit]: [
[
' ',
' (\\__/) ',
' ( {E} {E} ) ',
' =( .. )= ',
' (")__(") ',
],
[
' ',
' (|__/) ',
' ( {E} {E} ) ',
' =( .. )= ',
' (")__(") ',
],
[
' ',
' (\\__/) ',
' ( {E} {E} ) ',
' =( . . )= ',
' (")__(") ',
],
],
[mushroom]: [
[
' ',
' .-o-OO-o-. ',
'(__________)',
' |{E} {E}| ',
' |____| ',
],
[
' ',
' .-O-oo-O-. ',
'(__________)',
' |{E} {E}| ',
' |____| ',
],
[
' . o . ',
' .-o-OO-o-. ',
'(__________)',
' |{E} {E}| ',
' |____| ',
],
],
[chonk]: [
[
' ',
' /\\ /\\ ',
' ( {E} {E} ) ',
' ( .. ) ',
' `------´ ',
],
[
' ',
' /\\ /| ',
' ( {E} {E} ) ',
' ( .. ) ',
' `------´ ',
],
[
' ',
' /\\ /\\ ',
' ( {E} {E} ) ',
' ( .. ) ',
' `------´~ ',
],
],
}
const HAT_LINES: Record<Hat, string> = {
none: '',
crown: ' \\^^^/ ',
tophat: ' [___] ',
propeller: ' -+- ',
halo: ' ( ) ',
wizard: ' /^\\ ',
beanie: ' (___) ',
tinyduck: ' ,> ',
}
export function renderSprite(bones: CompanionBones, frame = 0): string[] {
const frames = BODIES[bones.species]
const body = frames[frame % frames.length]!.map(line =>
line.replaceAll('{E}', bones.eye),
)
const lines = [...body]
// Only replace with hat if line 0 is empty (some fidget frames use it for smoke etc)
if (bones.hat !== 'none' && !lines[0]!.trim()) {
lines[0] = HAT_LINES[bones.hat]
}
// Drop blank hat slot — wastes a row in the Card and ambient sprite when
// there's no hat and the frame isn't using it for smoke/antenna/etc.
// Only safe when ALL frames have blank line 0; otherwise heights oscillate.
if (!lines[0]!.trim() && frames.every(f => !f[0]!.trim())) lines.shift()
return lines
}
export function spriteFrameCount(species: Species): number {
return BODIES[species].length
}
export function renderFace(bones: CompanionBones): string {
const eye: Eye = bones.eye
switch (bones.species) {
case duck:
case goose:
return `(${eye}>`
case blob:
return `(${eye}${eye})`
case cat:
return `=${eye}ω${eye}=`
case dragon:
return `<${eye}~${eye}>`
case octopus:
return `~(${eye}${eye})~`
case owl:
return `(${eye})(${eye})`
case penguin:
return `(${eye}>)`
case turtle:
return `[${eye}_${eye}]`
case snail:
return `${eye}(@)`
case ghost:
return `/${eye}${eye}\\`
case axolotl:
return `}${eye}.${eye}{`
case capybara:
return `(${eye}oo${eye})`
case cactus:
return `|${eye} ${eye}|`
case robot:
return `[${eye}${eye}]`
case rabbit:
return `(${eye}..${eye})`
case mushroom:
return `|${eye} ${eye}|`
case chonk:
return `(${eye}.${eye})`
}
}

148
buddy/types.ts Normal file
View file

@ -0,0 +1,148 @@
export const RARITIES = [
'common',
'uncommon',
'rare',
'epic',
'legendary',
] as const
export type Rarity = (typeof RARITIES)[number]
// One species name collides with a model-codename canary in excluded-strings.txt.
// The check greps build output (not source), so runtime-constructing the value keeps
// the literal out of the bundle while the check stays armed for the actual codename.
// All species encoded uniformly; `as` casts are type-position only (erased pre-bundle).
const c = String.fromCharCode
// biome-ignore format: keep the species list compact
export const duck = c(0x64,0x75,0x63,0x6b) as 'duck'
export const goose = c(0x67, 0x6f, 0x6f, 0x73, 0x65) as 'goose'
export const blob = c(0x62, 0x6c, 0x6f, 0x62) as 'blob'
export const cat = c(0x63, 0x61, 0x74) as 'cat'
export const dragon = c(0x64, 0x72, 0x61, 0x67, 0x6f, 0x6e) as 'dragon'
export const octopus = c(0x6f, 0x63, 0x74, 0x6f, 0x70, 0x75, 0x73) as 'octopus'
export const owl = c(0x6f, 0x77, 0x6c) as 'owl'
export const penguin = c(0x70, 0x65, 0x6e, 0x67, 0x75, 0x69, 0x6e) as 'penguin'
export const turtle = c(0x74, 0x75, 0x72, 0x74, 0x6c, 0x65) as 'turtle'
export const snail = c(0x73, 0x6e, 0x61, 0x69, 0x6c) as 'snail'
export const ghost = c(0x67, 0x68, 0x6f, 0x73, 0x74) as 'ghost'
export const axolotl = c(0x61, 0x78, 0x6f, 0x6c, 0x6f, 0x74, 0x6c) as 'axolotl'
export const capybara = c(
0x63,
0x61,
0x70,
0x79,
0x62,
0x61,
0x72,
0x61,
) as 'capybara'
export const cactus = c(0x63, 0x61, 0x63, 0x74, 0x75, 0x73) as 'cactus'
export const robot = c(0x72, 0x6f, 0x62, 0x6f, 0x74) as 'robot'
export const rabbit = c(0x72, 0x61, 0x62, 0x62, 0x69, 0x74) as 'rabbit'
export const mushroom = c(
0x6d,
0x75,
0x73,
0x68,
0x72,
0x6f,
0x6f,
0x6d,
) as 'mushroom'
export const chonk = c(0x63, 0x68, 0x6f, 0x6e, 0x6b) as 'chonk'
export const SPECIES = [
duck,
goose,
blob,
cat,
dragon,
octopus,
owl,
penguin,
turtle,
snail,
ghost,
axolotl,
capybara,
cactus,
robot,
rabbit,
mushroom,
chonk,
] as const
export type Species = (typeof SPECIES)[number] // biome-ignore format: keep compact
export const EYES = ['·', '✦', '×', '◉', '@', '°'] as const
export type Eye = (typeof EYES)[number]
export const HATS = [
'none',
'crown',
'tophat',
'propeller',
'halo',
'wizard',
'beanie',
'tinyduck',
] as const
export type Hat = (typeof HATS)[number]
export const STAT_NAMES = [
'DEBUGGING',
'PATIENCE',
'CHAOS',
'WISDOM',
'SNARK',
] as const
export type StatName = (typeof STAT_NAMES)[number]
// Deterministic parts — derived from hash(userId)
export type CompanionBones = {
rarity: Rarity
species: Species
eye: Eye
hat: Hat
shiny: boolean
stats: Record<StatName, number>
}
// Model-generated soul — stored in config after first hatch
export type CompanionSoul = {
name: string
personality: string
}
export type Companion = CompanionBones &
CompanionSoul & {
hatchedAt: number
}
// What actually persists in config. Bones are regenerated from hash(userId)
// on every read so species renames don't break stored companions and users
// can't edit their way to a legendary.
export type StoredCompanion = CompanionSoul & { hatchedAt: number }
export const RARITY_WEIGHTS = {
common: 60,
uncommon: 25,
rare: 10,
epic: 4,
legendary: 1,
} as const satisfies Record<Rarity, number>
export const RARITY_STARS = {
common: '★',
uncommon: '★★',
rare: '★★★',
epic: '★★★★',
legendary: '★★★★★',
} as const satisfies Record<Rarity, string>
export const RARITY_COLORS = {
common: 'inactive',
uncommon: 'success',
rare: 'permission',
epic: 'autoAccept',
legendary: 'warning',
} as const satisfies Record<Rarity, keyof import('../utils/theme.js').Theme>

File diff suppressed because one or more lines are too long

31
cli/exit.ts Normal file
View file

@ -0,0 +1,31 @@
/**
* CLI exit helpers for subcommand handlers.
*
* Consolidates the 4-5 line "print + lint-suppress + exit" block that was
* copy-pasted ~60 times across `claude mcp *` / `claude plugin *` handlers.
* The `: never` return type lets TypeScript narrow control flow at call sites
* without a trailing `return`.
*/
/* eslint-disable custom-rules/no-process-exit -- centralized CLI exit point */
// `return undefined as never` (not a post-exit throw) — tests spy on
// process.exit and let it return. Call sites write `return cliError(...)`
// where subsequent code would dereference narrowed-away values under mock.
// cliError uses console.error (tests spy on console.error); cliOk uses
// process.stdout.write (tests spy on process.stdout.write — Bun's console.log
// doesn't route through a spied process.stdout.write).
/** Write an error message to stderr (if given) and exit with code 1. */
export function cliError(msg?: string): never {
// biome-ignore lint/suspicious/noConsole: centralized CLI error output
if (msg) console.error(msg)
process.exit(1)
return undefined as never
}
/** Write a message to stdout (if given) and exit with code 0. */
export function cliOk(msg?: string): never {
if (msg) process.stdout.write(msg + '\n')
process.exit(0)
return undefined as never
}

70
cli/handlers/agents.ts Normal file
View file

@ -0,0 +1,70 @@
/**
* Agents subcommand handler prints the list of configured agents.
* Dynamically imported only when `claude agents` runs.
*/
import {
AGENT_SOURCE_GROUPS,
compareAgentsByName,
getOverrideSourceLabel,
type ResolvedAgent,
resolveAgentModelDisplay,
resolveAgentOverrides,
} from '../../tools/AgentTool/agentDisplay.js'
import {
getActiveAgentsFromList,
getAgentDefinitionsWithOverrides,
} from '../../tools/AgentTool/loadAgentsDir.js'
import { getCwd } from '../../utils/cwd.js'
function formatAgent(agent: ResolvedAgent): string {
const model = resolveAgentModelDisplay(agent)
const parts = [agent.agentType]
if (model) {
parts.push(model)
}
if (agent.memory) {
parts.push(`${agent.memory} memory`)
}
return parts.join(' · ')
}
export async function agentsHandler(): Promise<void> {
const cwd = getCwd()
const { allAgents } = await getAgentDefinitionsWithOverrides(cwd)
const activeAgents = getActiveAgentsFromList(allAgents)
const resolvedAgents = resolveAgentOverrides(allAgents, activeAgents)
const lines: string[] = []
let totalActive = 0
for (const { label, source } of AGENT_SOURCE_GROUPS) {
const groupAgents = resolvedAgents
.filter(a => a.source === source)
.sort(compareAgentsByName)
if (groupAgents.length === 0) continue
lines.push(`${label}:`)
for (const agent of groupAgents) {
if (agent.overriddenBy) {
const winnerSource = getOverrideSourceLabel(agent.overriddenBy)
lines.push(` (shadowed by ${winnerSource}) ${formatAgent(agent)}`)
} else {
lines.push(` ${formatAgent(agent)}`)
totalActive++
}
}
lines.push('')
}
if (lines.length === 0) {
// biome-ignore lint/suspicious/noConsole:: intentional console output
console.log('No agents found.')
} else {
// biome-ignore lint/suspicious/noConsole:: intentional console output
console.log(`${totalActive} active agents\n`)
// biome-ignore lint/suspicious/noConsole:: intentional console output
console.log(lines.join('\n').trimEnd())
}
}

330
cli/handlers/auth.ts Normal file
View file

@ -0,0 +1,330 @@
/* eslint-disable custom-rules/no-process-exit -- CLI subcommand handler intentionally exits */
import {
clearAuthRelatedCaches,
performLogout,
} from '../../commands/logout/logout.js'
import {
type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
logEvent,
} from '../../services/analytics/index.js'
import { getSSLErrorHint } from '../../services/api/errorUtils.js'
import { fetchAndStoreClaudeCodeFirstTokenDate } from '../../services/api/firstTokenDate.js'
import {
createAndStoreApiKey,
fetchAndStoreUserRoles,
refreshOAuthToken,
shouldUseClaudeAIAuth,
storeOAuthAccountInfo,
} from '../../services/oauth/client.js'
import { getOauthProfileFromOauthToken } from '../../services/oauth/getOauthProfile.js'
import { OAuthService } from '../../services/oauth/index.js'
import type { OAuthTokens } from '../../services/oauth/types.js'
import {
clearOAuthTokenCache,
getAnthropicApiKeyWithSource,
getAuthTokenSource,
getOauthAccountInfo,
getSubscriptionType,
isUsing3PServices,
saveOAuthTokensIfNeeded,
validateForceLoginOrg,
} from '../../utils/auth.js'
import { saveGlobalConfig } from '../../utils/config.js'
import { logForDebugging } from '../../utils/debug.js'
import { isRunningOnHomespace } from '../../utils/envUtils.js'
import { errorMessage } from '../../utils/errors.js'
import { logError } from '../../utils/log.js'
import { getAPIProvider } from '../../utils/model/providers.js'
import { getInitialSettings } from '../../utils/settings/settings.js'
import { jsonStringify } from '../../utils/slowOperations.js'
import {
buildAccountProperties,
buildAPIProviderProperties,
} from '../../utils/status.js'
/**
* Shared post-token-acquisition logic. Saves tokens, fetches profile/roles,
* and sets up the local auth state.
*/
export async function installOAuthTokens(tokens: OAuthTokens): Promise<void> {
// Clear old state before saving new credentials
await performLogout({ clearOnboarding: false })
// Reuse pre-fetched profile if available, otherwise fetch fresh
const profile =
tokens.profile ?? (await getOauthProfileFromOauthToken(tokens.accessToken))
if (profile) {
storeOAuthAccountInfo({
accountUuid: profile.account.uuid,
emailAddress: profile.account.email,
organizationUuid: profile.organization.uuid,
displayName: profile.account.display_name || undefined,
hasExtraUsageEnabled:
profile.organization.has_extra_usage_enabled ?? undefined,
billingType: profile.organization.billing_type ?? undefined,
subscriptionCreatedAt:
profile.organization.subscription_created_at ?? undefined,
accountCreatedAt: profile.account.created_at,
})
} else if (tokens.tokenAccount) {
// Fallback to token exchange account data when profile endpoint fails
storeOAuthAccountInfo({
accountUuid: tokens.tokenAccount.uuid,
emailAddress: tokens.tokenAccount.emailAddress,
organizationUuid: tokens.tokenAccount.organizationUuid,
})
}
const storageResult = saveOAuthTokensIfNeeded(tokens)
clearOAuthTokenCache()
if (storageResult.warning) {
logEvent('tengu_oauth_storage_warning', {
warning:
storageResult.warning as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
})
}
// Roles and first-token-date may fail for limited-scope tokens (e.g.
// inference-only from setup-token). They're not required for core auth.
await fetchAndStoreUserRoles(tokens.accessToken).catch(err =>
logForDebugging(String(err), { level: 'error' }),
)
if (shouldUseClaudeAIAuth(tokens.scopes)) {
await fetchAndStoreClaudeCodeFirstTokenDate().catch(err =>
logForDebugging(String(err), { level: 'error' }),
)
} else {
// API key creation is critical for Console users — let it throw.
const apiKey = await createAndStoreApiKey(tokens.accessToken)
if (!apiKey) {
throw new Error(
'Unable to create API key. The server accepted the request but did not return a key.',
)
}
}
await clearAuthRelatedCaches()
}
export async function authLogin({
email,
sso,
console: useConsole,
claudeai,
}: {
email?: string
sso?: boolean
console?: boolean
claudeai?: boolean
}): Promise<void> {
if (useConsole && claudeai) {
process.stderr.write(
'Error: --console and --claudeai cannot be used together.\n',
)
process.exit(1)
}
const settings = getInitialSettings()
// forceLoginMethod is a hard constraint (enterprise setting) — matches ConsoleOAuthFlow behavior.
// Without it, --console selects Console; --claudeai (or no flag) selects claude.ai.
const loginWithClaudeAi = settings.forceLoginMethod
? settings.forceLoginMethod === 'claudeai'
: !useConsole
const orgUUID = settings.forceLoginOrgUUID
// Fast path: if a refresh token is provided via env var, skip the browser
// OAuth flow and exchange it directly for tokens.
const envRefreshToken = process.env.CLAUDE_CODE_OAUTH_REFRESH_TOKEN
if (envRefreshToken) {
const envScopes = process.env.CLAUDE_CODE_OAUTH_SCOPES
if (!envScopes) {
process.stderr.write(
'CLAUDE_CODE_OAUTH_SCOPES is required when using CLAUDE_CODE_OAUTH_REFRESH_TOKEN.\n' +
'Set it to the space-separated scopes the refresh token was issued with\n' +
'(e.g. "user:inference" or "user:profile user:inference user:sessions:claude_code user:mcp_servers").\n',
)
process.exit(1)
}
const scopes = envScopes.split(/\s+/).filter(Boolean)
try {
logEvent('tengu_login_from_refresh_token', {})
const tokens = await refreshOAuthToken(envRefreshToken, { scopes })
await installOAuthTokens(tokens)
const orgResult = await validateForceLoginOrg()
if (!orgResult.valid) {
process.stderr.write(orgResult.message + '\n')
process.exit(1)
}
// Mark onboarding complete — interactive paths handle this via
// the Onboarding component, but the env var path skips it.
saveGlobalConfig(current => {
if (current.hasCompletedOnboarding) return current
return { ...current, hasCompletedOnboarding: true }
})
logEvent('tengu_oauth_success', {
loginWithClaudeAi: shouldUseClaudeAIAuth(tokens.scopes),
})
process.stdout.write('Login successful.\n')
process.exit(0)
} catch (err) {
logError(err)
const sslHint = getSSLErrorHint(err)
process.stderr.write(
`Login failed: ${errorMessage(err)}\n${sslHint ? sslHint + '\n' : ''}`,
)
process.exit(1)
}
}
const resolvedLoginMethod = sso ? 'sso' : undefined
const oauthService = new OAuthService()
try {
logEvent('tengu_oauth_flow_start', { loginWithClaudeAi })
const result = await oauthService.startOAuthFlow(
async url => {
process.stdout.write('Opening browser to sign in…\n')
process.stdout.write(`If the browser didn't open, visit: ${url}\n`)
},
{
loginWithClaudeAi,
loginHint: email,
loginMethod: resolvedLoginMethod,
orgUUID,
},
)
await installOAuthTokens(result)
const orgResult = await validateForceLoginOrg()
if (!orgResult.valid) {
process.stderr.write(orgResult.message + '\n')
process.exit(1)
}
logEvent('tengu_oauth_success', { loginWithClaudeAi })
process.stdout.write('Login successful.\n')
process.exit(0)
} catch (err) {
logError(err)
const sslHint = getSSLErrorHint(err)
process.stderr.write(
`Login failed: ${errorMessage(err)}\n${sslHint ? sslHint + '\n' : ''}`,
)
process.exit(1)
} finally {
oauthService.cleanup()
}
}
export async function authStatus(opts: {
json?: boolean
text?: boolean
}): Promise<void> {
const { source: authTokenSource, hasToken } = getAuthTokenSource()
const { source: apiKeySource } = getAnthropicApiKeyWithSource()
const hasApiKeyEnvVar =
!!process.env.ANTHROPIC_API_KEY && !isRunningOnHomespace()
const oauthAccount = getOauthAccountInfo()
const subscriptionType = getSubscriptionType()
const using3P = isUsing3PServices()
const loggedIn =
hasToken || apiKeySource !== 'none' || hasApiKeyEnvVar || using3P
// Determine auth method
let authMethod: string = 'none'
if (using3P) {
authMethod = 'third_party'
} else if (authTokenSource === 'claude.ai') {
authMethod = 'claude.ai'
} else if (authTokenSource === 'apiKeyHelper') {
authMethod = 'api_key_helper'
} else if (authTokenSource !== 'none') {
authMethod = 'oauth_token'
} else if (apiKeySource === 'ANTHROPIC_API_KEY' || hasApiKeyEnvVar) {
authMethod = 'api_key'
} else if (apiKeySource === '/login managed key') {
authMethod = 'claude.ai'
}
if (opts.text) {
const properties = [
...buildAccountProperties(),
...buildAPIProviderProperties(),
]
let hasAuthProperty = false
for (const prop of properties) {
const value =
typeof prop.value === 'string'
? prop.value
: Array.isArray(prop.value)
? prop.value.join(', ')
: null
if (value === null || value === 'none') {
continue
}
hasAuthProperty = true
if (prop.label) {
process.stdout.write(`${prop.label}: ${value}\n`)
} else {
process.stdout.write(`${value}\n`)
}
}
if (!hasAuthProperty && hasApiKeyEnvVar) {
process.stdout.write('API key: ANTHROPIC_API_KEY\n')
}
if (!loggedIn) {
process.stdout.write(
'Not logged in. Run claude auth login to authenticate.\n',
)
}
} else {
const apiProvider = getAPIProvider()
const resolvedApiKeySource =
apiKeySource !== 'none'
? apiKeySource
: hasApiKeyEnvVar
? 'ANTHROPIC_API_KEY'
: null
const output: Record<string, string | boolean | null> = {
loggedIn,
authMethod,
apiProvider,
}
if (resolvedApiKeySource) {
output.apiKeySource = resolvedApiKeySource
}
if (authMethod === 'claude.ai') {
output.email = oauthAccount?.emailAddress ?? null
output.orgId = oauthAccount?.organizationUuid ?? null
output.orgName = oauthAccount?.organizationName ?? null
output.subscriptionType = subscriptionType ?? null
}
process.stdout.write(jsonStringify(output, null, 2) + '\n')
}
process.exit(loggedIn ? 0 : 1)
}
export async function authLogout(): Promise<void> {
try {
await performLogout({ clearOnboarding: false })
} catch {
process.stderr.write('Failed to log out.\n')
process.exit(1)
}
process.stdout.write('Successfully logged out from your Anthropic account.\n')
process.exit(0)
}

170
cli/handlers/autoMode.ts Normal file
View file

@ -0,0 +1,170 @@
/**
* Auto mode subcommand handlers dump default/merged classifier rules and
* critique user-written rules. Dynamically imported when `claude auto-mode ...` runs.
*/
import { errorMessage } from '../../utils/errors.js'
import {
getMainLoopModel,
parseUserSpecifiedModel,
} from '../../utils/model/model.js'
import {
type AutoModeRules,
buildDefaultExternalSystemPrompt,
getDefaultExternalAutoModeRules,
} from '../../utils/permissions/yoloClassifier.js'
import { getAutoModeConfig } from '../../utils/settings/settings.js'
import { sideQuery } from '../../utils/sideQuery.js'
import { jsonStringify } from '../../utils/slowOperations.js'
function writeRules(rules: AutoModeRules): void {
process.stdout.write(jsonStringify(rules, null, 2) + '\n')
}
export function autoModeDefaultsHandler(): void {
writeRules(getDefaultExternalAutoModeRules())
}
/**
* Dump the effective auto mode config: user settings where provided, external
* defaults otherwise. Per-section REPLACE semantics matches how
* buildYoloSystemPrompt resolves the external template (a non-empty user
* section replaces that section's defaults entirely; an empty/absent section
* falls through to defaults).
*/
export function autoModeConfigHandler(): void {
const config = getAutoModeConfig()
const defaults = getDefaultExternalAutoModeRules()
writeRules({
allow: config?.allow?.length ? config.allow : defaults.allow,
soft_deny: config?.soft_deny?.length
? config.soft_deny
: defaults.soft_deny,
environment: config?.environment?.length
? config.environment
: defaults.environment,
})
}
const CRITIQUE_SYSTEM_PROMPT =
'You are an expert reviewer of auto mode classifier rules for Claude Code.\n' +
'\n' +
'Claude Code has an "auto mode" that uses an AI classifier to decide whether ' +
'tool calls should be auto-approved or require user confirmation. Users can ' +
'write custom rules in three categories:\n' +
'\n' +
'- **allow**: Actions the classifier should auto-approve\n' +
'- **soft_deny**: Actions the classifier should block (require user confirmation)\n' +
"- **environment**: Context about the user's setup that helps the classifier make decisions\n" +
'\n' +
"Your job is to critique the user's custom rules for clarity, completeness, " +
'and potential issues. The classifier is an LLM that reads these rules as ' +
'part of its system prompt.\n' +
'\n' +
'For each rule, evaluate:\n' +
'1. **Clarity**: Is the rule unambiguous? Could the classifier misinterpret it?\n' +
"2. **Completeness**: Are there gaps or edge cases the rule doesn't cover?\n" +
'3. **Conflicts**: Do any of the rules conflict with each other?\n' +
'4. **Actionability**: Is the rule specific enough for the classifier to act on?\n' +
'\n' +
'Be concise and constructive. Only comment on rules that could be improved. ' +
'If all rules look good, say so.'
export async function autoModeCritiqueHandler(options: {
model?: string
}): Promise<void> {
const config = getAutoModeConfig()
const hasCustomRules =
(config?.allow?.length ?? 0) > 0 ||
(config?.soft_deny?.length ?? 0) > 0 ||
(config?.environment?.length ?? 0) > 0
if (!hasCustomRules) {
process.stdout.write(
'No custom auto mode rules found.\n\n' +
'Add rules to your settings file under autoMode.{allow, soft_deny, environment}.\n' +
'Run `claude auto-mode defaults` to see the default rules for reference.\n',
)
return
}
const model = options.model
? parseUserSpecifiedModel(options.model)
: getMainLoopModel()
const defaults = getDefaultExternalAutoModeRules()
const classifierPrompt = buildDefaultExternalSystemPrompt()
const userRulesSummary =
formatRulesForCritique('allow', config?.allow ?? [], defaults.allow) +
formatRulesForCritique(
'soft_deny',
config?.soft_deny ?? [],
defaults.soft_deny,
) +
formatRulesForCritique(
'environment',
config?.environment ?? [],
defaults.environment,
)
process.stdout.write('Analyzing your auto mode rules…\n\n')
let response
try {
response = await sideQuery({
querySource: 'auto_mode_critique',
model,
system: CRITIQUE_SYSTEM_PROMPT,
skipSystemPromptPrefix: true,
max_tokens: 4096,
messages: [
{
role: 'user',
content:
'Here is the full classifier system prompt that the auto mode classifier receives:\n\n' +
'<classifier_system_prompt>\n' +
classifierPrompt +
'\n</classifier_system_prompt>\n\n' +
"Here are the user's custom rules that REPLACE the corresponding default sections:\n\n" +
userRulesSummary +
'\nPlease critique these custom rules.',
},
],
})
} catch (error) {
process.stderr.write(
'Failed to analyze rules: ' + errorMessage(error) + '\n',
)
process.exitCode = 1
return
}
const textBlock = response.content.find(block => block.type === 'text')
if (textBlock?.type === 'text') {
process.stdout.write(textBlock.text + '\n')
} else {
process.stdout.write('No critique was generated. Please try again.\n')
}
}
function formatRulesForCritique(
section: string,
userRules: string[],
defaultRules: string[],
): string {
if (userRules.length === 0) return ''
const customLines = userRules.map(r => '- ' + r).join('\n')
const defaultLines = defaultRules.map(r => '- ' + r).join('\n')
return (
'## ' +
section +
' (custom rules replacing defaults)\n' +
'Custom:\n' +
customLines +
'\n\n' +
'Defaults being replaced:\n' +
defaultLines +
'\n\n'
)
}

362
cli/handlers/mcp.tsx Normal file

File diff suppressed because one or more lines are too long

878
cli/handlers/plugins.ts Normal file
View file

@ -0,0 +1,878 @@
/**
* Plugin and marketplace subcommand handlers extracted from main.tsx for lazy loading.
* These are dynamically imported only when `claude plugin *` or `claude plugin marketplace *` runs.
*/
/* eslint-disable custom-rules/no-process-exit -- CLI subcommand handlers intentionally exit */
import figures from 'figures'
import { basename, dirname } from 'path'
import { setUseCoworkPlugins } from '../../bootstrap/state.js'
import {
type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
type AnalyticsMetadata_I_VERIFIED_THIS_IS_PII_TAGGED,
logEvent,
} from '../../services/analytics/index.js'
import {
disableAllPlugins,
disablePlugin,
enablePlugin,
installPlugin,
uninstallPlugin,
updatePluginCli,
VALID_INSTALLABLE_SCOPES,
VALID_UPDATE_SCOPES,
} from '../../services/plugins/pluginCliCommands.js'
import { getPluginErrorMessage } from '../../types/plugin.js'
import { errorMessage } from '../../utils/errors.js'
import { logError } from '../../utils/log.js'
import { clearAllCaches } from '../../utils/plugins/cacheUtils.js'
import { getInstallCounts } from '../../utils/plugins/installCounts.js'
import {
isPluginInstalled,
loadInstalledPluginsV2,
} from '../../utils/plugins/installedPluginsManager.js'
import {
createPluginId,
loadMarketplacesWithGracefulDegradation,
} from '../../utils/plugins/marketplaceHelpers.js'
import {
addMarketplaceSource,
loadKnownMarketplacesConfig,
refreshAllMarketplaces,
refreshMarketplace,
removeMarketplaceSource,
saveMarketplaceToSettings,
} from '../../utils/plugins/marketplaceManager.js'
import { loadPluginMcpServers } from '../../utils/plugins/mcpPluginIntegration.js'
import { parseMarketplaceInput } from '../../utils/plugins/parseMarketplaceInput.js'
import {
parsePluginIdentifier,
scopeToSettingSource,
} from '../../utils/plugins/pluginIdentifier.js'
import { loadAllPlugins } from '../../utils/plugins/pluginLoader.js'
import type { PluginSource } from '../../utils/plugins/schemas.js'
import {
type ValidationResult,
validateManifest,
validatePluginContents,
} from '../../utils/plugins/validatePlugin.js'
import { jsonStringify } from '../../utils/slowOperations.js'
import { plural } from '../../utils/stringUtils.js'
import { cliError, cliOk } from '../exit.js'
// Re-export for main.tsx to reference in option definitions
export { VALID_INSTALLABLE_SCOPES, VALID_UPDATE_SCOPES }
/**
* Helper function to handle marketplace command errors consistently.
*/
export function handleMarketplaceError(error: unknown, action: string): never {
logError(error)
cliError(`${figures.cross} Failed to ${action}: ${errorMessage(error)}`)
}
function printValidationResult(result: ValidationResult): void {
if (result.errors.length > 0) {
// biome-ignore lint/suspicious/noConsole:: intentional console output
console.log(
`${figures.cross} Found ${result.errors.length} ${plural(result.errors.length, 'error')}:\n`,
)
result.errors.forEach(error => {
// biome-ignore lint/suspicious/noConsole:: intentional console output
console.log(` ${figures.pointer} ${error.path}: ${error.message}`)
})
// biome-ignore lint/suspicious/noConsole:: intentional console output
console.log('')
}
if (result.warnings.length > 0) {
// biome-ignore lint/suspicious/noConsole:: intentional console output
console.log(
`${figures.warning} Found ${result.warnings.length} ${plural(result.warnings.length, 'warning')}:\n`,
)
result.warnings.forEach(warning => {
// biome-ignore lint/suspicious/noConsole:: intentional console output
console.log(` ${figures.pointer} ${warning.path}: ${warning.message}`)
})
// biome-ignore lint/suspicious/noConsole:: intentional console output
console.log('')
}
}
// plugin validate
export async function pluginValidateHandler(
manifestPath: string,
options: { cowork?: boolean },
): Promise<void> {
if (options.cowork) setUseCoworkPlugins(true)
try {
const result = await validateManifest(manifestPath)
// biome-ignore lint/suspicious/noConsole:: intentional console output
console.log(`Validating ${result.fileType} manifest: ${result.filePath}\n`)
printValidationResult(result)
// If this is a plugin manifest located inside a .claude-plugin directory,
// also validate the plugin's content files (skills, agents, commands,
// hooks). Works whether the user passed a directory or the plugin.json
// path directly.
let contentResults: ValidationResult[] = []
if (result.fileType === 'plugin') {
const manifestDir = dirname(result.filePath)
if (basename(manifestDir) === '.claude-plugin') {
contentResults = await validatePluginContents(dirname(manifestDir))
for (const r of contentResults) {
// biome-ignore lint/suspicious/noConsole:: intentional console output
console.log(`Validating ${r.fileType}: ${r.filePath}\n`)
printValidationResult(r)
}
}
}
const allSuccess = result.success && contentResults.every(r => r.success)
const hasWarnings =
result.warnings.length > 0 ||
contentResults.some(r => r.warnings.length > 0)
if (allSuccess) {
cliOk(
hasWarnings
? `${figures.tick} Validation passed with warnings`
: `${figures.tick} Validation passed`,
)
} else {
// biome-ignore lint/suspicious/noConsole:: intentional console output
console.log(`${figures.cross} Validation failed`)
process.exit(1)
}
} catch (error) {
logError(error)
// biome-ignore lint/suspicious/noConsole:: intentional console output
console.error(
`${figures.cross} Unexpected error during validation: ${errorMessage(error)}`,
)
process.exit(2)
}
}
// plugin list (lines 52175416)
export async function pluginListHandler(options: {
json?: boolean
available?: boolean
cowork?: boolean
}): Promise<void> {
if (options.cowork) setUseCoworkPlugins(true)
logEvent('tengu_plugin_list_command', {})
const installedData = loadInstalledPluginsV2()
const { getPluginEditableScopes } = await import(
'../../utils/plugins/pluginStartupCheck.js'
)
const enabledPlugins = getPluginEditableScopes()
const pluginIds = Object.keys(installedData.plugins)
// Load all plugins once. The JSON and human paths both need:
// - loadErrors (to show load failures per plugin)
// - inline plugins (session-only via --plugin-dir, source='name@inline')
// which are NOT in installedData.plugins (V2 bookkeeping) — they must
// be surfaced separately or `plugin list` silently ignores --plugin-dir.
const {
enabled: loadedEnabled,
disabled: loadedDisabled,
errors: loadErrors,
} = await loadAllPlugins()
const allLoadedPlugins = [...loadedEnabled, ...loadedDisabled]
const inlinePlugins = allLoadedPlugins.filter(p =>
p.source.endsWith('@inline'),
)
// Path-level inline failures (dir doesn't exist, parse error before
// manifest is read) use source='inline[N]'. Plugin-level errors after
// manifest read use source='name@inline'. Collect both for the session
// section — these are otherwise invisible since they have no pluginId.
const inlineLoadErrors = loadErrors.filter(
e => e.source.endsWith('@inline') || e.source.startsWith('inline['),
)
if (options.json) {
// Create a map of plugin source to loaded plugin for quick lookup
const loadedPluginMap = new Map(allLoadedPlugins.map(p => [p.source, p]))
const plugins: Array<{
id: string
version: string
scope: string
enabled: boolean
installPath: string
installedAt?: string
lastUpdated?: string
projectPath?: string
mcpServers?: Record<string, unknown>
errors?: string[]
}> = []
for (const pluginId of pluginIds.sort()) {
const installations = installedData.plugins[pluginId]
if (!installations || installations.length === 0) continue
// Find loading errors for this plugin
const pluginName = parsePluginIdentifier(pluginId).name
const pluginErrors = loadErrors
.filter(
e =>
e.source === pluginId || ('plugin' in e && e.plugin === pluginName),
)
.map(getPluginErrorMessage)
for (const installation of installations) {
// Try to find the loaded plugin to get MCP servers
const loadedPlugin = loadedPluginMap.get(pluginId)
let mcpServers: Record<string, unknown> | undefined
if (loadedPlugin) {
// Load MCP servers if not already cached
const servers =
loadedPlugin.mcpServers ||
(await loadPluginMcpServers(loadedPlugin))
if (servers && Object.keys(servers).length > 0) {
mcpServers = servers
}
}
plugins.push({
id: pluginId,
version: installation.version || 'unknown',
scope: installation.scope,
enabled: enabledPlugins.has(pluginId),
installPath: installation.installPath,
installedAt: installation.installedAt,
lastUpdated: installation.lastUpdated,
projectPath: installation.projectPath,
mcpServers,
errors: pluginErrors.length > 0 ? pluginErrors : undefined,
})
}
}
// Session-only plugins: scope='session', no install metadata.
// Filter from inlineLoadErrors (not loadErrors) so an installed plugin
// with the same manifest name doesn't cross-contaminate via e.plugin.
// The e.plugin fallback catches the dirName≠manifestName case:
// createPluginFromPath tags errors with `${dirName}@inline` but
// plugin.source is reassigned to `${manifest.name}@inline` afterward
// (pluginLoader.ts loadInlinePlugins), so e.source !== p.source when
// a dev checkout dir like ~/code/my-fork/ has manifest name 'cool-plugin'.
for (const p of inlinePlugins) {
const servers = p.mcpServers || (await loadPluginMcpServers(p))
const pErrors = inlineLoadErrors
.filter(
e => e.source === p.source || ('plugin' in e && e.plugin === p.name),
)
.map(getPluginErrorMessage)
plugins.push({
id: p.source,
version: p.manifest.version ?? 'unknown',
scope: 'session',
enabled: p.enabled !== false,
installPath: p.path,
mcpServers:
servers && Object.keys(servers).length > 0 ? servers : undefined,
errors: pErrors.length > 0 ? pErrors : undefined,
})
}
// Path-level inline failures (--plugin-dir /nonexistent): no LoadedPlugin
// exists so the loop above can't surface them. Mirror the human-path
// handling so JSON consumers see the failure instead of silent omission.
for (const e of inlineLoadErrors.filter(e =>
e.source.startsWith('inline['),
)) {
plugins.push({
id: e.source,
version: 'unknown',
scope: 'session',
enabled: false,
installPath: 'path' in e ? e.path : '',
errors: [getPluginErrorMessage(e)],
})
}
// If --available is set, also load available plugins from marketplaces
if (options.available) {
const available: Array<{
pluginId: string
name: string
description?: string
marketplaceName: string
version?: string
source: PluginSource
installCount?: number
}> = []
try {
const [config, installCounts] = await Promise.all([
loadKnownMarketplacesConfig(),
getInstallCounts(),
])
const { marketplaces } =
await loadMarketplacesWithGracefulDegradation(config)
for (const {
name: marketplaceName,
data: marketplace,
} of marketplaces) {
if (marketplace) {
for (const entry of marketplace.plugins) {
const pluginId = createPluginId(entry.name, marketplaceName)
// Only include plugins that are not already installed
if (!isPluginInstalled(pluginId)) {
available.push({
pluginId,
name: entry.name,
description: entry.description,
marketplaceName,
version: entry.version,
source: entry.source,
installCount: installCounts?.get(pluginId),
})
}
}
}
}
} catch {
// Silently ignore marketplace loading errors
}
cliOk(jsonStringify({ installed: plugins, available }, null, 2))
} else {
cliOk(jsonStringify(plugins, null, 2))
}
}
if (pluginIds.length === 0 && inlinePlugins.length === 0) {
// inlineLoadErrors can exist with zero inline plugins (e.g. --plugin-dir
// points at a nonexistent path). Don't early-exit over them — fall
// through to the session section so the failure is visible.
if (inlineLoadErrors.length === 0) {
cliOk(
'No plugins installed. Use `claude plugin install` to install a plugin.',
)
}
}
if (pluginIds.length > 0) {
// biome-ignore lint/suspicious/noConsole:: intentional console output
console.log('Installed plugins:\n')
}
for (const pluginId of pluginIds.sort()) {
const installations = installedData.plugins[pluginId]
if (!installations || installations.length === 0) continue
// Find loading errors for this plugin
const pluginName = parsePluginIdentifier(pluginId).name
const pluginErrors = loadErrors.filter(
e => e.source === pluginId || ('plugin' in e && e.plugin === pluginName),
)
for (const installation of installations) {
const isEnabled = enabledPlugins.has(pluginId)
const status =
pluginErrors.length > 0
? `${figures.cross} failed to load`
: isEnabled
? `${figures.tick} enabled`
: `${figures.cross} disabled`
const version = installation.version || 'unknown'
const scope = installation.scope
// biome-ignore lint/suspicious/noConsole:: intentional console output
console.log(` ${figures.pointer} ${pluginId}`)
// biome-ignore lint/suspicious/noConsole:: intentional console output
console.log(` Version: ${version}`)
// biome-ignore lint/suspicious/noConsole:: intentional console output
console.log(` Scope: ${scope}`)
// biome-ignore lint/suspicious/noConsole:: intentional console output
console.log(` Status: ${status}`)
for (const error of pluginErrors) {
// biome-ignore lint/suspicious/noConsole:: intentional console output
console.log(` Error: ${getPluginErrorMessage(error)}`)
}
// biome-ignore lint/suspicious/noConsole:: intentional console output
console.log('')
}
}
if (inlinePlugins.length > 0 || inlineLoadErrors.length > 0) {
// biome-ignore lint/suspicious/noConsole:: intentional console output
console.log('Session-only plugins (--plugin-dir):\n')
for (const p of inlinePlugins) {
// Same dirName≠manifestName fallback as the JSON path above — error
// sources use the dir basename but p.source uses the manifest name.
const pErrors = inlineLoadErrors.filter(
e => e.source === p.source || ('plugin' in e && e.plugin === p.name),
)
const status =
pErrors.length > 0
? `${figures.cross} loaded with errors`
: `${figures.tick} loaded`
// biome-ignore lint/suspicious/noConsole:: intentional console output
console.log(` ${figures.pointer} ${p.source}`)
// biome-ignore lint/suspicious/noConsole:: intentional console output
console.log(` Version: ${p.manifest.version ?? 'unknown'}`)
// biome-ignore lint/suspicious/noConsole:: intentional console output
console.log(` Path: ${p.path}`)
// biome-ignore lint/suspicious/noConsole:: intentional console output
console.log(` Status: ${status}`)
for (const e of pErrors) {
// biome-ignore lint/suspicious/noConsole:: intentional console output
console.log(` Error: ${getPluginErrorMessage(e)}`)
}
// biome-ignore lint/suspicious/noConsole:: intentional console output
console.log('')
}
// Path-level failures: no LoadedPlugin object exists. Show them so
// `--plugin-dir /typo` doesn't just silently produce nothing.
for (const e of inlineLoadErrors.filter(e =>
e.source.startsWith('inline['),
)) {
// biome-ignore lint/suspicious/noConsole:: intentional console output
console.log(
` ${figures.pointer} ${e.source}: ${figures.cross} ${getPluginErrorMessage(e)}\n`,
)
}
}
cliOk()
}
// marketplace add (lines 54335487)
export async function marketplaceAddHandler(
source: string,
options: { cowork?: boolean; sparse?: string[]; scope?: string },
): Promise<void> {
if (options.cowork) setUseCoworkPlugins(true)
try {
const parsed = await parseMarketplaceInput(source)
if (!parsed) {
cliError(
`${figures.cross} Invalid marketplace source format. Try: owner/repo, https://..., or ./path`,
)
}
if ('error' in parsed) {
cliError(`${figures.cross} ${parsed.error}`)
}
// Validate scope
const scope = options.scope ?? 'user'
if (scope !== 'user' && scope !== 'project' && scope !== 'local') {
cliError(
`${figures.cross} Invalid scope '${scope}'. Use: user, project, or local`,
)
}
const settingSource = scopeToSettingSource(scope)
let marketplaceSource = parsed
if (options.sparse && options.sparse.length > 0) {
if (
marketplaceSource.source === 'github' ||
marketplaceSource.source === 'git'
) {
marketplaceSource = {
...marketplaceSource,
sparsePaths: options.sparse,
}
} else {
cliError(
`${figures.cross} --sparse is only supported for github and git marketplace sources (got: ${marketplaceSource.source})`,
)
}
}
// biome-ignore lint/suspicious/noConsole:: intentional console output
console.log('Adding marketplace...')
const { name, alreadyMaterialized, resolvedSource } =
await addMarketplaceSource(marketplaceSource, message => {
// biome-ignore lint/suspicious/noConsole:: intentional console output
console.log(message)
})
// Write intent to settings at the requested scope
saveMarketplaceToSettings(name, { source: resolvedSource }, settingSource)
clearAllCaches()
let sourceType = marketplaceSource.source
if (marketplaceSource.source === 'github') {
sourceType =
marketplaceSource.repo as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS
}
logEvent('tengu_marketplace_added', {
source_type:
sourceType as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
})
cliOk(
alreadyMaterialized
? `${figures.tick} Marketplace '${name}' already on disk — declared in ${scope} settings`
: `${figures.tick} Successfully added marketplace: ${name} (declared in ${scope} settings)`,
)
} catch (error) {
handleMarketplaceError(error, 'add marketplace')
}
}
// marketplace list (lines 54975565)
export async function marketplaceListHandler(options: {
json?: boolean
cowork?: boolean
}): Promise<void> {
if (options.cowork) setUseCoworkPlugins(true)
try {
const config = await loadKnownMarketplacesConfig()
const names = Object.keys(config)
if (options.json) {
const marketplaces = names.sort().map(name => {
const marketplace = config[name]
const source = marketplace?.source
return {
name,
source: source?.source,
...(source?.source === 'github' && { repo: source.repo }),
...(source?.source === 'git' && { url: source.url }),
...(source?.source === 'url' && { url: source.url }),
...(source?.source === 'directory' && { path: source.path }),
...(source?.source === 'file' && { path: source.path }),
installLocation: marketplace?.installLocation,
}
})
cliOk(jsonStringify(marketplaces, null, 2))
}
if (names.length === 0) {
cliOk('No marketplaces configured')
}
// biome-ignore lint/suspicious/noConsole:: intentional console output
console.log('Configured marketplaces:\n')
names.forEach(name => {
const marketplace = config[name]
// biome-ignore lint/suspicious/noConsole:: intentional console output
console.log(` ${figures.pointer} ${name}`)
if (marketplace?.source) {
const src = marketplace.source
if (src.source === 'github') {
// biome-ignore lint/suspicious/noConsole:: intentional console output
console.log(` Source: GitHub (${src.repo})`)
} else if (src.source === 'git') {
// biome-ignore lint/suspicious/noConsole:: intentional console output
console.log(` Source: Git (${src.url})`)
} else if (src.source === 'url') {
// biome-ignore lint/suspicious/noConsole:: intentional console output
console.log(` Source: URL (${src.url})`)
} else if (src.source === 'directory') {
// biome-ignore lint/suspicious/noConsole:: intentional console output
console.log(` Source: Directory (${src.path})`)
} else if (src.source === 'file') {
// biome-ignore lint/suspicious/noConsole:: intentional console output
console.log(` Source: File (${src.path})`)
}
}
// biome-ignore lint/suspicious/noConsole:: intentional console output
console.log('')
})
cliOk()
} catch (error) {
handleMarketplaceError(error, 'list marketplaces')
}
}
// marketplace remove (lines 55765598)
export async function marketplaceRemoveHandler(
name: string,
options: { cowork?: boolean },
): Promise<void> {
if (options.cowork) setUseCoworkPlugins(true)
try {
await removeMarketplaceSource(name)
clearAllCaches()
logEvent('tengu_marketplace_removed', {
marketplace_name:
name as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
})
cliOk(`${figures.tick} Successfully removed marketplace: ${name}`)
} catch (error) {
handleMarketplaceError(error, 'remove marketplace')
}
}
// marketplace update (lines 56095672)
export async function marketplaceUpdateHandler(
name: string | undefined,
options: { cowork?: boolean },
): Promise<void> {
if (options.cowork) setUseCoworkPlugins(true)
try {
if (name) {
// biome-ignore lint/suspicious/noConsole:: intentional console output
console.log(`Updating marketplace: ${name}...`)
await refreshMarketplace(name, message => {
// biome-ignore lint/suspicious/noConsole:: intentional console output
console.log(message)
})
clearAllCaches()
logEvent('tengu_marketplace_updated', {
marketplace_name:
name as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
})
cliOk(`${figures.tick} Successfully updated marketplace: ${name}`)
} else {
const config = await loadKnownMarketplacesConfig()
const marketplaceNames = Object.keys(config)
if (marketplaceNames.length === 0) {
cliOk('No marketplaces configured')
}
// biome-ignore lint/suspicious/noConsole:: intentional console output
console.log(`Updating ${marketplaceNames.length} marketplace(s)...`)
await refreshAllMarketplaces()
clearAllCaches()
logEvent('tengu_marketplace_updated_all', {
count:
marketplaceNames.length as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
})
cliOk(
`${figures.tick} Successfully updated ${marketplaceNames.length} marketplace(s)`,
)
}
} catch (error) {
handleMarketplaceError(error, 'update marketplace(s)')
}
}
// plugin install (lines 56905721)
export async function pluginInstallHandler(
plugin: string,
options: { scope?: string; cowork?: boolean },
): Promise<void> {
if (options.cowork) setUseCoworkPlugins(true)
const scope = options.scope || 'user'
if (options.cowork && scope !== 'user') {
cliError('--cowork can only be used with user scope')
}
if (
!VALID_INSTALLABLE_SCOPES.includes(
scope as (typeof VALID_INSTALLABLE_SCOPES)[number],
)
) {
cliError(
`Invalid scope: ${scope}. Must be one of: ${VALID_INSTALLABLE_SCOPES.join(', ')}.`,
)
}
// _PROTO_* routes to PII-tagged plugin_name/marketplace_name BQ columns.
// Unredacted plugin arg was previously logged to general-access
// additional_metadata for all users — dropped in favor of the privileged
// column route. marketplace may be undefined (fires before resolution).
const { name, marketplace } = parsePluginIdentifier(plugin)
logEvent('tengu_plugin_install_command', {
_PROTO_plugin_name: name as AnalyticsMetadata_I_VERIFIED_THIS_IS_PII_TAGGED,
...(marketplace && {
_PROTO_marketplace_name:
marketplace as AnalyticsMetadata_I_VERIFIED_THIS_IS_PII_TAGGED,
}),
scope: scope as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
})
await installPlugin(plugin, scope as 'user' | 'project' | 'local')
}
// plugin uninstall (lines 57385769)
export async function pluginUninstallHandler(
plugin: string,
options: { scope?: string; cowork?: boolean; keepData?: boolean },
): Promise<void> {
if (options.cowork) setUseCoworkPlugins(true)
const scope = options.scope || 'user'
if (options.cowork && scope !== 'user') {
cliError('--cowork can only be used with user scope')
}
if (
!VALID_INSTALLABLE_SCOPES.includes(
scope as (typeof VALID_INSTALLABLE_SCOPES)[number],
)
) {
cliError(
`Invalid scope: ${scope}. Must be one of: ${VALID_INSTALLABLE_SCOPES.join(', ')}.`,
)
}
const { name, marketplace } = parsePluginIdentifier(plugin)
logEvent('tengu_plugin_uninstall_command', {
_PROTO_plugin_name: name as AnalyticsMetadata_I_VERIFIED_THIS_IS_PII_TAGGED,
...(marketplace && {
_PROTO_marketplace_name:
marketplace as AnalyticsMetadata_I_VERIFIED_THIS_IS_PII_TAGGED,
}),
scope: scope as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
})
await uninstallPlugin(
plugin,
scope as 'user' | 'project' | 'local',
options.keepData,
)
}
// plugin enable (lines 57835818)
export async function pluginEnableHandler(
plugin: string,
options: { scope?: string; cowork?: boolean },
): Promise<void> {
if (options.cowork) setUseCoworkPlugins(true)
let scope: (typeof VALID_INSTALLABLE_SCOPES)[number] | undefined
if (options.scope) {
if (
!VALID_INSTALLABLE_SCOPES.includes(
options.scope as (typeof VALID_INSTALLABLE_SCOPES)[number],
)
) {
cliError(
`Invalid scope "${options.scope}". Valid scopes: ${VALID_INSTALLABLE_SCOPES.join(', ')}`,
)
}
scope = options.scope as (typeof VALID_INSTALLABLE_SCOPES)[number]
}
if (options.cowork && scope !== undefined && scope !== 'user') {
cliError('--cowork can only be used with user scope')
}
// --cowork always operates at user scope
if (options.cowork && scope === undefined) {
scope = 'user'
}
const { name, marketplace } = parsePluginIdentifier(plugin)
logEvent('tengu_plugin_enable_command', {
_PROTO_plugin_name: name as AnalyticsMetadata_I_VERIFIED_THIS_IS_PII_TAGGED,
...(marketplace && {
_PROTO_marketplace_name:
marketplace as AnalyticsMetadata_I_VERIFIED_THIS_IS_PII_TAGGED,
}),
scope: (scope ??
'auto') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
})
await enablePlugin(plugin, scope)
}
// plugin disable (lines 58335902)
export async function pluginDisableHandler(
plugin: string | undefined,
options: { scope?: string; cowork?: boolean; all?: boolean },
): Promise<void> {
if (options.all && plugin) {
cliError('Cannot use --all with a specific plugin')
}
if (!options.all && !plugin) {
cliError('Please specify a plugin name or use --all to disable all plugins')
}
if (options.cowork) setUseCoworkPlugins(true)
if (options.all) {
if (options.scope) {
cliError('Cannot use --scope with --all')
}
// No _PROTO_plugin_name here — --all disables all plugins.
// Distinguishable from the specific-plugin branch by plugin_name IS NULL.
logEvent('tengu_plugin_disable_command', {})
await disableAllPlugins()
return
}
let scope: (typeof VALID_INSTALLABLE_SCOPES)[number] | undefined
if (options.scope) {
if (
!VALID_INSTALLABLE_SCOPES.includes(
options.scope as (typeof VALID_INSTALLABLE_SCOPES)[number],
)
) {
cliError(
`Invalid scope "${options.scope}". Valid scopes: ${VALID_INSTALLABLE_SCOPES.join(', ')}`,
)
}
scope = options.scope as (typeof VALID_INSTALLABLE_SCOPES)[number]
}
if (options.cowork && scope !== undefined && scope !== 'user') {
cliError('--cowork can only be used with user scope')
}
// --cowork always operates at user scope
if (options.cowork && scope === undefined) {
scope = 'user'
}
const { name, marketplace } = parsePluginIdentifier(plugin!)
logEvent('tengu_plugin_disable_command', {
_PROTO_plugin_name: name as AnalyticsMetadata_I_VERIFIED_THIS_IS_PII_TAGGED,
...(marketplace && {
_PROTO_marketplace_name:
marketplace as AnalyticsMetadata_I_VERIFIED_THIS_IS_PII_TAGGED,
}),
scope: (scope ??
'auto') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
})
await disablePlugin(plugin!, scope)
}
// plugin update (lines 59185948)
export async function pluginUpdateHandler(
plugin: string,
options: { scope?: string; cowork?: boolean },
): Promise<void> {
if (options.cowork) setUseCoworkPlugins(true)
const { name, marketplace } = parsePluginIdentifier(plugin)
logEvent('tengu_plugin_update_command', {
_PROTO_plugin_name: name as AnalyticsMetadata_I_VERIFIED_THIS_IS_PII_TAGGED,
...(marketplace && {
_PROTO_marketplace_name:
marketplace as AnalyticsMetadata_I_VERIFIED_THIS_IS_PII_TAGGED,
}),
})
let scope: (typeof VALID_UPDATE_SCOPES)[number] = 'user'
if (options.scope) {
if (
!VALID_UPDATE_SCOPES.includes(
options.scope as (typeof VALID_UPDATE_SCOPES)[number],
)
) {
cliError(
`Invalid scope "${options.scope}". Valid scopes: ${VALID_UPDATE_SCOPES.join(', ')}`,
)
}
scope = options.scope as (typeof VALID_UPDATE_SCOPES)[number]
}
if (options.cowork && scope !== 'user') {
cliError('--cowork can only be used with user scope')
}
await updatePluginCli(plugin, scope)
}

110
cli/handlers/util.tsx Normal file

File diff suppressed because one or more lines are too long

View file

@ -0,0 +1,32 @@
import { jsonStringify } from '../utils/slowOperations.js'
// JSON.stringify emits U+2028/U+2029 raw (valid per ECMA-404). When the
// output is a single NDJSON line, any receiver that uses JavaScript
// line-terminator semantics (ECMA-262 §11.3 — \n \r U+2028 U+2029) to
// split the stream will cut the JSON mid-string. ProcessTransport now
// silently skips non-JSON lines rather than crashing (gh-28405), but
// the truncated fragment is still lost — the message is silently dropped.
//
// The \uXXXX form is equivalent JSON (parses to the same string) but
// can never be mistaken for a line terminator by ANY receiver. This is
// what ES2019's "Subsume JSON" proposal and Node's util.inspect do.
//
// Single regex with alternation: the callback's one dispatch per match
// is cheaper than two full-string scans.
const JS_LINE_TERMINATORS = /\u2028|\u2029/g
function escapeJsLineTerminators(json: string): string {
return json.replace(JS_LINE_TERMINATORS, c =>
c === '\u2028' ? '\\u2028' : '\\u2029',
)
}
/**
* JSON.stringify for one-message-per-line transports. Escapes U+2028
* LINE SEPARATOR and U+2029 PARAGRAPH SEPARATOR so the serialized output
* cannot be broken by a line-splitting receiver. Output is still valid
* JSON and parses to the same value.
*/
export function ndjsonSafeStringify(value: unknown): string {
return escapeJsLineTerminators(jsonStringify(value))
}

5594
cli/print.ts Normal file

File diff suppressed because it is too large Load diff

255
cli/remoteIO.ts Normal file
View file

@ -0,0 +1,255 @@
import type { StdoutMessage } from 'src/entrypoints/sdk/controlTypes.js'
import { PassThrough } from 'stream'
import { URL } from 'url'
import { getSessionId } from '../bootstrap/state.js'
import { getPollIntervalConfig } from '../bridge/pollConfig.js'
import { registerCleanup } from '../utils/cleanupRegistry.js'
import { setCommandLifecycleListener } from '../utils/commandLifecycle.js'
import { isDebugMode, logForDebugging } from '../utils/debug.js'
import { logForDiagnosticsNoPII } from '../utils/diagLogs.js'
import { isEnvTruthy } from '../utils/envUtils.js'
import { errorMessage } from '../utils/errors.js'
import { gracefulShutdown } from '../utils/gracefulShutdown.js'
import { logError } from '../utils/log.js'
import { writeToStdout } from '../utils/process.js'
import { getSessionIngressAuthToken } from '../utils/sessionIngressAuth.js'
import {
setSessionMetadataChangedListener,
setSessionStateChangedListener,
} from '../utils/sessionState.js'
import {
setInternalEventReader,
setInternalEventWriter,
} from '../utils/sessionStorage.js'
import { ndjsonSafeStringify } from './ndjsonSafeStringify.js'
import { StructuredIO } from './structuredIO.js'
import { CCRClient, CCRInitError } from './transports/ccrClient.js'
import { SSETransport } from './transports/SSETransport.js'
import type { Transport } from './transports/Transport.js'
import { getTransportForUrl } from './transports/transportUtils.js'
/**
* Bidirectional streaming for SDK mode with session tracking
* Supports WebSocket transport
*/
export class RemoteIO extends StructuredIO {
private url: URL
private transport: Transport
private inputStream: PassThrough
private readonly isBridge: boolean = false
private readonly isDebug: boolean = false
private ccrClient: CCRClient | null = null
private keepAliveTimer: ReturnType<typeof setInterval> | null = null
constructor(
streamUrl: string,
initialPrompt?: AsyncIterable<string>,
replayUserMessages?: boolean,
) {
const inputStream = new PassThrough({ encoding: 'utf8' })
super(inputStream, replayUserMessages)
this.inputStream = inputStream
this.url = new URL(streamUrl)
// Prepare headers with session token if available
const headers: Record<string, string> = {}
const sessionToken = getSessionIngressAuthToken()
if (sessionToken) {
headers['Authorization'] = `Bearer ${sessionToken}`
} else {
logForDebugging('[remote-io] No session ingress token available', {
level: 'error',
})
}
// Add environment runner version if available (set by Environment Manager)
const erVersion = process.env.CLAUDE_CODE_ENVIRONMENT_RUNNER_VERSION
if (erVersion) {
headers['x-environment-runner-version'] = erVersion
}
// Provide a callback that re-reads the session token dynamically.
// When the parent process refreshes the token (via token file or env var),
// the transport can pick it up on reconnection.
const refreshHeaders = (): Record<string, string> => {
const h: Record<string, string> = {}
const freshToken = getSessionIngressAuthToken()
if (freshToken) {
h['Authorization'] = `Bearer ${freshToken}`
}
const freshErVersion = process.env.CLAUDE_CODE_ENVIRONMENT_RUNNER_VERSION
if (freshErVersion) {
h['x-environment-runner-version'] = freshErVersion
}
return h
}
// Get appropriate transport based on URL protocol
this.transport = getTransportForUrl(
this.url,
headers,
getSessionId(),
refreshHeaders,
)
// Set up data callback
this.isBridge = process.env.CLAUDE_CODE_ENVIRONMENT_KIND === 'bridge'
this.isDebug = isDebugMode()
this.transport.setOnData((data: string) => {
this.inputStream.write(data)
if (this.isBridge && this.isDebug) {
writeToStdout(data.endsWith('\n') ? data : data + '\n')
}
})
// Set up close callback to handle connection failures
this.transport.setOnClose(() => {
// End the input stream to trigger graceful shutdown
this.inputStream.end()
})
// Initialize CCR v2 client (heartbeats, epoch, state reporting, event writes).
// The CCRClient constructor wires the SSE received-ack handler
// synchronously, so new CCRClient() MUST run before transport.connect() —
// otherwise early SSE frames hit an unwired onEventCallback and their
// 'received' delivery acks are silently dropped.
if (isEnvTruthy(process.env.CLAUDE_CODE_USE_CCR_V2)) {
// CCR v2 is SSE+POST by definition. getTransportForUrl returns
// SSETransport under the same env var, but the two checks live in
// different files — assert the invariant so a future decoupling
// fails loudly here instead of confusingly inside CCRClient.
if (!(this.transport instanceof SSETransport)) {
throw new Error(
'CCR v2 requires SSETransport; check getTransportForUrl',
)
}
this.ccrClient = new CCRClient(this.transport, this.url)
const init = this.ccrClient.initialize()
this.restoredWorkerState = init.catch(() => null)
init.catch((error: unknown) => {
logForDiagnosticsNoPII('error', 'cli_worker_lifecycle_init_failed', {
reason: error instanceof CCRInitError ? error.reason : 'unknown',
})
logError(
new Error(`CCRClient initialization failed: ${errorMessage(error)}`),
)
void gracefulShutdown(1, 'other')
})
registerCleanup(async () => this.ccrClient?.close())
// Register internal event writer for transcript persistence.
// When set, sessionStorage writes transcript messages as CCR v2
// internal events instead of v1 Session Ingress.
setInternalEventWriter((eventType, payload, options) =>
this.ccrClient!.writeInternalEvent(eventType, payload, options),
)
// Register internal event readers for session resume.
// When set, hydrateFromCCRv2InternalEvents() can fetch foreground
// and subagent internal events to reconstruct conversation state.
setInternalEventReader(
() => this.ccrClient!.readInternalEvents(),
() => this.ccrClient!.readSubagentInternalEvents(),
)
const LIFECYCLE_TO_DELIVERY = {
started: 'processing',
completed: 'processed',
} as const
setCommandLifecycleListener((uuid, state) => {
this.ccrClient?.reportDelivery(uuid, LIFECYCLE_TO_DELIVERY[state])
})
setSessionStateChangedListener((state, details) => {
this.ccrClient?.reportState(state, details)
})
setSessionMetadataChangedListener(metadata => {
this.ccrClient?.reportMetadata(metadata)
})
}
// Start connection only after all callbacks are wired (setOnData above,
// setOnEvent inside new CCRClient() when CCR v2 is enabled).
void this.transport.connect()
// Push a silent keep_alive frame on a fixed interval so upstream
// proxies and the session-ingress layer don't GC an otherwise-idle
// remote control session. The keep_alive type is filtered before
// reaching any client UI (Query.ts drops it; structuredIO.ts drops it;
// web/iOS/Android never see it in their message loop). Interval comes
// from GrowthBook (tengu_bridge_poll_interval_config
// session_keepalive_interval_v2_ms, default 120s); 0 = disabled.
// Bridge-only: fixes Envoy idle timeout on bridge-topology sessions
// (#21931). byoc workers ran without this before #21931 and do not
// need it — different network path.
const keepAliveIntervalMs =
getPollIntervalConfig().session_keepalive_interval_v2_ms
if (this.isBridge && keepAliveIntervalMs > 0) {
this.keepAliveTimer = setInterval(() => {
logForDebugging('[remote-io] keep_alive sent')
void this.write({ type: 'keep_alive' }).catch(err => {
logForDebugging(
`[remote-io] keep_alive write failed: ${errorMessage(err)}`,
)
})
}, keepAliveIntervalMs)
this.keepAliveTimer.unref?.()
}
// Register for graceful shutdown cleanup
registerCleanup(async () => this.close())
// If initial prompt is provided, send it through the input stream
if (initialPrompt) {
// Convert the initial prompt to the input stream format.
// Chunks from stdin may already contain trailing newlines, so strip
// them before appending our own to avoid double-newline issues that
// cause structuredIO to parse empty lines. String() handles both
// string chunks and Buffer objects from process.stdin.
const stream = this.inputStream
void (async () => {
for await (const chunk of initialPrompt) {
stream.write(String(chunk).replace(/\n$/, '') + '\n')
}
})()
}
}
override flushInternalEvents(): Promise<void> {
return this.ccrClient?.flushInternalEvents() ?? Promise.resolve()
}
override get internalEventsPending(): number {
return this.ccrClient?.internalEventsPending ?? 0
}
/**
* Send output to the transport.
* In bridge mode, control_request messages are always echoed to stdout so the
* bridge parent can detect permission requests. Other messages are echoed only
* in debug mode.
*/
async write(message: StdoutMessage): Promise<void> {
if (this.ccrClient) {
await this.ccrClient.writeEvent(message)
} else {
await this.transport.write(message)
}
if (this.isBridge) {
if (message.type === 'control_request' || this.isDebug) {
writeToStdout(ndjsonSafeStringify(message) + '\n')
}
}
}
/**
* Clean up connections gracefully
*/
close(): void {
if (this.keepAliveTimer) {
clearInterval(this.keepAliveTimer)
this.keepAliveTimer = null
}
this.transport.close()
this.inputStream.end()
}
}

859
cli/structuredIO.ts Normal file
View file

@ -0,0 +1,859 @@
import { feature } from 'bun:bundle'
import type {
ElicitResult,
JSONRPCMessage,
} from '@modelcontextprotocol/sdk/types.js'
import { randomUUID } from 'crypto'
import type { AssistantMessage } from 'src//types/message.js'
import type {
HookInput,
HookJSONOutput,
PermissionUpdate,
SDKMessage,
SDKUserMessage,
} from 'src/entrypoints/agentSdkTypes.js'
import { SDKControlElicitationResponseSchema } from 'src/entrypoints/sdk/controlSchemas.js'
import type {
SDKControlRequest,
SDKControlResponse,
StdinMessage,
StdoutMessage,
} from 'src/entrypoints/sdk/controlTypes.js'
import type { CanUseToolFn } from 'src/hooks/useCanUseTool.js'
import type { Tool, ToolUseContext } from 'src/Tool.js'
import { type HookCallback, hookJSONOutputSchema } from 'src/types/hooks.js'
import { logForDebugging } from 'src/utils/debug.js'
import { logForDiagnosticsNoPII } from 'src/utils/diagLogs.js'
import { AbortError } from 'src/utils/errors.js'
import {
type Output as PermissionToolOutput,
permissionPromptToolResultToPermissionDecision,
outputSchema as permissionToolOutputSchema,
} from 'src/utils/permissions/PermissionPromptToolResultSchema.js'
import type {
PermissionDecision,
PermissionDecisionReason,
} from 'src/utils/permissions/PermissionResult.js'
import { hasPermissionsToUseTool } from 'src/utils/permissions/permissions.js'
import { writeToStdout } from 'src/utils/process.js'
import { jsonStringify } from 'src/utils/slowOperations.js'
import { z } from 'zod/v4'
import { notifyCommandLifecycle } from '../utils/commandLifecycle.js'
import { normalizeControlMessageKeys } from '../utils/controlMessageCompat.js'
import { executePermissionRequestHooks } from '../utils/hooks.js'
import {
applyPermissionUpdates,
persistPermissionUpdates,
} from '../utils/permissions/PermissionUpdate.js'
import {
notifySessionStateChanged,
type RequiresActionDetails,
type SessionExternalMetadata,
} from '../utils/sessionState.js'
import { jsonParse } from '../utils/slowOperations.js'
import { Stream } from '../utils/stream.js'
import { ndjsonSafeStringify } from './ndjsonSafeStringify.js'
/**
* Synthetic tool name used when forwarding sandbox network permission
* requests via the can_use_tool control_request protocol. SDK hosts
* see this as a normal tool permission prompt.
*/
export const SANDBOX_NETWORK_ACCESS_TOOL_NAME = 'SandboxNetworkAccess'
function serializeDecisionReason(
reason: PermissionDecisionReason | undefined,
): string | undefined {
if (!reason) {
return undefined
}
if (
(feature('BASH_CLASSIFIER') || feature('TRANSCRIPT_CLASSIFIER')) &&
reason.type === 'classifier'
) {
return reason.reason
}
switch (reason.type) {
case 'rule':
case 'mode':
case 'subcommandResults':
case 'permissionPromptTool':
return undefined
case 'hook':
case 'asyncAgent':
case 'sandboxOverride':
case 'workingDir':
case 'safetyCheck':
case 'other':
return reason.reason
}
}
function buildRequiresActionDetails(
tool: Tool,
input: Record<string, unknown>,
toolUseID: string,
requestId: string,
): RequiresActionDetails {
// Per-tool summary methods may throw on malformed input; permission
// handling must not break because of a bad description.
let description: string
try {
description =
tool.getActivityDescription?.(input) ??
tool.getToolUseSummary?.(input) ??
tool.userFacingName(input)
} catch {
description = tool.name
}
return {
tool_name: tool.name,
action_description: description,
tool_use_id: toolUseID,
request_id: requestId,
input,
}
}
type PendingRequest<T> = {
resolve: (result: T) => void
reject: (error: unknown) => void
schema?: z.Schema
request: SDKControlRequest
}
/**
* Provides a structured way to read and write SDK messages from stdio,
* capturing the SDK protocol.
*/
// Maximum number of resolved tool_use IDs to track. Once exceeded, the oldest
// entry is evicted. This bounds memory in very long sessions while keeping
// enough history to catch duplicate control_response deliveries.
const MAX_RESOLVED_TOOL_USE_IDS = 1000
export class StructuredIO {
readonly structuredInput: AsyncGenerator<StdinMessage | SDKMessage>
private readonly pendingRequests = new Map<string, PendingRequest<unknown>>()
// CCR external_metadata read back on worker start; null when the
// transport doesn't restore. Assigned by RemoteIO.
restoredWorkerState: Promise<SessionExternalMetadata | null> =
Promise.resolve(null)
private inputClosed = false
private unexpectedResponseCallback?: (
response: SDKControlResponse,
) => Promise<void>
// Tracks tool_use IDs that have been resolved through the normal permission
// flow (or aborted by a hook). When a duplicate control_response arrives
// after the original was already handled, this Set prevents the orphan
// handler from re-processing it — which would push duplicate assistant
// messages into mutableMessages and cause a 400 "tool_use ids must be unique"
// error from the API.
private readonly resolvedToolUseIds = new Set<string>()
private prependedLines: string[] = []
private onControlRequestSent?: (request: SDKControlRequest) => void
private onControlRequestResolved?: (requestId: string) => void
// sendRequest() and print.ts both enqueue here; the drain loop is the
// only writer. Prevents control_request from overtaking queued stream_events.
readonly outbound = new Stream<StdoutMessage>()
constructor(
private readonly input: AsyncIterable<string>,
private readonly replayUserMessages?: boolean,
) {
this.input = input
this.structuredInput = this.read()
}
/**
* Records a tool_use ID as resolved so that late/duplicate control_response
* messages for the same tool are ignored by the orphan handler.
*/
private trackResolvedToolUseId(request: SDKControlRequest): void {
if (request.request.subtype === 'can_use_tool') {
this.resolvedToolUseIds.add(request.request.tool_use_id)
if (this.resolvedToolUseIds.size > MAX_RESOLVED_TOOL_USE_IDS) {
// Evict the oldest entry (Sets iterate in insertion order)
const first = this.resolvedToolUseIds.values().next().value
if (first !== undefined) {
this.resolvedToolUseIds.delete(first)
}
}
}
}
/** Flush pending internal events. No-op for non-remote IO. Overridden by RemoteIO. */
flushInternalEvents(): Promise<void> {
return Promise.resolve()
}
/** Internal-event queue depth. Overridden by RemoteIO; zero otherwise. */
get internalEventsPending(): number {
return 0
}
/**
* Queue a user turn to be yielded before the next message from this.input.
* Works before iteration starts and mid-stream read() re-checks
* prependedLines between each yielded message.
*/
prependUserMessage(content: string): void {
this.prependedLines.push(
jsonStringify({
type: 'user',
session_id: '',
message: { role: 'user', content },
parent_tool_use_id: null,
} satisfies SDKUserMessage) + '\n',
)
}
private async *read() {
let content = ''
// Called once before for-await (an empty this.input otherwise skips the
// loop body entirely), then again per block. prependedLines re-check is
// inside the while so a prepend pushed between two messages in the SAME
// block still lands first.
const splitAndProcess = async function* (this: StructuredIO) {
for (;;) {
if (this.prependedLines.length > 0) {
content = this.prependedLines.join('') + content
this.prependedLines = []
}
const newline = content.indexOf('\n')
if (newline === -1) break
const line = content.slice(0, newline)
content = content.slice(newline + 1)
const message = await this.processLine(line)
if (message) {
logForDiagnosticsNoPII('info', 'cli_stdin_message_parsed', {
type: message.type,
})
yield message
}
}
}.bind(this)
yield* splitAndProcess()
for await (const block of this.input) {
content += block
yield* splitAndProcess()
}
if (content) {
const message = await this.processLine(content)
if (message) {
yield message
}
}
this.inputClosed = true
for (const request of this.pendingRequests.values()) {
// Reject all pending requests if the input stream
request.reject(
new Error('Tool permission stream closed before response received'),
)
}
}
getPendingPermissionRequests() {
return Array.from(this.pendingRequests.values())
.map(entry => entry.request)
.filter(pr => pr.request.subtype === 'can_use_tool')
}
setUnexpectedResponseCallback(
callback: (response: SDKControlResponse) => Promise<void>,
): void {
this.unexpectedResponseCallback = callback
}
/**
* Inject a control_response message to resolve a pending permission request.
* Used by the bridge to feed permission responses from claude.ai into the
* SDK permission flow.
*
* Also sends a control_cancel_request to the SDK consumer so its canUseTool
* callback is aborted via the signal otherwise the callback hangs.
*/
injectControlResponse(response: SDKControlResponse): void {
const requestId = response.response?.request_id
if (!requestId) return
const request = this.pendingRequests.get(requestId)
if (!request) return
this.trackResolvedToolUseId(request.request)
this.pendingRequests.delete(requestId)
// Cancel the SDK consumer's canUseTool callback — the bridge won.
void this.write({
type: 'control_cancel_request',
request_id: requestId,
})
if (response.response.subtype === 'error') {
request.reject(new Error(response.response.error))
} else {
const result = response.response.response
if (request.schema) {
try {
request.resolve(request.schema.parse(result))
} catch (error) {
request.reject(error)
}
} else {
request.resolve({})
}
}
}
/**
* Register a callback invoked whenever a can_use_tool control_request
* is written to stdout. Used by the bridge to forward permission
* requests to claude.ai.
*/
setOnControlRequestSent(
callback: ((request: SDKControlRequest) => void) | undefined,
): void {
this.onControlRequestSent = callback
}
/**
* Register a callback invoked when a can_use_tool control_response arrives
* from the SDK consumer (via stdin). Used by the bridge to cancel the
* stale permission prompt on claude.ai when the SDK consumer wins the race.
*/
setOnControlRequestResolved(
callback: ((requestId: string) => void) | undefined,
): void {
this.onControlRequestResolved = callback
}
private async processLine(
line: string,
): Promise<StdinMessage | SDKMessage | undefined> {
// Skip empty lines (e.g. from double newlines in piped stdin)
if (!line) {
return undefined
}
try {
const message = normalizeControlMessageKeys(jsonParse(line)) as
| StdinMessage
| SDKMessage
if (message.type === 'keep_alive') {
// Silently ignore keep-alive messages
return undefined
}
if (message.type === 'update_environment_variables') {
// Apply environment variable updates directly to process.env.
// Used by bridge session runner for auth token refresh
// (CLAUDE_CODE_SESSION_ACCESS_TOKEN) which must be readable
// by the REPL process itself, not just child Bash commands.
const keys = Object.keys(message.variables)
for (const [key, value] of Object.entries(message.variables)) {
process.env[key] = value
}
logForDebugging(
`[structuredIO] applied update_environment_variables: ${keys.join(', ')}`,
)
return undefined
}
if (message.type === 'control_response') {
// Close lifecycle for every control_response, including duplicates
// and orphans — orphans don't yield to print.ts's main loop, so this
// is the only path that sees them. uuid is server-injected into the
// payload.
const uuid =
'uuid' in message && typeof message.uuid === 'string'
? message.uuid
: undefined
if (uuid) {
notifyCommandLifecycle(uuid, 'completed')
}
const request = this.pendingRequests.get(message.response.request_id)
if (!request) {
// Check if this tool_use was already resolved through the normal
// permission flow. Duplicate control_response deliveries (e.g. from
// WebSocket reconnects) arrive after the original was handled, and
// re-processing them would push duplicate assistant messages into
// the conversation, causing API 400 errors.
const responsePayload =
message.response.subtype === 'success'
? message.response.response
: undefined
const toolUseID = responsePayload?.toolUseID
if (
typeof toolUseID === 'string' &&
this.resolvedToolUseIds.has(toolUseID)
) {
logForDebugging(
`Ignoring duplicate control_response for already-resolved toolUseID=${toolUseID} request_id=${message.response.request_id}`,
)
return undefined
}
if (this.unexpectedResponseCallback) {
await this.unexpectedResponseCallback(message)
}
return undefined // Ignore responses for requests we don't know about
}
this.trackResolvedToolUseId(request.request)
this.pendingRequests.delete(message.response.request_id)
// Notify the bridge when the SDK consumer resolves a can_use_tool
// request, so it can cancel the stale permission prompt on claude.ai.
if (
request.request.request.subtype === 'can_use_tool' &&
this.onControlRequestResolved
) {
this.onControlRequestResolved(message.response.request_id)
}
if (message.response.subtype === 'error') {
request.reject(new Error(message.response.error))
return undefined
}
const result = message.response.response
if (request.schema) {
try {
request.resolve(request.schema.parse(result))
} catch (error) {
request.reject(error)
}
} else {
request.resolve({})
}
// Propagate control responses when replay is enabled
if (this.replayUserMessages) {
return message
}
return undefined
}
if (
message.type !== 'user' &&
message.type !== 'control_request' &&
message.type !== 'assistant' &&
message.type !== 'system'
) {
logForDebugging(`Ignoring unknown message type: ${message.type}`, {
level: 'warn',
})
return undefined
}
if (message.type === 'control_request') {
if (!message.request) {
exitWithMessage(`Error: Missing request on control_request`)
}
return message
}
if (message.type === 'assistant' || message.type === 'system') {
return message
}
if (message.message.role !== 'user') {
exitWithMessage(
`Error: Expected message role 'user', got '${message.message.role}'`,
)
}
return message
} catch (error) {
// biome-ignore lint/suspicious/noConsole:: intentional console output
console.error(`Error parsing streaming input line: ${line}: ${error}`)
// eslint-disable-next-line custom-rules/no-process-exit
process.exit(1)
}
}
async write(message: StdoutMessage): Promise<void> {
writeToStdout(ndjsonSafeStringify(message) + '\n')
}
private async sendRequest<Response>(
request: SDKControlRequest['request'],
schema: z.Schema,
signal?: AbortSignal,
requestId: string = randomUUID(),
): Promise<Response> {
const message: SDKControlRequest = {
type: 'control_request',
request_id: requestId,
request,
}
if (this.inputClosed) {
throw new Error('Stream closed')
}
if (signal?.aborted) {
throw new Error('Request aborted')
}
this.outbound.enqueue(message)
if (request.subtype === 'can_use_tool' && this.onControlRequestSent) {
this.onControlRequestSent(message)
}
const aborted = () => {
this.outbound.enqueue({
type: 'control_cancel_request',
request_id: requestId,
})
// Immediately reject the outstanding promise, without
// waiting for the host to acknowledge the cancellation.
const request = this.pendingRequests.get(requestId)
if (request) {
// Track the tool_use ID as resolved before rejecting, so that a
// late response from the host is ignored by the orphan handler.
this.trackResolvedToolUseId(request.request)
request.reject(new AbortError())
}
}
if (signal) {
signal.addEventListener('abort', aborted, {
once: true,
})
}
try {
return await new Promise<Response>((resolve, reject) => {
this.pendingRequests.set(requestId, {
request: {
type: 'control_request',
request_id: requestId,
request,
},
resolve: result => {
resolve(result as Response)
},
reject,
schema,
})
})
} finally {
if (signal) {
signal.removeEventListener('abort', aborted)
}
this.pendingRequests.delete(requestId)
}
}
createCanUseTool(
onPermissionPrompt?: (details: RequiresActionDetails) => void,
): CanUseToolFn {
return async (
tool: Tool,
input: { [key: string]: unknown },
toolUseContext: ToolUseContext,
assistantMessage: AssistantMessage,
toolUseID: string,
forceDecision?: PermissionDecision,
): Promise<PermissionDecision> => {
const mainPermissionResult =
forceDecision ??
(await hasPermissionsToUseTool(
tool,
input,
toolUseContext,
assistantMessage,
toolUseID,
))
// If the tool is allowed or denied, return the result
if (
mainPermissionResult.behavior === 'allow' ||
mainPermissionResult.behavior === 'deny'
) {
return mainPermissionResult
}
// Run PermissionRequest hooks in parallel with the SDK permission
// prompt. In the terminal CLI, hooks race against the interactive
// prompt so that e.g. a hook with --delay 20 doesn't block the UI.
// We need the same behavior here: the SDK host (VS Code, etc.) shows
// its permission dialog immediately while hooks run in the background.
// Whichever resolves first wins; the loser is cancelled/ignored.
// AbortController used to cancel the SDK request if a hook decides first
const hookAbortController = new AbortController()
const parentSignal = toolUseContext.abortController.signal
// Forward parent abort to our local controller
const onParentAbort = () => hookAbortController.abort()
parentSignal.addEventListener('abort', onParentAbort, { once: true })
try {
// Start the hook evaluation (runs in background)
const hookPromise = executePermissionRequestHooksForSDK(
tool.name,
toolUseID,
input,
toolUseContext,
mainPermissionResult.suggestions,
).then(decision => ({ source: 'hook' as const, decision }))
// Start the SDK permission prompt immediately (don't wait for hooks)
const requestId = randomUUID()
onPermissionPrompt?.(
buildRequiresActionDetails(tool, input, toolUseID, requestId),
)
const sdkPromise = this.sendRequest<PermissionToolOutput>(
{
subtype: 'can_use_tool',
tool_name: tool.name,
input,
permission_suggestions: mainPermissionResult.suggestions,
blocked_path: mainPermissionResult.blockedPath,
decision_reason: serializeDecisionReason(
mainPermissionResult.decisionReason,
),
tool_use_id: toolUseID,
agent_id: toolUseContext.agentId,
},
permissionToolOutputSchema(),
hookAbortController.signal,
requestId,
).then(result => ({ source: 'sdk' as const, result }))
// Race: hook completion vs SDK prompt response.
// The hook promise always resolves (never rejects), returning
// undefined if no hook made a decision.
const winner = await Promise.race([hookPromise, sdkPromise])
if (winner.source === 'hook') {
if (winner.decision) {
// Hook decided — abort the pending SDK request.
// Suppress the expected AbortError rejection from sdkPromise.
sdkPromise.catch(() => {})
hookAbortController.abort()
return winner.decision
}
// Hook passed through (no decision) — wait for the SDK prompt
const sdkResult = await sdkPromise
return permissionPromptToolResultToPermissionDecision(
sdkResult.result,
tool,
input,
toolUseContext,
)
}
// SDK prompt responded first — use its result (hook still running
// in background but its result will be ignored)
return permissionPromptToolResultToPermissionDecision(
winner.result,
tool,
input,
toolUseContext,
)
} catch (error) {
return permissionPromptToolResultToPermissionDecision(
{
behavior: 'deny',
message: `Tool permission request failed: ${error}`,
toolUseID,
},
tool,
input,
toolUseContext,
)
} finally {
// Only transition back to 'running' if no other permission prompts
// are pending (concurrent tool execution can have multiple in-flight).
if (this.getPendingPermissionRequests().length === 0) {
notifySessionStateChanged('running')
}
parentSignal.removeEventListener('abort', onParentAbort)
}
}
}
createHookCallback(callbackId: string, timeout?: number): HookCallback {
return {
type: 'callback',
timeout,
callback: async (
input: HookInput,
toolUseID: string | null,
abort: AbortSignal | undefined,
): Promise<HookJSONOutput> => {
try {
const result = await this.sendRequest<HookJSONOutput>(
{
subtype: 'hook_callback',
callback_id: callbackId,
input,
tool_use_id: toolUseID || undefined,
},
hookJSONOutputSchema(),
abort,
)
return result
} catch (error) {
// biome-ignore lint/suspicious/noConsole:: intentional console output
console.error(`Error in hook callback ${callbackId}:`, error)
return {}
}
},
}
}
/**
* Sends an elicitation request to the SDK consumer and returns the response.
*/
async handleElicitation(
serverName: string,
message: string,
requestedSchema?: Record<string, unknown>,
signal?: AbortSignal,
mode?: 'form' | 'url',
url?: string,
elicitationId?: string,
): Promise<ElicitResult> {
try {
const result = await this.sendRequest<ElicitResult>(
{
subtype: 'elicitation',
mcp_server_name: serverName,
message,
mode,
url,
elicitation_id: elicitationId,
requested_schema: requestedSchema,
},
SDKControlElicitationResponseSchema(),
signal,
)
return result
} catch {
return { action: 'cancel' as const }
}
}
/**
* Creates a SandboxAskCallback that forwards sandbox network permission
* requests to the SDK host as can_use_tool control_requests.
*
* This piggybacks on the existing can_use_tool protocol with a synthetic
* tool name so that SDK hosts (VS Code, CCR, etc.) can prompt the user
* for network access without requiring a new protocol subtype.
*/
createSandboxAskCallback(): (hostPattern: {
host: string
port?: number
}) => Promise<boolean> {
return async (hostPattern): Promise<boolean> => {
try {
const result = await this.sendRequest<PermissionToolOutput>(
{
subtype: 'can_use_tool',
tool_name: SANDBOX_NETWORK_ACCESS_TOOL_NAME,
input: { host: hostPattern.host },
tool_use_id: randomUUID(),
description: `Allow network connection to ${hostPattern.host}?`,
},
permissionToolOutputSchema(),
)
return result.behavior === 'allow'
} catch {
// If the request fails (stream closed, abort, etc.), deny the connection
return false
}
}
}
/**
* Sends an MCP message to an SDK server and waits for the response
*/
async sendMcpMessage(
serverName: string,
message: JSONRPCMessage,
): Promise<JSONRPCMessage> {
const response = await this.sendRequest<{ mcp_response: JSONRPCMessage }>(
{
subtype: 'mcp_message',
server_name: serverName,
message,
},
z.object({
mcp_response: z.any() as z.Schema<JSONRPCMessage>,
}),
)
return response.mcp_response
}
}
function exitWithMessage(message: string): never {
// biome-ignore lint/suspicious/noConsole:: intentional console output
console.error(message)
// eslint-disable-next-line custom-rules/no-process-exit
process.exit(1)
}
/**
* Execute PermissionRequest hooks and return a decision if one is made.
* Returns undefined if no hook made a decision.
*/
async function executePermissionRequestHooksForSDK(
toolName: string,
toolUseID: string,
input: Record<string, unknown>,
toolUseContext: ToolUseContext,
suggestions: PermissionUpdate[] | undefined,
): Promise<PermissionDecision | undefined> {
const appState = toolUseContext.getAppState()
const permissionMode = appState.toolPermissionContext.mode
// Iterate directly over the generator instead of using `all`
const hookGenerator = executePermissionRequestHooks(
toolName,
toolUseID,
input,
toolUseContext,
permissionMode,
suggestions,
toolUseContext.abortController.signal,
)
for await (const hookResult of hookGenerator) {
if (
hookResult.permissionRequestResult &&
(hookResult.permissionRequestResult.behavior === 'allow' ||
hookResult.permissionRequestResult.behavior === 'deny')
) {
const decision = hookResult.permissionRequestResult
if (decision.behavior === 'allow') {
const finalInput = decision.updatedInput || input
// Apply permission updates if provided by hook ("always allow")
const permissionUpdates = decision.updatedPermissions ?? []
if (permissionUpdates.length > 0) {
persistPermissionUpdates(permissionUpdates)
const currentAppState = toolUseContext.getAppState()
const updatedContext = applyPermissionUpdates(
currentAppState.toolPermissionContext,
permissionUpdates,
)
// Update permission context via setAppState
toolUseContext.setAppState(prev => {
if (prev.toolPermissionContext === updatedContext) return prev
return { ...prev, toolPermissionContext: updatedContext }
})
}
return {
behavior: 'allow',
updatedInput: finalInput,
userModified: false,
decisionReason: {
type: 'hook',
hookName: 'PermissionRequest',
},
}
} else {
// Hook denied the permission
return {
behavior: 'deny',
message:
decision.message || 'Permission denied by PermissionRequest hook',
decisionReason: {
type: 'hook',
hookName: 'PermissionRequest',
},
}
}
}
}
return undefined
}

View file

@ -0,0 +1,282 @@
import axios, { type AxiosError } from 'axios'
import type { StdoutMessage } from 'src/entrypoints/sdk/controlTypes.js'
import { logForDebugging } from '../../utils/debug.js'
import { logForDiagnosticsNoPII } from '../../utils/diagLogs.js'
import { getSessionIngressAuthToken } from '../../utils/sessionIngressAuth.js'
import { SerialBatchEventUploader } from './SerialBatchEventUploader.js'
import {
WebSocketTransport,
type WebSocketTransportOptions,
} from './WebSocketTransport.js'
const BATCH_FLUSH_INTERVAL_MS = 100
// Per-attempt POST timeout. Bounds how long a single stuck POST can block
// the serialized queue. Without this, a hung connection stalls all writes.
const POST_TIMEOUT_MS = 15_000
// Grace period for queued writes on close(). Covers a healthy POST (~100ms)
// plus headroom; best-effort, not a delivery guarantee under degraded network.
// Void-ed (nothing awaits it) so this is a last resort — replBridge teardown
// now closes AFTER archive so archive latency is the primary drain window.
// NOTE: gracefulShutdown's cleanup budget is 2s (not the 5s outer failsafe);
// 3s here exceeds it, but the process lives ~2s longer for hooks+analytics.
const CLOSE_GRACE_MS = 3000
/**
* Hybrid transport: WebSocket for reads, HTTP POST for writes.
*
* Write flow:
*
* write(stream_event)
* (100ms timer)
*
*
* write(other) uploader.enqueue() (SerialBatchEventUploader)
*
* writeBatch() serial, batched, retries indefinitely,
* backpressure at maxQueueSize
*
* postOnce() (single HTTP POST, throws on retryable)
*
* stream_event messages accumulate in streamEventBuffer for up to 100ms
* before enqueue (reduces POST count for high-volume content deltas). A
* non-stream write flushes any buffered stream_events first to preserve order.
*
* Serialization + retry + backpressure are delegated to SerialBatchEventUploader
* (same primitive CCR uses). At most one POST in-flight; events arriving during
* a POST batch into the next one. On failure, the uploader re-queues and retries
* with exponential backoff + jitter. If the queue fills past maxQueueSize,
* enqueue() blocks giving awaiting callers backpressure.
*
* Why serialize? Bridge mode fires writes via `void transport.write()`
* (fire-and-forget). Without this, concurrent POSTs concurrent Firestore
* writes to the same document collisions retry storms pages oncall.
*/
export class HybridTransport extends WebSocketTransport {
private postUrl: string
private uploader: SerialBatchEventUploader<StdoutMessage>
// stream_event delay buffer — accumulates content deltas for up to
// BATCH_FLUSH_INTERVAL_MS before enqueueing (reduces POST count)
private streamEventBuffer: StdoutMessage[] = []
private streamEventTimer: ReturnType<typeof setTimeout> | null = null
constructor(
url: URL,
headers: Record<string, string> = {},
sessionId?: string,
refreshHeaders?: () => Record<string, string>,
options?: WebSocketTransportOptions & {
maxConsecutiveFailures?: number
onBatchDropped?: (batchSize: number, failures: number) => void
},
) {
super(url, headers, sessionId, refreshHeaders, options)
const { maxConsecutiveFailures, onBatchDropped } = options ?? {}
this.postUrl = convertWsUrlToPostUrl(url)
this.uploader = new SerialBatchEventUploader<StdoutMessage>({
// Large cap — session-ingress accepts arbitrary batch sizes. Events
// naturally batch during in-flight POSTs; this just bounds the payload.
maxBatchSize: 500,
// Bridge callers use `void transport.write()` — backpressure doesn't
// apply (they don't await). A batch >maxQueueSize deadlocks (see
// SerialBatchEventUploader backpressure check). So set it high enough
// to be a memory bound only. Wire real backpressure in a follow-up
// once callers await.
maxQueueSize: 100_000,
baseDelayMs: 500,
maxDelayMs: 8000,
jitterMs: 1000,
// Optional cap so a persistently-failing server can't pin the drain
// loop for the lifetime of the process. Undefined = indefinite retry.
// replBridge sets this; the 1P transportUtils path does not.
maxConsecutiveFailures,
onBatchDropped: (batchSize, failures) => {
logForDiagnosticsNoPII(
'error',
'cli_hybrid_batch_dropped_max_failures',
{
batchSize,
failures,
},
)
onBatchDropped?.(batchSize, failures)
},
send: batch => this.postOnce(batch),
})
logForDebugging(`HybridTransport: POST URL = ${this.postUrl}`)
logForDiagnosticsNoPII('info', 'cli_hybrid_transport_initialized')
}
/**
* Enqueue a message and wait for the queue to drain. Returning flush()
* preserves the contract that `await write()` resolves after the event is
* POSTed (relied on by tests and replBridge's initial flush). Fire-and-forget
* callers (`void transport.write()`) are unaffected they don't await,
* so the later resolution doesn't add latency.
*/
override async write(message: StdoutMessage): Promise<void> {
if (message.type === 'stream_event') {
// Delay: accumulate stream_events briefly before enqueueing.
// Promise resolves immediately — callers don't await stream_events.
this.streamEventBuffer.push(message)
if (!this.streamEventTimer) {
this.streamEventTimer = setTimeout(
() => this.flushStreamEvents(),
BATCH_FLUSH_INTERVAL_MS,
)
}
return
}
// Immediate: flush any buffered stream_events (ordering), then this event.
await this.uploader.enqueue([...this.takeStreamEvents(), message])
return this.uploader.flush()
}
async writeBatch(messages: StdoutMessage[]): Promise<void> {
await this.uploader.enqueue([...this.takeStreamEvents(), ...messages])
return this.uploader.flush()
}
/** Snapshot before/after writeBatch() to detect silent drops. */
get droppedBatchCount(): number {
return this.uploader.droppedBatchCount
}
/**
* Block until all pending events are POSTed. Used by bridge's initial
* history flush so onStateChange('connected') fires after persistence.
*/
flush(): Promise<void> {
void this.uploader.enqueue(this.takeStreamEvents())
return this.uploader.flush()
}
/** Take ownership of buffered stream_events and clear the delay timer. */
private takeStreamEvents(): StdoutMessage[] {
if (this.streamEventTimer) {
clearTimeout(this.streamEventTimer)
this.streamEventTimer = null
}
const buffered = this.streamEventBuffer
this.streamEventBuffer = []
return buffered
}
/** Delay timer fired — enqueue accumulated stream_events. */
private flushStreamEvents(): void {
this.streamEventTimer = null
void this.uploader.enqueue(this.takeStreamEvents())
}
override close(): void {
if (this.streamEventTimer) {
clearTimeout(this.streamEventTimer)
this.streamEventTimer = null
}
this.streamEventBuffer = []
// Grace period for queued writes — fallback. replBridge teardown now
// awaits archive between write and close (see CLOSE_GRACE_MS), so
// archive latency is the primary drain window and this is a last
// resort. Keep close() sync (returns immediately) but defer
// uploader.close() so any remaining queue gets a chance to finish.
const uploader = this.uploader
let graceTimer: ReturnType<typeof setTimeout> | undefined
void Promise.race([
uploader.flush(),
new Promise<void>(r => {
// eslint-disable-next-line no-restricted-syntax -- need timer ref for clearTimeout
graceTimer = setTimeout(r, CLOSE_GRACE_MS)
}),
]).finally(() => {
clearTimeout(graceTimer)
uploader.close()
})
super.close()
}
/**
* Single-attempt POST. Throws on retryable failures (429, 5xx, network)
* so SerialBatchEventUploader re-queues and retries. Returns on success
* and on permanent failures (4xx non-429, no token) so the uploader moves on.
*/
private async postOnce(events: StdoutMessage[]): Promise<void> {
const sessionToken = getSessionIngressAuthToken()
if (!sessionToken) {
logForDebugging('HybridTransport: No session token available for POST')
logForDiagnosticsNoPII('warn', 'cli_hybrid_post_no_token')
return
}
const headers: Record<string, string> = {
Authorization: `Bearer ${sessionToken}`,
'Content-Type': 'application/json',
}
let response
try {
response = await axios.post(
this.postUrl,
{ events },
{
headers,
validateStatus: () => true,
timeout: POST_TIMEOUT_MS,
},
)
} catch (error) {
const axiosError = error as AxiosError
logForDebugging(`HybridTransport: POST error: ${axiosError.message}`)
logForDiagnosticsNoPII('warn', 'cli_hybrid_post_network_error')
throw error
}
if (response.status >= 200 && response.status < 300) {
logForDebugging(`HybridTransport: POST success count=${events.length}`)
return
}
// 4xx (except 429) are permanent — drop, don't retry.
if (
response.status >= 400 &&
response.status < 500 &&
response.status !== 429
) {
logForDebugging(
`HybridTransport: POST returned ${response.status} (permanent), dropping`,
)
logForDiagnosticsNoPII('warn', 'cli_hybrid_post_client_error', {
status: response.status,
})
return
}
// 429 / 5xx — retryable. Throw so uploader re-queues and backs off.
logForDebugging(
`HybridTransport: POST returned ${response.status} (retryable)`,
)
logForDiagnosticsNoPII('warn', 'cli_hybrid_post_retryable_error', {
status: response.status,
})
throw new Error(`POST failed with ${response.status}`)
}
}
/**
* Convert a WebSocket URL to the HTTP POST endpoint URL.
* From: wss://api.example.com/v2/session_ingress/ws/<session_id>
* To: https://api.example.com/v2/session_ingress/session/<session_id>/events
*/
function convertWsUrlToPostUrl(wsUrl: URL): string {
const protocol = wsUrl.protocol === 'wss:' ? 'https:' : 'http:'
// Replace /ws/ with /session/ and append /events
let pathname = wsUrl.pathname
pathname = pathname.replace('/ws/', '/session/')
if (!pathname.endsWith('/events')) {
pathname = pathname.endsWith('/')
? pathname + 'events'
: pathname + '/events'
}
return `${protocol}//${wsUrl.host}${pathname}${wsUrl.search}`
}

View file

@ -0,0 +1,711 @@
import axios, { type AxiosError } from 'axios'
import type { StdoutMessage } from 'src/entrypoints/sdk/controlTypes.js'
import { logForDebugging } from '../../utils/debug.js'
import { logForDiagnosticsNoPII } from '../../utils/diagLogs.js'
import { errorMessage } from '../../utils/errors.js'
import { getSessionIngressAuthHeaders } from '../../utils/sessionIngressAuth.js'
import { sleep } from '../../utils/sleep.js'
import { jsonParse, jsonStringify } from '../../utils/slowOperations.js'
import { getClaudeCodeUserAgent } from '../../utils/userAgent.js'
import type { Transport } from './Transport.js'
// ---------------------------------------------------------------------------
// Configuration
// ---------------------------------------------------------------------------
const RECONNECT_BASE_DELAY_MS = 1000
const RECONNECT_MAX_DELAY_MS = 30_000
/** Time budget for reconnection attempts before giving up (10 minutes). */
const RECONNECT_GIVE_UP_MS = 600_000
/** Server sends keepalives every 15s; treat connection as dead after 45s of silence. */
const LIVENESS_TIMEOUT_MS = 45_000
/**
* HTTP status codes that indicate a permanent server-side rejection.
* The transport transitions to 'closed' immediately without retrying.
*/
const PERMANENT_HTTP_CODES = new Set([401, 403, 404])
// POST retry configuration (matches HybridTransport)
const POST_MAX_RETRIES = 10
const POST_BASE_DELAY_MS = 500
const POST_MAX_DELAY_MS = 8000
/** Hoisted TextDecoder options to avoid per-chunk allocation in readStream. */
const STREAM_DECODE_OPTS: TextDecodeOptions = { stream: true }
/** Hoisted axios validateStatus callback to avoid per-request closure allocation. */
function alwaysValidStatus(): boolean {
return true
}
// ---------------------------------------------------------------------------
// SSE Frame Parser
// ---------------------------------------------------------------------------
type SSEFrame = {
event?: string
id?: string
data?: string
}
/**
* Incrementally parse SSE frames from a text buffer.
* Returns parsed frames and the remaining (incomplete) buffer.
*
* @internal exported for testing
*/
export function parseSSEFrames(buffer: string): {
frames: SSEFrame[]
remaining: string
} {
const frames: SSEFrame[] = []
let pos = 0
// SSE frames are delimited by double newlines
let idx: number
while ((idx = buffer.indexOf('\n\n', pos)) !== -1) {
const rawFrame = buffer.slice(pos, idx)
pos = idx + 2
// Skip empty frames
if (!rawFrame.trim()) continue
const frame: SSEFrame = {}
let isComment = false
for (const line of rawFrame.split('\n')) {
if (line.startsWith(':')) {
// SSE comment (e.g., `:keepalive`)
isComment = true
continue
}
const colonIdx = line.indexOf(':')
if (colonIdx === -1) continue
const field = line.slice(0, colonIdx)
// Per SSE spec, strip one leading space after colon if present
const value =
line[colonIdx + 1] === ' '
? line.slice(colonIdx + 2)
: line.slice(colonIdx + 1)
switch (field) {
case 'event':
frame.event = value
break
case 'id':
frame.id = value
break
case 'data':
// Per SSE spec, multiple data: lines are concatenated with \n
frame.data = frame.data ? frame.data + '\n' + value : value
break
// Ignore other fields (retry:, etc.)
}
}
// Only emit frames that have data (or are pure comments which reset liveness)
if (frame.data || isComment) {
frames.push(frame)
}
}
return { frames, remaining: buffer.slice(pos) }
}
// ---------------------------------------------------------------------------
// Types
// ---------------------------------------------------------------------------
type SSETransportState =
| 'idle'
| 'connected'
| 'reconnecting'
| 'closing'
| 'closed'
/**
* Payload for `event: client_event` frames, matching the StreamClientEvent
* proto message in session_stream.proto. This is the only event type sent
* to worker subscribers delivery_update, session_update, ephemeral_event,
* and catch_up_truncated are client-channel-only (see notifier.go and
* event_stream.go SubscriberClient guard).
*/
export type StreamClientEvent = {
event_id: string
sequence_num: number
event_type: string
source: string
payload: Record<string, unknown>
created_at: string
}
// ---------------------------------------------------------------------------
// SSETransport
// ---------------------------------------------------------------------------
/**
* Transport that uses SSE for reading and HTTP POST for writing.
*
* Reads events via Server-Sent Events from the CCR v2 event stream endpoint.
* Writes events via HTTP POST with retry logic (same pattern as HybridTransport).
*
* Each `event: client_event` frame carries a StreamClientEvent proto JSON
* directly in `data:`. The transport extracts `payload` and passes it to
* `onData` as newline-delimited JSON for StructuredIO consumers.
*
* Supports automatic reconnection with exponential backoff and Last-Event-ID
* for resumption after disconnection.
*/
export class SSETransport implements Transport {
private state: SSETransportState = 'idle'
private onData?: (data: string) => void
private onCloseCallback?: (closeCode?: number) => void
private onEventCallback?: (event: StreamClientEvent) => void
private headers: Record<string, string>
private sessionId?: string
private refreshHeaders?: () => Record<string, string>
private readonly getAuthHeaders: () => Record<string, string>
// SSE connection state
private abortController: AbortController | null = null
private lastSequenceNum = 0
private seenSequenceNums = new Set<number>()
// Reconnection state
private reconnectAttempts = 0
private reconnectStartTime: number | null = null
private reconnectTimer: NodeJS.Timeout | null = null
// Liveness detection
private livenessTimer: NodeJS.Timeout | null = null
// POST URL (derived from SSE URL)
private postUrl: string
// Runtime epoch for CCR v2 event format
constructor(
private readonly url: URL,
headers: Record<string, string> = {},
sessionId?: string,
refreshHeaders?: () => Record<string, string>,
initialSequenceNum?: number,
/**
* Per-instance auth header source. Omit to read the process-wide
* CLAUDE_CODE_SESSION_ACCESS_TOKEN (single-session callers). Required
* for concurrent multi-session callers the env-var path is a process
* global and would stomp across sessions.
*/
getAuthHeaders?: () => Record<string, string>,
) {
this.headers = headers
this.sessionId = sessionId
this.refreshHeaders = refreshHeaders
this.getAuthHeaders = getAuthHeaders ?? getSessionIngressAuthHeaders
this.postUrl = convertSSEUrlToPostUrl(url)
// Seed with a caller-provided high-water mark so the first connect()
// sends from_sequence_num / Last-Event-ID. Without this, a fresh
// SSETransport always asks the server to replay from sequence 0 —
// the entire session history on every transport swap.
if (initialSequenceNum !== undefined && initialSequenceNum > 0) {
this.lastSequenceNum = initialSequenceNum
}
logForDebugging(`SSETransport: SSE URL = ${url.href}`)
logForDebugging(`SSETransport: POST URL = ${this.postUrl}`)
logForDiagnosticsNoPII('info', 'cli_sse_transport_initialized')
}
/**
* High-water mark of sequence numbers seen on this stream. Callers that
* recreate the transport (e.g. replBridge onWorkReceived) read this before
* close() and pass it as `initialSequenceNum` to the next instance so the
* server resumes from the right point instead of replaying everything.
*/
getLastSequenceNum(): number {
return this.lastSequenceNum
}
async connect(): Promise<void> {
if (this.state !== 'idle' && this.state !== 'reconnecting') {
logForDebugging(
`SSETransport: Cannot connect, current state is ${this.state}`,
{ level: 'error' },
)
logForDiagnosticsNoPII('error', 'cli_sse_connect_failed')
return
}
this.state = 'reconnecting'
const connectStartTime = Date.now()
// Build SSE URL with sequence number for resumption
const sseUrl = new URL(this.url.href)
if (this.lastSequenceNum > 0) {
sseUrl.searchParams.set('from_sequence_num', String(this.lastSequenceNum))
}
// Build headers -- use fresh auth headers (supports Cookie for session keys).
// Remove stale Authorization header from this.headers when Cookie auth is used,
// since sending both confuses the auth interceptor.
const authHeaders = this.getAuthHeaders()
const headers: Record<string, string> = {
...this.headers,
...authHeaders,
Accept: 'text/event-stream',
'anthropic-version': '2023-06-01',
'User-Agent': getClaudeCodeUserAgent(),
}
if (authHeaders['Cookie']) {
delete headers['Authorization']
}
if (this.lastSequenceNum > 0) {
headers['Last-Event-ID'] = String(this.lastSequenceNum)
}
logForDebugging(`SSETransport: Opening ${sseUrl.href}`)
logForDiagnosticsNoPII('info', 'cli_sse_connect_opening')
this.abortController = new AbortController()
try {
// eslint-disable-next-line eslint-plugin-n/no-unsupported-features/node-builtins
const response = await fetch(sseUrl.href, {
headers,
signal: this.abortController.signal,
})
if (!response.ok) {
const isPermanent = PERMANENT_HTTP_CODES.has(response.status)
logForDebugging(
`SSETransport: HTTP ${response.status}${isPermanent ? ' (permanent)' : ''}`,
{ level: 'error' },
)
logForDiagnosticsNoPII('error', 'cli_sse_connect_http_error', {
status: response.status,
})
if (isPermanent) {
this.state = 'closed'
this.onCloseCallback?.(response.status)
return
}
this.handleConnectionError()
return
}
if (!response.body) {
logForDebugging('SSETransport: No response body')
this.handleConnectionError()
return
}
// Successfully connected
const connectDuration = Date.now() - connectStartTime
logForDebugging('SSETransport: Connected')
logForDiagnosticsNoPII('info', 'cli_sse_connect_connected', {
duration_ms: connectDuration,
})
this.state = 'connected'
this.reconnectAttempts = 0
this.reconnectStartTime = null
this.resetLivenessTimer()
// Read the SSE stream
await this.readStream(response.body)
} catch (error) {
if (this.abortController?.signal.aborted) {
// Intentional close
return
}
logForDebugging(
`SSETransport: Connection error: ${errorMessage(error)}`,
{ level: 'error' },
)
logForDiagnosticsNoPII('error', 'cli_sse_connect_error')
this.handleConnectionError()
}
}
/**
* Read and process the SSE stream body.
*/
// eslint-disable-next-line eslint-plugin-n/no-unsupported-features/node-builtins
private async readStream(body: ReadableStream<Uint8Array>): Promise<void> {
const reader = body.getReader()
const decoder = new TextDecoder()
let buffer = ''
try {
while (true) {
const { done, value } = await reader.read()
if (done) break
buffer += decoder.decode(value, STREAM_DECODE_OPTS)
const { frames, remaining } = parseSSEFrames(buffer)
buffer = remaining
for (const frame of frames) {
// Any frame (including keepalive comments) proves the connection is alive
this.resetLivenessTimer()
if (frame.id) {
const seqNum = parseInt(frame.id, 10)
if (!isNaN(seqNum)) {
if (this.seenSequenceNums.has(seqNum)) {
logForDebugging(
`SSETransport: DUPLICATE frame seq=${seqNum} (lastSequenceNum=${this.lastSequenceNum}, seenCount=${this.seenSequenceNums.size})`,
{ level: 'warn' },
)
logForDiagnosticsNoPII('warn', 'cli_sse_duplicate_sequence')
} else {
this.seenSequenceNums.add(seqNum)
// Prevent unbounded growth: once we have many entries, prune
// old sequence numbers that are well below the high-water mark.
// Only sequence numbers near lastSequenceNum matter for dedup.
if (this.seenSequenceNums.size > 1000) {
const threshold = this.lastSequenceNum - 200
for (const s of this.seenSequenceNums) {
if (s < threshold) {
this.seenSequenceNums.delete(s)
}
}
}
}
if (seqNum > this.lastSequenceNum) {
this.lastSequenceNum = seqNum
}
}
}
if (frame.event && frame.data) {
this.handleSSEFrame(frame.event, frame.data)
} else if (frame.data) {
// data: without event: — server is emitting the old envelope format
// or a bug. Log so incidents show as a signal instead of silent drops.
logForDebugging(
'SSETransport: Frame has data: but no event: field — dropped',
{ level: 'warn' },
)
logForDiagnosticsNoPII('warn', 'cli_sse_frame_missing_event_field')
}
}
}
} catch (error) {
if (this.abortController?.signal.aborted) return
logForDebugging(
`SSETransport: Stream read error: ${errorMessage(error)}`,
{ level: 'error' },
)
logForDiagnosticsNoPII('error', 'cli_sse_stream_read_error')
} finally {
reader.releaseLock()
}
// Stream ended — reconnect unless we're closing
if (this.state !== 'closing' && this.state !== 'closed') {
logForDebugging('SSETransport: Stream ended, reconnecting')
this.handleConnectionError()
}
}
/**
* Handle a single SSE frame. The event: field names the variant; data:
* carries the inner proto JSON directly (no envelope).
*
* Worker subscribers only receive client_event frames (see notifier.go)
* any other event type indicates a server-side change that CC doesn't yet
* understand. Log a diagnostic so we notice in telemetry.
*/
private handleSSEFrame(eventType: string, data: string): void {
if (eventType !== 'client_event') {
logForDebugging(
`SSETransport: Unexpected SSE event type '${eventType}' on worker stream`,
{ level: 'warn' },
)
logForDiagnosticsNoPII('warn', 'cli_sse_unexpected_event_type', {
event_type: eventType,
})
return
}
let ev: StreamClientEvent
try {
ev = jsonParse(data) as StreamClientEvent
} catch (error) {
logForDebugging(
`SSETransport: Failed to parse client_event data: ${errorMessage(error)}`,
{ level: 'error' },
)
return
}
const payload = ev.payload
if (payload && typeof payload === 'object' && 'type' in payload) {
const sessionLabel = this.sessionId ? ` session=${this.sessionId}` : ''
logForDebugging(
`SSETransport: Event seq=${ev.sequence_num} event_id=${ev.event_id} event_type=${ev.event_type} payload_type=${String(payload.type)}${sessionLabel}`,
)
logForDiagnosticsNoPII('info', 'cli_sse_message_received')
// Pass the unwrapped payload as newline-delimited JSON,
// matching the format that StructuredIO/WebSocketTransport consumers expect
this.onData?.(jsonStringify(payload) + '\n')
} else {
logForDebugging(
`SSETransport: Ignoring client_event with no type in payload: event_id=${ev.event_id}`,
)
}
this.onEventCallback?.(ev)
}
/**
* Handle connection errors with exponential backoff and time budget.
*/
private handleConnectionError(): void {
this.clearLivenessTimer()
if (this.state === 'closing' || this.state === 'closed') return
// Abort any in-flight SSE fetch
this.abortController?.abort()
this.abortController = null
const now = Date.now()
if (!this.reconnectStartTime) {
this.reconnectStartTime = now
}
const elapsed = now - this.reconnectStartTime
if (elapsed < RECONNECT_GIVE_UP_MS) {
// Clear any existing timer
if (this.reconnectTimer) {
clearTimeout(this.reconnectTimer)
this.reconnectTimer = null
}
// Refresh headers before reconnecting
if (this.refreshHeaders) {
const freshHeaders = this.refreshHeaders()
Object.assign(this.headers, freshHeaders)
logForDebugging('SSETransport: Refreshed headers for reconnect')
}
this.state = 'reconnecting'
this.reconnectAttempts++
const baseDelay = Math.min(
RECONNECT_BASE_DELAY_MS * Math.pow(2, this.reconnectAttempts - 1),
RECONNECT_MAX_DELAY_MS,
)
// Add ±25% jitter
const delay = Math.max(
0,
baseDelay + baseDelay * 0.25 * (2 * Math.random() - 1),
)
logForDebugging(
`SSETransport: Reconnecting in ${Math.round(delay)}ms (attempt ${this.reconnectAttempts}, ${Math.round(elapsed / 1000)}s elapsed)`,
)
logForDiagnosticsNoPII('error', 'cli_sse_reconnect_attempt', {
reconnectAttempts: this.reconnectAttempts,
})
this.reconnectTimer = setTimeout(() => {
this.reconnectTimer = null
void this.connect()
}, delay)
} else {
logForDebugging(
`SSETransport: Reconnection time budget exhausted after ${Math.round(elapsed / 1000)}s`,
{ level: 'error' },
)
logForDiagnosticsNoPII('error', 'cli_sse_reconnect_exhausted', {
reconnectAttempts: this.reconnectAttempts,
elapsedMs: elapsed,
})
this.state = 'closed'
this.onCloseCallback?.()
}
}
/**
* Bound timeout callback. Hoisted from an inline closure so that
* resetLivenessTimer (called per-frame) does not allocate a new closure
* on every SSE frame.
*/
private readonly onLivenessTimeout = (): void => {
this.livenessTimer = null
logForDebugging('SSETransport: Liveness timeout, reconnecting', {
level: 'error',
})
logForDiagnosticsNoPII('error', 'cli_sse_liveness_timeout')
this.abortController?.abort()
this.handleConnectionError()
}
/**
* Reset the liveness timer. If no SSE frame arrives within the timeout,
* treat the connection as dead and reconnect.
*/
private resetLivenessTimer(): void {
this.clearLivenessTimer()
this.livenessTimer = setTimeout(this.onLivenessTimeout, LIVENESS_TIMEOUT_MS)
}
private clearLivenessTimer(): void {
if (this.livenessTimer) {
clearTimeout(this.livenessTimer)
this.livenessTimer = null
}
}
// -----------------------------------------------------------------------
// Write (HTTP POST) — same pattern as HybridTransport
// -----------------------------------------------------------------------
async write(message: StdoutMessage): Promise<void> {
const authHeaders = this.getAuthHeaders()
if (Object.keys(authHeaders).length === 0) {
logForDebugging('SSETransport: No session token available for POST')
logForDiagnosticsNoPII('warn', 'cli_sse_post_no_token')
return
}
const headers: Record<string, string> = {
...authHeaders,
'Content-Type': 'application/json',
'anthropic-version': '2023-06-01',
'User-Agent': getClaudeCodeUserAgent(),
}
logForDebugging(
`SSETransport: POST body keys=${Object.keys(message as Record<string, unknown>).join(',')}`,
)
for (let attempt = 1; attempt <= POST_MAX_RETRIES; attempt++) {
try {
const response = await axios.post(this.postUrl, message, {
headers,
validateStatus: alwaysValidStatus,
})
if (response.status === 200 || response.status === 201) {
logForDebugging(`SSETransport: POST success type=${message.type}`)
return
}
logForDebugging(
`SSETransport: POST ${response.status} body=${jsonStringify(response.data).slice(0, 200)}`,
)
// 4xx errors (except 429) are permanent - don't retry
if (
response.status >= 400 &&
response.status < 500 &&
response.status !== 429
) {
logForDebugging(
`SSETransport: POST returned ${response.status} (client error), not retrying`,
)
logForDiagnosticsNoPII('warn', 'cli_sse_post_client_error', {
status: response.status,
})
return
}
// 429 or 5xx - retry
logForDebugging(
`SSETransport: POST returned ${response.status}, attempt ${attempt}/${POST_MAX_RETRIES}`,
)
logForDiagnosticsNoPII('warn', 'cli_sse_post_retryable_error', {
status: response.status,
attempt,
})
} catch (error) {
const axiosError = error as AxiosError
logForDebugging(
`SSETransport: POST error: ${axiosError.message}, attempt ${attempt}/${POST_MAX_RETRIES}`,
)
logForDiagnosticsNoPII('warn', 'cli_sse_post_network_error', {
attempt,
})
}
if (attempt === POST_MAX_RETRIES) {
logForDebugging(
`SSETransport: POST failed after ${POST_MAX_RETRIES} attempts, continuing`,
)
logForDiagnosticsNoPII('warn', 'cli_sse_post_retries_exhausted')
return
}
const delayMs = Math.min(
POST_BASE_DELAY_MS * Math.pow(2, attempt - 1),
POST_MAX_DELAY_MS,
)
await sleep(delayMs)
}
}
// -----------------------------------------------------------------------
// Transport interface
// -----------------------------------------------------------------------
isConnectedStatus(): boolean {
return this.state === 'connected'
}
isClosedStatus(): boolean {
return this.state === 'closed'
}
setOnData(callback: (data: string) => void): void {
this.onData = callback
}
setOnClose(callback: (closeCode?: number) => void): void {
this.onCloseCallback = callback
}
setOnEvent(callback: (event: StreamClientEvent) => void): void {
this.onEventCallback = callback
}
close(): void {
if (this.reconnectTimer) {
clearTimeout(this.reconnectTimer)
this.reconnectTimer = null
}
this.clearLivenessTimer()
this.state = 'closing'
this.abortController?.abort()
this.abortController = null
}
}
// ---------------------------------------------------------------------------
// URL Conversion
// ---------------------------------------------------------------------------
/**
* Convert an SSE URL to the HTTP POST endpoint URL.
* The SSE stream URL and POST URL share the same base; the POST endpoint
* is at `/events` (without `/stream`).
*
* From: https://api.example.com/v2/session_ingress/session/<session_id>/events/stream
* To: https://api.example.com/v2/session_ingress/session/<session_id>/events
*/
function convertSSEUrlToPostUrl(sseUrl: URL): string {
let pathname = sseUrl.pathname
// Remove /stream suffix to get the POST events endpoint
if (pathname.endsWith('/stream')) {
pathname = pathname.slice(0, -'/stream'.length)
}
return `${sseUrl.protocol}//${sseUrl.host}${pathname}`
}

View file

@ -0,0 +1,275 @@
import { jsonStringify } from '../../utils/slowOperations.js'
/**
* Serial ordered event uploader with batching, retry, and backpressure.
*
* - enqueue() adds events to a pending buffer
* - At most 1 POST in-flight at a time
* - Drains up to maxBatchSize items per POST
* - New events accumulate while in-flight
* - On failure: exponential backoff (clamped), retries indefinitely
* until success or close() unless maxConsecutiveFailures is set,
* in which case the failing batch is dropped and drain advances
* - flush() blocks until pending is empty and kicks drain if needed
* - Backpressure: enqueue() blocks when maxQueueSize is reached
*/
/**
* Throw from config.send() to make the uploader wait a server-supplied
* duration before retrying (e.g. 429 with Retry-After). When retryAfterMs
* is set, it overrides exponential backoff for that attempt clamped to
* [baseDelayMs, maxDelayMs] and jittered so a misbehaving server can
* neither hot-loop nor stall the client, and many sessions sharing a rate
* limit don't all pounce at the same instant. Without retryAfterMs, behaves
* like any other thrown error (exponential backoff).
*/
export class RetryableError extends Error {
constructor(
message: string,
readonly retryAfterMs?: number,
) {
super(message)
}
}
type SerialBatchEventUploaderConfig<T> = {
/** Max items per POST (1 = no batching) */
maxBatchSize: number
/**
* Max serialized bytes per POST. First item always goes in regardless of
* size; subsequent items only if cumulative JSON bytes stay under this.
* Undefined = no byte limit (count-only batching).
*/
maxBatchBytes?: number
/** Max pending items before enqueue() blocks */
maxQueueSize: number
/** The actual HTTP call — caller controls payload format */
send: (batch: T[]) => Promise<void>
/** Base delay for exponential backoff (ms) */
baseDelayMs: number
/** Max delay cap (ms) */
maxDelayMs: number
/** Random jitter range added to retry delay (ms) */
jitterMs: number
/**
* After this many consecutive send() failures, drop the failing batch
* and move on to the next pending item with a fresh failure budget.
* Undefined = retry indefinitely (default).
*/
maxConsecutiveFailures?: number
/** Called when a batch is dropped for hitting maxConsecutiveFailures. */
onBatchDropped?: (batchSize: number, failures: number) => void
}
export class SerialBatchEventUploader<T> {
private pending: T[] = []
private pendingAtClose = 0
private draining = false
private closed = false
private backpressureResolvers: Array<() => void> = []
private sleepResolve: (() => void) | null = null
private flushResolvers: Array<() => void> = []
private droppedBatches = 0
private readonly config: SerialBatchEventUploaderConfig<T>
constructor(config: SerialBatchEventUploaderConfig<T>) {
this.config = config
}
/**
* Monotonic count of batches dropped via maxConsecutiveFailures. Callers
* can snapshot before flush() and compare after to detect silent drops
* (flush() resolves normally even when batches were dropped).
*/
get droppedBatchCount(): number {
return this.droppedBatches
}
/**
* Pending queue depth. After close(), returns the count at close time
* close() clears the queue but shutdown diagnostics may read this after.
*/
get pendingCount(): number {
return this.closed ? this.pendingAtClose : this.pending.length
}
/**
* Add events to the pending buffer. Returns immediately if space is
* available. Blocks (awaits) if the buffer is full caller pauses
* until drain frees space.
*/
async enqueue(events: T | T[]): Promise<void> {
if (this.closed) return
const items = Array.isArray(events) ? events : [events]
if (items.length === 0) return
// Backpressure: wait until there's space
while (
this.pending.length + items.length > this.config.maxQueueSize &&
!this.closed
) {
await new Promise<void>(resolve => {
this.backpressureResolvers.push(resolve)
})
}
if (this.closed) return
this.pending.push(...items)
void this.drain()
}
/**
* Block until all pending events have been sent.
* Used at turn boundaries and graceful shutdown.
*/
flush(): Promise<void> {
if (this.pending.length === 0 && !this.draining) {
return Promise.resolve()
}
void this.drain()
return new Promise<void>(resolve => {
this.flushResolvers.push(resolve)
})
}
/**
* Drop pending events and stop processing.
* Resolves any blocked enqueue() and flush() callers.
*/
close(): void {
if (this.closed) return
this.closed = true
this.pendingAtClose = this.pending.length
this.pending = []
this.sleepResolve?.()
this.sleepResolve = null
for (const resolve of this.backpressureResolvers) resolve()
this.backpressureResolvers = []
for (const resolve of this.flushResolvers) resolve()
this.flushResolvers = []
}
/**
* Drain loop. At most one instance runs at a time (guarded by this.draining).
* Sends batches serially. On failure, backs off and retries indefinitely.
*/
private async drain(): Promise<void> {
if (this.draining || this.closed) return
this.draining = true
let failures = 0
try {
while (this.pending.length > 0 && !this.closed) {
const batch = this.takeBatch()
if (batch.length === 0) continue
try {
await this.config.send(batch)
failures = 0
} catch (err) {
failures++
if (
this.config.maxConsecutiveFailures !== undefined &&
failures >= this.config.maxConsecutiveFailures
) {
this.droppedBatches++
this.config.onBatchDropped?.(batch.length, failures)
failures = 0
this.releaseBackpressure()
continue
}
// Re-queue the failed batch at the front. Use concat (single
// allocation) instead of unshift(...batch) which shifts every
// pending item batch.length times. Only hit on failure path.
this.pending = batch.concat(this.pending)
const retryAfterMs =
err instanceof RetryableError ? err.retryAfterMs : undefined
await this.sleep(this.retryDelay(failures, retryAfterMs))
continue
}
// Release backpressure waiters if space opened up
this.releaseBackpressure()
}
} finally {
this.draining = false
// Notify flush waiters if queue is empty
if (this.pending.length === 0) {
for (const resolve of this.flushResolvers) resolve()
this.flushResolvers = []
}
}
}
/**
* Pull the next batch from pending. Respects both maxBatchSize and
* maxBatchBytes. The first item is always taken; subsequent items only
* if adding them keeps the cumulative JSON size under maxBatchBytes.
*
* Un-serializable items (BigInt, circular refs, throwing toJSON) are
* dropped in place they can never be sent and leaving them at
* pending[0] would poison the queue and hang flush() forever.
*/
private takeBatch(): T[] {
const { maxBatchSize, maxBatchBytes } = this.config
if (maxBatchBytes === undefined) {
return this.pending.splice(0, maxBatchSize)
}
let bytes = 0
let count = 0
while (count < this.pending.length && count < maxBatchSize) {
let itemBytes: number
try {
itemBytes = Buffer.byteLength(jsonStringify(this.pending[count]))
} catch {
this.pending.splice(count, 1)
continue
}
if (count > 0 && bytes + itemBytes > maxBatchBytes) break
bytes += itemBytes
count++
}
return this.pending.splice(0, count)
}
private retryDelay(failures: number, retryAfterMs?: number): number {
const jitter = Math.random() * this.config.jitterMs
if (retryAfterMs !== undefined) {
// Jitter on top of the server's hint prevents thundering herd when
// many sessions share a rate limit and all receive the same
// Retry-After. Clamp first, then spread — same shape as the
// exponential path (effective ceiling is maxDelayMs + jitterMs).
const clamped = Math.max(
this.config.baseDelayMs,
Math.min(retryAfterMs, this.config.maxDelayMs),
)
return clamped + jitter
}
const exponential = Math.min(
this.config.baseDelayMs * 2 ** (failures - 1),
this.config.maxDelayMs,
)
return exponential + jitter
}
private releaseBackpressure(): void {
const resolvers = this.backpressureResolvers
this.backpressureResolvers = []
for (const resolve of resolvers) resolve()
}
private sleep(ms: number): Promise<void> {
return new Promise(resolve => {
this.sleepResolve = resolve
setTimeout(
(self, resolve) => {
self.sleepResolve = null
resolve()
},
ms,
this,
resolve,
)
})
}
}

View file

@ -0,0 +1,800 @@
import type { StdoutMessage } from 'src/entrypoints/sdk/controlTypes.js'
import type WsWebSocket from 'ws'
import { logEvent } from '../../services/analytics/index.js'
import { CircularBuffer } from '../../utils/CircularBuffer.js'
import { logForDebugging } from '../../utils/debug.js'
import { logForDiagnosticsNoPII } from '../../utils/diagLogs.js'
import { isEnvTruthy } from '../../utils/envUtils.js'
import { getWebSocketTLSOptions } from '../../utils/mtls.js'
import {
getWebSocketProxyAgent,
getWebSocketProxyUrl,
} from '../../utils/proxy.js'
import {
registerSessionActivityCallback,
unregisterSessionActivityCallback,
} from '../../utils/sessionActivity.js'
import { jsonStringify } from '../../utils/slowOperations.js'
import type { Transport } from './Transport.js'
const KEEP_ALIVE_FRAME = '{"type":"keep_alive"}\n'
const DEFAULT_MAX_BUFFER_SIZE = 1000
const DEFAULT_BASE_RECONNECT_DELAY = 1000
const DEFAULT_MAX_RECONNECT_DELAY = 30000
/** Time budget for reconnection attempts before giving up (10 minutes). */
const DEFAULT_RECONNECT_GIVE_UP_MS = 600_000
const DEFAULT_PING_INTERVAL = 10000
const DEFAULT_KEEPALIVE_INTERVAL = 300_000 // 5 minutes
/**
* Threshold for detecting system sleep/wake. If the gap between consecutive
* reconnection attempts exceeds this, the machine likely slept. We reset
* the reconnection budget and retry the server will reject with permanent
* close codes (4001/1002) if the session was reaped during sleep.
*/
const SLEEP_DETECTION_THRESHOLD_MS = DEFAULT_MAX_RECONNECT_DELAY * 2 // 60s
/**
* WebSocket close codes that indicate a permanent server-side rejection.
* The transport transitions to 'closed' immediately without retrying.
*/
const PERMANENT_CLOSE_CODES = new Set([
1002, // protocol error — server rejected handshake (e.g. session reaped)
4001, // session expired / not found
4003, // unauthorized
])
export type WebSocketTransportOptions = {
/** When false, the transport does not attempt automatic reconnection on
* disconnect. Use this when the caller has its own recovery mechanism
* (e.g. the REPL bridge poll loop). Defaults to true. */
autoReconnect?: boolean
/** Gates the tengu_ws_transport_* telemetry events. Set true at the
* REPL-bridge construction site so only Remote Control sessions (the
* Cloudflare-idle-timeout population) emit; print-mode workers stay
* silent. Defaults to false. */
isBridge?: boolean
}
type WebSocketTransportState =
| 'idle'
| 'connected'
| 'reconnecting'
| 'closing'
| 'closed'
// Common interface between globalThis.WebSocket and ws.WebSocket
type WebSocketLike = {
close(): void
send(data: string): void
ping?(): void // Bun & ws both support this
}
export class WebSocketTransport implements Transport {
private ws: WebSocketLike | null = null
private lastSentId: string | null = null
protected url: URL
protected state: WebSocketTransportState = 'idle'
protected onData?: (data: string) => void
private onCloseCallback?: (closeCode?: number) => void
private onConnectCallback?: () => void
private headers: Record<string, string>
private sessionId?: string
private autoReconnect: boolean
private isBridge: boolean
// Reconnection state
private reconnectAttempts = 0
private reconnectStartTime: number | null = null
private reconnectTimer: NodeJS.Timeout | null = null
private lastReconnectAttemptTime: number | null = null
// Wall-clock of last WS data-frame activity (inbound message or outbound
// ws.send). Used to compute idle time at close — the signal for diagnosing
// proxy idle-timeout RSTs (e.g. Cloudflare 5-min). Excludes ping/pong
// control frames (proxies don't count those).
private lastActivityTime = 0
// Ping interval for connection health checks
private pingInterval: NodeJS.Timeout | null = null
private pongReceived = true
// Periodic keep_alive data frames to reset proxy idle timers
private keepAliveInterval: NodeJS.Timeout | null = null
// Message buffering for replay on reconnection
private messageBuffer: CircularBuffer<StdoutMessage>
// Track which runtime's WS we're using so we can detach listeners
// with the matching API (removeEventListener vs. off).
private isBunWs = false
// Captured at connect() time for handleOpenEvent timing. Stored as an
// instance field so the onOpen handler can be a stable class-property
// arrow function (removable in doDisconnect) instead of a closure over
// a local variable.
private connectStartTime = 0
private refreshHeaders?: () => Record<string, string>
constructor(
url: URL,
headers: Record<string, string> = {},
sessionId?: string,
refreshHeaders?: () => Record<string, string>,
options?: WebSocketTransportOptions,
) {
this.url = url
this.headers = headers
this.sessionId = sessionId
this.refreshHeaders = refreshHeaders
this.autoReconnect = options?.autoReconnect ?? true
this.isBridge = options?.isBridge ?? false
this.messageBuffer = new CircularBuffer(DEFAULT_MAX_BUFFER_SIZE)
}
public async connect(): Promise<void> {
if (this.state !== 'idle' && this.state !== 'reconnecting') {
logForDebugging(
`WebSocketTransport: Cannot connect, current state is ${this.state}`,
{ level: 'error' },
)
logForDiagnosticsNoPII('error', 'cli_websocket_connect_failed')
return
}
this.state = 'reconnecting'
this.connectStartTime = Date.now()
logForDebugging(`WebSocketTransport: Opening ${this.url.href}`)
logForDiagnosticsNoPII('info', 'cli_websocket_connect_opening')
// Start with provided headers and add runtime headers
const headers = { ...this.headers }
if (this.lastSentId) {
headers['X-Last-Request-Id'] = this.lastSentId
logForDebugging(
`WebSocketTransport: Adding X-Last-Request-Id header: ${this.lastSentId}`,
)
}
if (typeof Bun !== 'undefined') {
// Bun's WebSocket supports headers/proxy options but the DOM typings don't
// eslint-disable-next-line eslint-plugin-n/no-unsupported-features/node-builtins
const ws = new globalThis.WebSocket(this.url.href, {
headers,
proxy: getWebSocketProxyUrl(this.url.href),
tls: getWebSocketTLSOptions() || undefined,
} as unknown as string[])
this.ws = ws
this.isBunWs = true
ws.addEventListener('open', this.onBunOpen)
ws.addEventListener('message', this.onBunMessage)
ws.addEventListener('error', this.onBunError)
// eslint-disable-next-line eslint-plugin-n/no-unsupported-features/node-builtins
ws.addEventListener('close', this.onBunClose)
// 'pong' is Bun-specific — not in DOM typings.
ws.addEventListener('pong', this.onPong)
} else {
const { default: WS } = await import('ws')
const ws = new WS(this.url.href, {
headers,
agent: getWebSocketProxyAgent(this.url.href),
...getWebSocketTLSOptions(),
})
this.ws = ws
this.isBunWs = false
ws.on('open', this.onNodeOpen)
ws.on('message', this.onNodeMessage)
ws.on('error', this.onNodeError)
ws.on('close', this.onNodeClose)
ws.on('pong', this.onPong)
}
}
// --- Bun (native WebSocket) event handlers ---
// Stored as class-property arrow functions so they can be removed in
// doDisconnect(). Without removal, each reconnect orphans the old WS
// object + its 5 closures until GC, which accumulates under network
// instability. Mirrors the pattern in src/utils/mcpWebSocketTransport.ts.
private onBunOpen = () => {
this.handleOpenEvent()
// Bun's WebSocket doesn't expose upgrade response headers,
// so replay all buffered messages. The server deduplicates by UUID.
if (this.lastSentId) {
this.replayBufferedMessages('')
}
}
private onBunMessage = (event: MessageEvent) => {
const message =
typeof event.data === 'string' ? event.data : String(event.data)
this.lastActivityTime = Date.now()
logForDiagnosticsNoPII('info', 'cli_websocket_message_received', {
length: message.length,
})
if (this.onData) {
this.onData(message)
}
}
private onBunError = () => {
logForDebugging('WebSocketTransport: Error', {
level: 'error',
})
logForDiagnosticsNoPII('error', 'cli_websocket_connect_error')
// close event fires after error — let it call handleConnectionError
}
// eslint-disable-next-line eslint-plugin-n/no-unsupported-features/node-builtins
private onBunClose = (event: CloseEvent) => {
const isClean = event.code === 1000 || event.code === 1001
logForDebugging(
`WebSocketTransport: Closed: ${event.code}`,
isClean ? undefined : { level: 'error' },
)
logForDiagnosticsNoPII('error', 'cli_websocket_connect_closed')
this.handleConnectionError(event.code)
}
// --- Node (ws package) event handlers ---
private onNodeOpen = () => {
// Capture ws before handleOpenEvent() invokes onConnectCallback — if the
// callback synchronously closes the transport, this.ws becomes null.
// The old inline-closure code had this safety implicitly via closure capture.
const ws = this.ws
this.handleOpenEvent()
if (!ws) return
// Check for last-id in upgrade response headers (ws package only)
const nws = ws as unknown as WsWebSocket & {
upgradeReq?: { headers?: Record<string, string> }
}
const upgradeResponse = nws.upgradeReq
if (upgradeResponse?.headers?.['x-last-request-id']) {
const serverLastId = upgradeResponse.headers['x-last-request-id']
this.replayBufferedMessages(serverLastId)
}
}
private onNodeMessage = (data: Buffer) => {
const message = data.toString()
this.lastActivityTime = Date.now()
logForDiagnosticsNoPII('info', 'cli_websocket_message_received', {
length: message.length,
})
if (this.onData) {
this.onData(message)
}
}
private onNodeError = (err: Error) => {
logForDebugging(`WebSocketTransport: Error: ${err.message}`, {
level: 'error',
})
logForDiagnosticsNoPII('error', 'cli_websocket_connect_error')
// close event fires after error — let it call handleConnectionError
}
private onNodeClose = (code: number, _reason: Buffer) => {
const isClean = code === 1000 || code === 1001
logForDebugging(
`WebSocketTransport: Closed: ${code}`,
isClean ? undefined : { level: 'error' },
)
logForDiagnosticsNoPII('error', 'cli_websocket_connect_closed')
this.handleConnectionError(code)
}
// --- Shared handlers ---
private onPong = () => {
this.pongReceived = true
}
private handleOpenEvent(): void {
const connectDuration = Date.now() - this.connectStartTime
logForDebugging('WebSocketTransport: Connected')
logForDiagnosticsNoPII('info', 'cli_websocket_connect_connected', {
duration_ms: connectDuration,
})
// Reconnect success — capture attempt count + downtime before resetting.
// reconnectStartTime is null on first connect, non-null on reopen.
if (this.isBridge && this.reconnectStartTime !== null) {
logEvent('tengu_ws_transport_reconnected', {
attempts: this.reconnectAttempts,
downtimeMs: Date.now() - this.reconnectStartTime,
})
}
this.reconnectAttempts = 0
this.reconnectStartTime = null
this.lastReconnectAttemptTime = null
this.lastActivityTime = Date.now()
this.state = 'connected'
this.onConnectCallback?.()
// Start periodic pings to detect dead connections
this.startPingInterval()
// Start periodic keep_alive data frames to reset proxy idle timers
this.startKeepaliveInterval()
// Register callback for session activity signals
registerSessionActivityCallback(() => {
void this.write({ type: 'keep_alive' })
})
}
protected sendLine(line: string): boolean {
if (!this.ws || this.state !== 'connected') {
logForDebugging('WebSocketTransport: Not connected')
logForDiagnosticsNoPII('info', 'cli_websocket_send_not_connected')
return false
}
try {
this.ws.send(line)
this.lastActivityTime = Date.now()
return true
} catch (error) {
logForDebugging(`WebSocketTransport: Failed to send: ${error}`, {
level: 'error',
})
logForDiagnosticsNoPII('error', 'cli_websocket_send_error')
// Don't null this.ws here — let doDisconnect() (via handleConnectionError)
// handle cleanup so listeners are removed before the WS is released.
this.handleConnectionError()
return false
}
}
/**
* Remove all listeners attached in connect() for the given WebSocket.
* Without this, each reconnect orphans the old WS object + its closures
* until GC these accumulate under network instability. Mirrors the
* pattern in src/utils/mcpWebSocketTransport.ts.
*/
private removeWsListeners(ws: WebSocketLike): void {
if (this.isBunWs) {
const nws = ws as unknown as globalThis.WebSocket
nws.removeEventListener('open', this.onBunOpen)
nws.removeEventListener('message', this.onBunMessage)
nws.removeEventListener('error', this.onBunError)
// eslint-disable-next-line eslint-plugin-n/no-unsupported-features/node-builtins
nws.removeEventListener('close', this.onBunClose)
// 'pong' is Bun-specific — not in DOM typings
nws.removeEventListener('pong' as 'message', this.onPong)
} else {
const nws = ws as unknown as WsWebSocket
nws.off('open', this.onNodeOpen)
nws.off('message', this.onNodeMessage)
nws.off('error', this.onNodeError)
nws.off('close', this.onNodeClose)
nws.off('pong', this.onPong)
}
}
protected doDisconnect(): void {
// Stop pinging and keepalive when disconnecting
this.stopPingInterval()
this.stopKeepaliveInterval()
// Unregister session activity callback
unregisterSessionActivityCallback()
if (this.ws) {
// Remove listeners BEFORE close() so the old WS + closures can be
// GC'd promptly instead of lingering until the next mark-and-sweep.
this.removeWsListeners(this.ws)
this.ws.close()
this.ws = null
}
}
private handleConnectionError(closeCode?: number): void {
logForDebugging(
`WebSocketTransport: Disconnected from ${this.url.href}` +
(closeCode != null ? ` (code ${closeCode})` : ''),
)
logForDiagnosticsNoPII('info', 'cli_websocket_disconnected')
if (this.isBridge) {
// Fire on every close — including intermediate ones during a reconnect
// storm (those never surface to the onCloseCallback consumer). For the
// Cloudflare-5min-idle hypothesis: cluster msSinceLastActivity; if the
// peak sits at ~300s with closeCode 1006, that's the proxy RST.
logEvent('tengu_ws_transport_closed', {
closeCode,
msSinceLastActivity:
this.lastActivityTime > 0 ? Date.now() - this.lastActivityTime : -1,
// 'connected' = healthy drop (the Cloudflare case); 'reconnecting' =
// connect-rejection mid-storm. State isn't mutated until the branches
// below, so this reads the pre-close value.
wasConnected: this.state === 'connected',
reconnectAttempts: this.reconnectAttempts,
})
}
this.doDisconnect()
if (this.state === 'closing' || this.state === 'closed') return
// Permanent codes: don't retry — server has definitively ended the session.
// Exception: 4003 (unauthorized) can be retried when refreshHeaders is
// available and returns a new token (e.g. after the parent process mints
// a fresh session ingress token during reconnection).
let headersRefreshed = false
if (closeCode === 4003 && this.refreshHeaders) {
const freshHeaders = this.refreshHeaders()
if (freshHeaders.Authorization !== this.headers.Authorization) {
Object.assign(this.headers, freshHeaders)
headersRefreshed = true
logForDebugging(
'WebSocketTransport: 4003 received but headers refreshed, scheduling reconnect',
)
logForDiagnosticsNoPII('info', 'cli_websocket_4003_token_refreshed')
}
}
if (
closeCode != null &&
PERMANENT_CLOSE_CODES.has(closeCode) &&
!headersRefreshed
) {
logForDebugging(
`WebSocketTransport: Permanent close code ${closeCode}, not reconnecting`,
{ level: 'error' },
)
logForDiagnosticsNoPII('error', 'cli_websocket_permanent_close', {
closeCode,
})
this.state = 'closed'
this.onCloseCallback?.(closeCode)
return
}
// When autoReconnect is disabled, go straight to closed state.
// The caller (e.g. REPL bridge poll loop) handles recovery.
if (!this.autoReconnect) {
this.state = 'closed'
this.onCloseCallback?.(closeCode)
return
}
// Schedule reconnection with exponential backoff and time budget
const now = Date.now()
if (!this.reconnectStartTime) {
this.reconnectStartTime = now
}
// Detect system sleep/wake: if the gap since our last reconnection
// attempt greatly exceeds the max delay, the machine likely slept
// (e.g. laptop lid closed). Reset the budget and retry from scratch —
// the server will reject with permanent close codes (4001/1002) if
// the session was reaped while we were asleep.
if (
this.lastReconnectAttemptTime !== null &&
now - this.lastReconnectAttemptTime > SLEEP_DETECTION_THRESHOLD_MS
) {
logForDebugging(
`WebSocketTransport: Detected system sleep (${Math.round((now - this.lastReconnectAttemptTime) / 1000)}s gap), resetting reconnection budget`,
)
logForDiagnosticsNoPII('info', 'cli_websocket_sleep_detected', {
gapMs: now - this.lastReconnectAttemptTime,
})
this.reconnectStartTime = now
this.reconnectAttempts = 0
}
this.lastReconnectAttemptTime = now
const elapsed = now - this.reconnectStartTime
if (elapsed < DEFAULT_RECONNECT_GIVE_UP_MS) {
// Clear any existing reconnection timer to avoid duplicates
if (this.reconnectTimer) {
clearTimeout(this.reconnectTimer)
this.reconnectTimer = null
}
// Refresh headers before reconnecting (e.g. to pick up a new session token).
// Skip if already refreshed by the 4003 path above.
if (!headersRefreshed && this.refreshHeaders) {
const freshHeaders = this.refreshHeaders()
Object.assign(this.headers, freshHeaders)
logForDebugging('WebSocketTransport: Refreshed headers for reconnect')
}
this.state = 'reconnecting'
this.reconnectAttempts++
const baseDelay = Math.min(
DEFAULT_BASE_RECONNECT_DELAY * Math.pow(2, this.reconnectAttempts - 1),
DEFAULT_MAX_RECONNECT_DELAY,
)
// Add ±25% jitter to avoid thundering herd
const delay = Math.max(
0,
baseDelay + baseDelay * 0.25 * (2 * Math.random() - 1),
)
logForDebugging(
`WebSocketTransport: Reconnecting in ${Math.round(delay)}ms (attempt ${this.reconnectAttempts}, ${Math.round(elapsed / 1000)}s elapsed)`,
)
logForDiagnosticsNoPII('error', 'cli_websocket_reconnect_attempt', {
reconnectAttempts: this.reconnectAttempts,
})
if (this.isBridge) {
logEvent('tengu_ws_transport_reconnecting', {
attempt: this.reconnectAttempts,
elapsedMs: elapsed,
delayMs: Math.round(delay),
})
}
this.reconnectTimer = setTimeout(() => {
this.reconnectTimer = null
void this.connect()
}, delay)
} else {
logForDebugging(
`WebSocketTransport: Reconnection time budget exhausted after ${Math.round(elapsed / 1000)}s for ${this.url.href}`,
{ level: 'error' },
)
logForDiagnosticsNoPII('error', 'cli_websocket_reconnect_exhausted', {
reconnectAttempts: this.reconnectAttempts,
elapsedMs: elapsed,
})
this.state = 'closed'
// Notify close callback
if (this.onCloseCallback) {
this.onCloseCallback(closeCode)
}
}
}
close(): void {
// Clear any pending reconnection timer
if (this.reconnectTimer) {
clearTimeout(this.reconnectTimer)
this.reconnectTimer = null
}
// Clear ping and keepalive intervals
this.stopPingInterval()
this.stopKeepaliveInterval()
// Unregister session activity callback
unregisterSessionActivityCallback()
this.state = 'closing'
this.doDisconnect()
}
private replayBufferedMessages(lastId: string): void {
const messages = this.messageBuffer.toArray()
if (messages.length === 0) return
// Find where to start replay based on server's last received message
let startIndex = 0
if (lastId) {
const lastConfirmedIndex = messages.findIndex(
message => 'uuid' in message && message.uuid === lastId,
)
if (lastConfirmedIndex >= 0) {
// Server confirmed messages up to lastConfirmedIndex — evict them
startIndex = lastConfirmedIndex + 1
// Rebuild the buffer with only unconfirmed messages
const remaining = messages.slice(startIndex)
this.messageBuffer.clear()
this.messageBuffer.addAll(remaining)
if (remaining.length === 0) {
this.lastSentId = null
}
logForDebugging(
`WebSocketTransport: Evicted ${startIndex} confirmed messages, ${remaining.length} remaining`,
)
logForDiagnosticsNoPII(
'info',
'cli_websocket_evicted_confirmed_messages',
{
evicted: startIndex,
remaining: remaining.length,
},
)
}
}
const messagesToReplay = messages.slice(startIndex)
if (messagesToReplay.length === 0) {
logForDebugging('WebSocketTransport: No new messages to replay')
logForDiagnosticsNoPII('info', 'cli_websocket_no_messages_to_replay')
return
}
logForDebugging(
`WebSocketTransport: Replaying ${messagesToReplay.length} buffered messages`,
)
logForDiagnosticsNoPII('info', 'cli_websocket_messages_to_replay', {
count: messagesToReplay.length,
})
for (const message of messagesToReplay) {
const line = jsonStringify(message) + '\n'
const success = this.sendLine(line)
if (!success) {
this.handleConnectionError()
break
}
}
// Do NOT clear the buffer after replay — messages remain buffered until
// the server confirms receipt on the next reconnection. This prevents
// message loss if the connection drops after replay but before the server
// processes the messages.
}
isConnectedStatus(): boolean {
return this.state === 'connected'
}
isClosedStatus(): boolean {
return this.state === 'closed'
}
setOnData(callback: (data: string) => void): void {
this.onData = callback
}
setOnConnect(callback: () => void): void {
this.onConnectCallback = callback
}
setOnClose(callback: (closeCode?: number) => void): void {
this.onCloseCallback = callback
}
getStateLabel(): string {
return this.state
}
async write(message: StdoutMessage): Promise<void> {
if ('uuid' in message && typeof message.uuid === 'string') {
this.messageBuffer.add(message)
this.lastSentId = message.uuid
}
const line = jsonStringify(message) + '\n'
if (this.state !== 'connected') {
// Message buffered for replay when connected (if it has a UUID)
return
}
const sessionLabel = this.sessionId ? ` session=${this.sessionId}` : ''
const detailLabel = this.getControlMessageDetailLabel(message)
logForDebugging(
`WebSocketTransport: Sending message type=${message.type}${sessionLabel}${detailLabel}`,
)
this.sendLine(line)
}
private getControlMessageDetailLabel(message: StdoutMessage): string {
if (message.type === 'control_request') {
const { request_id, request } = message
const toolName =
request.subtype === 'can_use_tool' ? request.tool_name : ''
return ` subtype=${request.subtype} request_id=${request_id}${toolName ? ` tool=${toolName}` : ''}`
}
if (message.type === 'control_response') {
const { subtype, request_id } = message.response
return ` subtype=${subtype} request_id=${request_id}`
}
return ''
}
private startPingInterval(): void {
// Clear any existing interval
this.stopPingInterval()
this.pongReceived = true
let lastTickTime = Date.now()
// Send ping periodically to detect dead connections.
// If the previous ping got no pong, treat the connection as dead.
this.pingInterval = setInterval(() => {
if (this.state === 'connected' && this.ws) {
const now = Date.now()
const gap = now - lastTickTime
lastTickTime = now
// Process-suspension detector. If the wall-clock gap between ticks
// greatly exceeds the 10s interval, the process was suspended
// (laptop lid, SIGSTOP, VM pause). setInterval does not queue
// missed ticks — it coalesces — so on wake this callback fires
// once with a huge gap. The socket is almost certainly dead:
// NAT mappings drop in 30s5min, and the server has been
// retransmitting into the void. Don't wait for a ping/pong
// round-trip to confirm (ws.ping() on a dead socket returns
// immediately with no error — bytes go into the kernel send
// buffer). Assume dead and reconnect now. A spurious reconnect
// after a short sleep is cheap — replayBufferedMessages() handles
// it and the server dedups by UUID.
if (gap > SLEEP_DETECTION_THRESHOLD_MS) {
logForDebugging(
`WebSocketTransport: ${Math.round(gap / 1000)}s tick gap detected — process was suspended, forcing reconnect`,
)
logForDiagnosticsNoPII(
'info',
'cli_websocket_sleep_detected_on_ping',
{ gapMs: gap },
)
this.handleConnectionError()
return
}
if (!this.pongReceived) {
logForDebugging(
'WebSocketTransport: No pong received, connection appears dead',
{ level: 'error' },
)
logForDiagnosticsNoPII('error', 'cli_websocket_pong_timeout')
this.handleConnectionError()
return
}
this.pongReceived = false
try {
this.ws.ping?.()
} catch (error) {
logForDebugging(`WebSocketTransport: Ping failed: ${error}`, {
level: 'error',
})
logForDiagnosticsNoPII('error', 'cli_websocket_ping_failed')
}
}
}, DEFAULT_PING_INTERVAL)
}
private stopPingInterval(): void {
if (this.pingInterval) {
clearInterval(this.pingInterval)
this.pingInterval = null
}
}
private startKeepaliveInterval(): void {
this.stopKeepaliveInterval()
// In CCR sessions, session activity heartbeats handle keep-alives
if (isEnvTruthy(process.env.CLAUDE_CODE_REMOTE)) {
return
}
this.keepAliveInterval = setInterval(() => {
if (this.state === 'connected' && this.ws) {
try {
this.ws.send(KEEP_ALIVE_FRAME)
this.lastActivityTime = Date.now()
logForDebugging(
'WebSocketTransport: Sent periodic keep_alive data frame',
)
} catch (error) {
logForDebugging(
`WebSocketTransport: Periodic keep_alive failed: ${error}`,
{ level: 'error' },
)
logForDiagnosticsNoPII('error', 'cli_websocket_keepalive_failed')
}
}
}, DEFAULT_KEEPALIVE_INTERVAL)
}
private stopKeepaliveInterval(): void {
if (this.keepAliveInterval) {
clearInterval(this.keepAliveInterval)
this.keepAliveInterval = null
}
}
}

View file

@ -0,0 +1,131 @@
import { sleep } from '../../utils/sleep.js'
/**
* Coalescing uploader for PUT /worker (session state + metadata).
*
* - 1 in-flight PUT + 1 pending patch
* - New calls coalesce into pending (never grows beyond 1 slot)
* - On success: send pending if exists
* - On failure: exponential backoff (clamped), retries indefinitely
* until success or close(). Absorbs any pending patches before each retry.
* - No backpressure needed naturally bounded at 2 slots
*
* Coalescing rules:
* - Top-level keys (worker_status, external_metadata) last value wins
* - Inside external_metadata / internal_metadata RFC 7396 merge:
* keys are added/overwritten, null values preserved (server deletes)
*/
type WorkerStateUploaderConfig = {
send: (body: Record<string, unknown>) => Promise<boolean>
/** Base delay for exponential backoff (ms) */
baseDelayMs: number
/** Max delay cap (ms) */
maxDelayMs: number
/** Random jitter range added to retry delay (ms) */
jitterMs: number
}
export class WorkerStateUploader {
private inflight: Promise<void> | null = null
private pending: Record<string, unknown> | null = null
private closed = false
private readonly config: WorkerStateUploaderConfig
constructor(config: WorkerStateUploaderConfig) {
this.config = config
}
/**
* Enqueue a patch to PUT /worker. Coalesces with any existing pending
* patch. Fire-and-forget callers don't need to await.
*/
enqueue(patch: Record<string, unknown>): void {
if (this.closed) return
this.pending = this.pending ? coalescePatches(this.pending, patch) : patch
void this.drain()
}
close(): void {
this.closed = true
this.pending = null
}
private async drain(): Promise<void> {
if (this.inflight || this.closed) return
if (!this.pending) return
const payload = this.pending
this.pending = null
this.inflight = this.sendWithRetry(payload).then(() => {
this.inflight = null
if (this.pending && !this.closed) {
void this.drain()
}
})
}
/** Retries indefinitely with exponential backoff until success or close(). */
private async sendWithRetry(payload: Record<string, unknown>): Promise<void> {
let current = payload
let failures = 0
while (!this.closed) {
const ok = await this.config.send(current)
if (ok) return
failures++
await sleep(this.retryDelay(failures))
// Absorb any patches that arrived during the retry
if (this.pending && !this.closed) {
current = coalescePatches(current, this.pending)
this.pending = null
}
}
}
private retryDelay(failures: number): number {
const exponential = Math.min(
this.config.baseDelayMs * 2 ** (failures - 1),
this.config.maxDelayMs,
)
const jitter = Math.random() * this.config.jitterMs
return exponential + jitter
}
}
/**
* Coalesce two patches for PUT /worker.
*
* Top-level keys: overlay replaces base (last value wins).
* Metadata keys (external_metadata, internal_metadata): RFC 7396 merge
* one level deep overlay keys are added/overwritten, null values
* preserved for server-side delete.
*/
function coalescePatches(
base: Record<string, unknown>,
overlay: Record<string, unknown>,
): Record<string, unknown> {
const merged = { ...base }
for (const [key, value] of Object.entries(overlay)) {
if (
(key === 'external_metadata' || key === 'internal_metadata') &&
merged[key] &&
typeof merged[key] === 'object' &&
typeof value === 'object' &&
value !== null
) {
// RFC 7396 merge — overlay keys win, nulls preserved for server
merged[key] = {
...(merged[key] as Record<string, unknown>),
...(value as Record<string, unknown>),
}
} else {
merged[key] = value
}
}
return merged
}

998
cli/transports/ccrClient.ts Normal file
View file

@ -0,0 +1,998 @@
import { randomUUID } from 'crypto'
import type {
SDKPartialAssistantMessage,
StdoutMessage,
} from 'src/entrypoints/sdk/controlTypes.js'
import { decodeJwtExpiry } from '../../bridge/jwtUtils.js'
import { logForDebugging } from '../../utils/debug.js'
import { logForDiagnosticsNoPII } from '../../utils/diagLogs.js'
import { errorMessage, getErrnoCode } from '../../utils/errors.js'
import { createAxiosInstance } from '../../utils/proxy.js'
import {
registerSessionActivityCallback,
unregisterSessionActivityCallback,
} from '../../utils/sessionActivity.js'
import {
getSessionIngressAuthHeaders,
getSessionIngressAuthToken,
} from '../../utils/sessionIngressAuth.js'
import type {
RequiresActionDetails,
SessionState,
} from '../../utils/sessionState.js'
import { sleep } from '../../utils/sleep.js'
import { getClaudeCodeUserAgent } from '../../utils/userAgent.js'
import {
RetryableError,
SerialBatchEventUploader,
} from './SerialBatchEventUploader.js'
import type { SSETransport, StreamClientEvent } from './SSETransport.js'
import { WorkerStateUploader } from './WorkerStateUploader.js'
/** Default interval between heartbeat events (20s; server TTL is 60s). */
const DEFAULT_HEARTBEAT_INTERVAL_MS = 20_000
/**
* stream_event messages accumulate in a delay buffer for up to this many ms
* before enqueue. Mirrors HybridTransport's batching window. text_delta
* events for the same content block accumulate into a single full-so-far
* snapshot per flush each emitted event is self-contained so a client
* connecting mid-stream sees complete text, not a fragment.
*/
const STREAM_EVENT_FLUSH_INTERVAL_MS = 100
/** Hoisted axios validateStatus callback to avoid per-request closure allocation. */
function alwaysValidStatus(): boolean {
return true
}
export type CCRInitFailReason =
| 'no_auth_headers'
| 'missing_epoch'
| 'worker_register_failed'
/** Thrown by initialize(); carries a typed reason for the diag classifier. */
export class CCRInitError extends Error {
constructor(readonly reason: CCRInitFailReason) {
super(`CCRClient init failed: ${reason}`)
}
}
/**
* Consecutive 401/403 with a VALID-LOOKING token before giving up. An
* expired JWT short-circuits this (exits immediately deterministic,
* retry is futile). This threshold is for the uncertain case: token's
* exp is in the future but server says 401 (userauth down, KMS hiccup,
* clock skew). 10 × 20s heartbeat 200s to ride it out.
*/
const MAX_CONSECUTIVE_AUTH_FAILURES = 10
type EventPayload = {
uuid: string
type: string
[key: string]: unknown
}
type ClientEvent = {
payload: EventPayload
ephemeral?: boolean
}
/**
* Structural subset of a stream_event carrying a text_delta. Not a narrowing
* of SDKPartialAssistantMessage RawMessageStreamEvent's delta is a union and
* narrowing through two levels defeats the discriminant.
*/
type CoalescedStreamEvent = {
type: 'stream_event'
uuid: string
session_id: string
parent_tool_use_id: string | null
event: {
type: 'content_block_delta'
index: number
delta: { type: 'text_delta'; text: string }
}
}
/**
* Accumulator state for text_delta coalescing. Keyed by API message ID so
* lifetime is tied to the assistant message cleared when the complete
* SDKAssistantMessage arrives (writeEvent), which is reliable even when
* abort/error paths skip content_block_stop/message_stop delivery.
*/
export type StreamAccumulatorState = {
/** API message ID (msg_...) → blocks[blockIndex] → chunk array. */
byMessage: Map<string, string[][]>
/**
* {session_id}:{parent_tool_use_id} active message ID.
* content_block_delta events don't carry the message ID (only
* message_start does), so we track which message is currently streaming
* for each scope. At most one message streams per scope at a time.
*/
scopeToMessage: Map<string, string>
}
export function createStreamAccumulator(): StreamAccumulatorState {
return { byMessage: new Map(), scopeToMessage: new Map() }
}
function scopeKey(m: {
session_id: string
parent_tool_use_id: string | null
}): string {
return `${m.session_id}:${m.parent_tool_use_id ?? ''}`
}
/**
* Accumulate text_delta stream_events into full-so-far snapshots per content
* block. Each flush emits ONE event per touched block containing the FULL
* accumulated text from the start of the block a client connecting
* mid-stream receives a self-contained snapshot, not a fragment.
*
* Non-text-delta events pass through unchanged. message_start records the
* active message ID for the scope; content_block_delta appends chunks;
* the snapshot event reuses the first text_delta UUID seen for that block in
* this flush so server-side idempotency remains stable across retries.
*
* Cleanup happens in writeEvent when the complete assistant message arrives
* (reliable), not here on stop events (abort/error paths skip those).
*/
export function accumulateStreamEvents(
buffer: SDKPartialAssistantMessage[],
state: StreamAccumulatorState,
): EventPayload[] {
const out: EventPayload[] = []
// chunks[] → snapshot already in `out` this flush. Keyed by the chunks
// array reference (stable per {messageId, index}) so subsequent deltas
// rewrite the same entry instead of emitting one event per delta.
const touched = new Map<string[], CoalescedStreamEvent>()
for (const msg of buffer) {
switch (msg.event.type) {
case 'message_start': {
const id = msg.event.message.id
const prevId = state.scopeToMessage.get(scopeKey(msg))
if (prevId) state.byMessage.delete(prevId)
state.scopeToMessage.set(scopeKey(msg), id)
state.byMessage.set(id, [])
out.push(msg)
break
}
case 'content_block_delta': {
if (msg.event.delta.type !== 'text_delta') {
out.push(msg)
break
}
const messageId = state.scopeToMessage.get(scopeKey(msg))
const blocks = messageId ? state.byMessage.get(messageId) : undefined
if (!blocks) {
// Delta without a preceding message_start (reconnect mid-stream,
// or message_start was in a prior buffer that got dropped). Pass
// through raw — can't produce a full-so-far snapshot without the
// prior chunks anyway.
out.push(msg)
break
}
const chunks = (blocks[msg.event.index] ??= [])
chunks.push(msg.event.delta.text)
const existing = touched.get(chunks)
if (existing) {
existing.event.delta.text = chunks.join('')
break
}
const snapshot: CoalescedStreamEvent = {
type: 'stream_event',
uuid: msg.uuid,
session_id: msg.session_id,
parent_tool_use_id: msg.parent_tool_use_id,
event: {
type: 'content_block_delta',
index: msg.event.index,
delta: { type: 'text_delta', text: chunks.join('') },
},
}
touched.set(chunks, snapshot)
out.push(snapshot)
break
}
default:
out.push(msg)
}
}
return out
}
/**
* Clear accumulator entries for a completed assistant message. Called from
* writeEvent when the SDKAssistantMessage arrives the reliable end-of-stream
* signal that fires even when abort/interrupt/error skip SSE stop events.
*/
export function clearStreamAccumulatorForMessage(
state: StreamAccumulatorState,
assistant: {
session_id: string
parent_tool_use_id: string | null
message: { id: string }
},
): void {
state.byMessage.delete(assistant.message.id)
const scope = scopeKey(assistant)
if (state.scopeToMessage.get(scope) === assistant.message.id) {
state.scopeToMessage.delete(scope)
}
}
type RequestResult = { ok: true } | { ok: false; retryAfterMs?: number }
type WorkerEvent = {
payload: EventPayload
is_compaction?: boolean
agent_id?: string
}
export type InternalEvent = {
event_id: string
event_type: string
payload: Record<string, unknown>
event_metadata?: Record<string, unknown> | null
is_compaction: boolean
created_at: string
agent_id?: string
}
type ListInternalEventsResponse = {
data: InternalEvent[]
next_cursor?: string
}
type WorkerStateResponse = {
worker?: {
external_metadata?: Record<string, unknown>
}
}
/**
* Manages the worker lifecycle protocol with CCR v2:
* - Epoch management: reads worker_epoch from CLAUDE_CODE_WORKER_EPOCH env var
* - Runtime state reporting: PUT /sessions/{id}/worker
* - Heartbeat: POST /sessions/{id}/worker/heartbeat for liveness detection
*
* All writes go through this.request().
*/
export class CCRClient {
private workerEpoch = 0
private readonly heartbeatIntervalMs: number
private readonly heartbeatJitterFraction: number
private heartbeatTimer: NodeJS.Timeout | null = null
private heartbeatInFlight = false
private closed = false
private consecutiveAuthFailures = 0
private currentState: SessionState | null = null
private readonly sessionBaseUrl: string
private readonly sessionId: string
private readonly http = createAxiosInstance({ keepAlive: true })
// stream_event delay buffer — accumulates content deltas for up to
// STREAM_EVENT_FLUSH_INTERVAL_MS before enqueueing (reduces POST count
// and enables text_delta coalescing). Mirrors HybridTransport's pattern.
private streamEventBuffer: SDKPartialAssistantMessage[] = []
private streamEventTimer: ReturnType<typeof setTimeout> | null = null
// Full-so-far text accumulator. Persists across flushes so each emitted
// text_delta event carries the complete text from the start of the block —
// mid-stream reconnects see a self-contained snapshot. Keyed by API message
// ID; cleared in writeEvent when the complete assistant message arrives.
private streamTextAccumulator = createStreamAccumulator()
private readonly workerState: WorkerStateUploader
private readonly eventUploader: SerialBatchEventUploader<ClientEvent>
private readonly internalEventUploader: SerialBatchEventUploader<WorkerEvent>
private readonly deliveryUploader: SerialBatchEventUploader<{
eventId: string
status: 'received' | 'processing' | 'processed'
}>
/**
* Called when the server returns 409 (a newer worker epoch superseded ours).
* Default: process.exit(1) correct for spawn-mode children where the
* parent bridge re-spawns. In-process callers (replBridge) MUST override
* this to close gracefully instead; exit would kill the user's REPL.
*/
private readonly onEpochMismatch: () => never
/**
* Auth header source. Defaults to the process-wide session-ingress token
* (CLAUDE_CODE_SESSION_ACCESS_TOKEN env var). Callers managing multiple
* concurrent sessions with distinct JWTs MUST inject this the env-var
* path is a process global and would stomp across sessions.
*/
private readonly getAuthHeaders: () => Record<string, string>
constructor(
transport: SSETransport,
sessionUrl: URL,
opts?: {
onEpochMismatch?: () => never
heartbeatIntervalMs?: number
heartbeatJitterFraction?: number
/**
* Per-instance auth header source. Omit to read the process-wide
* CLAUDE_CODE_SESSION_ACCESS_TOKEN (single-session callers REPL,
* daemon). Required for concurrent multi-session callers.
*/
getAuthHeaders?: () => Record<string, string>
},
) {
this.onEpochMismatch =
opts?.onEpochMismatch ??
(() => {
// eslint-disable-next-line custom-rules/no-process-exit
process.exit(1)
})
this.heartbeatIntervalMs =
opts?.heartbeatIntervalMs ?? DEFAULT_HEARTBEAT_INTERVAL_MS
this.heartbeatJitterFraction = opts?.heartbeatJitterFraction ?? 0
this.getAuthHeaders = opts?.getAuthHeaders ?? getSessionIngressAuthHeaders
// Session URL: https://host/v1/code/sessions/{id}
if (sessionUrl.protocol !== 'http:' && sessionUrl.protocol !== 'https:') {
throw new Error(
`CCRClient: Expected http(s) URL, got ${sessionUrl.protocol}`,
)
}
const pathname = sessionUrl.pathname.replace(/\/$/, '')
this.sessionBaseUrl = `${sessionUrl.protocol}//${sessionUrl.host}${pathname}`
// Extract session ID from the URL path (last segment)
this.sessionId = pathname.split('/').pop() || ''
this.workerState = new WorkerStateUploader({
send: body =>
this.request(
'put',
'/worker',
{ worker_epoch: this.workerEpoch, ...body },
'PUT worker',
).then(r => r.ok),
baseDelayMs: 500,
maxDelayMs: 30_000,
jitterMs: 500,
})
this.eventUploader = new SerialBatchEventUploader<ClientEvent>({
maxBatchSize: 100,
maxBatchBytes: 10 * 1024 * 1024,
// flushStreamEventBuffer() enqueues a full 100ms window of accumulated
// stream_events in one call. A burst of mixed delta types that don't
// fold into a single snapshot could exceed the old cap (50) and deadlock
// on the SerialBatchEventUploader backpressure check. Match
// HybridTransport's bound — high enough to be memory-only.
maxQueueSize: 100_000,
send: async batch => {
const result = await this.request(
'post',
'/worker/events',
{ worker_epoch: this.workerEpoch, events: batch },
'client events',
)
if (!result.ok) {
throw new RetryableError(
'client event POST failed',
result.retryAfterMs,
)
}
},
baseDelayMs: 500,
maxDelayMs: 30_000,
jitterMs: 500,
})
this.internalEventUploader = new SerialBatchEventUploader<WorkerEvent>({
maxBatchSize: 100,
maxBatchBytes: 10 * 1024 * 1024,
maxQueueSize: 200,
send: async batch => {
const result = await this.request(
'post',
'/worker/internal-events',
{ worker_epoch: this.workerEpoch, events: batch },
'internal events',
)
if (!result.ok) {
throw new RetryableError(
'internal event POST failed',
result.retryAfterMs,
)
}
},
baseDelayMs: 500,
maxDelayMs: 30_000,
jitterMs: 500,
})
this.deliveryUploader = new SerialBatchEventUploader<{
eventId: string
status: 'received' | 'processing' | 'processed'
}>({
maxBatchSize: 64,
maxQueueSize: 64,
send: async batch => {
const result = await this.request(
'post',
'/worker/events/delivery',
{
worker_epoch: this.workerEpoch,
updates: batch.map(d => ({
event_id: d.eventId,
status: d.status,
})),
},
'delivery batch',
)
if (!result.ok) {
throw new RetryableError('delivery POST failed', result.retryAfterMs)
}
},
baseDelayMs: 500,
maxDelayMs: 30_000,
jitterMs: 500,
})
// Ack each received client_event so CCR can track delivery status.
// Wired here (not in initialize()) so the callback is registered the
// moment new CCRClient() returns — remoteIO must be free to call
// transport.connect() immediately after without racing the first
// SSE catch-up frame against an unwired onEventCallback.
transport.setOnEvent((event: StreamClientEvent) => {
this.reportDelivery(event.event_id, 'received')
})
}
/**
* Initialize the session worker:
* 1. Take worker_epoch from the argument, or fall back to
* CLAUDE_CODE_WORKER_EPOCH (set by env-manager / bridge spawner)
* 2. Report state as 'idle'
* 3. Start heartbeat timer
*
* In-process callers (replBridge) pass the epoch directly they
* registered the worker themselves and there is no parent process
* setting env vars.
*/
async initialize(epoch?: number): Promise<Record<string, unknown> | null> {
const startMs = Date.now()
if (Object.keys(this.getAuthHeaders()).length === 0) {
throw new CCRInitError('no_auth_headers')
}
if (epoch === undefined) {
const rawEpoch = process.env.CLAUDE_CODE_WORKER_EPOCH
epoch = rawEpoch ? parseInt(rawEpoch, 10) : NaN
}
if (isNaN(epoch)) {
throw new CCRInitError('missing_epoch')
}
this.workerEpoch = epoch
// Concurrent with the init PUT — neither depends on the other.
const restoredPromise = this.getWorkerState()
const result = await this.request(
'put',
'/worker',
{
worker_status: 'idle',
worker_epoch: this.workerEpoch,
// Clear stale pending_action/task_summary left by a prior
// worker crash — the in-session clears don't survive process restart.
external_metadata: {
pending_action: null,
task_summary: null,
},
},
'PUT worker (init)',
)
if (!result.ok) {
// 409 → onEpochMismatch may throw, but request() catches it and returns
// false. Without this check we'd continue to startHeartbeat(), leaking a
// 20s timer against a dead epoch. Throw so connect()'s rejection handler
// fires instead of the success path.
throw new CCRInitError('worker_register_failed')
}
this.currentState = 'idle'
this.startHeartbeat()
// sessionActivity's refcount-gated timer fires while an API call or tool
// is in-flight; without a write the container lease can expire mid-wait.
// v1 wires this in WebSocketTransport per-connection.
registerSessionActivityCallback(() => {
void this.writeEvent({ type: 'keep_alive' })
})
logForDebugging(`CCRClient: initialized, epoch=${this.workerEpoch}`)
logForDiagnosticsNoPII('info', 'cli_worker_lifecycle_initialized', {
epoch: this.workerEpoch,
duration_ms: Date.now() - startMs,
})
// Await the concurrent GET and log state_restored here, after the PUT
// has succeeded — logging inside getWorkerState() raced: if the GET
// resolved before the PUT failed, diagnostics showed both init_failed
// and state_restored for the same session.
const { metadata, durationMs } = await restoredPromise
if (!this.closed) {
logForDiagnosticsNoPII('info', 'cli_worker_state_restored', {
duration_ms: durationMs,
had_state: metadata !== null,
})
}
return metadata
}
// Control_requests are marked processed and not re-delivered on
// restart, so read back what the prior worker wrote.
private async getWorkerState(): Promise<{
metadata: Record<string, unknown> | null
durationMs: number
}> {
const startMs = Date.now()
const authHeaders = this.getAuthHeaders()
if (Object.keys(authHeaders).length === 0) {
return { metadata: null, durationMs: 0 }
}
const data = await this.getWithRetry<WorkerStateResponse>(
`${this.sessionBaseUrl}/worker`,
authHeaders,
'worker_state',
)
return {
metadata: data?.worker?.external_metadata ?? null,
durationMs: Date.now() - startMs,
}
}
/**
* Send an authenticated HTTP request to CCR. Handles auth headers,
* 409 epoch mismatch, and error logging. Returns { ok: true } on 2xx.
* On 429, reads Retry-After (integer seconds) so the uploader can honor
* the server's backoff hint instead of blindly exponentiating.
*/
private async request(
method: 'post' | 'put',
path: string,
body: unknown,
label: string,
{ timeout = 10_000 }: { timeout?: number } = {},
): Promise<RequestResult> {
const authHeaders = this.getAuthHeaders()
if (Object.keys(authHeaders).length === 0) return { ok: false }
try {
const response = await this.http[method](
`${this.sessionBaseUrl}${path}`,
body,
{
headers: {
...authHeaders,
'Content-Type': 'application/json',
'anthropic-version': '2023-06-01',
'User-Agent': getClaudeCodeUserAgent(),
},
validateStatus: alwaysValidStatus,
timeout,
},
)
if (response.status >= 200 && response.status < 300) {
this.consecutiveAuthFailures = 0
return { ok: true }
}
if (response.status === 409) {
this.handleEpochMismatch()
}
if (response.status === 401 || response.status === 403) {
// A 401 with an expired JWT is deterministic — no retry will
// ever succeed. Check the token's own exp before burning
// wall-clock on the threshold loop.
const tok = getSessionIngressAuthToken()
const exp = tok ? decodeJwtExpiry(tok) : null
if (exp !== null && exp * 1000 < Date.now()) {
logForDebugging(
`CCRClient: session_token expired (exp=${new Date(exp * 1000).toISOString()}) — no refresh was delivered, exiting`,
{ level: 'error' },
)
logForDiagnosticsNoPII('error', 'cli_worker_token_expired_no_refresh')
this.onEpochMismatch()
}
// Token looks valid but server says 401 — possible server-side
// blip (userauth down, KMS hiccup). Count toward threshold.
this.consecutiveAuthFailures++
if (this.consecutiveAuthFailures >= MAX_CONSECUTIVE_AUTH_FAILURES) {
logForDebugging(
`CCRClient: ${this.consecutiveAuthFailures} consecutive auth failures with a valid-looking token — server-side auth unrecoverable, exiting`,
{ level: 'error' },
)
logForDiagnosticsNoPII('error', 'cli_worker_auth_failures_exhausted')
this.onEpochMismatch()
}
}
logForDebugging(`CCRClient: ${label} returned ${response.status}`, {
level: 'warn',
})
logForDiagnosticsNoPII('warn', 'cli_worker_request_failed', {
method,
path,
status: response.status,
})
if (response.status === 429) {
const raw = response.headers?.['retry-after']
const seconds = typeof raw === 'string' ? parseInt(raw, 10) : NaN
if (!isNaN(seconds) && seconds >= 0) {
return { ok: false, retryAfterMs: seconds * 1000 }
}
}
return { ok: false }
} catch (error) {
logForDebugging(`CCRClient: ${label} failed: ${errorMessage(error)}`, {
level: 'warn',
})
logForDiagnosticsNoPII('warn', 'cli_worker_request_error', {
method,
path,
error_code: getErrnoCode(error),
})
return { ok: false }
}
}
/** Report worker state to CCR via PUT /sessions/{id}/worker. */
reportState(state: SessionState, details?: RequiresActionDetails): void {
if (state === this.currentState && !details) return
this.currentState = state
this.workerState.enqueue({
worker_status: state,
requires_action_details: details
? {
tool_name: details.tool_name,
action_description: details.action_description,
request_id: details.request_id,
}
: null,
})
}
/** Report external metadata to CCR via PUT /worker. */
reportMetadata(metadata: Record<string, unknown>): void {
this.workerState.enqueue({ external_metadata: metadata })
}
/**
* Handle epoch mismatch (409 Conflict). A newer CC instance has replaced
* this one exit immediately.
*/
private handleEpochMismatch(): never {
logForDebugging('CCRClient: Epoch mismatch (409), shutting down', {
level: 'error',
})
logForDiagnosticsNoPII('error', 'cli_worker_epoch_mismatch')
this.onEpochMismatch()
}
/** Start periodic heartbeat. */
private startHeartbeat(): void {
this.stopHeartbeat()
const schedule = (): void => {
const jitter =
this.heartbeatIntervalMs *
this.heartbeatJitterFraction *
(2 * Math.random() - 1)
this.heartbeatTimer = setTimeout(tick, this.heartbeatIntervalMs + jitter)
}
const tick = (): void => {
void this.sendHeartbeat()
// stopHeartbeat nulls the timer; check after the fire-and-forget send
// but before rescheduling so close() during sendHeartbeat is honored.
if (this.heartbeatTimer === null) return
schedule()
}
schedule()
}
/** Stop heartbeat timer. */
private stopHeartbeat(): void {
if (this.heartbeatTimer) {
clearTimeout(this.heartbeatTimer)
this.heartbeatTimer = null
}
}
/** Send a heartbeat via POST /sessions/{id}/worker/heartbeat. */
private async sendHeartbeat(): Promise<void> {
if (this.heartbeatInFlight) return
this.heartbeatInFlight = true
try {
const result = await this.request(
'post',
'/worker/heartbeat',
{ session_id: this.sessionId, worker_epoch: this.workerEpoch },
'Heartbeat',
{ timeout: 5_000 },
)
if (result.ok) {
logForDebugging('CCRClient: Heartbeat sent')
}
} finally {
this.heartbeatInFlight = false
}
}
/**
* Write a StdoutMessage as a client event via POST /sessions/{id}/worker/events.
* These events are visible to frontend clients via the SSE stream.
* Injects a UUID if missing to ensure server-side idempotency on retry.
*
* stream_event messages are held in a 100ms delay buffer and accumulated
* (text_deltas for the same content block emit a full-so-far snapshot per
* flush). A non-stream_event write flushes the buffer first so downstream
* ordering is preserved.
*/
async writeEvent(message: StdoutMessage): Promise<void> {
if (message.type === 'stream_event') {
this.streamEventBuffer.push(message)
if (!this.streamEventTimer) {
this.streamEventTimer = setTimeout(
() => void this.flushStreamEventBuffer(),
STREAM_EVENT_FLUSH_INTERVAL_MS,
)
}
return
}
await this.flushStreamEventBuffer()
if (message.type === 'assistant') {
clearStreamAccumulatorForMessage(this.streamTextAccumulator, message)
}
await this.eventUploader.enqueue(this.toClientEvent(message))
}
/** Wrap a StdoutMessage as a ClientEvent, injecting a UUID if missing. */
private toClientEvent(message: StdoutMessage): ClientEvent {
const msg = message as unknown as Record<string, unknown>
return {
payload: {
...msg,
uuid: typeof msg.uuid === 'string' ? msg.uuid : randomUUID(),
} as EventPayload,
}
}
/**
* Drain the stream_event delay buffer: accumulate text_deltas into
* full-so-far snapshots, clear the timer, enqueue the resulting events.
* Called from the timer, from writeEvent on a non-stream message, and from
* flush(). close() drops the buffer call flush() first if you need
* delivery.
*/
private async flushStreamEventBuffer(): Promise<void> {
if (this.streamEventTimer) {
clearTimeout(this.streamEventTimer)
this.streamEventTimer = null
}
if (this.streamEventBuffer.length === 0) return
const buffered = this.streamEventBuffer
this.streamEventBuffer = []
const payloads = accumulateStreamEvents(
buffered,
this.streamTextAccumulator,
)
await this.eventUploader.enqueue(
payloads.map(payload => ({ payload, ephemeral: true })),
)
}
/**
* Write an internal worker event via POST /sessions/{id}/worker/internal-events.
* These events are NOT visible to frontend clients they store worker-internal
* state (transcript messages, compaction markers) needed for session resume.
*/
async writeInternalEvent(
eventType: string,
payload: Record<string, unknown>,
{
isCompaction = false,
agentId,
}: {
isCompaction?: boolean
agentId?: string
} = {},
): Promise<void> {
const event: WorkerEvent = {
payload: {
type: eventType,
...payload,
uuid: typeof payload.uuid === 'string' ? payload.uuid : randomUUID(),
} as EventPayload,
...(isCompaction && { is_compaction: true }),
...(agentId && { agent_id: agentId }),
}
await this.internalEventUploader.enqueue(event)
}
/**
* Flush pending internal events. Call between turns and on shutdown
* to ensure transcript entries are persisted.
*/
flushInternalEvents(): Promise<void> {
return this.internalEventUploader.flush()
}
/**
* Flush pending client events (writeEvent queue). Call before close()
* when the caller needs delivery confirmation close() abandons the
* queue. Resolves once the uploader drains or rejects; returns
* regardless of whether individual POSTs succeeded (check server state
* separately if that matters).
*/
async flush(): Promise<void> {
await this.flushStreamEventBuffer()
return this.eventUploader.flush()
}
/**
* Read foreground agent internal events from
* GET /sessions/{id}/worker/internal-events.
* Returns transcript entries from the last compaction boundary, or null on failure.
* Used for session resume.
*/
async readInternalEvents(): Promise<InternalEvent[] | null> {
return this.paginatedGet('/worker/internal-events', {}, 'internal_events')
}
/**
* Read all subagent internal events from
* GET /sessions/{id}/worker/internal-events?subagents=true.
* Returns a merged stream across all non-foreground agents, each from its
* compaction point. Used for session resume.
*/
async readSubagentInternalEvents(): Promise<InternalEvent[] | null> {
return this.paginatedGet(
'/worker/internal-events',
{ subagents: 'true' },
'subagent_events',
)
}
/**
* Paginated GET with retry. Fetches all pages from a list endpoint,
* retrying each page on failure with exponential backoff + jitter.
*/
private async paginatedGet(
path: string,
params: Record<string, string>,
context: string,
): Promise<InternalEvent[] | null> {
const authHeaders = this.getAuthHeaders()
if (Object.keys(authHeaders).length === 0) return null
const allEvents: InternalEvent[] = []
let cursor: string | undefined
do {
const url = new URL(`${this.sessionBaseUrl}${path}`)
for (const [k, v] of Object.entries(params)) {
url.searchParams.set(k, v)
}
if (cursor) {
url.searchParams.set('cursor', cursor)
}
const page = await this.getWithRetry<ListInternalEventsResponse>(
url.toString(),
authHeaders,
context,
)
if (!page) return null
allEvents.push(...(page.data ?? []))
cursor = page.next_cursor
} while (cursor)
logForDebugging(
`CCRClient: Read ${allEvents.length} internal events from ${path}${params.subagents ? ' (subagents)' : ''}`,
)
return allEvents
}
/**
* Single GET request with retry. Returns the parsed response body
* on success, null if all retries are exhausted.
*/
private async getWithRetry<T>(
url: string,
authHeaders: Record<string, string>,
context: string,
): Promise<T | null> {
for (let attempt = 1; attempt <= 10; attempt++) {
let response
try {
response = await this.http.get<T>(url, {
headers: {
...authHeaders,
'anthropic-version': '2023-06-01',
'User-Agent': getClaudeCodeUserAgent(),
},
validateStatus: alwaysValidStatus,
timeout: 30_000,
})
} catch (error) {
logForDebugging(
`CCRClient: GET ${url} failed (attempt ${attempt}/10): ${errorMessage(error)}`,
{ level: 'warn' },
)
if (attempt < 10) {
const delay =
Math.min(500 * 2 ** (attempt - 1), 30_000) + Math.random() * 500
await sleep(delay)
}
continue
}
if (response.status >= 200 && response.status < 300) {
return response.data
}
if (response.status === 409) {
this.handleEpochMismatch()
}
logForDebugging(
`CCRClient: GET ${url} returned ${response.status} (attempt ${attempt}/10)`,
{ level: 'warn' },
)
if (attempt < 10) {
const delay =
Math.min(500 * 2 ** (attempt - 1), 30_000) + Math.random() * 500
await sleep(delay)
}
}
logForDebugging('CCRClient: GET retries exhausted', { level: 'error' })
logForDiagnosticsNoPII('error', 'cli_worker_get_retries_exhausted', {
context,
})
return null
}
/**
* Report delivery status for a client-to-worker event.
* POST /v1/code/sessions/{id}/worker/events/delivery (batch endpoint)
*/
reportDelivery(
eventId: string,
status: 'received' | 'processing' | 'processed',
): void {
void this.deliveryUploader.enqueue({ eventId, status })
}
/** Get the current epoch (for external use). */
getWorkerEpoch(): number {
return this.workerEpoch
}
/** Internal-event queue depth — shutdown-snapshot backpressure signal. */
get internalEventsPending(): number {
return this.internalEventUploader.pendingCount
}
/** Clean up uploaders and timers. */
close(): void {
this.closed = true
this.stopHeartbeat()
unregisterSessionActivityCallback()
if (this.streamEventTimer) {
clearTimeout(this.streamEventTimer)
this.streamEventTimer = null
}
this.streamEventBuffer = []
this.streamTextAccumulator.byMessage.clear()
this.streamTextAccumulator.scopeToMessage.clear()
this.workerState.close()
this.eventUploader.close()
this.internalEventUploader.close()
this.deliveryUploader.close()
}
}

View file

@ -0,0 +1,45 @@
import { URL } from 'url'
import { isEnvTruthy } from '../../utils/envUtils.js'
import { HybridTransport } from './HybridTransport.js'
import { SSETransport } from './SSETransport.js'
import type { Transport } from './Transport.js'
import { WebSocketTransport } from './WebSocketTransport.js'
/**
* Helper function to get the appropriate transport for a URL.
*
* Transport selection priority:
* 1. SSETransport (SSE reads + POST writes) when CLAUDE_CODE_USE_CCR_V2 is set
* 2. HybridTransport (WS reads + POST writes) when CLAUDE_CODE_POST_FOR_SESSION_INGRESS_V2 is set
* 3. WebSocketTransport (WS reads + WS writes) default
*/
export function getTransportForUrl(
url: URL,
headers: Record<string, string> = {},
sessionId?: string,
refreshHeaders?: () => Record<string, string>,
): Transport {
if (isEnvTruthy(process.env.CLAUDE_CODE_USE_CCR_V2)) {
// v2: SSE for reads, HTTP POST for writes
// --sdk-url is the session URL (.../sessions/{id});
// derive the SSE stream URL by appending /worker/events/stream
const sseUrl = new URL(url.href)
if (sseUrl.protocol === 'wss:') {
sseUrl.protocol = 'https:'
} else if (sseUrl.protocol === 'ws:') {
sseUrl.protocol = 'http:'
}
sseUrl.pathname =
sseUrl.pathname.replace(/\/$/, '') + '/worker/events/stream'
return new SSETransport(sseUrl, headers, sessionId, refreshHeaders)
}
if (url.protocol === 'ws:' || url.protocol === 'wss:') {
if (isEnvTruthy(process.env.CLAUDE_CODE_POST_FOR_SESSION_INGRESS_V2)) {
return new HybridTransport(url, headers, sessionId, refreshHeaders)
}
return new WebSocketTransport(url, headers, sessionId, refreshHeaders)
} else {
throw new Error(`Unsupported protocol: ${url.protocol}`)
}
}

422
cli/update.ts Normal file
View file

@ -0,0 +1,422 @@
import chalk from 'chalk'
import { logEvent } from 'src/services/analytics/index.js'
import {
getLatestVersion,
type InstallStatus,
installGlobalPackage,
} from 'src/utils/autoUpdater.js'
import { regenerateCompletionCache } from 'src/utils/completionCache.js'
import {
getGlobalConfig,
type InstallMethod,
saveGlobalConfig,
} from 'src/utils/config.js'
import { logForDebugging } from 'src/utils/debug.js'
import { getDoctorDiagnostic } from 'src/utils/doctorDiagnostic.js'
import { gracefulShutdown } from 'src/utils/gracefulShutdown.js'
import {
installOrUpdateClaudePackage,
localInstallationExists,
} from 'src/utils/localInstaller.js'
import {
installLatest as installLatestNative,
removeInstalledSymlink,
} from 'src/utils/nativeInstaller/index.js'
import { getPackageManager } from 'src/utils/nativeInstaller/packageManagers.js'
import { writeToStdout } from 'src/utils/process.js'
import { gte } from 'src/utils/semver.js'
import { getInitialSettings } from 'src/utils/settings/settings.js'
export async function update() {
logEvent('tengu_update_check', {})
writeToStdout(`Current version: ${MACRO.VERSION}\n`)
const channel = getInitialSettings()?.autoUpdatesChannel ?? 'latest'
writeToStdout(`Checking for updates to ${channel} version...\n`)
logForDebugging('update: Starting update check')
// Run diagnostic to detect potential issues
logForDebugging('update: Running diagnostic')
const diagnostic = await getDoctorDiagnostic()
logForDebugging(`update: Installation type: ${diagnostic.installationType}`)
logForDebugging(
`update: Config install method: ${diagnostic.configInstallMethod}`,
)
// Check for multiple installations
if (diagnostic.multipleInstallations.length > 1) {
writeToStdout('\n')
writeToStdout(chalk.yellow('Warning: Multiple installations found') + '\n')
for (const install of diagnostic.multipleInstallations) {
const current =
diagnostic.installationType === install.type
? ' (currently running)'
: ''
writeToStdout(`- ${install.type} at ${install.path}${current}\n`)
}
}
// Display warnings if any exist
if (diagnostic.warnings.length > 0) {
writeToStdout('\n')
for (const warning of diagnostic.warnings) {
logForDebugging(`update: Warning detected: ${warning.issue}`)
// Don't skip PATH warnings - they're always relevant
// The user needs to know that 'which claude' points elsewhere
logForDebugging(`update: Showing warning: ${warning.issue}`)
writeToStdout(chalk.yellow(`Warning: ${warning.issue}\n`))
writeToStdout(chalk.bold(`Fix: ${warning.fix}\n`))
}
}
// Update config if installMethod is not set (but skip for package managers)
const config = getGlobalConfig()
if (
!config.installMethod &&
diagnostic.installationType !== 'package-manager'
) {
writeToStdout('\n')
writeToStdout('Updating configuration to track installation method...\n')
let detectedMethod: 'local' | 'native' | 'global' | 'unknown' = 'unknown'
// Map diagnostic installation type to config install method
switch (diagnostic.installationType) {
case 'npm-local':
detectedMethod = 'local'
break
case 'native':
detectedMethod = 'native'
break
case 'npm-global':
detectedMethod = 'global'
break
default:
detectedMethod = 'unknown'
}
saveGlobalConfig(current => ({
...current,
installMethod: detectedMethod,
}))
writeToStdout(`Installation method set to: ${detectedMethod}\n`)
}
// Check if running from development build
if (diagnostic.installationType === 'development') {
writeToStdout('\n')
writeToStdout(
chalk.yellow('Warning: Cannot update development build') + '\n',
)
await gracefulShutdown(1)
}
// Check if running from a package manager
if (diagnostic.installationType === 'package-manager') {
const packageManager = await getPackageManager()
writeToStdout('\n')
if (packageManager === 'homebrew') {
writeToStdout('Claude is managed by Homebrew.\n')
const latest = await getLatestVersion(channel)
if (latest && !gte(MACRO.VERSION, latest)) {
writeToStdout(`Update available: ${MACRO.VERSION}${latest}\n`)
writeToStdout('\n')
writeToStdout('To update, run:\n')
writeToStdout(chalk.bold(' brew upgrade claude-code') + '\n')
} else {
writeToStdout('Claude is up to date!\n')
}
} else if (packageManager === 'winget') {
writeToStdout('Claude is managed by winget.\n')
const latest = await getLatestVersion(channel)
if (latest && !gte(MACRO.VERSION, latest)) {
writeToStdout(`Update available: ${MACRO.VERSION}${latest}\n`)
writeToStdout('\n')
writeToStdout('To update, run:\n')
writeToStdout(
chalk.bold(' winget upgrade Anthropic.ClaudeCode') + '\n',
)
} else {
writeToStdout('Claude is up to date!\n')
}
} else if (packageManager === 'apk') {
writeToStdout('Claude is managed by apk.\n')
const latest = await getLatestVersion(channel)
if (latest && !gte(MACRO.VERSION, latest)) {
writeToStdout(`Update available: ${MACRO.VERSION}${latest}\n`)
writeToStdout('\n')
writeToStdout('To update, run:\n')
writeToStdout(chalk.bold(' apk upgrade claude-code') + '\n')
} else {
writeToStdout('Claude is up to date!\n')
}
} else {
// pacman, deb, and rpm don't get specific commands because they each have
// multiple frontends (pacman: yay/paru/makepkg, deb: apt/apt-get/aptitude/nala,
// rpm: dnf/yum/zypper)
writeToStdout('Claude is managed by a package manager.\n')
writeToStdout('Please use your package manager to update.\n')
}
await gracefulShutdown(0)
}
// Check for config/reality mismatch (skip for package-manager installs)
if (
config.installMethod &&
diagnostic.configInstallMethod !== 'not set' &&
diagnostic.installationType !== 'package-manager'
) {
const runningType = diagnostic.installationType
const configExpects = diagnostic.configInstallMethod
// Map installation types for comparison
const typeMapping: Record<string, string> = {
'npm-local': 'local',
'npm-global': 'global',
native: 'native',
development: 'development',
unknown: 'unknown',
}
const normalizedRunningType = typeMapping[runningType] || runningType
if (
normalizedRunningType !== configExpects &&
configExpects !== 'unknown'
) {
writeToStdout('\n')
writeToStdout(chalk.yellow('Warning: Configuration mismatch') + '\n')
writeToStdout(`Config expects: ${configExpects} installation\n`)
writeToStdout(`Currently running: ${runningType}\n`)
writeToStdout(
chalk.yellow(
`Updating the ${runningType} installation you are currently using`,
) + '\n',
)
// Update config to match reality
saveGlobalConfig(current => ({
...current,
installMethod: normalizedRunningType as InstallMethod,
}))
writeToStdout(
`Config updated to reflect current installation method: ${normalizedRunningType}\n`,
)
}
}
// Handle native installation updates first
if (diagnostic.installationType === 'native') {
logForDebugging(
'update: Detected native installation, using native updater',
)
try {
const result = await installLatestNative(channel, true)
// Handle lock contention gracefully
if (result.lockFailed) {
const pidInfo = result.lockHolderPid
? ` (PID ${result.lockHolderPid})`
: ''
writeToStdout(
chalk.yellow(
`Another Claude process${pidInfo} is currently running. Please try again in a moment.`,
) + '\n',
)
await gracefulShutdown(0)
}
if (!result.latestVersion) {
process.stderr.write('Failed to check for updates\n')
await gracefulShutdown(1)
}
if (result.latestVersion === MACRO.VERSION) {
writeToStdout(
chalk.green(`Claude Code is up to date (${MACRO.VERSION})`) + '\n',
)
} else {
writeToStdout(
chalk.green(
`Successfully updated from ${MACRO.VERSION} to version ${result.latestVersion}`,
) + '\n',
)
await regenerateCompletionCache()
}
await gracefulShutdown(0)
} catch (error) {
process.stderr.write('Error: Failed to install native update\n')
process.stderr.write(String(error) + '\n')
process.stderr.write('Try running "claude doctor" for diagnostics\n')
await gracefulShutdown(1)
}
}
// Fallback to existing JS/npm-based update logic
// Remove native installer symlink since we're not using native installation
// But only if user hasn't migrated to native installation
if (config.installMethod !== 'native') {
await removeInstalledSymlink()
}
logForDebugging('update: Checking npm registry for latest version')
logForDebugging(`update: Package URL: ${MACRO.PACKAGE_URL}`)
const npmTag = channel === 'stable' ? 'stable' : 'latest'
const npmCommand = `npm view ${MACRO.PACKAGE_URL}@${npmTag} version`
logForDebugging(`update: Running: ${npmCommand}`)
const latestVersion = await getLatestVersion(channel)
logForDebugging(
`update: Latest version from npm: ${latestVersion || 'FAILED'}`,
)
if (!latestVersion) {
logForDebugging('update: Failed to get latest version from npm registry')
process.stderr.write(chalk.red('Failed to check for updates') + '\n')
process.stderr.write('Unable to fetch latest version from npm registry\n')
process.stderr.write('\n')
process.stderr.write('Possible causes:\n')
process.stderr.write(' • Network connectivity issues\n')
process.stderr.write(' • npm registry is unreachable\n')
process.stderr.write(' • Corporate proxy/firewall blocking npm\n')
if (MACRO.PACKAGE_URL && !MACRO.PACKAGE_URL.startsWith('@anthropic')) {
process.stderr.write(
' • Internal/development build not published to npm\n',
)
}
process.stderr.write('\n')
process.stderr.write('Try:\n')
process.stderr.write(' • Check your internet connection\n')
process.stderr.write(' • Run with --debug flag for more details\n')
const packageName =
MACRO.PACKAGE_URL ||
(process.env.USER_TYPE === 'ant'
? '@anthropic-ai/claude-cli'
: '@anthropic-ai/claude-code')
process.stderr.write(
` • Manually check: npm view ${packageName} version\n`,
)
process.stderr.write(' • Check if you need to login: npm whoami\n')
await gracefulShutdown(1)
}
// Check if versions match exactly, including any build metadata (like SHA)
if (latestVersion === MACRO.VERSION) {
writeToStdout(
chalk.green(`Claude Code is up to date (${MACRO.VERSION})`) + '\n',
)
await gracefulShutdown(0)
}
writeToStdout(
`New version available: ${latestVersion} (current: ${MACRO.VERSION})\n`,
)
writeToStdout('Installing update...\n')
// Determine update method based on what's actually running
let useLocalUpdate = false
let updateMethodName = ''
switch (diagnostic.installationType) {
case 'npm-local':
useLocalUpdate = true
updateMethodName = 'local'
break
case 'npm-global':
useLocalUpdate = false
updateMethodName = 'global'
break
case 'unknown': {
// Fallback to detection if we can't determine installation type
const isLocal = await localInstallationExists()
useLocalUpdate = isLocal
updateMethodName = isLocal ? 'local' : 'global'
writeToStdout(
chalk.yellow('Warning: Could not determine installation type') + '\n',
)
writeToStdout(
`Attempting ${updateMethodName} update based on file detection...\n`,
)
break
}
default:
process.stderr.write(
`Error: Cannot update ${diagnostic.installationType} installation\n`,
)
await gracefulShutdown(1)
}
writeToStdout(`Using ${updateMethodName} installation update method...\n`)
logForDebugging(`update: Update method determined: ${updateMethodName}`)
logForDebugging(`update: useLocalUpdate: ${useLocalUpdate}`)
let status: InstallStatus
if (useLocalUpdate) {
logForDebugging(
'update: Calling installOrUpdateClaudePackage() for local update',
)
status = await installOrUpdateClaudePackage(channel)
} else {
logForDebugging('update: Calling installGlobalPackage() for global update')
status = await installGlobalPackage()
}
logForDebugging(`update: Installation status: ${status}`)
switch (status) {
case 'success':
writeToStdout(
chalk.green(
`Successfully updated from ${MACRO.VERSION} to version ${latestVersion}`,
) + '\n',
)
await regenerateCompletionCache()
break
case 'no_permissions':
process.stderr.write(
'Error: Insufficient permissions to install update\n',
)
if (useLocalUpdate) {
process.stderr.write('Try manually updating with:\n')
process.stderr.write(
` cd ~/.claude/local && npm update ${MACRO.PACKAGE_URL}\n`,
)
} else {
process.stderr.write('Try running with sudo or fix npm permissions\n')
process.stderr.write(
'Or consider using native installation with: claude install\n',
)
}
await gracefulShutdown(1)
break
case 'install_failed':
process.stderr.write('Error: Failed to install update\n')
if (useLocalUpdate) {
process.stderr.write('Try manually updating with:\n')
process.stderr.write(
` cd ~/.claude/local && npm update ${MACRO.PACKAGE_URL}\n`,
)
} else {
process.stderr.write(
'Or consider using native installation with: claude install\n',
)
}
await gracefulShutdown(1)
break
case 'in_progress':
process.stderr.write(
'Error: Another instance is currently performing an update\n',
)
process.stderr.write('Please wait and try again later\n')
await gracefulShutdown(1)
break
}
await gracefulShutdown(0)
}