From 6f54d99b4470b9d685f891eb8958fb9fd4c233bc Mon Sep 17 00:00:00 2001 From: Ehab Younes Date: Thu, 4 Jun 2026 16:26:10 +0300 Subject: [PATCH] feat(telemetry): harden the telemetry export command Follow-up hardening on top of the Coder telemetry export command (#953): - Surface a non-blocking warning when the pre-export flush does not fully succeed, so a successful-looking export no longer hides missing events. - Add a self-describing manifest.json to OTLP/JSON exports: manifest and telemetry schema versions, date range, source file and event counts, and per-signal record counts. - Make JSON exports cancellable and reject non-file save destinations. - Version telemetry rows with schema_version and export span logs as linked OTLP log records. - Harden HTTP route normalization for telemetry. - Assert the exporter resource carries per-event provenance, tighten comments, and remove em-dashes from comments and test names. --- CHANGELOG.md | 5 + src/instrumentation/websocket.ts | 4 +- src/logging/httpRequestsTelemetry.ts | 88 +-------- src/logging/routeNormalization.ts | 130 +++++++++++++ src/telemetry/export/command.ts | 10 +- src/telemetry/export/pipeline.ts | 20 +- src/telemetry/export/prompts.ts | 11 +- src/telemetry/export/writers/index.ts | 15 +- src/telemetry/export/writers/otlp/manifest.ts | 76 ++++++++ src/telemetry/export/writers/otlp/records.ts | 8 + src/telemetry/export/writers/otlp/types.ts | 3 + src/telemetry/export/writers/otlp/writer.ts | 73 +++++++- src/telemetry/export/writers/types.ts | 10 +- src/telemetry/service.ts | 52 ++++-- src/telemetry/sinks/localJsonlSink.ts | 21 ++- src/telemetry/wireFormat.ts | 12 ++ src/util/fs.ts | 2 +- src/websocket/reconnectingWebSocket.ts | 7 +- test/mocks/telemetry.ts | 2 + .../logging/httpRequestsTelemetry.test.ts | 38 +--- test/unit/logging/routeNormalization.test.ts | 76 ++++++++ test/unit/telemetry/export/command.test.ts | 24 ++- test/unit/telemetry/export/pipeline.test.ts | 36 +++- test/unit/telemetry/export/prompts.test.ts | 9 + .../telemetry/export/writers/index.test.ts | 16 +- .../export/writers/otlp/manifest.test.ts | 70 +++++++ .../export/writers/otlp/records.test.ts | 28 ++- .../export/writers/otlp/writer.test.ts | 175 ++++++++++++------ test/unit/telemetry/service.test.ts | 25 ++- .../telemetry/sinks/localJsonlSink.test.ts | 21 ++- test/unit/telemetry/wireFormat.test.ts | 16 ++ 31 files changed, 835 insertions(+), 248 deletions(-) create mode 100644 src/logging/routeNormalization.ts create mode 100644 src/telemetry/export/writers/otlp/manifest.ts create mode 100644 test/unit/logging/routeNormalization.test.ts create mode 100644 test/unit/telemetry/export/writers/otlp/manifest.test.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index b200309d5a..9aabc4a1dc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -27,6 +27,11 @@ - Local telemetry now records authentication refresh and recovery prompts. - Local telemetry now records workspace and agent state transitions with observed durations. +- The `Coder: Export Telemetry` command writes locally recorded telemetry to + a file you choose, as a JSON array or an OTLP/JSON zip, for a selected date + range. The OTLP zip includes a `manifest.json` summarizing the export: date + range, source file and event counts, per-signal record counts, and the + telemetry schema version. - Path-like settings (`coder.binaryDestination`, `coder.tlsCertFile`, `coder.tlsKeyFile`, `coder.tlsCaFile`, `coder.tlsAltHost`, `coder.proxyLogDirectory`) and items in `coder.globalFlags` now support diff --git a/src/instrumentation/websocket.ts b/src/instrumentation/websocket.ts index 7c9b0efd15..0db3760537 100644 --- a/src/instrumentation/websocket.ts +++ b/src/instrumentation/websocket.ts @@ -1,3 +1,5 @@ +import { normalizeRoute } from "../logging/routeNormalization"; + import type { CallerProperties } from "../telemetry/event"; import type { TelemetryReporter } from "../telemetry/reporter"; import type { ConnectionState } from "../websocket/reconnectingWebSocket"; @@ -82,7 +84,7 @@ export class WebSocketTelemetry { this.#connectStartedAtMs = undefined; this.#telemetry.log( "connection.opened", - { route }, + { route: normalizeRoute(route) }, { connectDurationMs: now - start }, ); this.#finishReconnect({ result: "success" }); diff --git a/src/logging/httpRequestsTelemetry.ts b/src/logging/httpRequestsTelemetry.ts index 6069c8f423..d529e8abf4 100644 --- a/src/logging/httpRequestsTelemetry.ts +++ b/src/logging/httpRequestsTelemetry.ts @@ -6,42 +6,15 @@ import { } from "../telemetry/reporter"; import { formatMethod } from "./formatters"; +import { normalizeRoute } from "./routeNormalization"; import type { Disposable } from "vscode"; import type { RequestConfigWithMeta } from "./types"; const EVENT_NAME = "http.requests"; -const UNKNOWN_ROUTE = ""; const WINDOW_SECONDS = 60; -const ID_PLACEHOLDER = "{id}"; -const NAME_PLACEHOLDER = "{name}"; - -const ROUTE_NORMALIZATION_RULES: ReadonlyArray = [ - "api/v2/users/{name}/workspace/{name}", - "api/v2/users/{name}/keys/{id}", - "api/v2/users/{name}", - "api/v2/tasks/{name}/{id}", - "api/v2/tasks/{name}", - "api/v2/organizations/{id}/templates/{name}/versions/{name}", - "api/v2/organizations/{id}/templates/{name}", - "api/v2/organizations/{id}/groups/{name}", - "api/v2/organizations/{id}/members/{name}", - "api/v2/organizations/{id}", - "api/v2/aibridge/sessions/{id}", - "api/v2/files/{id}", - "api/v2/groups/{id}", - "api/v2/licenses/{id}", - "api/v2/oauth2-provider/apps/{id}", - "api/v2/templates/{id}", - "api/v2/templateversions/{id}", - "api/v2/workspaceagents/{id}", - "api/v2/workspacebuilds/{id}", - "api/v2/workspaces/{id}/builds/{id}", - "api/v2/workspaces/{id}", -].map((rule) => rule.split("/")); - interface HttpRequestBucket { count1xx: number; count2xx: number; @@ -115,7 +88,7 @@ export class HttpRequestsTelemetry implements Disposable { } const method = formatMethod(config.method); - const route = normalizeHttpRoute(config.url, config.baseURL); + const route = normalizeRoute(config.url, config.baseURL); const bucket = this.#getOrCreateBucket(method, route); const durationMs = elapsedMs(config); @@ -206,63 +179,6 @@ export class HttpRequestsTelemetry implements Disposable { } } -export function normalizeHttpRoute( - url: string | undefined, - baseURL?: string, -): string { - if (!url) { - return UNKNOWN_ROUTE; - } - - const segments = parsePathSegments(url, baseURL); - if (segments.length === 0) { - return UNKNOWN_ROUTE; - } - - for (const rule of ROUTE_NORMALIZATION_RULES) { - const normalized = normalizeByRule(segments, rule); - if (normalized) { - return normalized; - } - } - // No matching rule. Pass through; add a rule above if cardinality grows. - return `/${segments.join("/")}`; -} - -function normalizeByRule( - segments: readonly string[], - rule: readonly string[], -): string | undefined { - if (segments.length < rule.length) { - return undefined; - } - - const normalized: string[] = []; - for (const [index, ruleSegment] of rule.entries()) { - if (ruleSegment === ID_PLACEHOLDER || ruleSegment === NAME_PLACEHOLDER) { - normalized.push(ruleSegment); - continue; - } - if (segments[index] !== ruleSegment) { - return undefined; - } - normalized.push(segments[index]); - } - - // Trailing segments pass through. If a tail can hold an ID, add a rule. - return `/${[...normalized, ...segments.slice(rule.length)].join("/")}`; -} - -function parsePathSegments(url: string, baseURL?: string): string[] { - try { - return new URL(url, baseURL ?? "http://coder.invalid").pathname - .split("/") - .filter(Boolean); - } catch { - return []; - } -} - function elapsedMs( config: RequestConfigWithMeta | undefined, ): number | undefined { diff --git a/src/logging/routeNormalization.ts b/src/logging/routeNormalization.ts new file mode 100644 index 0000000000..ed2d2f1c1c --- /dev/null +++ b/src/logging/routeNormalization.ts @@ -0,0 +1,130 @@ +/** + * Normalizes request and websocket routes into low-cardinality telemetry + * labels. Drops the query/fragment (which can carry tokens) and bounds + * cardinality by collapsing ids and bucketing unmatched routes, so even an + * unseen route is safe to emit. + */ + +const UNKNOWN_ROUTE = ""; +const ID_PLACEHOLDER = "{id}"; +const NAME_PLACEHOLDER = "{name}"; +/** Tail marker for routes with no matching template. */ +const BUCKET_PLACEHOLDER = "{*}"; +/** Segments kept verbatim before the bucket marker on unmatched routes. */ +const UNMATCHED_PREFIX_SEGMENTS = 3; + +/** Any-version UUID; version and variant nibbles are unenforced so UUIDv7 ids collapse too. */ +const UUID_PATTERN = + /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i; +const INTEGER_PATTERN = /^\d+$/; + +/** + * Templates refine name segments (usernames, workspace/template names) that + * id collapsing misses. Precision only: a missing rule never risks + * cardinality, since unmatched routes still collapse ids and bucket. + */ +const ROUTE_NORMALIZATION_RULES: ReadonlyArray = [ + "api/v2/users/{name}/workspace/{name}", + "api/v2/users/{name}/keys/{id}", + "api/v2/users/{name}", + "api/v2/tasks/{name}/{id}", + "api/v2/tasks/{name}", + "api/v2/organizations/{id}/templates/{name}/versions/{name}", + "api/v2/organizations/{id}/templates/{name}", + "api/v2/organizations/{id}/groups/{name}", + "api/v2/organizations/{id}/members/{name}", + "api/v2/organizations/{id}", + "api/v2/aibridge/sessions/{id}", + "api/v2/files/{id}", + "api/v2/groups/{id}", + "api/v2/licenses/{id}", + "api/v2/oauth2-provider/apps/{id}", + "api/v2/templates/{id}", + "api/v2/templateversions/{id}", + "api/v2/workspaceagents/{id}", + "api/v2/workspacebuilds/{id}", + "api/v2/workspaces/{id}/builds/{id}", + "api/v2/workspaces/{id}", +].map((rule) => rule.split("/")); + +/** + * Normalizes `url` (optionally resolved against `baseURL`) to a stable route + * label. Returns `` for missing or unparseable input. + */ +export function normalizeRoute( + url: string | undefined, + baseURL?: string, +): string { + if (!url) { + return UNKNOWN_ROUTE; + } + + const segments = parsePathSegments(url, baseURL); + if (segments.length === 0) { + return UNKNOWN_ROUTE; + } + + const collapsed = segments.map(collapseIdSegment); + for (const rule of ROUTE_NORMALIZATION_RULES) { + const normalized = normalizeByRule(collapsed, rule); + if (normalized) { + return normalized; + } + } + return bucketUnmatchedRoute(collapsed); +} + +/** Collapses UUID and integer segments to `{id}` to bound cardinality. */ +function collapseIdSegment(segment: string): string { + return UUID_PATTERN.test(segment) || INTEGER_PATTERN.test(segment) + ? ID_PLACEHOLDER + : segment; +} + +function normalizeByRule( + segments: readonly string[], + rule: readonly string[], +): string | undefined { + if (segments.length < rule.length) { + return undefined; + } + + const normalized: string[] = []; + for (const [index, ruleSegment] of rule.entries()) { + if (ruleSegment === ID_PLACEHOLDER || ruleSegment === NAME_PLACEHOLDER) { + normalized.push(ruleSegment); + continue; + } + if (segments[index] !== ruleSegment) { + return undefined; + } + normalized.push(segments[index]); + } + + // Trailing segments pass through with ids already collapsed; add a rule + // above if a tail can hold a name. + return `/${[...normalized, ...segments.slice(rule.length)].join("/")}`; +} + +/** + * Bucket for unmatched routes: keep a short prefix and collapse the rest, so + * unseen routes stay bounded even when their variable segments are names. + */ +function bucketUnmatchedRoute(segments: readonly string[]): string { + if (segments.length <= UNMATCHED_PREFIX_SEGMENTS) { + return `/${segments.join("/")}`; + } + const prefix = segments.slice(0, UNMATCHED_PREFIX_SEGMENTS).join("/"); + return `/${prefix}/${BUCKET_PLACEHOLDER}`; +} + +/** Path segments only; `pathname` drops the query and fragment so tokens never reach telemetry. */ +function parsePathSegments(url: string, baseURL?: string): string[] { + try { + return new URL(url, baseURL ?? "http://coder.invalid").pathname + .split("/") + .filter(Boolean); + } catch { + return []; + } +} diff --git a/src/telemetry/export/command.ts b/src/telemetry/export/command.ts index 94cb66582d..1186e1bb97 100644 --- a/src/telemetry/export/command.ts +++ b/src/telemetry/export/command.ts @@ -17,6 +17,7 @@ import { createExportWriter } from "./writers"; import type { Logger } from "../../logging/logger"; import type { TelemetryContext } from "../event"; +import type { FlushStatus } from "../service"; const REVEAL_ACTION = "Reveal in File Explorer"; @@ -29,7 +30,7 @@ const PROGRESS_OPTIONS = { export async function runExportTelemetryCommand( telemetryDir: string, logger: Logger, - flushTelemetry: () => Promise, + flushTelemetry: () => Promise, context: TelemetryContext, ): Promise { const choice = await promptForExport(); @@ -58,13 +59,18 @@ export async function runExportTelemetryCommand( /** Wires the pipeline's host hooks to the progress UI and the logger. */ function exportRuntime( { progress, signal }: ProgressContext, - flushTelemetry: () => Promise, + flushTelemetry: () => Promise, logger: Logger, ): ExportRuntime { return { signal, flushTelemetry, report: (message) => progress.report({ message }), + // Warn but keep going: the export still reflects what reached disk. + onFlushIncomplete: () => + void vscode.window.showWarningMessage( + "Some recent telemetry could not be flushed; this export may be missing the latest events.", + ), onCleanupError: (err, target) => logger.warn("Failed to clean up after telemetry export", target, err), }; diff --git a/src/telemetry/export/pipeline.ts b/src/telemetry/export/pipeline.ts index 4d13bc48e2..4bd3e7a974 100644 --- a/src/telemetry/export/pipeline.ts +++ b/src/telemetry/export/pipeline.ts @@ -5,6 +5,7 @@ import { throwIfAborted } from "../../error/errorUtils"; import { listTelemetryFilesForRange, streamTelemetryEvents } from "./files"; import type { TelemetryEvent } from "../event"; +import type { FlushStatus } from "../service"; import type { TelemetryDateRange } from "./range"; import type { ExportWriter } from "./writers/types"; @@ -22,8 +23,10 @@ export interface ExportRequest { */ export interface ExportRuntime { readonly signal: AbortSignal; - readonly flushTelemetry: () => Promise; + readonly flushTelemetry: () => Promise; readonly report: (message: string) => void; + /** The pre-export flush did not fully succeed, so recent events may be missing. */ + readonly onFlushIncomplete: () => void; /** A temp file, staging dir, or empty export could not be removed (caller logs). */ readonly onCleanupError: (err: unknown, target: string) => void; } @@ -38,8 +41,11 @@ export async function collectTelemetryExport( runtime: ExportRuntime, ): Promise { runtime.report("Flushing buffered events..."); - await runtime.flushTelemetry(); + const flushStatus = await runtime.flushTelemetry(); throwIfAborted(runtime.signal); + if (!flushStatus.ok) { + runtime.onFlushIncomplete(); + } runtime.report("Locating telemetry files..."); const filePaths = await listTelemetryFilesForRange( @@ -55,10 +61,12 @@ export async function collectTelemetryExport( streamTelemetryEvents(filePaths, request.range), runtime.signal, ); - const eventCount = await request.writer(request.outputPath, events, { - signal: runtime.signal, - onCleanupError: runtime.onCleanupError, - }); + const eventCount = await request.writer( + request.outputPath, + events, + { range: request.range, sourceFiles: filePaths.length }, + { signal: runtime.signal, onCleanupError: runtime.onCleanupError }, + ); if (eventCount === 0) { await removeEmptyExport(request.outputPath, runtime.onCleanupError); } diff --git a/src/telemetry/export/prompts.ts b/src/telemetry/export/prompts.ts index 249e039f3c..12d86c0ef0 100644 --- a/src/telemetry/export/prompts.ts +++ b/src/telemetry/export/prompts.ts @@ -152,5 +152,14 @@ async function promptSavePath( filters, title: "Save Telemetry Export", }); - return uri?.fsPath; + if (!uri) { + return undefined; + } + if (uri.scheme !== "file") { + vscode.window.showErrorMessage( + "Telemetry can only be exported to a local file. The selected location is not a local file path.", + ); + return undefined; + } + return uri.fsPath; } diff --git a/src/telemetry/export/writers/index.ts b/src/telemetry/export/writers/index.ts index 836ab9ac97..3f6ede1dd2 100644 --- a/src/telemetry/export/writers/index.ts +++ b/src/telemetry/export/writers/index.ts @@ -5,7 +5,12 @@ import type { TelemetryContext } from "../../event"; import type { ExportFormat, ExportWriter } from "./types"; -export type { ExportFormat, ExportWriteOptions, ExportWriter } from "./types"; +export type { + ExportDescriptor, + ExportFormat, + ExportWriteOptions, + ExportWriter, +} from "./types"; /** Picks the writer for `format`, binding the context the OTLP writer needs. */ export function createExportWriter( @@ -13,10 +18,12 @@ export function createExportWriter( context: TelemetryContext, ): ExportWriter { if (format === "json") { - return writeJsonArrayExport; + // JSON has nowhere to record the descriptor, so it is dropped here. + return (outputPath, events, _descriptor, options) => + writeJsonArrayExport(outputPath, events, options); } - return (outputPath, events, options) => - writeOtlpZipExport(outputPath, events, context, options).then( + return (outputPath, events, descriptor, options) => + writeOtlpZipExport(outputPath, events, context, descriptor, options).then( (counts) => counts.logs + counts.traces + counts.metrics, ); } diff --git a/src/telemetry/export/writers/otlp/manifest.ts b/src/telemetry/export/writers/otlp/manifest.ts new file mode 100644 index 0000000000..3fc4cb0c27 --- /dev/null +++ b/src/telemetry/export/writers/otlp/manifest.ts @@ -0,0 +1,76 @@ +import { CURRENT_TELEMETRY_SCHEMA_VERSION } from "../../../wireFormat"; + +import type { TelemetryContext } from "../../../event"; + +/** File name of the manifest packed alongside the export envelopes. */ +export const MANIFEST_FILE = "manifest.json"; + +/** Manifest document format version; bump when this shape changes. */ +export const MANIFEST_SCHEMA_VERSION = 1; + +/** Date range the export covers. */ +export interface ManifestRange { + readonly label: string; + readonly startMs?: number; + readonly endMs?: number; +} + +/** Per-signal record counts (records written, not source events). */ +export interface RecordCounts { + readonly logs: number; + readonly traces: number; + readonly metrics: number; +} + +/** Caller-supplied inputs the writer cannot derive from the event stream. */ +export interface ManifestInput { + readonly range: ManifestRange; + readonly sourceFiles: number; +} + +/** Self-describing metadata written alongside an export. */ +export interface ExportManifest { + readonly schemaVersion: number; + readonly telemetrySchemaVersion: number; + readonly exportedAt: string; + readonly extensionVersion: string; + readonly format: string; + readonly range: { + readonly label: string; + readonly start: string | null; + readonly end: string | null; + }; + readonly sourceFiles: number; + readonly sourceEvents: number; + readonly records: RecordCounts; +} + +export function buildManifest(args: { + readonly format: string; + readonly context: TelemetryContext; + readonly input: ManifestInput; + readonly sourceEvents: number; + readonly records: RecordCounts; + readonly exportedAt: string; +}): ExportManifest { + const { format, context, input, sourceEvents, records, exportedAt } = args; + return { + schemaVersion: MANIFEST_SCHEMA_VERSION, + telemetrySchemaVersion: CURRENT_TELEMETRY_SCHEMA_VERSION, + exportedAt, + extensionVersion: context.extensionVersion, + format, + range: { + label: input.range.label, + start: toIso(input.range.startMs), + end: toIso(input.range.endMs), + }, + sourceFiles: input.sourceFiles, + sourceEvents, + records, + }; +} + +function toIso(ms: number | undefined): string | null { + return ms === undefined ? null : new Date(ms).toISOString(); +} diff --git a/src/telemetry/export/writers/otlp/records.ts b/src/telemetry/export/writers/otlp/records.ts index 6be26c9905..11a4e93865 100644 --- a/src/telemetry/export/writers/otlp/records.ts +++ b/src/telemetry/export/writers/otlp/records.ts @@ -108,6 +108,12 @@ export function otlpScope(version: string) { return { name: "coder.vscode-coder.telemetry.export", version }; } +/** + * Builds an OTLP log record. Span-attached logs (`Span.log()` / + * `Span.logError()`) carry `traceId` and a `parentEventId`, mapped here to + * the OTLP `traceId`/`spanId` so backends nest them under their span rather + * than treat them as standalone events. Plain logs carry neither. + */ export function logRecord(event: TelemetryEvent): OtlpLogRecord { const timeUnixNano = String(toUnixNano(event.timestamp)); const errored = event.error !== undefined; @@ -123,6 +129,8 @@ export function logRecord(event: TelemetryEvent): OtlpLogRecord { ...(event.error && exceptionAttributes(event.error)), ...eventContextAttributes(event), }), + ...(event.traceId !== undefined && { traceId: event.traceId }), + ...(event.parentEventId !== undefined && { spanId: event.parentEventId }), }; } diff --git a/src/telemetry/export/writers/otlp/types.ts b/src/telemetry/export/writers/otlp/types.ts index bcce6133bc..38a5cf636b 100644 --- a/src/telemetry/export/writers/otlp/types.ts +++ b/src/telemetry/export/writers/otlp/types.ts @@ -22,6 +22,9 @@ export interface OtlpLogRecord { readonly severityText: string; readonly body: { readonly stringValue: string }; readonly attributes: readonly OtlpKeyValue[]; + /** Set on span-attached logs to link the record to its parent span. */ + readonly traceId?: string; + readonly spanId?: string; } export interface OtlpStatus { diff --git a/src/telemetry/export/writers/otlp/writer.ts b/src/telemetry/export/writers/otlp/writer.ts index f78da6f524..ed6f4fa7fe 100644 --- a/src/telemetry/export/writers/otlp/writer.ts +++ b/src/telemetry/export/writers/otlp/writer.ts @@ -14,6 +14,7 @@ import { writeAtomically } from "../../../../util/fs"; import { describeMetricEvent } from "../../metrics"; import { openEnvelopeFile, type EnvelopeFile } from "./envelope"; +import { buildManifest, MANIFEST_FILE, type RecordCounts } from "./manifest"; import { ENVELOPES, ENVELOPE_SUFFIX, @@ -29,18 +30,24 @@ import { } from "./records"; import type { TelemetryContext, TelemetryEvent } from "../../../event"; -import type { ExportWriteOptions } from "../types"; +import type { ExportDescriptor, ExportWriteOptions } from "../types"; -/** Event totals by signal — a metric event with all records suppressed still counts as one. */ +/** Event totals by signal; a metric event with all records suppressed still counts as one. */ export interface OtlpExportCounts { readonly logs: number; readonly traces: number; readonly metrics: number; } +/** OTLP/JSON format tag recorded in the manifest. */ +const OTLP_FORMAT = "otlp-json"; + interface Channel { file: EnvelopeFile; + /** Source events routed to this signal. */ count: number; + /** OTLP records written to this signal. */ + records: number; } // Read high-water mark (HWM): bytes buffered per read while streaming a staged @@ -57,6 +64,7 @@ export async function writeOtlpZipExport( outputPath: string, events: AsyncIterable, context: TelemetryContext, + descriptor: ExportDescriptor, options: ExportWriteOptions = {}, ): Promise { throwIfAborted(options.signal); @@ -73,6 +81,7 @@ export async function writeOtlpZipExport( events, context, options.signal, + descriptor, ); await packZip(zipPath, stagingDir, options.signal); } catch (err) { @@ -106,6 +115,7 @@ async function writeStagedFiles( events: AsyncIterable, context: TelemetryContext, signal: AbortSignal | undefined, + descriptor: ExportDescriptor, ): Promise { const resource = JSON.stringify(otlpResource(context)); const scope = JSON.stringify(otlpScope(context.extensionVersion)); @@ -126,11 +136,41 @@ async function writeStagedFiles( await (succeeded ? Promise.all(closes) : Promise.allSettled(closes)); } - return { + const counts: OtlpExportCounts = { logs: channels.logs.count, traces: channels.traces.count, metrics: channels.metrics.count, }; + await writeManifest(dir, descriptor, context, channels); + return counts; +} + +async function writeManifest( + dir: string, + descriptor: ExportDescriptor, + context: TelemetryContext, + channels: Record, +): Promise { + const records: RecordCounts = { + logs: channels.logs.records, + traces: channels.traces.records, + metrics: channels.metrics.records, + }; + const sourceEvents = + channels.logs.count + channels.traces.count + channels.metrics.count; + const manifest = buildManifest({ + format: OTLP_FORMAT, + input: descriptor, + context, + sourceEvents, + records, + exportedAt: new Date().toISOString(), + }); + await fs.writeFile( + path.join(dir, MANIFEST_FILE), + JSON.stringify(manifest, null, 2), + "utf8", + ); } async function openChannels( @@ -145,7 +185,7 @@ async function openChannels( envelopePrefix(envelope, resource, scope), ENVELOPE_SUFFIX, ); - return { file, count: 0 }; + return { file, count: 0, records: 0 }; }; // Promise.allSettled so one failure doesn't orphan its siblings' fds. const settled = await Promise.allSettled([ @@ -168,10 +208,18 @@ async function openChannels( return { logs, traces, metrics }; } -function hasTraceId( +/** + * A completed timed span (`trace()` / `Span.phase()`) always carries a + * framework-set `result` property. Span-attached logs (`Span.log()` / + * `Span.logError()`) share the `traceId` but have no `result`, so they route + * to log records instead of becoming zero-duration spans. + */ +function isTimedSpan( event: TelemetryEvent, ): event is TelemetryEvent & { readonly traceId: string } { - return event.traceId !== undefined; + return ( + event.traceId !== undefined && Object.hasOwn(event.properties, "result") + ); } async function routeEvent( @@ -187,7 +235,7 @@ async function routeEvent( metricRecords(event, metric, state), ); channels.metrics.count += 1; - } else if (hasTraceId(event)) { + } else if (isTimedSpan(event)) { await appendRecords(channels.traces, [spanRecord(event)]); channels.traces.count += 1; } else { @@ -209,6 +257,7 @@ async function appendRecords( ): Promise { for (const record of records) { await channel.file.append(record); + channel.records += 1; } } @@ -276,12 +325,16 @@ async function pumpEnvelopes( signal: AbortSignal | undefined, waitForDrain: () => Promise, ): Promise { - for (const envelope of Object.values(ENVELOPES)) { + const names = [ + ...Object.values(ENVELOPES).map((envelope) => envelope.file), + MANIFEST_FILE, + ]; + for (const name of names) { throwIfAborted(signal); await streamFileIntoZip( zip, - envelope.file, - path.join(sourceDir, envelope.file), + name, + path.join(sourceDir, name), signal, waitForDrain, ); diff --git a/src/telemetry/export/writers/types.ts b/src/telemetry/export/writers/types.ts index 7c8fd9e1d6..134ee26d30 100644 --- a/src/telemetry/export/writers/types.ts +++ b/src/telemetry/export/writers/types.ts @@ -1,4 +1,5 @@ import type { TelemetryEvent } from "../../event"; +import type { TelemetryDateRange } from "../range"; export type ExportFormat = "json" | "otlp"; @@ -9,9 +10,16 @@ export interface ExportWriteOptions { readonly onCleanupError?: (err: unknown, target: string) => void; } +/** What an export covers. The OTLP writer records it in the zip's manifest. */ +export interface ExportDescriptor { + readonly range: TelemetryDateRange; + readonly sourceFiles: number; +} + /** Streams `events` to `outputPath`, returning how many were written. */ export type ExportWriter = ( outputPath: string, events: AsyncIterable, - options: ExportWriteOptions, + descriptor: ExportDescriptor, + options?: ExportWriteOptions, ) => Promise; diff --git a/src/telemetry/service.ts b/src/telemetry/service.ts index 65657e447d..f3c69501e1 100644 --- a/src/telemetry/service.ts +++ b/src/telemetry/service.ts @@ -20,6 +20,7 @@ import { } from "./event"; import { newSpanId, newTraceId } from "./ids"; import { NOOP_SPAN, type Span, type SpanResult } from "./span"; +import { CURRENT_TELEMETRY_SCHEMA_VERSION } from "./wireFormat"; import type { TelemetryReporter } from "./reporter"; @@ -53,6 +54,18 @@ interface EmitOptions extends Partial { error?: unknown; } +/** Per-sink flush outcome. */ +export interface SinkFlushResult { + readonly name: string; + readonly ok: boolean; +} + +/** Structured result of flushing all sinks; `ok` is true only if all flushed. */ +export interface FlushStatus { + readonly ok: boolean; + readonly sinks: readonly SinkFlushResult[]; +} + /** * Emits structured telemetry events to a fan-out of sinks. Sinks are filtered * by `minLevel` and may self-gate. `dispose` flushes are best-effort since @@ -158,19 +171,28 @@ export class TelemetryService implements vscode.Disposable, TelemetryReporter { ); } - public async flush(): Promise { - await Promise.allSettled( - this.sinks.map((sink) => this.#safeCall(sink, "flush")), + public async flush(): Promise { + const sinks = await Promise.all( + this.sinks.map(async (sink) => ({ + name: sink.name, + ok: await this.#flushSink(sink), + })), ); + return { ok: sinks.every((s) => s.ok), sinks }; } public async dispose(): Promise { this.#configWatcher.dispose(); - await Promise.allSettled( - this.sinks.map(async (sink) => { - await this.#safeCall(sink, "flush"); - await this.#safeCall(sink, "dispose"); - }), + await this.flush(); + await Promise.allSettled(this.sinks.map((sink) => this.#disposeSink(sink))); + } + + #flushSink(sink: TelemetrySink): Promise { + // The sink logs its own flush failure with detail; we only need the + // outcome for FlushStatus. + return sink.flush().then( + () => true, + () => false, ); } @@ -364,6 +386,7 @@ export class TelemetryService implements vscode.Disposable, TelemetryReporter { eventName, timestamp: new Date().toISOString(), eventSequence: this.#nextSequence++, + schemaVersion: CURRENT_TELEMETRY_SCHEMA_VERSION, context: this.getContext(), properties: { ...properties }, measurements: { ...measurements }, @@ -393,20 +416,15 @@ export class TelemetryService implements vscode.Disposable, TelemetryReporter { } this.#level = newLevel; if (newLevel === "off") { - await Promise.allSettled( - this.sinks.map((sink) => this.#safeCall(sink, "flush")), - ); + await this.flush(); } } - async #safeCall( - sink: TelemetrySink, - action: "flush" | "dispose", - ): Promise { + async #disposeSink(sink: TelemetrySink): Promise { try { - await sink[action](); + await sink.dispose(); } catch (err) { - this.logger.warn(`Telemetry sink '${sink.name}' ${action} failed`, err); + this.logger.warn(`Telemetry sink '${sink.name}' dispose failed`, err); } } } diff --git a/src/telemetry/sinks/localJsonlSink.ts b/src/telemetry/sinks/localJsonlSink.ts index 66cf639b53..05e29b3697 100644 --- a/src/telemetry/sinks/localJsonlSink.ts +++ b/src/telemetry/sinks/localJsonlSink.ts @@ -36,8 +36,9 @@ interface CurrentFile { * Writes telemetry events as JSON Lines. Each VS Code session writes its * own files (`telemetry-YYYY-MM-DD-{sessionId8}.jsonl` plus `.N.jsonl` size * segments), so concurrent windows cannot race on appends or rotation. - * `write` is sync and never throws; disk I/O happens in `flush` and - * `dispose` and catches errors. Tunables come from `coder.telemetry.local` + * `write` is sync and never throws. Disk I/O happens in `flush`/`dispose`; + * a failed flush rejects so awaited callers (the export) can react, while + * background flushes ignore it. Tunables come from `coder.telemetry.local` * and update reactively. */ export class LocalJsonlSink implements TelemetrySink, vscode.Disposable { @@ -96,7 +97,9 @@ export class LocalJsonlSink implements TelemetrySink, vscode.Disposable { this.#buffer.push(line); this.#enforceBufferLimit(); if (this.#buffer.length >= this.#config.flushBatchSize) { - void this.flush(); + void this.flush().catch(() => { + // Background flush; failures are logged in #doFlush. + }); } } @@ -127,7 +130,9 @@ export class LocalJsonlSink implements TelemetrySink, vscode.Disposable { clearTimeout(this.#flushTimer); this.#flushTimer = null; } - await this.flush(); + await this.flush().catch(() => { + // Failure already logged in #doFlush; dispose is best-effort. + }); } #enforceBufferLimit(): void { @@ -181,7 +186,11 @@ export class LocalJsonlSink implements TelemetrySink, vscode.Disposable { return; } this.#flushTimer = setTimeout(() => { - void this.flush().finally(() => this.#scheduleNextFlush()); + void this.flush() + .catch(() => { + // Background flush; failures are logged in #doFlush. + }) + .finally(() => this.#scheduleNextFlush()); }, this.#config.flushIntervalMs); } @@ -203,6 +212,8 @@ export class LocalJsonlSink implements TelemetrySink, vscode.Disposable { `Telemetry sink '${this.name}' flush failed, ${lines.length} event(s) discarded`, err, ); + // Surface to awaited callers (the export) so they can warn. + throw err; } } diff --git a/src/telemetry/wireFormat.ts b/src/telemetry/wireFormat.ts index 564598cbb5..273a8b2f05 100644 --- a/src/telemetry/wireFormat.ts +++ b/src/telemetry/wireFormat.ts @@ -1,5 +1,14 @@ import { z } from "zod"; +/** + * Wire schema version stamped on every telemetry row. Each row carries its + * own version because rows are self-contained: one row maps to one exported + * record, with no file-level header to anchor a single version to. Bump only + * on a breaking change to the JSONL shape (a renamed, removed, or retyped + * field); additive optional fields do not need a bump. + */ +export const CURRENT_TELEMETRY_SCHEMA_VERSION = 1; + /** Session-stable resource attributes captured once per VS Code session. */ const SessionContextSchema = z.object({ extensionVersion: z.string(), @@ -27,6 +36,8 @@ const TelemetryEventSchema = z.object({ eventName: z.string(), timestamp: z.iso.datetime({ offset: true }), eventSequence: z.number(), + /** Wire schema version of this row. See `CURRENT_TELEMETRY_SCHEMA_VERSION`. */ + schemaVersion: z.number().int().positive(), context: TelemetryContextSchema, properties: z.record(z.string(), z.string()), measurements: z.record(z.string(), z.number()), @@ -80,6 +91,7 @@ export function serializeTelemetryEvent( event_name: event.eventName, timestamp: event.timestamp, event_sequence: event.eventSequence, + schema_version: event.schemaVersion, context: { extension_version: event.context.extensionVersion, machine_id: event.context.machineId, diff --git a/src/util/fs.ts b/src/util/fs.ts index 17b1fd89e4..b44c901364 100644 --- a/src/util/fs.ts +++ b/src/util/fs.ts @@ -48,7 +48,7 @@ export async function renameWithRetry( /** * Generate a temporary file path by appending a suffix with a random component. * The suffix describes the purpose of the temp file (e.g. "temp", "old"). - * Example: tempFilePath("/a/b", "temp") → "/a/b.temp-k7x3f9qw" + * Example: tempFilePath("/a/b", "temp") returns "/a/b.temp-k7x3f9qw" */ export function tempFilePath(basePath: string, suffix: string): string { return `${basePath}.${suffix}-${crypto.randomUUID().substring(0, 8)}`; diff --git a/src/websocket/reconnectingWebSocket.ts b/src/websocket/reconnectingWebSocket.ts index fb9e13074a..6e49e830f5 100644 --- a/src/websocket/reconnectingWebSocket.ts +++ b/src/websocket/reconnectingWebSocket.ts @@ -212,8 +212,9 @@ export class ReconnectingWebSocket< } /** - * Extract the route (pathname + search) from the current socket URL for logging. - * Falls back to the last known route when the socket is closed. + * Route (pathname only) of the current socket, for logging. The query + * string is dropped so connection tokens carried there never reach logs or + * telemetry. Falls back to the last known route when the socket is closed. */ get #route(): string { const socketUrl = this.#currentSocket?.url; @@ -221,7 +222,7 @@ export class ReconnectingWebSocket< return this.#lastRoute; } const url = new URL(socketUrl); - return url.pathname + url.search; + return url.pathname; } public addEventListener( diff --git a/test/mocks/telemetry.ts b/test/mocks/telemetry.ts index f0c46b91f6..24ca08c8d0 100644 --- a/test/mocks/telemetry.ts +++ b/test/mocks/telemetry.ts @@ -7,6 +7,7 @@ import { type TelemetrySink, } from "@/telemetry/event"; import { TelemetryService } from "@/telemetry/service"; +import { CURRENT_TELEMETRY_SCHEMA_VERSION } from "@/telemetry/wireFormat"; import { createMockLogger, MockConfigurationProvider } from "./testHelpers"; @@ -86,6 +87,7 @@ export function createTelemetryEventFactory(): ( eventName: "test.event", timestamp: "2026-05-04T12:00:00.000Z", eventSequence: seq, + schemaVersion: CURRENT_TELEMETRY_SCHEMA_VERSION, context: { extensionVersion: "1.14.5", machineId: "machine-id", diff --git a/test/unit/logging/httpRequestsTelemetry.test.ts b/test/unit/logging/httpRequestsTelemetry.test.ts index d0415c2d15..f39224c93c 100644 --- a/test/unit/logging/httpRequestsTelemetry.test.ts +++ b/test/unit/logging/httpRequestsTelemetry.test.ts @@ -9,10 +9,7 @@ import { vi, } from "vitest"; -import { - HttpRequestsTelemetry, - normalizeHttpRoute, -} from "@/logging/httpRequestsTelemetry"; +import { HttpRequestsTelemetry } from "@/logging/httpRequestsTelemetry"; import { NOOP_TELEMETRY_REPORTER, type TelemetryReporter, @@ -220,39 +217,6 @@ describe("HttpRequestsTelemetry", () => { await advanceOneWindow(); expect(log).not.toHaveBeenCalled(); }); - - it.each([ - ["/api/v2/workspaces/abc-123?foo=bar", "/api/v2/workspaces/{id}"], - [ - "/api/v2/users/danny/workspace/my-workspace?foo=bar", - "/api/v2/users/{name}/workspace/{name}", - ], - ["/api/v2/users/danny/keys/123", "/api/v2/users/{name}/keys/{id}"], - ["/api/v2/tasks/danny/task-123", "/api/v2/tasks/{name}/{id}"], - [ - "/api/v2/organizations/9f0f7f37-dfb7-4f4b-bcb8-c7062c7550fc/templates/base/versions/v1", - "/api/v2/organizations/{id}/templates/{name}/versions/{name}", - ], - [ - "/api/v2/organizations/9f0f7f37-dfb7-4f4b-bcb8-c7062c7550fc/members/danny", - "/api/v2/organizations/{id}/members/{name}", - ], - [ - "/api/v2/workspaceagents/9f0f7f37-dfb7-4f4b-bcb8-c7062c7550fc/logs", - "/api/v2/workspaceagents/{id}/logs", - ], - [ - "/api/v2/workspaces/0196ac60-0cf9-7c6b-ba8e-925c3e83bb9f/builds/42", - "/api/v2/workspaces/{id}/builds/{id}", - ], - // No rule match: route is passed through verbatim. - ["/api/v2/buildinfo", "/api/v2/buildinfo"], - [undefined, ""], - ["", ""], - ["http://%", ""], - ])("normalizes %s", (url, expected) => { - expect(normalizeHttpRoute(url)).toBe(expected); - }); }); async function advanceOneWindow(): Promise { diff --git a/test/unit/logging/routeNormalization.test.ts b/test/unit/logging/routeNormalization.test.ts new file mode 100644 index 0000000000..c438b1b2f0 --- /dev/null +++ b/test/unit/logging/routeNormalization.test.ts @@ -0,0 +1,76 @@ +import { describe, expect, it } from "vitest"; + +import { normalizeRoute } from "@/logging/routeNormalization"; + +describe("normalizeRoute", () => { + it.each<[string | undefined, string]>([ + // Known templates collapse id and name segments. + ["/api/v2/workspaces/abc-123", "/api/v2/workspaces/{id}"], + [ + "/api/v2/users/danny/workspace/my-workspace", + "/api/v2/users/{name}/workspace/{name}", + ], + ["/api/v2/users/danny/keys/123", "/api/v2/users/{name}/keys/{id}"], + ["/api/v2/tasks/danny/task-123", "/api/v2/tasks/{name}/{id}"], + [ + "/api/v2/organizations/9f0f7f37-dfb7-4f4b-bcb8-c7062c7550fc/templates/base/versions/v1", + "/api/v2/organizations/{id}/templates/{name}/versions/{name}", + ], + [ + "/api/v2/organizations/9f0f7f37-dfb7-4f4b-bcb8-c7062c7550fc/members/danny", + "/api/v2/organizations/{id}/members/{name}", + ], + [ + "/api/v2/workspaceagents/9f0f7f37-dfb7-4f4b-bcb8-c7062c7550fc/logs", + "/api/v2/workspaceagents/{id}/logs", + ], + [ + "/api/v2/workspaces/0196ac60-0cf9-7c6b-ba8e-925c3e83bb9f/builds/42", + "/api/v2/workspaces/{id}/builds/{id}", + ], + // Websocket watch routes collapse their id without a dedicated rule. + [ + "/api/v2/workspaceagents/9f0f7f37-dfb7-4f4b-bcb8-c7062c7550fc/watch-metadata-ws", + "/api/v2/workspaceagents/{id}/watch-metadata-ws", + ], + [ + "/api/v2/workspaces/0196ac60-0cf9-7c6b-ba8e-925c3e83bb9f/watch-ws", + "/api/v2/workspaces/{id}/watch-ws", + ], + // Short static routes pass through unchanged. + ["/api/v2/buildinfo", "/api/v2/buildinfo"], + // Query strings and fragments are dropped (they can carry tokens). + ["/api/v2/workspaces/abc-123?foo=bar", "/api/v2/workspaces/{id}"], + [ + "/api/v2/users/danny/workspace/my-workspace?token=secret", + "/api/v2/users/{name}/workspace/{name}", + ], + ["/api/v2/buildinfo?x=1#frag", "/api/v2/buildinfo"], + // Short unknown routes collapse id segments without bucketing. + ["/api/v2/9f0f7f37-dfb7-4f4b-bcb8-c7062c7550fc", "/api/v2/{id}"], + ["/api/v2/12345", "/api/v2/{id}"], + // Deep unknown routes bucket after the resource prefix, whether the + // variable tail is an id or an unrecognizable name. + ["/api/v2/newresource/some-name/details", "/api/v2/newresource/{*}"], + [ + "/api/v2/newresource/9f0f7f37-dfb7-4f4b-bcb8-c7062c7550fc", + "/api/v2/newresource/{*}", + ], + [ + "/api/v2/newresource/9f0f7f37-dfb7-4f4b-bcb8-c7062c7550fc/details", + "/api/v2/newresource/{*}", + ], + // Missing or unparseable input is unknown. + [undefined, ""], + ["", ""], + ["http://%", ""], + ])("normalizes %s", (url, expected) => { + expect(normalizeRoute(url)).toBe(expected); + }); + + it("resolves relative urls against the base url", () => { + expect( + normalizeRoute("api/v2/workspaces/abc-123", "https://coder.example.com/"), + ).toBe("/api/v2/workspaces/{id}"); + }); +}); diff --git a/test/unit/telemetry/export/command.test.ts b/test/unit/telemetry/export/command.test.ts index 69ee2796d2..c660fb22e2 100644 --- a/test/unit/telemetry/export/command.test.ts +++ b/test/unit/telemetry/export/command.test.ts @@ -11,6 +11,8 @@ import { MockProgressReporter, } from "../../../mocks/testHelpers"; +import type { FlushStatus } from "@/telemetry/service"; + // command.ts orchestrates prompts and the pipeline; both are covered in their // own files and mocked here so these tests focus on command.ts: given a choice // and a result count, the right notification fires. @@ -30,6 +32,8 @@ const CHOICE: ExportChoice = { outputPath: OUTPUT_PATH, }; +const OK_FLUSH: FlushStatus = { ok: true, sinks: [] }; + function setup( opts: { choice?: ExportChoice; @@ -57,7 +61,7 @@ function setup( runExportTelemetryCommand( TELEMETRY_DIR, createMockLogger(), - vi.fn(() => Promise.resolve()), + vi.fn(() => Promise.resolve(OK_FLUSH)), context, ), }; @@ -92,6 +96,24 @@ describe("runExportTelemetryCommand", () => { ); }); + it("wires a warning into the runtime for an incomplete flush", async () => { + const { run } = setup(); + let onFlushIncomplete: (() => void) | undefined; + vi.mocked(collectTelemetryExport).mockImplementation( + (_request, runtime) => { + onFlushIncomplete = runtime.onFlushIncomplete; + return Promise.resolve(2); + }, + ); + + await run(); + onFlushIncomplete?.(); + + expect(vscode.window.showWarningMessage).toHaveBeenCalledWith( + expect.stringContaining("could not be flushed"), + ); + }); + describe("successful export", () => { it.each([ [1, "Exported 1 telemetry event to"], diff --git a/test/unit/telemetry/export/pipeline.test.ts b/test/unit/telemetry/export/pipeline.test.ts index f17842064b..bd0abe83fc 100644 --- a/test/unit/telemetry/export/pipeline.test.ts +++ b/test/unit/telemetry/export/pipeline.test.ts @@ -14,6 +14,7 @@ import { createTelemetryEventFactory } from "../../../mocks/telemetry"; import type { TelemetryEvent } from "@/telemetry/event"; import type { TelemetryDateRange } from "@/telemetry/export/range"; import type { ExportWriter } from "@/telemetry/export/writers/types"; +import type { FlushStatus } from "@/telemetry/service"; vi.mock("@/telemetry/export/files", () => ({ listTelemetryFilesForRange: vi.fn(), @@ -28,6 +29,7 @@ const RANGE: TelemetryDateRange = { }; const FILE_PATHS = ["/tmp/telemetry/a.jsonl", "/tmp/telemetry/b.jsonl"]; const OUTPUT_PATH = "/tmp/out.json"; +const OK_FLUSH: FlushStatus = { ok: true, sinks: [] }; function setup( opts: { @@ -35,6 +37,7 @@ function setup( filePaths?: readonly string[]; signal?: AbortSignal; writeCount?: number; + flush?: FlushStatus; } = {}, ) { vi.resetAllMocks(); @@ -49,11 +52,12 @@ function setup( const writer = vi.fn(() => Promise.resolve(opts.writeCount ?? 1), ); - const flushTelemetry = vi.fn(() => Promise.resolve()); + const flushTelemetry = vi.fn(() => Promise.resolve(opts.flush ?? OK_FLUSH)); const runtime: ExportRuntime = { signal: opts.signal ?? new AbortController().signal, flushTelemetry, report: vi.fn(), + onFlushIncomplete: vi.fn(), onCleanupError: vi.fn(), }; const request: ExportRequest = { @@ -82,6 +86,24 @@ describe("collectTelemetryExport", () => { expect(flushOrder).toBeLessThan(listOrder); }); + it("notifies the host when the flush did not fully succeed", async () => { + const { run, runtime } = setup({ + flush: { ok: false, sinks: [{ name: "local-jsonl", ok: false }] }, + }); + + await run(); + + expect(runtime.onFlushIncomplete).toHaveBeenCalled(); + }); + + it("does not notify the host when the flush succeeds", async () => { + const { run, runtime } = setup(); + + await run(); + + expect(runtime.onFlushIncomplete).not.toHaveBeenCalled(); + }); + it("returns 0 and never writes when no files cover the range", async () => { const { run, writer } = setup({ filePaths: [] }); @@ -96,15 +118,17 @@ describe("collectTelemetryExport", () => { await expect(run()).resolves.toBe(5); }); - it("passes the output path, signal, and cleanup handler to the writer", async () => { + it("passes the descriptor, output path, signal, and cleanup handler to the writer", async () => { const { run, runtime, writer } = setup({ writeCount: 2 }); await run(); - expect(writer).toHaveBeenCalledWith(OUTPUT_PATH, expect.anything(), { - signal: runtime.signal, - onCleanupError: runtime.onCleanupError, - }); + expect(writer).toHaveBeenCalledWith( + OUTPUT_PATH, + expect.anything(), + { range: RANGE, sourceFiles: FILE_PATHS.length }, + { signal: runtime.signal, onCleanupError: runtime.onCleanupError }, + ); }); it("removes the empty output file when the writer wrote nothing", async () => { diff --git a/test/unit/telemetry/export/prompts.test.ts b/test/unit/telemetry/export/prompts.test.ts index cf77c29cef..c8f278e50e 100644 --- a/test/unit/telemetry/export/prompts.test.ts +++ b/test/unit/telemetry/export/prompts.test.ts @@ -86,6 +86,15 @@ describe("promptForExport", () => { }, ); + it("rejects a non-file destination with an error and returns undefined", async () => { + answer({ savePath: vscode.Uri.parse("vscode-vfs://host/telemetry.json") }); + + await expect(promptForExport()).resolves.toBeUndefined(); + expect(vscode.window.showErrorMessage).toHaveBeenCalledWith( + expect.stringContaining("local file"), + ); + }); + it("sets ignoreFocusOut on every prompt", async () => { answer({ range: CUSTOM_PICK, diff --git a/test/unit/telemetry/export/writers/index.test.ts b/test/unit/telemetry/export/writers/index.test.ts index 750d592f1b..04bf81f6af 100644 --- a/test/unit/telemetry/export/writers/index.test.ts +++ b/test/unit/telemetry/export/writers/index.test.ts @@ -17,14 +17,23 @@ vi.mock("@/telemetry/export/writers/otlp/writer", () => ({ const { context } = createTelemetryEventFactory()(); const OUTPUT = "/tmp/out"; const EVENTS = asyncIterable([]); +const DESCRIPTOR = { + range: { label: "Last 24 hours", filenamePart: "last-24-hours" }, + sourceFiles: 2, +}; const OPTIONS = { signal: new AbortController().signal, onCleanupError: vi.fn(), }; describe("createExportWriter", () => { - it("uses the JSON writer for the json format", () => { - expect(createExportWriter("json", context)).toBe(writeJsonArrayExport); + it("delegates to the JSON writer, which ignores the descriptor", async () => { + vi.mocked(writeJsonArrayExport).mockResolvedValue(4); + + const writer = createExportWriter("json", context); + + await expect(writer(OUTPUT, EVENTS, DESCRIPTOR, OPTIONS)).resolves.toBe(4); + expect(writeJsonArrayExport).toHaveBeenCalledWith(OUTPUT, EVENTS, OPTIONS); }); it("binds the context and sums signal counts for the otlp format", async () => { @@ -36,11 +45,12 @@ describe("createExportWriter", () => { const writer = createExportWriter("otlp", context); - await expect(writer(OUTPUT, EVENTS, OPTIONS)).resolves.toBe(9); + await expect(writer(OUTPUT, EVENTS, DESCRIPTOR, OPTIONS)).resolves.toBe(9); expect(writeOtlpZipExport).toHaveBeenCalledWith( OUTPUT, EVENTS, context, + DESCRIPTOR, OPTIONS, ); }); diff --git a/test/unit/telemetry/export/writers/otlp/manifest.test.ts b/test/unit/telemetry/export/writers/otlp/manifest.test.ts new file mode 100644 index 0000000000..64f6b4d63f --- /dev/null +++ b/test/unit/telemetry/export/writers/otlp/manifest.test.ts @@ -0,0 +1,70 @@ +import { describe, expect, it } from "vitest"; + +import { + buildManifest, + MANIFEST_SCHEMA_VERSION, + type ManifestInput, +} from "@/telemetry/export/writers/otlp/manifest"; +import { CURRENT_TELEMETRY_SCHEMA_VERSION } from "@/telemetry/wireFormat"; + +import { createTelemetryEventFactory } from "../../../../../mocks/telemetry"; + +const makeEvent = createTelemetryEventFactory(); +const { context } = makeEvent(); + +const INPUT: ManifestInput = { + range: { label: "Last 24 hours", startMs: 0, endMs: 86_400_000 }, + sourceFiles: 2, +}; + +/** `buildManifest` with representative defaults; overrides win. */ +function build(overrides: Partial[0]> = {}) { + return buildManifest({ + format: "otlp-json", + context, + input: INPUT, + sourceEvents: 9, + records: { logs: 5, traces: 3, metrics: 1 }, + exportedAt: "2026-05-04T18:00:00.000Z", + ...overrides, + }); +} + +describe("buildManifest", () => { + it("captures the manifest and telemetry schema versions separately", () => { + const manifest = build(); + + expect(manifest.schemaVersion).toBe(MANIFEST_SCHEMA_VERSION); + expect(manifest.telemetrySchemaVersion).toBe( + CURRENT_TELEMETRY_SCHEMA_VERSION, + ); + }); + + it("threads format, range, counts, and source metadata into the document", () => { + expect(build()).toMatchObject({ + format: "otlp-json", + exportedAt: "2026-05-04T18:00:00.000Z", + extensionVersion: context.extensionVersion, + sourceFiles: 2, + sourceEvents: 9, + records: { logs: 5, traces: 3, metrics: 1 }, + range: { + label: "Last 24 hours", + start: "1970-01-01T00:00:00.000Z", + end: "1970-01-02T00:00:00.000Z", + }, + }); + }); + + it("renders unset range bounds as null", () => { + const manifest = build({ + input: { range: { label: "All time" }, sourceFiles: 0 }, + }); + + expect(manifest.range).toEqual({ + label: "All time", + start: null, + end: null, + }); + }); +}); diff --git a/test/unit/telemetry/export/writers/otlp/records.test.ts b/test/unit/telemetry/export/writers/otlp/records.test.ts index d24cfe34e3..58cdebfa82 100644 --- a/test/unit/telemetry/export/writers/otlp/records.test.ts +++ b/test/unit/telemetry/export/writers/otlp/records.test.ts @@ -133,6 +133,28 @@ describe("logRecord", () => { "coder.event.deployment_url": "https://prev.coder.example.com", }); }); + + it("links span-attached logs to their parent span", () => { + const record = logRecord( + makeEvent({ + eventName: "remote.setup.checkpoint", + traceId: TRACE_ID, + parentEventId: "parent-span-id", + }), + ); + + expect(record).toMatchObject({ + traceId: TRACE_ID, + spanId: "parent-span-id", + }); + }); + + it("omits trace correlation on plain logs", () => { + const record = logRecord(makeEvent()); + + expect(record).not.toHaveProperty("traceId"); + expect(record).not.toHaveProperty("spanId"); + }); }); describe("spanRecord", () => { @@ -251,7 +273,7 @@ describe("metricRecords", () => { { windowSeconds: 60, measurements: [counter("count.2xx", 3)] }, state, ); - // 2026-05-04T12:00:00.000Z (window start = first event time − 60s) in ns: + // 2026-05-04T12:00:00.000Z (window start = first event time minus 60s) in ns: const expectedStart = String( BigInt(Date.parse("2026-05-04T12:00:00.000Z")) * 1_000_000n, ); @@ -268,7 +290,7 @@ describe("metricRecords", () => { it("clamps startTimeUnixNano <= timeUnixNano for events that arrive before the anchor", () => { const state = newCumulativeState(); - // First event lands at T=12:03 with a 60s window → anchor = 12:02. + // First event lands at T=12:03 with a 60s window, so anchor = 12:02. metricRecords( makeEvent({ eventName: "http.requests", @@ -336,7 +358,7 @@ describe("metricRecords", () => { state, ); - // Infinity counter coerces to 0 → suppressed. + // Infinity counter coerces to 0, so it is suppressed. expect(records).toEqual([]); }); diff --git a/test/unit/telemetry/export/writers/otlp/writer.test.ts b/test/unit/telemetry/export/writers/otlp/writer.test.ts index 4160d5935e..b1377bdd81 100644 --- a/test/unit/telemetry/export/writers/otlp/writer.test.ts +++ b/test/unit/telemetry/export/writers/otlp/writer.test.ts @@ -4,7 +4,10 @@ import * as path from "node:path"; import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; import { ENVELOPES } from "@/telemetry/export/writers/otlp/records"; -import { writeOtlpZipExport } from "@/telemetry/export/writers/otlp/writer"; +import { + type OtlpExportCounts, + writeOtlpZipExport, +} from "@/telemetry/export/writers/otlp/writer"; import { asyncIterable } from "../../../../../mocks/asyncIterable"; import { createTelemetryEventFactory } from "../../../../../mocks/telemetry"; @@ -17,7 +20,11 @@ import { parseEnvelope, } from "./helpers"; -import type { TelemetryEvent } from "@/telemetry/event"; +import type { TelemetryContext, TelemetryEvent } from "@/telemetry/event"; +import type { + ExportDescriptor, + ExportWriteOptions, +} from "@/telemetry/export/writers/types"; vi.mock("node:fs", async () => (await import("memfs")).fs); vi.mock("node:fs/promises", async () => (await import("memfs")).fs.promises); @@ -30,6 +37,16 @@ const OUT = "/exports/telemetry.otlp.zip"; /** Matches what writer.ts passes to fs.mkdtemp on every platform. */ const STAGING_PREFIX = path.join("/tmp", "coder-telemetry-otlp-"); +const DESCRIPTOR: ExportDescriptor = { + range: { + label: "Last 24 hours", + filenamePart: "last-24-hours", + startMs: 0, + endMs: 86_400_000, + }, + sourceFiles: 2, +}; + const makeEvent = createTelemetryEventFactory(); const { context } = makeEvent(); @@ -41,9 +58,20 @@ beforeEach(() => { afterEach(() => vol.reset()); +/** Exports to OUT with the default descriptor; tests override events/options/context. */ +function writeZip( + events: AsyncIterable | readonly TelemetryEvent[], + options: ExportWriteOptions = {}, + ctx: TelemetryContext = context, +): Promise { + const stream = + Symbol.asyncIterator in events ? events : asyncIterable(events); + return writeOtlpZipExport(OUT, stream, ctx, DESCRIPTOR, options); +} + /** Reads the zip and returns the parsed envelope for each signal. */ async function exportAndRead(events: readonly TelemetryEvent[]) { - const counts = await writeOtlpZipExport(OUT, asyncIterable(events), context); + const counts = await writeZip(events); const files = unzipSync(vol.readFileSync(OUT) as Uint8Array); return { counts, @@ -54,24 +82,18 @@ async function exportAndRead(events: readonly TelemetryEvent[]) { } describe("writeOtlpZipExport", () => { - it("packs logs.json, traces.json, and metrics.json into the zip", async () => { - await writeOtlpZipExport(OUT, asyncIterable([makeEvent()]), context); + it("packs the three signal envelopes plus the manifest into the zip", async () => { + await writeZip([makeEvent()]); const files = unzipSync(vol.readFileSync(OUT) as Uint8Array); expect(Object.keys(files).sort()).toEqual( - Object.values(ENVELOPES) - .map((e) => e.file) - .sort(), + [...Object.values(ENVELOPES).map((e) => e.file), "manifest.json"].sort(), ); }); // Golden files capture the full serialized envelope per signal so schema // changes show up as a reviewable JSON diff. Regenerate with `pnpm test ... -u`. it("matches the golden OTLP envelopes for a representative export", async () => { - await writeOtlpZipExport( - OUT, - asyncIterable(Object.values(GOLDEN_EVENTS)), - GOLDEN_CONTEXT, - ); + await writeZip(Object.values(GOLDEN_EVENTS), {}, GOLDEN_CONTEXT); const files = unzipSync(vol.readFileSync(OUT) as Uint8Array); for (const { file } of Object.values(ENVELOPES)) { @@ -111,7 +133,11 @@ describe("writeOtlpZipExport", () => { const { counts, logs, traces, metrics } = await exportAndRead([ makeEvent({ eventName: "log.info" }), makeEvent({ eventName: "log.warn" }), - makeEvent({ eventName: "trace.x", traceId: TRACE_ID }), + makeEvent({ + eventName: "trace.x", + traceId: TRACE_ID, + properties: { result: "success" }, + }), makeEvent({ eventName: "http.requests", measurements: { @@ -133,7 +159,11 @@ describe("writeOtlpZipExport", () => { it("writes identical resource, scope, and schemaUrl into every envelope file", async () => { const { logs, traces, metrics } = await exportAndRead([ makeEvent({ eventName: "log.info" }), - makeEvent({ eventName: "trace.x", traceId: TRACE_ID }), + makeEvent({ + eventName: "trace.x", + traceId: TRACE_ID, + properties: { result: "success" }, + }), makeEvent({ eventName: "http.requests", measurements: { window_seconds: 60, "count.2xx": 1 }, @@ -160,20 +190,12 @@ describe("writeOtlpZipExport", () => { throw new Error("boom"); })(); - await expect(writeOtlpZipExport(OUT, failing, context)).rejects.toThrow( - /boom/, - ); + await expect(writeZip(failing)).rejects.toThrow(/boom/); }); it("wraps per-event mapping failures with the event identity", async () => { await expect( - writeOtlpZipExport( - OUT, - asyncIterable([ - makeEvent({ eventId: "id-bad", timestamp: "not-a-date" }), - ]), - context, - ), + writeZip([makeEvent({ eventId: "id-bad", timestamp: "not-a-date" })]), ).rejects.toThrow( /Failed to export event id-bad .*Invalid telemetry timestamp/, ); @@ -184,7 +206,7 @@ describe("writeOtlpZipExport", () => { const mkdtempSpy = vi.spyOn(fsPromises, "mkdtemp"); try { - await writeOtlpZipExport(OUT, asyncIterable([makeEvent()]), context); + await writeZip([makeEvent()]); expect(vol.readdirSync("/exports")).toEqual(["telemetry.otlp.zip"]); expect(mkdtempSpy).toHaveBeenCalledWith(STAGING_PREFIX); @@ -217,14 +239,9 @@ describe("writeOtlpZipExport", () => { ); try { - const counts = await writeOtlpZipExport( - OUT, - asyncIterable([makeEvent()]), - context, - { - onCleanupError: (err, dir) => cleanupErrors.push({ err, dir }), - }, - ); + const counts = await writeZip([makeEvent()], { + onCleanupError: (err, dir) => cleanupErrors.push({ err, dir }), + }); expect(counts.logs).toBe(1); expect(cleanupErrors).toHaveLength(1); @@ -242,16 +259,11 @@ describe("writeOtlpZipExport", () => { .mockRejectedValueOnce(new Error("EBUSY")); try { - const counts = await writeOtlpZipExport( - OUT, - asyncIterable([makeEvent()]), - context, - { - onCleanupError: () => { - throw new Error("logger blew up"); - }, + const counts = await writeZip([makeEvent()], { + onCleanupError: () => { + throw new Error("logger blew up"); }, - ); + }); expect(counts.logs).toBe(1); } finally { spy.mockRestore(); @@ -267,9 +279,9 @@ describe("writeOtlpZipExport", () => { yield makeEvent(); })(); - await expect( - writeOtlpZipExport(OUT, events, context, { signal: ac.signal }), - ).rejects.toMatchObject({ name: "AbortError" }); + await expect(writeZip(events, { signal: ac.signal })).rejects.toMatchObject( + { name: "AbortError" }, + ); }); it("preserves AbortError name when cancellation fires during zip packing", async () => { @@ -282,9 +294,9 @@ describe("writeOtlpZipExport", () => { ac.abort(); })(); - await expect( - writeOtlpZipExport(OUT, events, context, { signal: ac.signal }), - ).rejects.toMatchObject({ name: "AbortError" }); + await expect(writeZip(events, { signal: ac.signal })).rejects.toMatchObject( + { name: "AbortError" }, + ); }); it("coerces non-Error abort reasons into a named AbortError", async () => { @@ -292,9 +304,68 @@ describe("writeOtlpZipExport", () => { ac.abort("user cancelled"); await expect( - writeOtlpZipExport(OUT, asyncIterable([makeEvent()]), context, { - signal: ac.signal, - }), + writeZip([makeEvent()], { signal: ac.signal }), ).rejects.toMatchObject({ name: "AbortError", message: "Aborted" }); }); }); + +describe("writeOtlpZipExport manifest", () => { + interface Manifest { + schemaVersion: number; + telemetrySchemaVersion: number; + format: string; + sourceFiles: number; + sourceEvents: number; + records: { logs: number; traces: number; metrics: number }; + range: { label: string; start: string | null; end: string | null }; + } + + async function exportWithManifest( + events: readonly TelemetryEvent[], + ): Promise<{ files: Record; manifest: Manifest }> { + await writeZip(events); + const files = unzipSync(vol.readFileSync(OUT) as Uint8Array); + const manifest = JSON.parse( + new TextDecoder().decode(files["manifest.json"]), + ) as Manifest; + return { files, manifest }; + } + + it("reports record counts, source totals, and range in the manifest", async () => { + const { manifest } = await exportWithManifest([ + makeEvent({ eventName: "log.info" }), + makeEvent({ + eventName: "trace.x", + traceId: TRACE_ID, + properties: { result: "success" }, + }), + makeEvent({ + eventName: "http.requests", + measurements: { + window_seconds: 60, + "count.2xx": 1, + "duration.p95_ms": 5, + }, + }), + ]); + + expect(manifest).toMatchObject({ + format: "otlp-json", + sourceFiles: 2, + sourceEvents: 3, + records: { logs: 1, traces: 1, metrics: 2 }, + range: { + label: "Last 24 hours", + start: "1970-01-01T00:00:00.000Z", + end: "1970-01-02T00:00:00.000Z", + }, + }); + }); + + it("stamps both the manifest and telemetry schema versions", async () => { + const { manifest } = await exportWithManifest([makeEvent()]); + + expect(manifest.schemaVersion).toBeGreaterThanOrEqual(1); + expect(manifest.telemetrySchemaVersion).toBeGreaterThanOrEqual(1); + }); +}); diff --git a/test/unit/telemetry/service.test.ts b/test/unit/telemetry/service.test.ts index 669b118b64..ca5264b56e 100644 --- a/test/unit/telemetry/service.test.ts +++ b/test/unit/telemetry/service.test.ts @@ -603,7 +603,7 @@ describe("TelemetryService", () => { }); describe("reactive level", () => { - it("local → off flushes sinks and suppresses; off → local resumes", () => { + it("local to off flushes sinks and suppresses; off to local resumes", () => { h.service.log("first"); expect(h.sink.events).toHaveLength(1); @@ -618,7 +618,7 @@ describe("TelemetryService", () => { expect(h.sink.events).toHaveLength(2); }); - it("a mid-trace level → off does not orphan child phases or drop the parent", async () => { + it("a mid-trace level to off does not orphan child phases or drop the parent", async () => { let toggled = false; await h.service.trace("op", async (span) => { await span.phase("p1", () => Promise.resolve()); @@ -785,17 +785,24 @@ describe("TelemetryService", () => { }); describe("flush", () => { - it("flushes every sink", async () => { + it("flushes every sink and reports success", async () => { const second = new TestSink("second"); const service = makeService([h.sink, second]); - await service.flush(); + const status = await service.flush(); expect(h.sink.flush).toHaveBeenCalledTimes(1); expect(second.flush).toHaveBeenCalledTimes(1); + expect(status).toEqual({ + ok: true, + sinks: [ + { name: h.sink.name, ok: true }, + { name: "second", ok: true }, + ], + }); }); - it("resolves even when a sink rejects", async () => { + it("reports per-sink failure without rejecting", async () => { const bad: TelemetrySink = { name: "bad", minLevel: "local", @@ -806,8 +813,14 @@ describe("TelemetryService", () => { const good = new TestSink("good"); const service = makeService([bad, good]); - await expect(service.flush()).resolves.toBeUndefined(); + const status = await service.flush(); + expect(good.flush).toHaveBeenCalled(); + expect(status.ok).toBe(false); + expect(status.sinks).toEqual([ + { name: "bad", ok: false }, + { name: "good", ok: true }, + ]); }); it("does not dispose sinks", async () => { diff --git a/test/unit/telemetry/sinks/localJsonlSink.test.ts b/test/unit/telemetry/sinks/localJsonlSink.test.ts index 7a2abb331b..eb037daa3e 100644 --- a/test/unit/telemetry/sinks/localJsonlSink.test.ts +++ b/test/unit/telemetry/sinks/localJsonlSink.test.ts @@ -135,7 +135,7 @@ describe("LocalJsonlSink", () => { vi.spyOn(fsPromises, "appendFile").mockRejectedValueOnce(new Error("boom")); overflowBuffer(); - await sink.flush(); + await expect(sink.flush()).rejects.toThrow("boom"); overflowBuffer(); await sink.flush(); @@ -309,18 +309,33 @@ describe("LocalJsonlSink", () => { }); }); - it("does not throw when fs.appendFile rejects, and recovers on the next flush", async () => { + it("rejects when fs.appendFile fails, and recovers on the next flush", async () => { const { sink, makeEvent } = setup(); vi.spyOn(fsPromises, "appendFile").mockRejectedValueOnce(new Error("boom")); sink.write(makeEvent()); - await expect(sink.flush()).resolves.toBeUndefined(); + await expect(sink.flush()).rejects.toThrow("boom"); sink.write(makeEvent()); await sink.flush(); expect(readJsonl(todaysFile())).toHaveLength(1); }); + it("drops the oldest events once the buffer overflows", async () => { + const { sink, makeEvent } = setup({ + bufferLimit: 10, + flushBatchSize: 10_000, + }); + + for (let i = 0; i < 13; i++) { + sink.write(makeEvent()); + } + await sink.flush(); + + // 13 written, 3 oldest dropped to honor bufferLimit. + expect(readJsonl(todaysFile())).toHaveLength(10); + }); + it("two sinks with different sessions write to disjoint files", async () => { const a = setup(); const b = setup({}, "ffeeddcc-1111-2222-3333-444444444444"); diff --git a/test/unit/telemetry/wireFormat.test.ts b/test/unit/telemetry/wireFormat.test.ts index 98cdc1300c..2db2818995 100644 --- a/test/unit/telemetry/wireFormat.test.ts +++ b/test/unit/telemetry/wireFormat.test.ts @@ -1,6 +1,7 @@ import { describe, expect, it } from "vitest"; import { + CURRENT_TELEMETRY_SCHEMA_VERSION, parseTelemetryEventLine, serializeTelemetryEvent, TelemetryFileParseError, @@ -51,6 +52,12 @@ describe("serializeTelemetryEvent", () => { expect(wire.properties).toEqual({ user_id: "alice", camelCase: "kept" }); expect(wire.measurements).toEqual({ duration_ms: 42 }); }); + + it("stamps the current schema version on every row", () => { + const wire = serializeTelemetryEvent(makeEvent()); + + expect(wire.schema_version).toBe(CURRENT_TELEMETRY_SCHEMA_VERSION); + }); }); describe("parseTelemetryEventLine", () => { @@ -102,6 +109,15 @@ describe("parseTelemetryEventLine", () => { ).toThrow(TelemetryFileParseError); }); + it("rejects rows without a schema version", () => { + const wire = serializeTelemetryEvent(makeEvent()); + delete wire.schema_version; + + expect(() => + parseTelemetryEventLine(JSON.stringify(wire), "", 1), + ).toThrow(TelemetryFileParseError); + }); + it("preserves arbitrary keys in properties and measurements", () => { const event: TelemetryEvent = makeEvent({ properties: { user_id: "alice", camelCase: "kept" },