Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
81 changes: 64 additions & 17 deletions src/vs/platform/telemetry/common/telemetryUtils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -356,9 +356,45 @@ function anonymizeFilePaths(stack: string, cleanupPatterns: RegExp[]): string {
return updatedStack;
}

const userDataRegexes = [
{ label: 'URL', regex: /[a-zA-Z][a-zA-Z0-9+.-]*:\/\/[^\s]*/ },
{ label: 'Google API Key', regex: /AIza[A-Za-z0-9_\\\-]{35}/ },
{ label: 'JWT', regex: /eyJ[0eXAiOiJKV1Qi|hbGci|a-zA-Z0-9\-_]+\.[a-zA-Z0-9\-_]+\.[a-zA-Z0-9\-_]+/ },
{ label: 'Slack Token', regex: /xox[pbar]\-[A-Za-z0-9]/ },
{ label: 'GitHub Token', regex: /(gh[psuro]_[a-zA-Z0-9]{36}|github_pat_[a-zA-Z0-9]{22}_[a-zA-Z0-9]{59})/ },
{ label: 'Generic Secret', regex: /(key|token|sig|secret|signature|password|passwd|pwd|android:value)[^a-zA-Z0-9]/i },
{ label: 'CLI Credentials', regex: /((login|psexec|(certutil|psexec)\.exe).{1,50}(\s-u(ser(name)?)?\s+.{3,100})?\s-(admin|user|vm|root)?p(ass(word)?)?\s+["']?[^$\-\/\s]|(^|[\s\r\n\\])net(\.exe)?.{1,5}(user\s+|share\s+\/user:| user -? secrets ? set) \s + [^ $\s \/])/ },
{ label: 'Microsoft Entra ID', regex: /eyJ(?:0eXAiOiJKV1Qi|hbGci|[a-zA-Z0-9\-_]+\.[a-zA-Z0-9\-_]+\.)/ },
{ label: 'Email', regex: /[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}/ }
];

/**
* Redacts a value if it contains commonly leaked PII.
* @param value The value returned (as-is) when no PII is detected
* @param probe The string actually matched against the PII heuristics. Defaults
* to `value`; callers may pass a value that includes a trailing delimiter (e.g. a
* newline) so that heuristics relying on a non-alphanumeric boundary match the
* same way they would against the original whole string.
* @returns A `<REDACTED: ...>` marker if the probe matched, otherwise `value`
*/
Comment thread
bryanchen-d marked this conversation as resolved.
function redactIfPossibleUserInfo(value: string, probe: string = value): string {
for (const secretRegex of userDataRegexes) {
if (secretRegex.regex.test(probe)) {
return `<REDACTED: ${secretRegex.label}>`;
}
}
return value;
}

/**
* Attempts to remove commonly leaked PII
* @param property The property which will be removed if it contains user data
* Attempts to remove commonly leaked PII.
*
* When a match is found the check is applied per line so that a single suspicious
* frame (e.g. a stack frame containing a function name such as `getStorageKey`
* which matches the broad `Generic Secret` heuristic) only redacts that line —
* replacing it with a `<REDACTED: ...>` marker — instead of wiping the entire
* multi-line value such as a whole callstack.
* @param property The property whose offending lines will be replaced with a redaction marker if they contain user data
* @returns The new value for the property
*/
function removePropertiesWithPossibleUserInfo(property: string): string {
Expand All @@ -367,26 +403,37 @@ function removePropertiesWithPossibleUserInfo(property: string): string {
return property;
}

const userDataRegexes = [
{ label: 'URL', regex: /[a-zA-Z][a-zA-Z0-9+.-]*:\/\/[^\s]*/ },
{ label: 'Google API Key', regex: /AIza[A-Za-z0-9_\\\-]{35}/ },
{ label: 'JWT', regex: /eyJ[0eXAiOiJKV1Qi|hbGci|a-zA-Z0-9\-_]+\.[a-zA-Z0-9\-_]+\.[a-zA-Z0-9\-_]+/ },
{ label: 'Slack Token', regex: /xox[pbar]\-[A-Za-z0-9]/ },
{ label: 'GitHub Token', regex: /(gh[psuro]_[a-zA-Z0-9]{36}|github_pat_[a-zA-Z0-9]{22}_[a-zA-Z0-9]{59})/ },
{ label: 'Generic Secret', regex: /(key|token|sig|secret|signature|password|passwd|pwd|android:value)[^a-zA-Z0-9]/i },
{ label: 'CLI Credentials', regex: /((login|psexec|(certutil|psexec)\.exe).{1,50}(\s-u(ser(name)?)?\s+.{3,100})?\s-(admin|user|vm|root)?p(ass(word)?)?\s+["']?[^$\-\/\s]|(^|[\s\r\n\\])net(\.exe)?.{1,5}(user\s+|share\s+\/user:| user -? secrets ? set) \s + [^ $\s \/])/ },
{ label: 'Microsoft Entra ID', regex: /eyJ(?:0eXAiOiJKV1Qi|hbGci|[a-zA-Z0-9\-_]+\.[a-zA-Z0-9\-_]+\.)/ },
{ label: 'Email', regex: /[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}/ }
];

// Check for common user data in the telemetry events
// Fast path: if nothing matches we return the value untouched without
// allocating. This keeps the common (no-PII) case as cheap as the previous
// implementation and avoids splitting potentially large callstacks.
let hasMatch = false;
for (const secretRegex of userDataRegexes) {
if (secretRegex.regex.test(property)) {
return `<REDACTED: ${secretRegex.label}>`;
hasMatch = true;
break;
}
}
if (!hasMatch) {
return property;
}

// Single line values keep the original behavior of redacting the whole value.
if (!property.includes('\n')) {
return redactIfPossibleUserInfo(property);
}

return property;
// Multi-line values (e.g. callstacks) are redacted line-by-line so we only
// drop the offending lines and preserve the rest of the information. The
// newline delimiter stripped by `split` is re-appended (for every line but
// the last) when matching so heuristics that rely on a trailing
// non-alphanumeric boundary behave identically to the previous whole-string
// check and don't under-redact the last token of a line.
const lines = property.split('\n');
for (let i = 0; i < lines.length; i++) {
const probe = i < lines.length - 1 ? lines[i] + '\n' : lines[i];
lines[i] = redactIfPossibleUserInfo(lines[i], probe);
}
return lines.join('\n');
}


Expand Down
78 changes: 78 additions & 0 deletions src/vs/platform/telemetry/test/browser/telemetryService.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -415,6 +415,84 @@ suite('TelemetryService', () => {
}
}));

test('Unexpected Error Telemetry redacts only offending frames and preserves the rest of the callstack', sinonTestFn(function (this: any) {
const origErrorHandler = Errors.errorHandler.getUnexpectedErrorHandler();
Errors.setUnexpectedErrorHandler(() => { });
try {
const testAppender = new TestTelemetryAppender();
const service = new TestErrorTelemetryService({ appenders: [testAppender] });
const errorTelemetry = new ErrorTelemetry(service);

// A frame whose function name matches the broad `Generic Secret` heuristic
// (`getStorageKey` contains `key(`) previously caused the entire callstack
// to be redacted. See https://github.com/microsoft/vscode/issues/301200.
const stack = [
'Error: Something failed',
' at StorageService.getStorageKey (out/vs/platform/storage/storage.js:1:200)',
' at Foo.run (out/vs/workbench/foo.js:3:40)',
' at Bar.baz (out/vs/workbench/bar.js:5:60)',
];
Comment thread
bryanchen-d marked this conversation as resolved.

const error: any = new Error('Something failed');
error.stack = stack.join('\n');
Errors.onUnexpectedError(error);
this.clock.tick(ErrorTelemetry.ERROR_FLUSH_TIMEOUT);

assert.strictEqual(testAppender.getEventsCount(), 1);
const cs: string = testAppender.events[0].data.callstack;
// The whole stack must not collapse into a single redaction marker.
assert.notStrictEqual(cs, '<REDACTED: Generic Secret>', 'Entire callstack should not be redacted');
assert.strictEqual(cs.split('\n').length, stack.length, 'All frames should be preserved');
// Only the offending frame is redacted, the others remain intact.
assert.notStrictEqual(cs.indexOf('Foo.run'), -1, 'Non-offending frames should be preserved');
assert.notStrictEqual(cs.indexOf('Bar.baz'), -1, 'Non-offending frames should be preserved');
assert.strictEqual(cs.indexOf('getStorageKey'), -1, 'Offending frame should be redacted');

errorTelemetry.dispose();
service.dispose();
}
finally {
Errors.setUnexpectedErrorHandler(origErrorHandler);
}
}));

test('Unexpected Error Telemetry still redacts a frame whose trailing token relies on the newline delimiter', sinonTestFn(function (this: any) {
const origErrorHandler = Errors.errorHandler.getUnexpectedErrorHandler();
Errors.setUnexpectedErrorHandler(() => { });
try {
const testAppender = new TestTelemetryAppender();
const service = new TestErrorTelemetryService({ appenders: [testAppender] });
const errorTelemetry = new ErrorTelemetry(service);

// `getApiKey` ends the line, so the `Generic Secret` heuristic only
// matches because of the following newline. Per-line redaction must
// re-append that delimiter so this frame is still redacted, matching
// the previous whole-string behavior.
const stack = [
'Error: boom',
' at Service.getApiKey',
' at Foo.run (out/vs/workbench/foo.js:3:40)',
];

const error: any = new Error('boom');
error.stack = stack.join('\n');
Errors.onUnexpectedError(error);
this.clock.tick(ErrorTelemetry.ERROR_FLUSH_TIMEOUT);

assert.strictEqual(testAppender.getEventsCount(), 1);
const cs: string = testAppender.events[0].data.callstack;
assert.strictEqual(cs.indexOf('getApiKey'), -1, 'Trailing-token frame should still be redacted');
assert.notStrictEqual(cs.indexOf('Foo.run'), -1, 'Other frames should be preserved');
assert.strictEqual(cs.split('\n').length, stack.length, 'All frames should be preserved');

errorTelemetry.dispose();
service.dispose();
}
finally {
Errors.setUnexpectedErrorHandler(origErrorHandler);
}
}));

test('Uncaught Error Telemetry removes PII', sinonTestFn(function (this: any) {
const errorStub = sinon.stub();
mainWindow.onerror = errorStub;
Expand Down
Loading