From 928bd911f38ff096101012ce8076f5220388ef07 Mon Sep 17 00:00:00 2001 From: Theodore Li Date: Wed, 3 Jun 2026 00:08:42 -0700 Subject: [PATCH 01/11] fix(background): recategorize user/recovery failures as errors, not trigger faults (#4860) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix(webhook): don't fault trigger run on user/workflow execution errors Webhook-triggered executions re-threw every error, so trigger.dev marked the run failed and fired #eng-errors alerts. The vast majority of these are user-caused workflow failures (missing required fields, invalid field references, bad URLs, provider 4xx, expired models, low credit) that are already recorded in the execution logs. Distinguish fault vs error in executeWebhookJobInternal: when the failure was finalized by core (the workflow ran and its failure is logged), complete the run with { success: false } instead of throwing. Errors that were not finalized came from the webhook pipeline itself and still re-throw to fault the run. Await waitForPostExecution first so the finalized flag is reliable. The error is still recorded on the run's OTel span via recordException (no ERROR status, so the run isn't faulted) and remains in the execution logs, so these stay investigable in Tempo/Loki without false alerts. Co-Authored-By: Claude Opus 4.8 (1M context) * fix(schedule): don't fault trigger run on error-recovery failures The schedule task already treats workflow-execution failures as recorded errors rather than trigger faults, but the outermost catch's own recovery code (the infra-retry and releaseClaim calls) was unguarded. A secondary DB blip while releasing the claim re-threw and escaped run(), faulting the trigger.dev run and firing an alert — a double-fault during cleanup. Wrap the recovery path in a try/catch: log and record the exception on the span without re-throwing. The claim expires on its TTL and the next tick re-claims the schedule, so swallowing the cleanup failure is safe. Co-Authored-By: Claude Opus 4.8 (1M context) * test(webhook): assert waitForPostExecution runs on the non-finalized path Guards the race fix on the infra-error path so a future refactor can't silently drop the await. Addresses Greptile review feedback. Co-Authored-By: Claude Opus 4.8 (1M context) --------- Co-authored-by: Claude Opus 4.8 (1M context) --- apps/sim/background/schedule-execution.ts | 31 +++- apps/sim/background/webhook-execution.test.ts | 158 +++++++++++++++++- apps/sim/background/webhook-execution.ts | 22 ++- 3 files changed, 199 insertions(+), 12 deletions(-) diff --git a/apps/sim/background/schedule-execution.ts b/apps/sim/background/schedule-execution.ts index b90886d7f71..4c6b1a9cba8 100644 --- a/apps/sim/background/schedule-execution.ts +++ b/apps/sim/background/schedule-execution.ts @@ -1,3 +1,4 @@ +import { trace } from '@opentelemetry/api' import { db, jobExecutionLogs, @@ -943,16 +944,28 @@ export async function executeScheduleJob(payload: ScheduleExecutionPayload) { ) } } catch (error: unknown) { - if (isRetryableInfrastructureError(error)) { - await retryScheduleAfterInfraFailure({ payload, requestId, claimedAt, error }) - return - } + try { + if (isRetryableInfrastructureError(error)) { + await retryScheduleAfterInfraFailure({ payload, requestId, claimedAt, error }) + return + } - logger.error(`[${requestId}] Error processing schedule ${payload.scheduleId}`, error) - await releaseClaim( - now, - `Failed to release schedule ${payload.scheduleId} after unhandled error` - ) + logger.error(`[${requestId}] Error processing schedule ${payload.scheduleId}`, error) + await releaseClaim( + now, + `Failed to release schedule ${payload.scheduleId} after unhandled error` + ) + } catch (recoveryError: unknown) { + // A secondary failure during error recovery (e.g. a transient DB blip while + // releasing the claim or scheduling an infra retry) must not fault the run. The + // claim expires on its TTL and the next tick re-claims the schedule. Record the + // exception on the span so it stays visible in traces without faulting the run. + logger.error( + `[${requestId}] Failed to recover schedule ${payload.scheduleId} after error`, + recoveryError + ) + trace.getActiveSpan()?.recordException(toError(recoveryError)) + } } }) } diff --git a/apps/sim/background/webhook-execution.test.ts b/apps/sim/background/webhook-execution.test.ts index 620c073ac0d..7a75dc3e4f9 100644 --- a/apps/sim/background/webhook-execution.test.ts +++ b/apps/sim/background/webhook-execution.test.ts @@ -2,17 +2,110 @@ * @vitest-environment node */ +import { + dbChainMock, + dbChainMockFns, + executionPreprocessingMock, + executionPreprocessingMockFns, + loggingSessionMock, + loggingSessionMockFns, +} from '@sim/testing' import { beforeEach, describe, expect, it, vi } from 'vitest' -const { mockResolveWebhookRecordProviderConfig } = vi.hoisted(() => ({ +const { + mockResolveWebhookRecordProviderConfig, + mockExecuteWorkflowCore, + mockWasExecutionFinalizedByCore, + mockRecordException, + mockGetActiveSpan, +} = vi.hoisted(() => ({ mockResolveWebhookRecordProviderConfig: vi.fn(), + mockExecuteWorkflowCore: vi.fn(), + mockWasExecutionFinalizedByCore: vi.fn(), + mockRecordException: vi.fn(), + mockGetActiveSpan: vi.fn(), })) +vi.mock('@opentelemetry/api', () => ({ + trace: { getActiveSpan: mockGetActiveSpan }, +})) + +vi.mock('@sim/db', () => dbChainMock) +vi.mock('@/lib/execution/preprocessing', () => executionPreprocessingMock) +vi.mock('@/lib/logs/execution/logging-session', () => loggingSessionMock) + vi.mock('@/lib/webhooks/env-resolver', () => ({ resolveWebhookRecordProviderConfig: mockResolveWebhookRecordProviderConfig, })) -import { resolveWebhookExecutionProviderConfig } from './webhook-execution' +vi.mock('@/lib/workflows/executor/execution-core', () => ({ + executeWorkflowCore: mockExecuteWorkflowCore, + wasExecutionFinalizedByCore: mockWasExecutionFinalizedByCore, +})) + +vi.mock('@/lib/core/idempotency', () => ({ + IdempotencyService: { createWebhookIdempotencyKey: vi.fn(() => 'idempotency-key') }, + webhookIdempotency: { + executeWithIdempotency: vi.fn( + (_provider: string, _key: string, operation: () => Promise) => operation() + ), + }, +})) + +vi.mock('@/lib/workflows/persistence/utils', () => ({ + loadDeployedWorkflowState: vi.fn(async () => ({ + blocks: {}, + edges: [], + loops: {}, + parallels: {}, + deploymentVersionId: 'deployment-1', + })), +})) + +vi.mock('@/lib/webhooks/providers', () => ({ + getProviderHandler: vi.fn(() => ({})), +})) + +vi.mock('@/lib/logs/execution/trace-spans/trace-spans', () => ({ + buildTraceSpans: vi.fn(() => ({ traceSpans: [] })), +})) + +vi.mock('@/lib/core/execution-limits', () => ({ + createTimeoutAbortController: vi.fn(() => ({ + signal: new AbortController().signal, + cleanup: vi.fn(), + isTimedOut: () => false, + timeoutMs: 120_000, + })), + getTimeoutErrorMessage: vi.fn(() => 'timed out'), +})) + +vi.mock('@/lib/workflows/executor/pause-persistence', () => ({ + handlePostExecutionPauseState: vi.fn(), +})) + +vi.mock('@/lib/webhooks/attachment-processor', () => ({ + WebhookAttachmentProcessor: class {}, +})) + +vi.mock('@/app/api/auth/oauth/utils', () => ({ + resolveOAuthAccountId: vi.fn(), +})) + +vi.mock('@/executor/execution/snapshot', () => ({ + ExecutionSnapshot: class {}, +})) + +vi.mock('@/tools/safe-assign', () => ({ safeAssign: vi.fn() })) + +vi.mock('@/blocks', () => ({ getBlock: vi.fn(() => null) })) + +vi.mock('@/triggers', () => ({ + getTrigger: vi.fn(), + isTriggerValid: vi.fn(() => false), +})) + +import { executeWebhookJob, resolveWebhookExecutionProviderConfig } from './webhook-execution' describe('resolveWebhookExecutionProviderConfig', () => { beforeEach(() => { @@ -66,3 +159,64 @@ describe('resolveWebhookExecutionProviderConfig', () => { ) }) }) + +describe('executeWebhookJob fault vs error handling', () => { + const payload = { + webhookId: 'webhook-1', + workflowId: 'workflow-1', + userId: 'user-1', + executionId: 'execution-1', + requestId: 'request-1', + provider: 'gmail', + body: { message: 'hello' }, + headers: {}, + path: '/webhook', + workspaceId: 'workspace-1', + } + + beforeEach(() => { + vi.clearAllMocks() + executionPreprocessingMockFns.mockPreprocessExecution.mockResolvedValue({ + success: true, + workflowRecord: { workspaceId: 'workspace-1', userId: 'user-1', variables: {} }, + executionTimeout: { async: 120_000 }, + }) + mockResolveWebhookRecordProviderConfig.mockImplementation(async (record) => record) + dbChainMockFns.limit.mockResolvedValue([{ id: 'webhook-1' }]) + mockGetActiveSpan.mockReturnValue({ recordException: mockRecordException }) + }) + + it('completes the run (does not throw) when the failure was finalized by core', async () => { + mockExecuteWorkflowCore.mockRejectedValue( + new Error('Gmail 2 is missing required fields: Label') + ) + mockWasExecutionFinalizedByCore.mockReturnValue(true) + + const result = await executeWebhookJob(payload) + + expect(result).toMatchObject({ + success: false, + workflowId: 'workflow-1', + executionId: 'execution-1', + provider: 'gmail', + }) + expect(loggingSessionMockFns.mockWaitForPostExecution).toHaveBeenCalled() + // User/workflow errors are already recorded by core — the catch must not re-log them. + expect(loggingSessionMockFns.mockSafeCompleteWithError).not.toHaveBeenCalled() + // The error is still recorded on the run span so it stays visible in traces. + expect(mockRecordException).toHaveBeenCalledWith( + expect.objectContaining({ message: 'Gmail 2 is missing required fields: Label' }) + ) + }) + + it('faults the run (re-throws) when the failure was not finalized by core', async () => { + mockExecuteWorkflowCore.mockRejectedValue(new Error('Workflow state not found')) + mockWasExecutionFinalizedByCore.mockReturnValue(false) + + await expect(executeWebhookJob(payload)).rejects.toThrow('Workflow state not found') + // waitForPostExecution must run on every path so the finalized-by-core signal is always reliable. + expect(loggingSessionMockFns.mockWaitForPostExecution).toHaveBeenCalled() + // Pipeline/infra errors are recorded here before re-throwing to fault the trigger.dev run. + expect(loggingSessionMockFns.mockSafeCompleteWithError).toHaveBeenCalled() + }) +}) diff --git a/apps/sim/background/webhook-execution.ts b/apps/sim/background/webhook-execution.ts index 1753813d849..41048f9208b 100644 --- a/apps/sim/background/webhook-execution.ts +++ b/apps/sim/background/webhook-execution.ts @@ -1,3 +1,4 @@ +import { trace } from '@opentelemetry/api' import { db } from '@sim/db' import { account, webhook } from '@sim/db/schema' import { createLogger, runWithRequestContext } from '@sim/logger' @@ -616,8 +617,27 @@ async function executeWebhookJobInternal( provider: payload.provider, }) + // The finalized flag is set inside a fire-and-forget post-execution promise; await it so the + // signal is reliable and the failure is fully persisted before we decide fault vs error. + await loggingSession.waitForPostExecution() + + // A failure inside workflow execution (block error, provider 4xx, missing required field, etc.) + // is finalized by core and already recorded in the execution logs. That is a user/workflow error, + // not a trigger.dev job fault — complete the run normally so we don't fire a false alert. Errors + // that were not finalized came from the webhook pipeline itself, so we re-throw to fault below. if (wasExecutionFinalizedByCore(error, executionId)) { - throw error + // Record the exception on the run span so it stays visible in traces without + // marking the span as ERROR — that status is what faults the trigger.dev run. + trace.getActiveSpan()?.recordException(toError(error)) + + return { + success: false, + workflowId: payload.workflowId, + executionId, + output: hasExecutionResult(error) ? error.executionResult.output : {}, + executedAt: new Date().toISOString(), + provider: payload.provider, + } } try { From 1933fc4f11572bd10b69781f65f15fc5a00cfb8b Mon Sep 17 00:00:00 2001 From: Vikhyath Mondreti Date: Wed, 3 Jun 2026 10:21:47 -0700 Subject: [PATCH 02/11] fix(env): schema treatment of empty string (#4862) --- apps/realtime/src/env.ts | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/apps/realtime/src/env.ts b/apps/realtime/src/env.ts index 083126a60d7..40f5abd898f 100644 --- a/apps/realtime/src/env.ts +++ b/apps/realtime/src/env.ts @@ -3,7 +3,10 @@ import { z } from 'zod' const EnvSchema = z.object({ NODE_ENV: z.enum(['development', 'test', 'production']).default('development'), DATABASE_URL: z.string().url(), - REDIS_URL: z.string().url().optional(), + REDIS_URL: z.preprocess( + (value) => (typeof value === 'string' && value.trim() === '' ? undefined : value), + z.string().url().optional() + ), BETTER_AUTH_URL: z.string().url(), BETTER_AUTH_SECRET: z.string().min(32), INTERNAL_API_SECRET: z.string().min(32), From 648a5a117df01917c6675be2161db3f54e3df81b Mon Sep 17 00:00:00 2001 From: Waleed Date: Wed, 3 Jun 2026 11:37:29 -0700 Subject: [PATCH 03/11] feat(storage): support S3-compatible endpoints (R2, MinIO, B2) for file storage (#4865) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat(storage): support S3-compatible endpoints (R2, MinIO, B2) for file storage Add S3_ENDPOINT and S3_FORCE_PATH_STYLE env vars, wired into the shared upload S3 client so Cloudflare R2, MinIO, Backblaze B2, and other S3-compatible stores work for self-hosted file storage. The endpoint is trusted operator config (no SSRF/HTTPS gate). Makes the multipart Location fallback endpoint-aware, extends the S3 client unit tests, and documents the new vars in Helm values, .env.example, and the English self-hosting docs (incl. browser-reachability + CORS guidance). * docs(storage): add RustFS as an S3-compatible provider example * fix(storage): address review feedback and fix env mock for CI - Add envBoolean to the shared env test mock (createEnvMock) so config.ts's forcePathStyle coercion resolves — fixes failing knowledge/utils.test.ts - Declare S3_FORCE_PATH_STYLE as z.string() (every other env var's pattern); it's coerced via envBoolean at the consumption site, avoiding a boolean type that never matches the string process.env value - Log path-style from S3_CONFIG.forcePathStyle (envBoolean) instead of a separate isTruthy call, so the startup log can't disagree with the client - Make buildObjectFallbackUrl honor forcePathStyle: virtual-hosted-style URL (bucket as subdomain) for R2, path-style only when forcePathStyle is set * docs(storage): add backlinks to S3-compatible providers (R2, MinIO, Ceph, B2, RustFS) and backends --- .../en/self-hosting/environment-variables.mdx | 13 + .../content/docs/en/self-hosting/meta.json | 1 + .../docs/en/self-hosting/object-storage.mdx | 289 ++++++++++++++++++ apps/sim/.env.example | 15 + apps/sim/lib/core/config/env.ts | 4 +- apps/sim/lib/uploads/config.ts | 11 +- apps/sim/lib/uploads/core/setup.server.ts | 13 +- .../lib/uploads/providers/s3/client.test.ts | 43 ++- apps/sim/lib/uploads/providers/s3/client.ts | 25 +- helm/sim/examples/values-aws.yaml | 3 + helm/sim/values.yaml | 2 + packages/testing/src/mocks/env.mock.ts | 8 + 12 files changed, 418 insertions(+), 9 deletions(-) create mode 100644 apps/docs/content/docs/en/self-hosting/object-storage.mdx diff --git a/apps/docs/content/docs/en/self-hosting/environment-variables.mdx b/apps/docs/content/docs/en/self-hosting/environment-variables.mdx index 8d481b0d62f..0a11464a8f1 100644 --- a/apps/docs/content/docs/en/self-hosting/environment-variables.mdx +++ b/apps/docs/content/docs/en/self-hosting/environment-variables.mdx @@ -70,6 +70,19 @@ import { Callout } from 'fumadocs-ui/components/callout' | `ALLOWED_LOGIN_EMAILS` | Restrict signups to specific emails (comma-separated) | | `DISABLE_REGISTRATION` | Set to `true` to disable new user signups | +## File Storage + +By default Sim writes uploads to local disk. For production, point it at AWS S3 or Azure Blob. See [Object Storage](/self-hosting/object-storage) for the full setup, bucket layout, and IAM policy. + +| Variable | Description | +|----------|-------------| +| `AWS_REGION` | AWS region — set with `S3_BUCKET_NAME` to enable S3 | +| `AWS_ACCESS_KEY_ID` | AWS access key. Omit to use the instance/IRSA credential chain | +| `AWS_SECRET_ACCESS_KEY` | AWS secret key. Omit to use the instance/IRSA credential chain | +| `S3_BUCKET_NAME` | General workspace files bucket — set with `AWS_REGION` to enable S3 | +| `AZURE_STORAGE_CONTAINER_NAME` | General files container — set with Azure credentials to enable Blob (takes precedence over S3) | +| `AZURE_CONNECTION_STRING` | Azure connection string, or use `AZURE_ACCOUNT_NAME` + `AZURE_ACCOUNT_KEY` | + ## Email Providers Configure one provider — the mailer auto-detects in priority order: **Resend → AWS SES → SMTP → Azure Communication Services**. If none are configured, emails are logged to the console instead. diff --git a/apps/docs/content/docs/en/self-hosting/meta.json b/apps/docs/content/docs/en/self-hosting/meta.json index 805cfb659a1..8ec1af87ec8 100644 --- a/apps/docs/content/docs/en/self-hosting/meta.json +++ b/apps/docs/content/docs/en/self-hosting/meta.json @@ -5,6 +5,7 @@ "docker", "kubernetes", "platforms", + "object-storage", "environment-variables", "troubleshooting" ], diff --git a/apps/docs/content/docs/en/self-hosting/object-storage.mdx b/apps/docs/content/docs/en/self-hosting/object-storage.mdx new file mode 100644 index 00000000000..edeee7b87d5 --- /dev/null +++ b/apps/docs/content/docs/en/self-hosting/object-storage.mdx @@ -0,0 +1,289 @@ +--- +title: Object Storage +description: Configure where Sim stores uploaded files — local disk, AWS S3, or Azure Blob +--- + +import { Tab, Tabs } from 'fumadocs-ui/components/tabs' +import { Callout } from 'fumadocs-ui/components/callout' +import { Step, Steps } from 'fumadocs-ui/components/steps' +import { FAQ } from '@/components/ui/faq' + +Sim stores every uploaded file — knowledge base documents, chat attachments, execution outputs, profile pictures, and more — in object storage. Three backends are supported: + +| Backend | When to use | +|---------|-------------| +| **Local disk** | Single-node Docker, local development, evaluation | +| **[AWS S3](https://aws.amazon.com/s3/)** | Production, especially when running more than one app replica | +| **[Azure Blob](https://learn.microsoft.com/azure/storage/blobs/)** | Production on Azure | + + + Local disk writes to the container's `/uploads` directory. Files are lost when the container is recreated unless that path is on a persistent volume, and they are **not** shared across replicas. For any multi-replica or production deployment, use S3 or Azure Blob. + + +## How the backend is selected + +Sim picks the backend automatically from environment variables — there is no explicit "provider" flag. The logic, in order of precedence: + +1. **Azure Blob** — used if `AZURE_STORAGE_CONTAINER_NAME` is set **and** either (`AZURE_ACCOUNT_NAME` + `AZURE_ACCOUNT_KEY`) or `AZURE_CONNECTION_STRING` is set. +2. **AWS S3** — used if `S3_BUCKET_NAME` **and** `AWS_REGION` are set (and Azure is not configured). +3. **Local disk** — the fallback when neither is configured. + +If both Azure and S3 are configured, **Azure wins**. Set only the variables for the backend you intend to use. + +## Set up AWS S3 + + + + + +### Create the buckets + +Sim separates files into purpose-specific buckets. At minimum you need the general workspace bucket; the rest are created on demand based on which env vars you set. A bucket that isn't configured falls back to the general bucket where the code allows it, but the recommended setup is one bucket per purpose. + +```bash +# Set your region once +export AWS_REGION=us-east-1 + +# Create buckets (names must be globally unique — prefix with your org) +for name in workspace-files knowledge-base execution-files chat-files \ + copilot-files profile-pictures og-images workspace-logos; do + aws s3api create-bucket \ + --bucket "myorg-sim-$name" \ + --region "$AWS_REGION" \ + --create-bucket-configuration LocationConstraint="$AWS_REGION" +done +``` + + + In `us-east-1`, omit the `--create-bucket-configuration` flag — that region rejects an explicit `LocationConstraint`. + + +Keep all buckets **private** (block public access). Sim serves files through short-lived presigned URLs, so the buckets never need public read access. + + + + + +### Grant access with an IAM policy + +Create an IAM policy scoped to your buckets and attach it to the user (or role) Sim runs as: + +```json +{ + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Action": [ + "s3:GetObject", + "s3:PutObject", + "s3:DeleteObject", + "s3:ListBucket" + ], + "Resource": [ + "arn:aws:s3:::myorg-sim-*", + "arn:aws:s3:::myorg-sim-*/*" + ] + } + ] +} +``` + +You then have two ways to supply credentials: + +- **Static keys** — create an IAM user with this policy and set `AWS_ACCESS_KEY_ID` / `AWS_SECRET_ACCESS_KEY`. +- **Instance/role credentials (recommended)** — attach the policy to the EC2 instance role, ECS task role, or EKS IRSA role. Leave `AWS_ACCESS_KEY_ID` / `AWS_SECRET_ACCESS_KEY` unset and Sim falls back to the default AWS credential chain automatically. + + + + + +### Configure environment variables + +Set the region, optionally the credentials, and the bucket names: + +```bash +# Region + credentials +AWS_REGION=us-east-1 +AWS_ACCESS_KEY_ID=AKIA... # omit when using an instance/IRSA role +AWS_SECRET_ACCESS_KEY=... # omit when using an instance/IRSA role + +# Buckets (per purpose) +S3_BUCKET_NAME=myorg-sim-workspace-files +S3_KB_BUCKET_NAME=myorg-sim-knowledge-base +S3_EXECUTION_FILES_BUCKET_NAME=myorg-sim-execution-files +S3_CHAT_BUCKET_NAME=myorg-sim-chat-files +S3_COPILOT_BUCKET_NAME=myorg-sim-copilot-files +S3_PROFILE_PICTURES_BUCKET_NAME=myorg-sim-profile-pictures +S3_OG_IMAGES_BUCKET_NAME=myorg-sim-og-images +S3_WORKSPACE_LOGOS_BUCKET_NAME=myorg-sim-workspace-logos +``` + +Only `AWS_REGION` and `S3_BUCKET_NAME` are strictly required to switch Sim into S3 mode. Add the others so each file type lands in its own bucket. + + + + + +### S3 bucket reference + +| Variable | Stores | Required | +|----------|--------|----------| +| `AWS_REGION` | Region for all buckets | **Yes** (enables S3) | +| `AWS_ACCESS_KEY_ID` | Access key | No (uses credential chain if unset) | +| `AWS_SECRET_ACCESS_KEY` | Secret key | No (uses credential chain if unset) | +| `S3_BUCKET_NAME` | General workspace files | **Yes** (enables S3) | +| `S3_KB_BUCKET_NAME` | Knowledge base documents | Recommended | +| `S3_EXECUTION_FILES_BUCKET_NAME` | Workflow execution files (default: `sim-execution-files`) | Recommended | +| `S3_CHAT_BUCKET_NAME` | Deployed chat assets | Recommended | +| `S3_COPILOT_BUCKET_NAME` | Copilot attachments | Recommended | +| `S3_PROFILE_PICTURES_BUCKET_NAME` | User avatars | Recommended | +| `S3_OG_IMAGES_BUCKET_NAME` | OpenGraph preview images (falls back to `S3_BUCKET_NAME`) | Optional | +| `S3_WORKSPACE_LOGOS_BUCKET_NAME` | Workspace logos (falls back to `S3_BUCKET_NAME`) | Optional | +| `S3_LOGS_BUCKET_NAME` | Stored logs | Optional | +| `S3_ENDPOINT` | Custom endpoint for S3-compatible storage (R2, MinIO, B2) | Optional (AWS S3 if unset) | +| `S3_FORCE_PATH_STYLE` | `true` for path-style addressing (MinIO/Ceph) | Optional (defaults `false`) | + +## Apply the configuration + + + + +Add the storage variables to the `.env` file used by `docker-compose.prod.yml`, then restart: + +```bash +docker compose -f docker-compose.prod.yml up -d +``` + +Because files now live in S3, you no longer depend on a local `/uploads` volume for durability. + + + + +Set the variables under `app.env` (non-secret, e.g. region and bucket names) and supply credentials through a secret. The chart ships a complete example at `helm/sim/examples/values-aws.yaml`: + +```yaml +app: + env: + AWS_REGION: "us-east-1" + S3_BUCKET_NAME: "myorg-sim-workspace-files" + S3_KB_BUCKET_NAME: "myorg-sim-knowledge-base" + S3_EXECUTION_FILES_BUCKET_NAME: "myorg-sim-execution-files" + # ...remaining buckets +``` + +On EKS, prefer **IRSA**: attach the IAM policy to the service account's role and leave the access-key variables unset. + + + + +## Set up Azure Blob + +Azure Blob uses one container per purpose, mirroring the S3 layout. Authenticate with either a connection string or an account name + key. + +```bash +# Credentials — provide ONE of these forms +AZURE_ACCOUNT_NAME=mystorageaccount +AZURE_ACCOUNT_KEY=... +# or +AZURE_CONNECTION_STRING=DefaultEndpointsProtocol=https;AccountName=...;AccountKey=...;EndpointSuffix=core.windows.net + +# Containers (per purpose) +AZURE_STORAGE_CONTAINER_NAME=workspace-files +AZURE_STORAGE_KB_CONTAINER_NAME=knowledge-base +AZURE_STORAGE_EXECUTION_FILES_CONTAINER_NAME=execution-files +AZURE_STORAGE_CHAT_CONTAINER_NAME=chat-files +AZURE_STORAGE_COPILOT_CONTAINER_NAME=copilot-files +AZURE_STORAGE_PROFILE_PICTURES_CONTAINER_NAME=profile-pictures +AZURE_STORAGE_OG_IMAGES_CONTAINER_NAME=og-images +AZURE_STORAGE_WORKSPACE_LOGOS_CONTAINER_NAME=workspace-logos +``` + +A full Helm example lives at `helm/sim/examples/values-azure.yaml`. + +## Set up an S3-compatible provider (R2, MinIO, B2) + +Sim works with any S3-compatible store by pointing the S3 client at a custom endpoint. Configure it exactly like AWS S3 (buckets, access key, secret), then add `S3_ENDPOINT` — and `S3_FORCE_PATH_STYLE` where the provider requires path-style addressing. Verified with [Cloudflare R2](https://developers.cloudflare.com/r2/), [MinIO](https://min.io/), [Backblaze B2](https://www.backblaze.com/cloud-storage), and [RustFS](https://rustfs.com/). + + + `S3_ENDPOINT` is trusted operator configuration, so it is used as-is — `http://` and private hosts are accepted (no SSRF/HTTPS gate). Don't wire it to untrusted input. + + + + **The endpoint must be reachable from your users' browsers, and the bucket needs CORS.** Uploads use presigned `PUT` requests sent **directly from the browser** to `S3_ENDPOINT` (downloads are proxied back through the app, so they only need server-side reachability). This means: + + - A purely internal endpoint (e.g. `https://minio.internal:9000` that only the app pods can resolve) will let the server start cleanly but **uploads will fail in the browser**. Use an endpoint your users can reach. + - Configure a **CORS policy** on the bucket that allows your Sim origin (`PUT`, `GET`, and the `Authorization` / `Content-Type` / `x-amz-*` headers). This applies to AWS S3 too — R2 and MinIO are no different. + + + + + +[Cloudflare R2](https://developers.cloudflare.com/r2/api/s3/) uses virtual-hosted style (the default) and the region `auto`: + +```bash +AWS_REGION=auto +S3_ENDPOINT=https://.r2.cloudflarestorage.com +AWS_ACCESS_KEY_ID= +AWS_SECRET_ACCESS_KEY= +S3_BUCKET_NAME=myorg-sim-workspace-files +# ...remaining S3_*_BUCKET_NAME vars, one R2 bucket each +``` + +Leave `S3_FORCE_PATH_STYLE` unset — R2 supports the default virtual-hosted addressing. + + + + +[MinIO](https://min.io/docs/minio/linux/index.html) (and [Ceph RGW](https://docs.ceph.com/en/latest/radosgw/)) need path-style addressing and accept any region string: + +```bash +AWS_REGION=us-east-1 +S3_ENDPOINT=https://minio.example.com # must be reachable from users' browsers, not app-pods-only +S3_FORCE_PATH_STYLE=true +AWS_ACCESS_KEY_ID= +AWS_SECRET_ACCESS_KEY= +S3_BUCKET_NAME=myorg-sim-workspace-files +# ...remaining S3_*_BUCKET_NAME vars, one bucket each +``` + +`http://` works server-side, but since the browser uploads directly to this endpoint, prefer a TLS endpoint your users can reach (a mixed-content `http://` target will be blocked on an `https://` Sim origin). + + + + +[RustFS](https://rustfs.com/) is a Rust-based, S3-compatible store (a MinIO drop-in). Configure it exactly like MinIO — path-style, any region string, SigV4 access key/secret: + +```bash +AWS_REGION=us-east-1 +S3_ENDPOINT=https://rustfs.example.com # must be reachable from users' browsers +S3_FORCE_PATH_STYLE=true +AWS_ACCESS_KEY_ID= +AWS_SECRET_ACCESS_KEY= +S3_BUCKET_NAME=myorg-sim-workspace-files +# ...remaining S3_*_BUCKET_NAME vars, one bucket each +``` + +The same browser-reachability and CORS requirements apply. + + + + +## Verify it works + +After restarting with the new configuration: + +1. Open the app and upload a document to a knowledge base (or set a profile picture). +2. Confirm an object appears in the corresponding bucket/container. +3. Reload the page — the file should still render (downloads stream back through the app at `/api/files/serve`). + +If uploads fail, check the app logs for credential or permission errors (see [Troubleshooting](/self-hosting/troubleshooting)). + + diff --git a/apps/sim/.env.example b/apps/sim/.env.example index ca6012c7bb1..180e9b56e98 100644 --- a/apps/sim/.env.example +++ b/apps/sim/.env.example @@ -71,6 +71,21 @@ API_ENCRYPTION_KEY=your_api_encryption_key # Use `openssl rand -hex 32` to gener # PEOPLEDATALABS_API_KEY_1= # People Data Labs API key #1 # PEOPLEDATALABS_API_KEY_2= # People Data Labs API key #2 +# File Storage (Optional - defaults to local disk; use S3 or Azure Blob for production) +# AWS_REGION=us-east-1 # Required with S3_BUCKET_NAME to enable S3. Use "auto" for Cloudflare R2 +# AWS_ACCESS_KEY_ID= # Omit to use the instance/IRSA credential chain +# AWS_SECRET_ACCESS_KEY= # Omit to use the instance/IRSA credential chain +# S3_BUCKET_NAME= # General workspace files bucket (required with AWS_REGION to enable S3) +# S3_KB_BUCKET_NAME= # Knowledge base documents +# S3_EXECUTION_FILES_BUCKET_NAME= # Workflow execution files +# S3_CHAT_BUCKET_NAME= # Deployed chat assets +# S3_COPILOT_BUCKET_NAME= # Copilot attachments +# S3_PROFILE_PICTURES_BUCKET_NAME= # User profile pictures +# S3_OG_IMAGES_BUCKET_NAME= # OpenGraph preview images (falls back to S3_BUCKET_NAME) +# S3_WORKSPACE_LOGOS_BUCKET_NAME= # Workspace logos (falls back to S3_BUCKET_NAME) +# S3_ENDPOINT= # Custom endpoint for S3-compatible storage (Cloudflare R2, MinIO, Backblaze B2). Leave unset for AWS S3 +# S3_FORCE_PATH_STYLE=true # Required for MinIO/Ceph RGW. Leave unset for AWS S3 and R2 + # Admin API (Optional - for self-hosted GitOps) # ADMIN_API_KEY= # Use `openssl rand -hex 32` to generate. Enables admin API for workflow export/import. # Usage: curl -H "x-admin-key: your_key" https://your-instance/api/v1/admin/workspaces diff --git a/apps/sim/lib/core/config/env.ts b/apps/sim/lib/core/config/env.ts index 223eb519524..e47ca481d02 100644 --- a/apps/sim/lib/core/config/env.ts +++ b/apps/sim/lib/core/config/env.ts @@ -218,8 +218,10 @@ export const env = createEnv({ S3_PROFILE_PICTURES_BUCKET_NAME: z.string().optional(), // S3 bucket for profile pictures S3_OG_IMAGES_BUCKET_NAME: z.string().optional(), // S3 bucket for OpenGraph images S3_WORKSPACE_LOGOS_BUCKET_NAME: z.string().optional(), // S3 bucket for workspace logos + S3_ENDPOINT: z.string().optional(), // Custom endpoint for S3-compatible storage (Cloudflare R2, MinIO, Backblaze B2). Leave unset for AWS S3 + S3_FORCE_PATH_STYLE: z.string().optional(), // Force path-style addressing (MinIO/Ceph RGW). Defaults to false (AWS S3, R2). Coerced via envBoolean at the consumption site - // Cloud Storage - Azure Blob + // Cloud Storage - Azure Blob AZURE_ACCOUNT_NAME: z.string().optional(), // Azure storage account name AZURE_ACCOUNT_KEY: z.string().optional(), // Azure storage account key AZURE_CONNECTION_STRING: z.string().optional(), // Azure storage connection string diff --git a/apps/sim/lib/uploads/config.ts b/apps/sim/lib/uploads/config.ts index f38d147db07..d833d19c9f8 100644 --- a/apps/sim/lib/uploads/config.ts +++ b/apps/sim/lib/uploads/config.ts @@ -1,4 +1,4 @@ -import { env } from '@/lib/core/config/env' +import { env, envBoolean } from '@/lib/core/config/env' import type { StorageConfig, StorageContext } from '@/lib/uploads/shared/types' export type { StorageConfig, StorageContext } from '@/lib/uploads/shared/types' @@ -17,6 +17,15 @@ export const USE_S3_STORAGE = hasS3Config && !USE_BLOB_STORAGE export const S3_CONFIG = { bucket: env.S3_BUCKET_NAME || '', region: env.AWS_REGION || '', + /** + * Custom endpoint for S3-compatible providers (Cloudflare R2, MinIO, Backblaze B2). + * Unset means the AWS SDK derives the host from `region`, targeting AWS S3. + * This is trusted operator configuration (not user input), so it is passed + * through verbatim — `http://` and private hosts are allowed for on-prem MinIO. + */ + endpoint: env.S3_ENDPOINT || undefined, + /** Path-style addressing — required by MinIO/Ceph RGW; AWS S3 and R2 use the default `false`. */ + forcePathStyle: envBoolean(env.S3_FORCE_PATH_STYLE) ?? false, } export const BLOB_CONFIG = { diff --git a/apps/sim/lib/uploads/core/setup.server.ts b/apps/sim/lib/uploads/core/setup.server.ts index 87801a29f96..70a03f43976 100644 --- a/apps/sim/lib/uploads/core/setup.server.ts +++ b/apps/sim/lib/uploads/core/setup.server.ts @@ -3,7 +3,12 @@ import { mkdir } from 'fs/promises' import path, { join } from 'path' import { createLogger } from '@sim/logger' import { env } from '@/lib/core/config/env' -import { getStorageProvider, USE_BLOB_STORAGE, USE_S3_STORAGE } from '@/lib/uploads/config' +import { + getStorageProvider, + S3_CONFIG, + USE_BLOB_STORAGE, + USE_S3_STORAGE, +} from '@/lib/uploads/config' const logger = createLogger('UploadsSetup') @@ -79,6 +84,12 @@ if (typeof process !== 'undefined') { } else { logger.info('AWS S3 credentials found in environment variables') } + + if (env.S3_ENDPOINT) { + logger.info( + `Using S3-compatible endpoint: ${env.S3_ENDPOINT} (path-style: ${S3_CONFIG.forcePathStyle})` + ) + } } else { // Local storage mode logger.info('Using local file storage') diff --git a/apps/sim/lib/uploads/providers/s3/client.test.ts b/apps/sim/lib/uploads/providers/s3/client.test.ts index 4e62109a5d8..48c017d2cb6 100644 --- a/apps/sim/lib/uploads/providers/s3/client.test.ts +++ b/apps/sim/lib/uploads/providers/s3/client.test.ts @@ -14,6 +14,7 @@ const { mockDeleteObjectCommand, mockGetSignedUrl, mockEnv, + mockS3Config, } = vi.hoisted(() => { const mockSend = vi.fn() const mockS3Client = { send: mockSend } @@ -24,9 +25,21 @@ const { AWS_ACCESS_KEY_ID: 'test-access-key', AWS_SECRET_ACCESS_KEY: 'test-secret-key', } + const mockS3Config: { + bucket: string + region: string + endpoint: string | undefined + forcePathStyle: boolean + } = { + bucket: 'test-bucket', + region: 'test-region', + endpoint: undefined, + forcePathStyle: false, + } return { mockSend, mockS3Client, + mockS3Config, mockS3ClientConstructor: vi.fn().mockImplementation( class { constructor() { @@ -71,10 +84,7 @@ vi.mock('@/lib/uploads/setup', () => ({ })) vi.mock('@/lib/uploads/config', () => ({ - S3_CONFIG: { - bucket: 'test-bucket', - region: 'test-region', - }, + S3_CONFIG: mockS3Config, S3_KB_CONFIG: { bucket: 'test-kb-bucket', region: 'test-region', @@ -97,6 +107,8 @@ describe('S3 Client', () => { vi.spyOn(Date.prototype, 'toISOString').mockReturnValue('2025-06-16T01:13:10.765Z') mockEnv.AWS_ACCESS_KEY_ID = 'test-access-key' mockEnv.AWS_SECRET_ACCESS_KEY = 'test-secret-key' + mockS3Config.endpoint = undefined + mockS3Config.forcePathStyle = false resetS3ClientForTesting() }) @@ -342,6 +354,8 @@ describe('S3 Client', () => { expect(client).toBeDefined() expect(mockS3ClientConstructor).toHaveBeenCalledWith({ region: 'test-region', + endpoint: undefined, + forcePathStyle: false, credentials: { accessKeyId: 'test-access-key', secretAccessKey: 'test-secret-key', @@ -359,8 +373,29 @@ describe('S3 Client', () => { expect(client).toBeDefined() expect(mockS3ClientConstructor).toHaveBeenCalledWith({ region: 'test-region', + endpoint: undefined, + forcePathStyle: false, credentials: undefined, }) }) + + it('should pass a custom endpoint and path-style flag for S3-compatible providers', () => { + mockS3Config.endpoint = 'https://account.r2.cloudflarestorage.com' + mockS3Config.forcePathStyle = true + resetS3ClientForTesting() + + const client = getS3Client() + + expect(client).toBeDefined() + expect(mockS3ClientConstructor).toHaveBeenCalledWith({ + region: 'test-region', + endpoint: 'https://account.r2.cloudflarestorage.com', + forcePathStyle: true, + credentials: { + accessKeyId: 'test-access-key', + secretAccessKey: 'test-secret-key', + }, + }) + }) }) }) diff --git a/apps/sim/lib/uploads/providers/s3/client.ts b/apps/sim/lib/uploads/providers/s3/client.ts index fe939cb506f..42b0c62fac8 100644 --- a/apps/sim/lib/uploads/providers/s3/client.ts +++ b/apps/sim/lib/uploads/providers/s3/client.ts @@ -54,6 +54,8 @@ export function getS3Client(): S3Client { _s3Client = new S3Client({ region, + endpoint: S3_CONFIG.endpoint, + forcePathStyle: S3_CONFIG.forcePathStyle, credentials: env.AWS_ACCESS_KEY_ID && env.AWS_SECRET_ACCESS_KEY ? { @@ -386,6 +388,26 @@ export async function getS3MultipartPartUrls( return presignedUrls } +/** + * Build a fallback object URL for when the SDK omits `Location` on multipart + * completion. For a custom `S3_CONFIG.endpoint` it matches the configured + * addressing mode — path-style for MinIO/Ceph (`forcePathStyle`), virtual-hosted + * (bucket as a subdomain) for R2 and friends. Falls back to the AWS + * virtual-hosted host when no custom endpoint is set. + */ +function buildObjectFallbackUrl(bucket: string, region: string, key: string): string { + if (S3_CONFIG.endpoint) { + const base = S3_CONFIG.endpoint.replace(/\/+$/, '') + if (S3_CONFIG.forcePathStyle) { + return `${base}/${bucket}/${key}` + } + const url = new URL(base) + url.hostname = `${bucket}.${url.hostname}` + return `${url.origin}/${key}` + } + return `https://${bucket}.s3.${region}.amazonaws.com/${key}` +} + /** * Complete multipart upload for S3 */ @@ -408,8 +430,7 @@ export async function completeS3MultipartUpload( }) const response = await s3Client.send(command) - const location = - response.Location || `https://${config.bucket}.s3.${config.region}.amazonaws.com/${key}` + const location = response.Location || buildObjectFallbackUrl(config.bucket, config.region, key) const path = `/api/files/serve/${encodeURIComponent(key)}` return { diff --git a/helm/sim/examples/values-aws.yaml b/helm/sim/examples/values-aws.yaml index c8795a8e976..a0837b2672b 100644 --- a/helm/sim/examples/values-aws.yaml +++ b/helm/sim/examples/values-aws.yaml @@ -103,6 +103,9 @@ app: S3_PROFILE_PICTURES_BUCKET_NAME: "profile-pictures" # User avatars S3_OG_IMAGES_BUCKET_NAME: "og-images" # OpenGraph preview images S3_WORKSPACE_LOGOS_BUCKET_NAME: "workspace-logos" # Workspace logos + # For S3-compatible storage (Cloudflare R2, MinIO, Backblaze B2) instead of AWS S3: + # S3_ENDPOINT: "https://.r2.cloudflarestorage.com" # custom endpoint; set AWS_REGION: "auto" for R2 + # S3_FORCE_PATH_STYLE: "true" # required for MinIO/Ceph; omit for AWS S3 and R2 # Realtime service realtime: diff --git a/helm/sim/values.yaml b/helm/sim/values.yaml index 6b48a957bd3..5fad7baa674 100644 --- a/helm/sim/values.yaml +++ b/helm/sim/values.yaml @@ -260,6 +260,8 @@ app: S3_PROFILE_PICTURES_BUCKET_NAME: "" # S3 bucket for user profile pictures S3_OG_IMAGES_BUCKET_NAME: "" # S3 bucket for OpenGraph preview images S3_WORKSPACE_LOGOS_BUCKET_NAME: "" # S3 bucket for workspace logos + S3_ENDPOINT: "" # Custom endpoint for S3-compatible storage (Cloudflare R2, MinIO, Backblaze B2). Leave empty for AWS S3 + S3_FORCE_PATH_STYLE: "" # Set to "true" for path-style addressing (MinIO/Ceph RGW). Leave empty for AWS S3 and R2 # Azure Blob Storage Configuration (optional - for file storage) # If configured, files will be stored in Azure Blob instead of local storage diff --git a/packages/testing/src/mocks/env.mock.ts b/packages/testing/src/mocks/env.mock.ts index 61f733c1ec2..e6bbd09f7b4 100644 --- a/packages/testing/src/mocks/env.mock.ts +++ b/packages/testing/src/mocks/env.mock.ts @@ -53,6 +53,14 @@ export function createEnvMock(overrides: Record = {} typeof value === 'string' ? value.toLowerCase() === 'false' || value === '0' : value === false, + envBoolean: (value: boolean | string | undefined | null): boolean | undefined => { + if (typeof value === 'boolean') return value + if (value === undefined || value === null || value === '') return undefined + const normalized = String(value).trim().toLowerCase() + return ( + normalized === 'true' || normalized === '1' || normalized === 'yes' || normalized === 'on' + ) + }, envNumber: ( value: number | string | undefined | null, fallback: number, From b98fbfebef2d4adc394ed344d2a6a27af698a176 Mon Sep 17 00:00:00 2001 From: Waleed Date: Wed, 3 Jun 2026 11:46:13 -0700 Subject: [PATCH 04/11] docs(slack): remove archival reference from Download files per Slack Marketplace guidelines (#4867) --- apps/docs/content/docs/de/tools/slack.mdx | 2 +- apps/docs/content/docs/en/tools/slack.mdx | 2 +- apps/docs/content/docs/es/tools/slack.mdx | 2 +- apps/docs/content/docs/fr/tools/slack.mdx | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/apps/docs/content/docs/de/tools/slack.mdx b/apps/docs/content/docs/de/tools/slack.mdx index 6de7714fd2b..01b8b58df63 100644 --- a/apps/docs/content/docs/de/tools/slack.mdx +++ b/apps/docs/content/docs/de/tools/slack.mdx @@ -41,7 +41,7 @@ In Sim ermöglicht die Slack-Integration Ihren Agenten, programmatisch mit Slack - **Reaktionen hinzufügen**: Drücken Sie Stimmungen oder Bestätigungen aus, indem Sie Emoji-Reaktionen zu jeder Nachricht hinzufügen - **Canvases erstellen**: Erstellen und teilen Sie Slack-Canvases (kollaborative Dokumente) direkt in Kanälen, um reichhaltigere Inhalte zu teilen und zu dokumentieren - **Nachrichten lesen**: Lesen Sie aktuelle Nachrichten aus Kanälen, um Überwachung, Berichterstattung oder das Auslösen weiterer Aktionen basierend auf Kanalaktivitäten zu ermöglichen -- **Dateien herunterladen**: Rufen Sie in Slack-Kanälen geteilte Dateien zur Verarbeitung oder Archivierung ab +- **Dateien herunterladen**: Rufen Sie in Slack-Kanälen geteilte Dateien zur Verarbeitung in einem Workflow ab Dies ermöglicht leistungsstarke Automatisierungsszenarien wie das Senden von Benachrichtigungen mit dynamischen Updates, das Verwalten von Gesprächsabläufen mit bearbeitbaren Statusnachrichten, das Bestätigen wichtiger Nachrichten mit Reaktionen und das Sauberhalten von Kanälen durch Entfernen veralteter Bot-Nachrichten. Ihre Agenten können zeitnahe Informationen liefern, Nachrichten aktualisieren, während Workflows fortschreiten, kollaborative Dokumente erstellen oder Teammitglieder benachrichtigen, wenn Aufmerksamkeit benötigt wird. Diese Integration überbrückt die Lücke zwischen Ihren KI-Workflows und der Kommunikation Ihres Teams und stellt sicher, dass jeder mit genauen, aktuellen Informationen auf dem Laufenden bleibt. Durch die Verbindung von Sim mit Slack können Sie Agenten erstellen, die Ihr Team mit relevanten Informationen zur richtigen Zeit auf dem Laufenden halten, die Zusammenarbeit verbessern, indem sie Erkenntnisse automatisch teilen und aktualisieren, und die Notwendigkeit manueller Statusaktualisierungen reduzieren – alles während Sie Ihren bestehenden Slack-Workspace nutzen, in dem Ihr Team bereits kommuniziert. {/* MANUAL-CONTENT-END */} diff --git a/apps/docs/content/docs/en/tools/slack.mdx b/apps/docs/content/docs/en/tools/slack.mdx index 54065a58200..c231b547b62 100644 --- a/apps/docs/content/docs/en/tools/slack.mdx +++ b/apps/docs/content/docs/en/tools/slack.mdx @@ -23,7 +23,7 @@ With the Slack integration in Sim, you can: - **Create canvases**: Create and share Slack canvases (collaborative documents) directly in channels - **Read messages**: Retrieve recent messages from channels or DMs, with filtering by time range - **Manage channels and users**: List channels, members, and users in your Slack workspace -- **Download files**: Retrieve files shared in Slack channels for processing or archival +- **Download files**: Retrieve files shared in Slack channels for processing within a workflow In Sim, the Slack integration enables your agents to programmatically interact with Slack as part of their workflows. This allows for automation scenarios such as sending notifications with dynamic updates, managing conversational flows with editable status messages, acknowledging important messages with reactions, and maintaining clean channels by removing outdated bot messages. The integration can also be used in trigger mode to start a workflow when a message is sent to a channel. diff --git a/apps/docs/content/docs/es/tools/slack.mdx b/apps/docs/content/docs/es/tools/slack.mdx index 67028cb518c..bf23941f9dc 100644 --- a/apps/docs/content/docs/es/tools/slack.mdx +++ b/apps/docs/content/docs/es/tools/slack.mdx @@ -41,7 +41,7 @@ En Sim, la integración con Slack permite a tus agentes interactuar programátic - **Añadir reacciones**: Expresar sentimiento o reconocimiento añadiendo reacciones con emojis a cualquier mensaje - **Crear lienzos**: Crear y compartir lienzos de Slack (documentos colaborativos) directamente en canales, permitiendo compartir contenido más rico y documentación - **Leer mensajes**: Leer mensajes recientes de canales, permitiendo monitoreo, informes o activación de acciones adicionales basadas en la actividad del canal -- **Descargar archivos**: Recuperar archivos compartidos en canales de Slack para procesamiento o archivo +- **Descargar archivos**: Recuperar archivos compartidos en canales de Slack para procesamiento en un flujo de trabajo Esto permite escenarios de automatización potentes como enviar notificaciones con actualizaciones dinámicas, gestionar flujos conversacionales con mensajes de estado editables, reconocer mensajes importantes con reacciones y mantener canales limpios eliminando mensajes de bot obsoletos. Tus agentes pueden entregar información oportuna, actualizar mensajes a medida que avanzan los flujos de trabajo, crear documentos colaborativos o alertar a miembros del equipo cuando se necesita atención. Esta integración cierra la brecha entre tus flujos de trabajo de IA y la comunicación de tu equipo, asegurando que todos se mantengan informados con información precisa y actualizada. Al conectar Sim con Slack, puedes crear agentes que mantengan a tu equipo actualizado con información relevante en el momento adecuado, mejorar la colaboración compartiendo y actualizando información automáticamente, y reducir la necesidad de actualizaciones manuales de estado, todo mientras aprovechas tu espacio de trabajo de Slack existente donde tu equipo ya se comunica. {/* MANUAL-CONTENT-END */} diff --git a/apps/docs/content/docs/fr/tools/slack.mdx b/apps/docs/content/docs/fr/tools/slack.mdx index 00440dce96f..4a1f0995b54 100644 --- a/apps/docs/content/docs/fr/tools/slack.mdx +++ b/apps/docs/content/docs/fr/tools/slack.mdx @@ -41,7 +41,7 @@ Dans Sim, l'intégration Slack permet à vos agents d'interagir programmatiqueme - **Ajoutez des réactions** : Exprimez un sentiment ou une reconnaissance en ajoutant des réactions emoji à n'importe quel message - **Créez des canevas** : Créez et partagez des canevas Slack (documents collaboratifs) directement dans les canaux, permettant un partage de contenu et une documentation plus riches - **Lisez des messages** : Lisez les messages récents des canaux, permettant la surveillance, le reporting ou le déclenchement d'actions supplémentaires basées sur l'activité du canal -- **Téléchargez des fichiers** : Récupérez les fichiers partagés dans les canaux Slack pour traitement ou archivage +- **Téléchargez des fichiers** : Récupérez les fichiers partagés dans les canaux Slack pour traitement dans un workflow Cela permet des scénarios d'automatisation puissants tels que l'envoi de notifications avec des mises à jour dynamiques, la gestion des flux conversationnels avec des messages de statut modifiables, la reconnaissance de messages importants avec des réactions, et le maintien de canaux propres en supprimant les messages de bot obsolètes. Vos agents peuvent fournir des informations opportunes, mettre à jour des messages au fur et à mesure que les workflows progressent, créer des documents collaboratifs, ou alerter les membres de l'équipe lorsqu'une attention est nécessaire. Cette intégration comble le fossé entre vos workflows d'IA et la communication de votre équipe, garantissant que tout le monde reste informé avec des informations précises et à jour. En connectant Sim avec Slack, vous pouvez créer des agents qui tiennent votre équipe informée avec des informations pertinentes au bon moment, améliorent la collaboration en partageant et en mettant à jour automatiquement des insights, et réduisent le besoin de mises à jour manuelles de statut—tout en tirant parti de votre espace de travail Slack existant où votre équipe communique déjà. {/* MANUAL-CONTENT-END */} From b329c36b1a849f1e6469ed41f9aaa025151c597a Mon Sep 17 00:00:00 2001 From: Waleed Date: Wed, 3 Jun 2026 12:00:04 -0700 Subject: [PATCH 05/11] fix(auth): link SSO sign-in to existing same-email accounts (#4866) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix(auth): link SSO sign-in to existing same-email accounts SSO sign-ins failed with "account not linked" (then a cascading "Invalid callbackURL") when an account with the same email already existed. Better Auth's `@better-auth/sso` plugin hardcodes the provisioned user's `emailVerified: options?.trustEmailVerified ? : false`, so with the option unset every SSO login arrived unverified and tripped the account linking gate `(!isTrustedProvider && !userInfo.emailVerified)` whenever the provider was not in `accountLinking.trustedProviders`. - Set `trustEmailVerified: true` on the SSO plugin so the IdP's verified-email claim is honored (Okta, Entra ID, Google Workspace, Auth0 all assert it). - Trust the operator's configured provider for linking: merge `SSO_PROVIDER_ID` (when present in the app env) plus a new `SSO_TRUSTED_PROVIDER_IDS` list into `trustedProviders`. Empty/unset => no-op, so existing deployments are unchanged. - Invite callback URL: return a clean `/invite/` (token already persists in sessionStorage) so an appended `?error=` cannot produce a malformed URL. - Document `SSO_TRUSTED_PROVIDER_IDS` in SSO docs, Helm values, and schema. Co-Authored-By: Claude Opus 4.8 * fix(auth): address review — guard trusted SSO providers, revert invite callback - Only compute additionalTrustedSsoProviders when SSO_ENABLED, so trustedProviders is exactly unchanged for non-SSO deployments. - Revert the invite getCallbackUrl change: keep the token in the callback URL (with sessionStorage/searchParams fallback) so the token survives when sessionStorage is unavailable. The account-linking fix removes the "account not linked" error that caused the malformed callback URL, so the callback cleanup is unnecessary. Co-Authored-By: Claude Opus 4.8 * fix(auth): guard trusted SSO providers with isSsoEnabled (isTruthy) env.SSO_ENABLED can be the string "false" (t3-env returns strings for booleans), which is truthy in JS. Use the canonical isSsoEnabled flag (isTruthy(env.SSO_ENABLED)) so SSO_ENABLED="false"/"0" correctly yields an empty trusted-provider list, matching how SSO is gated elsewhere. Co-Authored-By: Claude Opus 4.8 --------- Co-authored-by: Claude Opus 4.8 --- apps/docs/content/docs/en/enterprise/sso.mdx | 21 +++++++++++++++++++ apps/sim/lib/auth/auth.ts | 22 ++++++++++++++++++++ apps/sim/lib/core/config/env.ts | 1 + helm/sim/values.schema.json | 4 ++++ helm/sim/values.yaml | 4 ++++ 5 files changed, 52 insertions(+) diff --git a/apps/docs/content/docs/en/enterprise/sso.mdx b/apps/docs/content/docs/en/enterprise/sso.mdx index bfccc204122..ee3d53be3ec 100644 --- a/apps/docs/content/docs/en/enterprise/sso.mdx +++ b/apps/docs/content/docs/en/enterprise/sso.mdx @@ -250,6 +250,10 @@ SSO provisioning creates internal organization members. External workspace membe question: "Can I still use email/password login after enabling SSO?", answer: "Yes. Enabling SSO does not disable password-based login. Users can still sign in with their email and password if they have one. Forced SSO (requiring all users on the domain to use SSO) is not yet supported." }, + { + question: "A user already has an account with the same email — what happens when they sign in with SSO?", + answer: "Sim links the SSO identity to the existing account automatically, as long as your identity provider reports the email as verified (email_verified) or the provider is trusted. Most OIDC providers (Okta, Google Workspace, Auth0) assert email_verified, so linking just works. If sign-in fails with 'account not linked' — common with SAML providers that omit the claim — add the provider's ID to SSO_TRUSTED_PROVIDER_IDS on self-hosted and restart." + }, { question: "Who can configure SSO on Sim Cloud?", answer: "Organization owners and admins can configure SSO. You must be on the Enterprise plan." @@ -280,8 +284,25 @@ NEXT_PUBLIC_SSO_ENABLED=true # Required if you want users auto-added to your organization on first SSO sign-in ORGANIZATIONS_ENABLED=true NEXT_PUBLIC_ORGANIZATIONS_ENABLED=true + +# Optional: comma-separated SSO provider IDs to trust for automatic account linking +# (links an SSO sign-in to an existing account with the same email). Needed when your +# IdP does not assert email_verified — typically SAML providers, or OIDC providers that +# omit the claim. Set it to the Provider ID you registered, then restart. +# (If you also keep SSO_PROVIDER_ID in the app's environment, that provider is trusted +# without listing it here.) +SSO_TRUSTED_PROVIDER_IDS=custom-oidc,partner-saml ``` + + When someone signs in with SSO and an account with the same email already exists + (for example, they previously signed up with email/password), Sim links the SSO + identity to that account automatically as long as your IdP reports the email as + verified, or the provider is trusted. If you hit an `account not linked` error, + either confirm your IdP sends `email_verified`, or add the provider's ID to + `SSO_TRUSTED_PROVIDER_IDS` and restart. + + You can register providers through the **Settings UI** (same as cloud) or by running the registration script directly against your database. ### Script-based registration diff --git a/apps/sim/lib/auth/auth.ts b/apps/sim/lib/auth/auth.ts index 76465b45bec..cd27bbd5e2d 100644 --- a/apps/sim/lib/auth/auth.ts +++ b/apps/sim/lib/auth/auth.ts @@ -71,6 +71,7 @@ import { isRegistrationDisabled, isSignupEmailValidationEnabled, isSignupMxValidationEnabled, + isSsoEnabled, } from '@/lib/core/config/feature-flags' import { PlatformEvents } from '@/lib/core/telemetry' import { getBaseUrl, isLocalhostUrl, parseOriginList } from '@/lib/core/utils/urls' @@ -164,6 +165,20 @@ const additionalTrustedOrigins = parseOriginList(env.TRUSTED_ORIGINS, (value) => logger.warn('Ignoring invalid entry in TRUSTED_ORIGINS', { value }) ) +/** + * SSO provider IDs to trust for automatic account linking when an SSO sign-in + * matches an existing account's email. Includes `SSO_PROVIDER_ID` when it is set + * in the app environment, plus any IDs from `SSO_TRUSTED_PROVIDER_IDS`. Empty when + * SSO is disabled, so `trustedProviders` is unchanged for non-SSO deployments. + * Resolved once at startup; `trustEmailVerified` on the SSO plugin handles IdPs + * that assert `email_verified` live, so this is only needed for IdPs that omit it. + */ +const additionalTrustedSsoProviders = isSsoEnabled + ? [env.SSO_PROVIDER_ID, ...(env.SSO_TRUSTED_PROVIDER_IDS?.split(',') ?? [])] + .map((id) => id?.trim()) + .filter((id): id is string => Boolean(id)) + : [] + if (env.NODE_ENV === 'production') { const baseUrl = getBaseUrl() if (isLocalhostUrl(baseUrl)) { @@ -685,6 +700,7 @@ export const auth = betterAuth({ 'calcom', 'docusign', ...SSO_TRUSTED_PROVIDERS, + ...additionalTrustedSsoProviders, ], }, }, @@ -2916,6 +2932,12 @@ export const auth = betterAuth({ ...(env.SSO_ENABLED ? [ sso({ + /** + * Honor the IdP's verified-email claim. Without this the SSO plugin + * forces `emailVerified: false`, blocking automatic linking of an SSO + * login to an existing same-email account (Better Auth "account not linked"). + */ + trustEmailVerified: true, organizationProvisioning: { disabled: false, defaultRole: 'member', diff --git a/apps/sim/lib/core/config/env.ts b/apps/sim/lib/core/config/env.ts index e47ca481d02..0a863112f10 100644 --- a/apps/sim/lib/core/config/env.ts +++ b/apps/sim/lib/core/config/env.ts @@ -406,6 +406,7 @@ export const env = createEnv({ SSO_DOMAIN: z.string().optional(), // [REQUIRED] SSO email domain SSO_USER_EMAIL: z.string().optional(), // [REQUIRED] User email for SSO registration SSO_ORGANIZATION_ID: z.string().optional(), // Organization ID for SSO registration (optional) + SSO_TRUSTED_PROVIDER_IDS: z.string().optional(), // Comma-separated SSO provider IDs to trust for automatic account linking when an existing account shares the same email. Use for IdPs that do not assert email_verified. Merged into Better Auth accountLinking.trustedProviders. // SSO Mapping Configuration (optional - sensible defaults provided) SSO_MAPPING_ID: z.string().optional(), // Custom ID claim mapping (default: sub for OIDC, nameidentifier for SAML) diff --git a/helm/sim/values.schema.json b/helm/sim/values.schema.json index 724f58161e5..b950e5bdd86 100644 --- a/helm/sim/values.schema.json +++ b/helm/sim/values.schema.json @@ -157,6 +157,10 @@ "type": "string", "description": "Comma-separated additional public origins to trust for auth (e.g. 'https://app.example.com,https://www.example.com'). Merged into Better Auth trustedOrigins." }, + "SSO_TRUSTED_PROVIDER_IDS": { + "type": "string", + "description": "Comma-separated SSO provider IDs to trust for automatic account linking when an SSO sign-in matches an existing account's email. Only needed for IdPs that do not assert email_verified. Merged into Better Auth accountLinking.trustedProviders." + }, "NODE_ENV": { "type": "string", "enum": ["development", "test", "production"], diff --git a/helm/sim/values.yaml b/helm/sim/values.yaml index 5fad7baa674..562a82d5798 100644 --- a/helm/sim/values.yaml +++ b/helm/sim/values.yaml @@ -216,6 +216,10 @@ app: # Set to "true" AFTER running the SSO registration script SSO_ENABLED: "" # Enable SSO authentication ("true" to enable) NEXT_PUBLIC_SSO_ENABLED: "" # Show SSO login button in UI ("true" to enable) + # SSO_TRUSTED_PROVIDER_IDS: comma-separated SSO provider IDs to trust for automatic account linking when a + # user signs in via SSO and an account with the same email already exists. Only needed for IdPs that do NOT + # assert email_verified (trustEmailVerified already handles those that do). Resolved at startup — restart after editing. + SSO_TRUSTED_PROVIDER_IDS: "" # Enterprise Feature Overrides (self-hosted) CREDENTIAL_SETS_ENABLED: "" # Enable credential sets (email polling) on self-hosted ("true" to enable) From cd667749604d111916659e0dedb8c49e79a2b46b Mon Sep 17 00:00:00 2001 From: Waleed Date: Wed, 3 Jun 2026 12:23:03 -0700 Subject: [PATCH 06/11] feat(gitlab): sync repository files (code/docs) (#4864) * feat(gitlab): sync repository files (code/docs) alongside wiki and issues * fix(gitlab): follow full keyset next-link for repo tree + skip disabled wiki gracefully in all/both * fix(gitlab): error on bad user branch (tree 404), warn on resolveRef fallback, normalize pathPrefix to directory boundary * fix(gitlab): preserve slashes in branch ref for file source URLs (GitFlow branches) * fix(gitlab): never abort sync on repo-tree 404 (empty repo); validate user branch exists at setup instead * fix(gitlab): validate ref via commits endpoint so tags and commit SHAs are accepted, not just branches * fix(gitlab): skip repo phase on tree 403 (missing read_repository) so wiki/issues still sync under all * fix(byok): add Fal icon and repair corrupted Ollama icon path The Ollama BYOK icon rendered blank because its SVG path had spaces stripped between arc-command flags (e.g. `a5.05 5.05 0 12.05-.636`), producing invalid tokens. Replaced with the canonical Ollama path. Also added a dedicated FalIcon (was falling back to the generic ImageIcon) and wired it into the BYOK provider list. Co-Authored-By: Claude Opus 4.8 * fix(icons): repair corrupted Fireworks icon arc command The leftmost spark of the Fireworks icon never rendered because its third subpath used a corrupted arc command (`a34.59 34.59 0 17.15 37.65`) with collapsed flags, yielding an invalid sweep-flag of 7 that aborts the path parse. Replaced with the canonical lobehub Fireworks source. Co-Authored-By: Claude Opus 4.8 --------- Co-authored-by: Claude Opus 4.8 --- .../settings/components/byok/byok.tsx | 4 +- apps/sim/components/icons.tsx | 23 +- apps/sim/connectors/gitlab/gitlab.ts | 512 ++++++++++++++++-- 3 files changed, 499 insertions(+), 40 deletions(-) diff --git a/apps/sim/app/workspace/[workspaceId]/settings/components/byok/byok.tsx b/apps/sim/app/workspace/[workspaceId]/settings/components/byok/byok.tsx index d289eea58dd..9d28fc7d365 100644 --- a/apps/sim/app/workspace/[workspaceId]/settings/components/byok/byok.tsx +++ b/apps/sim/app/workspace/[workspaceId]/settings/components/byok/byok.tsx @@ -20,13 +20,13 @@ import { BasetenIcon, BrandfetchIcon, ExaAIIcon, + FalIcon, FindymailIcon, FirecrawlIcon, FireworksIcon, GeminiIcon, GoogleIcon, HunterIOIcon, - ImageIcon, JinaAIIcon, LinkupIcon, MistralIcon, @@ -118,7 +118,7 @@ const PROVIDERS: { { id: 'falai', name: 'Fal.ai', - icon: ImageIcon, + icon: FalIcon, description: 'Image and video generation', placeholder: 'Enter your Fal.ai API key', }, diff --git a/apps/sim/components/icons.tsx b/apps/sim/components/icons.tsx index fcdab73224d..49d1d7ddf38 100644 --- a/apps/sim/components/icons.tsx +++ b/apps/sim/components/icons.tsx @@ -3365,7 +3365,18 @@ export const OllamaIcon = (props: SVGProps) => ( xmlns='http://www.w3.org/2000/svg' > Ollama - + + +) +export const FalIcon = (props: SVGProps) => ( + + Fal + ) export function ShieldCheckIcon(props: SVGProps) { @@ -3982,16 +3993,16 @@ export function FireworksIcon(props: SVGProps) { return ( ) diff --git a/apps/sim/connectors/gitlab/gitlab.ts b/apps/sim/connectors/gitlab/gitlab.ts index f3fe3829585..a7f0917c7e9 100644 --- a/apps/sim/connectors/gitlab/gitlab.ts +++ b/apps/sim/connectors/gitlab/gitlab.ts @@ -9,18 +9,120 @@ const logger = createLogger('GitLabConnector') const DEFAULT_HOST = 'gitlab.com' const PAGE_SIZE = 100 +/** Max repository file size to index. Larger blobs are skipped. */ +const MAX_FILE_SIZE = 10 * 1024 * 1024 +/** Bytes sniffed for NUL when detecting binary files (matches git's heuristic). */ +const BINARY_SNIFF_BYTES = 8000 /** * Prefix encoded into each document's externalId so getDocument can route to the - * correct GitLab resource. Wiki pages are addressed by slug, issues by iid. + * correct GitLab resource. Wiki pages are addressed by slug, issues by iid, and + * repository files by their repo-relative path. */ const WIKI_PREFIX = 'wiki:' const ISSUE_PREFIX = 'issue:' +const FILE_PREFIX = 'file:' /** - * Selects which GitLab resources to sync. + * Selects which GitLab resources to sync. `repo` = repository files (code/docs), + * `all` = repo + wiki + issues. `both` is retained for backward compatibility and + * means wiki + issues (no repository files). */ -type ContentTypeChoice = 'wiki' | 'issues' | 'both' +type ContentTypeChoice = 'repo' | 'wiki' | 'issues' | 'both' | 'all' + +/** Listing phases, walked in order: repository files ➜ wiki ➜ issues. */ +type SyncPhase = 'repo' | 'wiki' | 'issues' + +interface GitLabTreeEntry { + id: string + name: string + type: 'blob' | 'tree' + path: string + mode?: string +} + +interface GitLabFile { + file_path?: string + blob_id?: string + content?: string + encoding?: string + size?: number +} + +/** + * Heuristic binary detection: a NUL byte in the first 8 KB marks the file as + * binary, matching `git diff` / `git grep` semantics. + */ +function isBinaryBuffer(buf: Buffer): boolean { + const len = Math.min(buf.length, BINARY_SNIFF_BYTES) + for (let i = 0; i < len; i++) { + if (buf[i] === 0) return true + } + return false +} + +/** + * Parses a comma-separated extension filter into a normalized set (leading dot, + * lowercased). Returns null when no filter is configured (accept all files). + */ +function parseExtensions(raw: unknown): Set | null { + const trimmed = typeof raw === 'string' ? raw.trim() : '' + if (!trimmed) return null + const exts = trimmed + .split(',') + .map((e) => e.trim().toLowerCase()) + .filter(Boolean) + .map((e) => (e.startsWith('.') ? e : `.${e}`)) + return exts.length > 0 ? new Set(exts) : null +} + +/** + * Returns true when the file path matches the extension filter (or no filter set). + */ +function matchesExtension(filePath: string, extSet: Set | null): boolean { + if (!extSet) return true + const lastDot = filePath.lastIndexOf('.') + if (lastDot === -1) return false + return extSet.has(filePath.slice(lastDot).toLowerCase()) +} + +/** + * Extracts the full `rel="next"` URL from a keyset-pagination `Link` response + * header. GitLab's guidance is to follow this link verbatim rather than rebuild + * the URL, so the connector stores and re-fetches it as-is — this is robust to + * whichever continuation parameter the endpoint uses (`page_token`, `cursor`, + * `id_after`, …). Returns undefined when there is no next page. + */ +function parseNextLink(linkHeader: string | null): string | undefined { + if (!linkHeader) return undefined + for (const part of linkHeader.split(',')) { + if (!/rel="?next"?/i.test(part)) continue + const urlMatch = part.match(/<([^>]+)>/) + if (urlMatch) return urlMatch[1] + } + return undefined +} + +/** + * Returns the ordered list of active sync phases for a content-type choice. + */ +function activePhases(choice: ContentTypeChoice): SyncPhase[] { + const phases: SyncPhase[] = [] + if (choice === 'repo' || choice === 'all') phases.push('repo') + if (choice === 'wiki' || choice === 'both' || choice === 'all') phases.push('wiki') + if (choice === 'issues' || choice === 'both' || choice === 'all') phases.push('issues') + return phases +} + +/** + * Returns the phase following `current` for a choice, or undefined when `current` + * is the last active phase. + */ +function nextPhase(current: SyncPhase, choice: ContentTypeChoice): SyncPhase | undefined { + const phases = activePhases(choice) + const idx = phases.indexOf(current) + return idx >= 0 && idx + 1 < phases.length ? phases[idx + 1] : undefined +} interface GitLabWikiPage { slug: string @@ -57,6 +159,7 @@ interface GitLabProject { id: number path_with_namespace?: string web_url?: string + default_branch?: string wiki_access_level?: string wiki_enabled?: boolean } @@ -94,7 +197,15 @@ function encodeProjectId(project: unknown): string { */ function getContentTypeChoice(sourceConfig: Record): ContentTypeChoice { const value = typeof sourceConfig.contentTypes === 'string' ? sourceConfig.contentTypes : 'both' - if (value === 'wiki' || value === 'issues') return value + if ( + value === 'repo' || + value === 'wiki' || + value === 'issues' || + value === 'both' || + value === 'all' + ) { + return value + } return 'both' } @@ -136,6 +247,118 @@ function buildIssueContentHash(projectId: string, iid: number, updatedAt: string return `gitlab:issue:${projectId}:${iid}:${updatedAt}` } +/** + * Builds the change-detection hash for a repository file. The git blob SHA is + * content-addressable, so it changes exactly when the file content changes — and + * it is available both on the tree listing (`tree entry.id`) and the file fetch + * (`blob_id`), so the stub and hydrated document hash identically without a + * content fetch during listing. + */ +function buildFileContentHash(projectId: string, path: string, blobSha: string): string { + return `gitlab:file:${projectId}:${path}:${blobSha}` +} + +/** + * Builds the web UI URL for a repository file at a given ref. + */ +function buildFileSourceUrl( + apiBase: string, + encodedProject: string, + host: string, + projectPath: string, + ref: string, + path: string +): string { + const encodedPath = path.split('/').map(encodeURIComponent).join('/') + if (projectPath) { + const encodedRef = ref.split('/').map(encodeURIComponent).join('/') + return `https://${host}/${projectPath}/-/blob/${encodedRef}/${encodedPath}` + } + return `${apiBase}/projects/${encodedProject}/repository/files/${encodeURIComponent(path)}/raw?ref=${encodeURIComponent(ref)}` +} + +/** + * Builds a deferred stub for a repository file from a tree entry. Content is empty + * and fetched lazily via getDocument for new/changed files only. + */ +function treeEntryToStub( + apiBase: string, + encodedProject: string, + host: string, + projectPath: string, + ref: string, + entry: GitLabTreeEntry +): ExternalDocument { + return { + externalId: `${FILE_PREFIX}${entry.path}`, + title: entry.name || entry.path, + content: '', + contentDeferred: true, + mimeType: 'text/plain', + sourceUrl: buildFileSourceUrl(apiBase, encodedProject, host, projectPath, ref, entry.path), + contentHash: buildFileContentHash(encodedProject, entry.path, entry.id), + metadata: { + contentType: 'file', + title: entry.name || entry.path, + path: entry.path, + }, + } +} + +/** + * Builds a repository-file document from a fetched (non-raw) file response. Returns + * null for binary, oversized, or empty files so they are not indexed. + */ +function fileToDocument( + apiBase: string, + encodedProject: string, + host: string, + projectPath: string, + ref: string, + path: string, + file: GitLabFile +): ExternalDocument | null { + const blobSha = file.blob_id?.trim() + if (!blobSha) return null + + if (typeof file.size === 'number' && file.size > MAX_FILE_SIZE) { + logger.info('Skipping oversized GitLab file', { path, size: file.size }) + return null + } + + const raw = typeof file.content === 'string' ? file.content : '' + const buffer = file.encoding === 'base64' ? Buffer.from(raw, 'base64') : Buffer.from(raw, 'utf8') + if (isBinaryBuffer(buffer)) { + logger.info('Skipping binary GitLab file', { path }) + return null + } + if (buffer.byteLength > MAX_FILE_SIZE) { + logger.info('Skipping oversized GitLab file', { path, size: buffer.byteLength }) + return null + } + + const content = buffer.toString('utf8') + const title = path.split('/').pop() || path + const body = composeBody(title, content) + if (!body.trim()) return null + + return { + externalId: `${FILE_PREFIX}${path}`, + title, + content: body, + contentDeferred: false, + mimeType: 'text/plain', + sourceUrl: buildFileSourceUrl(apiBase, encodedProject, host, projectPath, ref, path), + contentHash: buildFileContentHash(encodedProject, path, blobSha), + metadata: { + contentType: 'file', + title, + path, + size: buffer.byteLength, + }, + } +} + /** * Composes the document body as "Title\n\n". */ @@ -251,30 +474,73 @@ async function fetchProject( * issues via the X-Next-Page header. */ interface CursorState { - phase: 'wiki' | 'issues' + phase: SyncPhase issuePage: number + /** Full `rel="next"` URL for the repository-tree keyset page to fetch next. */ + fileNextUrl?: string } function encodeCursor(state: CursorState): string { return Buffer.from(JSON.stringify(state), 'utf8').toString('base64url') } -function decodeCursor(cursor: string | undefined, initialPhase: 'wiki' | 'issues'): CursorState { +function decodeCursor(cursor: string | undefined, initialPhase: SyncPhase): CursorState { if (!cursor) return { phase: initialPhase, issuePage: 1 } try { const parsed = JSON.parse(Buffer.from(cursor, 'base64url').toString('utf8')) as Partial<{ - phase: 'wiki' | 'issues' + phase: SyncPhase issuePage: number + fileNextUrl: string }> + const phase: SyncPhase = + parsed.phase === 'repo' || parsed.phase === 'issues' || parsed.phase === 'wiki' + ? parsed.phase + : initialPhase return { - phase: parsed.phase === 'issues' ? 'issues' : 'wiki', + phase, issuePage: Number(parsed.issuePage) > 0 ? Number(parsed.issuePage) : 1, + fileNextUrl: typeof parsed.fileNextUrl === 'string' ? parsed.fileNextUrl : undefined, } } catch { return { phase: initialPhase, issuePage: 1 } } } +/** + * Resolves the git ref (branch/tag) to sync repository files from. Uses the + * user-configured `ref` when set, otherwise the project's default branch, which + * is cached on syncContext to avoid repeat lookups across pages and getDocument. + */ +async function resolveRef( + sourceConfig: Record, + syncContext: Record | undefined, + apiBase: string, + encodedProject: string, + accessToken: string +): Promise { + const configured = typeof sourceConfig.ref === 'string' ? sourceConfig.ref.trim() : '' + if (configured) return configured + + const cached = syncContext?.defaultBranch as string | undefined + if (cached) return cached + + const response = await fetchProject(apiBase, encodedProject, accessToken) + if (response.ok) { + const project = (await response.json()) as GitLabProject + const branch = project.default_branch?.trim() || 'main' + if (syncContext) { + syncContext.defaultBranch = branch + if (project.path_with_namespace) syncContext.projectPath = project.path_with_namespace + } + return branch + } + logger.warn('Failed to fetch GitLab project for default branch; falling back to "main"', { + project: encodedProject, + status: response.status, + }) + return 'main' +} + /** * Applies the optional maxItems cap to a batch, tracking the running total in * syncContext and flagging `listingCapped` when the cap is hit. @@ -298,7 +564,8 @@ function applyMaxItemsCap( export const gitlabConnector: ConnectorConfig = { id: 'gitlab', name: 'GitLab', - description: 'Sync wiki pages and issues from a GitLab project into your knowledge base', + description: + 'Sync repository files, wiki pages, and issues from a GitLab project into your knowledge base', version: '1.0.0', icon: GitLabIcon, @@ -310,8 +577,9 @@ export const gitlabConnector: ConnectorConfig = { /** * Incremental sync applies to issues only (via the `updated_after` filter - * derived from lastSyncAt). Wikis lack a change timestamp, so they are always - * re-listed in full and reconciled by content hash. + * derived from lastSyncAt). Wikis and repository files lack a change timestamp + * on listing, so they are always re-listed in full and reconciled by content + * hash (wiki: content digest, file: git blob SHA) — unchanged docs are skipped. */ supportsIncrementalSync: true, @@ -338,10 +606,42 @@ export const gitlabConnector: ConnectorConfig = { type: 'dropdown', required: false, options: [ + { label: 'Code, Wiki & Issues', id: 'all' }, + { label: 'Code (repository files) only', id: 'repo' }, { label: 'Wiki only', id: 'wiki' }, { label: 'Issues only', id: 'issues' }, - { label: 'Both', id: 'both' }, + { label: 'Wiki & Issues', id: 'both' }, ], + description: 'Which content to index. "Code" syncs repository files (READMEs, docs, source).', + }, + { + id: 'ref', + title: 'Branch', + type: 'short-input', + required: false, + mode: 'advanced', + placeholder: 'Default branch', + description: 'Branch or tag to sync repository files from. Applies only when syncing Code.', + }, + { + id: 'pathPrefix', + title: 'Path Filter', + type: 'short-input', + required: false, + mode: 'advanced', + placeholder: 'e.g. docs/', + description: + 'Only sync repository files under this path prefix. Applies only when syncing Code.', + }, + { + id: 'fileExtensions', + title: 'File Extensions', + type: 'short-input', + required: false, + mode: 'advanced', + placeholder: 'e.g. .md, .txt, .mdx', + description: + 'Only sync repository files with these extensions (comma-separated). Leave blank for all text files. Applies only when syncing Code.', }, { id: 'issueState', @@ -398,13 +698,13 @@ export const gitlabConnector: ConnectorConfig = { const choice = getContentTypeChoice(sourceConfig) const maxItems = sourceConfig.maxItems ? Number(sourceConfig.maxItems) : 0 - const wantsWiki = choice === 'wiki' || choice === 'both' - const wantsIssues = choice === 'issues' || choice === 'both' - if (!encodedProject) { throw new Error('Project is required') } + const phases = activePhases(choice) + if (phases.length === 0) return { documents: [], hasMore: false } + let projectPath = (syncContext?.projectPath as string) ?? '' if (!projectPath && syncContext) { const projectResponse = await fetchProject(apiBase, encodedProject, accessToken) @@ -412,13 +712,98 @@ export const gitlabConnector: ConnectorConfig = { const project = (await projectResponse.json()) as GitLabProject projectPath = project.path_with_namespace ?? '' syncContext.projectPath = projectPath + if (project.default_branch && !syncContext.defaultBranch) { + syncContext.defaultBranch = project.default_branch + } } } - const initialPhase: 'wiki' | 'issues' = wantsWiki ? 'wiki' : 'issues' - const state = decodeCursor(cursor, initialPhase) + let state = decodeCursor(cursor, phases[0]) + if (!phases.includes(state.phase)) state = { phase: phases[0], issuePage: 1 } + + /** Cursor that advances to the first page of the phase after `current`, if any. */ + const advance = (current: SyncPhase): { nextCursor?: string; hasMore: boolean } => { + const next = nextPhase(current, choice) + if (!next) return { hasMore: false } + return { nextCursor: encodeCursor({ phase: next, issuePage: 1 }), hasMore: true } + } + + if (state.phase === 'repo') { + const ref = await resolveRef(sourceConfig, syncContext, apiBase, encodedProject, accessToken) + const extSet = parseExtensions(sourceConfig.fileExtensions) + const rawPrefix = + typeof sourceConfig.pathPrefix === 'string' ? sourceConfig.pathPrefix.trim() : '' + const pathPrefix = rawPrefix && !rawPrefix.endsWith('/') ? `${rawPrefix}/` : rawPrefix + + const treeParams = new URLSearchParams({ + ref, + recursive: 'true', + per_page: String(PAGE_SIZE), + pagination: 'keyset', + }) + const url = + state.fileNextUrl ?? + `${apiBase}/projects/${encodedProject}/repository/tree?${treeParams.toString()}` + logger.info('Listing GitLab repository files', { + host, + project: encodedProject, + ref, + continued: Boolean(state.fileNextUrl), + }) + + const response = await fetchWithRetry(url, { + method: 'GET', + headers: authHeaders(accessToken), + }) - if (state.phase === 'wiki' && wantsWiki) { + if (!response.ok) { + if (response.status === 404 || response.status === 403) { + logger.warn('GitLab repository tree unavailable; skipping files', { + host, + project: encodedProject, + ref, + status: response.status, + }) + const adv = advance('repo') + return { documents: [], nextCursor: adv.nextCursor, hasMore: adv.hasMore } + } + const errorText = await response.text().catch(() => '') + logger.error('Failed to list GitLab repository tree', { + status: response.status, + error: errorText.slice(0, 500), + }) + throw new Error(`Failed to list GitLab repository tree: ${response.status}`) + } + + const entries = (await response.json()) as GitLabTreeEntry[] + const documents: ExternalDocument[] = [] + for (const entry of entries) { + if (entry.type !== 'blob' || !entry.path) continue + if (pathPrefix && !entry.path.startsWith(pathPrefix)) continue + if (!matchesExtension(entry.path, extSet)) continue + documents.push(treeEntryToStub(apiBase, encodedProject, host, projectPath, ref, entry)) + } + + const { documents: capped, capped: hitLimit } = applyMaxItemsCap( + documents, + maxItems, + syncContext + ) + if (hitLimit) return { documents: capped, hasMore: false } + + const nextLink = parseNextLink(response.headers.get('link')) + if (nextLink) { + return { + documents: capped, + nextCursor: encodeCursor({ phase: 'repo', issuePage: 1, fileNextUrl: nextLink }), + hasMore: true, + } + } + const adv = advance('repo') + return { documents: capped, nextCursor: adv.nextCursor, hasMore: adv.hasMore } + } + + if (state.phase === 'wiki') { const url = `${apiBase}/projects/${encodedProject}/wikis?with_content=1` logger.info('Listing GitLab wiki pages', { host, project: encodedProject }) @@ -428,6 +813,15 @@ export const gitlabConnector: ConnectorConfig = { }) if (!response.ok) { + if (response.status === 403 || response.status === 404) { + logger.warn('GitLab wiki unavailable; skipping wiki phase', { + host, + project: encodedProject, + status: response.status, + }) + const adv = advance('wiki') + return { documents: [], nextCursor: adv.nextCursor, hasMore: adv.hasMore } + } const errorText = await response.text().catch(() => '') logger.error('Failed to list GitLab wiki pages', { status: response.status, @@ -450,18 +844,15 @@ export const gitlabConnector: ConnectorConfig = { syncContext ) - if (hitLimit || !wantsIssues) { + if (hitLimit) { return { documents: capped, hasMore: false } } - return { - documents: capped, - nextCursor: encodeCursor({ phase: 'issues', issuePage: 1 }), - hasMore: true, - } + const adv = advance('wiki') + return { documents: capped, nextCursor: adv.nextCursor, hasMore: adv.hasMore } } - if (wantsIssues) { + if (state.phase === 'issues') { const params = new URLSearchParams({ per_page: String(PAGE_SIZE), page: String(state.issuePage), @@ -586,6 +977,32 @@ export const gitlabConnector: ConnectorConfig = { return issueToDocument(encodedProject, host, projectPath, issue) } + if (externalId.startsWith(FILE_PREFIX)) { + const path = externalId.slice(FILE_PREFIX.length) + if (!path) return null + + const ref = await resolveRef( + sourceConfig, + syncContext, + apiBase, + encodedProject, + accessToken + ) + const url = `${apiBase}/projects/${encodedProject}/repository/files/${encodeURIComponent(path)}?ref=${encodeURIComponent(ref)}` + const response = await fetchWithRetry(url, { + method: 'GET', + headers: authHeaders(accessToken), + }) + + if (!response.ok) { + if (response.status === 404) return null + throw new Error(`Failed to fetch GitLab file: ${response.status}`) + } + + const file = (await response.json()) as GitLabFile + return fileToDocument(apiBase, encodedProject, host, projectPath, ref, path, file) + } + return null } catch (error) { logger.warn(`Failed to fetch GitLab document ${externalId}`, { @@ -634,7 +1051,7 @@ export const gitlabConnector: ConnectorConfig = { const projectRecord = (await response.json()) as GitLabProject - if (choice === 'wiki' || choice === 'both') { + if (activePhases(choice).includes('wiki')) { const accessLevel = projectRecord.wiki_access_level const enabled = accessLevel != null ? accessLevel !== 'disabled' : projectRecord.wiki_enabled !== false @@ -642,7 +1059,28 @@ export const gitlabConnector: ConnectorConfig = { if (choice === 'wiki') { return { valid: false, error: 'The wiki feature is disabled for this project' } } - logger.warn('Wiki feature disabled; only issues will sync', { project }) + logger.warn('Wiki feature disabled; it will be skipped', { project }) + } + } + + const userRef = typeof sourceConfig.ref === 'string' ? sourceConfig.ref.trim() : '' + if (userRef && activePhases(choice).includes('repo')) { + const refResponse = await fetchWithRetry( + `${apiBase}/projects/${encodedProject}/repository/commits/${encodeURIComponent(userRef)}`, + { method: 'GET', headers: authHeaders(accessToken) }, + VALIDATE_RETRY_OPTIONS + ) + if (refResponse.status === 404) { + return { + valid: false, + error: `Branch, tag, or commit "${userRef}" not found in project "${project}"`, + } + } + if (!refResponse.ok) { + return { + valid: false, + error: `Cannot verify ref "${userRef}": ${refResponse.status}`, + } } } @@ -659,16 +1097,17 @@ export const gitlabConnector: ConnectorConfig = { { id: 'author', displayName: 'Author', fieldType: 'text' }, { id: 'labels', displayName: 'Labels', fieldType: 'text' }, { id: 'milestone', displayName: 'Milestone', fieldType: 'text' }, + { id: 'path', displayName: 'File Path', fieldType: 'text' }, + { id: 'size', displayName: 'File Size (bytes)', fieldType: 'number' }, { id: 'createdAt', displayName: 'Created At', fieldType: 'date' }, { id: 'updatedAt', displayName: 'Updated At', fieldType: 'date' }, ], /** - * Maps document metadata to tag slots. The `contentType` and `title` tags - * apply to both wikis and issues. The remaining tags (state, author, labels, - * milestone, createdAt, updatedAt) are issue-only — wiki pages expose none of - * them in the REST API, so wiki documents leave those metadata fields empty - * and the type/empty guards below skip them. + * Maps document metadata to tag slots. `contentType` and `title` apply to every + * document type. `state`/`author`/`labels`/`milestone`/`createdAt`/`updatedAt` + * are issue-only and `path`/`size` are repository-file-only; each document type + * leaves the others' fields empty and the type/empty guards below skip them. */ mapTags: (metadata: Record): Record => { const result: Record = {} @@ -693,6 +1132,15 @@ export const gitlabConnector: ConnectorConfig = { result.milestone = metadata.milestone } + if (typeof metadata.path === 'string' && metadata.path.trim()) { + result.path = metadata.path + } + + if (metadata.size != null) { + const num = Number(metadata.size) + if (!Number.isNaN(num)) result.size = num + } + const createdAt = parseTagDate(metadata.createdAt) if (createdAt) result.createdAt = createdAt From 4b84f06108f12dcf6bfbb719cdf267b77114520a Mon Sep 17 00:00:00 2001 From: Theodore Li Date: Wed, 3 Jun 2026 13:58:04 -0700 Subject: [PATCH 07/11] fix(mothership): run client-routed workflow tools server-side in headless execution (#4870) * fix(mothership): run client-routed workflow tools server-side in headless execution Headless Mothership (Mothership block, no browser) could not run workflows. The run_workflow/run_workflow_until_block/run_block/run_from_block tools are registered with route 'client', so the executor gate (isSimExecuted) skipped their registered server handlers and fell through to executeAppTool, throwing 'Tool not found'. Interactive runs delegate these to the browser before reaching the executor, so only the headless path broke. Allow a client-routed tool to use its registered server handler when one exists, which only affects the four run tools (the only client-routed tools, all of which have server handlers). * test(mothership): clear handler registry between executor tests Add clearHandlers() helper and reset the module-level handler registry in beforeEach so handlers registered in one test do not leak into the next. --- .../copilot/tool-executor/executor.test.ts | 37 ++++++++++++++++++- .../sim/lib/copilot/tool-executor/executor.ts | 14 ++++++- apps/sim/lib/copilot/tool-executor/router.ts | 4 ++ 3 files changed, 51 insertions(+), 4 deletions(-) diff --git a/apps/sim/lib/copilot/tool-executor/executor.test.ts b/apps/sim/lib/copilot/tool-executor/executor.test.ts index adeb6ce48da..61733f43a95 100644 --- a/apps/sim/lib/copilot/tool-executor/executor.test.ts +++ b/apps/sim/lib/copilot/tool-executor/executor.test.ts @@ -5,9 +5,10 @@ import { beforeEach, describe, expect, it, vi } from 'vitest' import { DEFAULT_EXECUTION_TIMEOUT_MS } from '@/lib/execution/constants' -const { isKnownTool, isSimExecuted } = vi.hoisted(() => ({ +const { isKnownTool, isSimExecuted, isClientExecuted } = vi.hoisted(() => ({ isKnownTool: vi.fn(), isSimExecuted: vi.fn(), + isClientExecuted: vi.fn(), })) const { executeAppTool } = vi.hoisted(() => ({ @@ -17,17 +18,19 @@ const { executeAppTool } = vi.hoisted(() => ({ vi.mock('./router', () => ({ isKnownTool, isSimExecuted, + isClientExecuted, })) vi.mock('@/tools', () => ({ executeTool: executeAppTool, })) -import { executeTool } from './executor' +import { clearHandlers, executeTool, registerHandler } from './executor' describe('copilot tool executor fallback', () => { beforeEach(() => { vi.clearAllMocks() + clearHandlers() }) it('falls back to app tool executor for dynamic sim tools', async () => { @@ -59,6 +62,36 @@ describe('copilot tool executor fallback', () => { expect(result).toEqual({ success: true, output: { emails: [] } }) }) + it('uses the registered handler for client-routed tools when running headless (Mothership block)', async () => { + isKnownTool.mockReturnValue(true) + isSimExecuted.mockReturnValue(false) + isClientExecuted.mockReturnValue(true) + + const runWorkflowHandler = vi.fn().mockResolvedValue({ success: true, output: { ran: true } }) + registerHandler('run_workflow', runWorkflowHandler) + + const context = { userId: 'user-1', workflowId: 'workflow-1', workspaceId: 'ws-1' } + const result = await executeTool('run_workflow', { workflow_input: {} }, context) + + expect(runWorkflowHandler).toHaveBeenCalledWith({ workflow_input: {} }, context) + expect(executeAppTool).not.toHaveBeenCalled() + expect(result).toEqual({ success: true, output: { ran: true } }) + }) + + it('falls back to app tool executor for client-routed tools with no registered handler', async () => { + isKnownTool.mockReturnValue(true) + isSimExecuted.mockReturnValue(false) + isClientExecuted.mockReturnValue(true) + executeAppTool.mockResolvedValue({ + success: false, + error: 'Tool not found: unknown_client_tool', + }) + + await executeTool('unknown_client_tool', {}, { userId: 'user-1' }) + + expect(executeAppTool).toHaveBeenCalledWith('unknown_client_tool', expect.any(Object)) + }) + it('converts function_execute timeout from seconds to milliseconds for copilot calls', async () => { isKnownTool.mockReturnValue(false) isSimExecuted.mockReturnValue(false) diff --git a/apps/sim/lib/copilot/tool-executor/executor.ts b/apps/sim/lib/copilot/tool-executor/executor.ts index 084d046c027..d6f7d5caae8 100644 --- a/apps/sim/lib/copilot/tool-executor/executor.ts +++ b/apps/sim/lib/copilot/tool-executor/executor.ts @@ -2,7 +2,7 @@ import { createLogger } from '@sim/logger' import { toError } from '@sim/utils/errors' import { DEFAULT_EXECUTION_TIMEOUT_MS } from '@/lib/execution/constants' import { executeTool as executeAppTool } from '@/tools' -import { isKnownTool, isSimExecuted } from './router' +import { isClientExecuted, isKnownTool, isSimExecuted } from './router' import type { ToolCallDescriptor, ToolExecutionContext, @@ -35,12 +35,22 @@ export function hasHandler(toolId: string): boolean { return handlerRegistry.has(toolId) } +export function clearHandlers(): void { + handlerRegistry.clear() +} + export async function executeTool( toolId: string, params: Record, context: ToolExecutionContext ): Promise { - const canUseRegisteredHandler = isKnownTool(toolId) && isSimExecuted(toolId) + // Client-routed tools (e.g. run_workflow) are normally executed in the browser and never + // reach this point in interactive mode. In headless mode (Mothership block, no browser) there + // is no client to delegate to, so fall back to the registered server-side handler when one + // exists — otherwise the call would route to executeAppTool and throw "Tool not found". + const canUseRegisteredHandler = + isKnownTool(toolId) && + (isSimExecuted(toolId) || (isClientExecuted(toolId) && hasHandler(toolId))) if (!canUseRegisteredHandler) { const appParams = buildAppToolParams(toolId, params, context) return executeAppTool(toolId, appParams) diff --git a/apps/sim/lib/copilot/tool-executor/router.ts b/apps/sim/lib/copilot/tool-executor/router.ts index 7c64490cb4c..46a6815cfd7 100644 --- a/apps/sim/lib/copilot/tool-executor/router.ts +++ b/apps/sim/lib/copilot/tool-executor/router.ts @@ -31,6 +31,10 @@ export function isGoExecuted(toolId: string): boolean { return getToolEntry(toolId)?.route === 'go' } +export function isClientExecuted(toolId: string): boolean { + return getToolEntry(toolId)?.route === 'client' +} + export function isKnownTool(toolId: string): boolean { return isToolInCatalog(toolId) } From 85942a5301b9baa1147183c15708c31aa24908c3 Mon Sep 17 00:00:00 2001 From: Waleed Date: Wed, 3 Jun 2026 13:58:14 -0700 Subject: [PATCH 08/11] fix(dev): use globalThis for singleton state to prevent HMR memory leaks (#4869) * fix(dev): use globalThis for singleton state to prevent HMR memory leaks * fix(dev): apply globalThis guard to rate-limiter storage factory to prevent listener accumulation * fix(types): resolve McpConnectionManager globalThis undefined type error --- .../copilot/persistence/tool-confirm/index.ts | 18 ++- apps/sim/lib/copilot/tasks.ts | 20 +++- apps/sim/lib/core/config/redis.ts | 108 ++++++++++-------- .../lib/core/rate-limiter/storage/factory.ts | 34 +++--- apps/sim/lib/execution/cancellation.ts | 12 +- apps/sim/lib/mcp/connection-manager.ts | 13 ++- apps/sim/lib/mcp/pubsub.ts | 44 ++++--- 7 files changed, 155 insertions(+), 94 deletions(-) diff --git a/apps/sim/lib/copilot/persistence/tool-confirm/index.ts b/apps/sim/lib/copilot/persistence/tool-confirm/index.ts index cd13ee31057..09f5fd2ee93 100644 --- a/apps/sim/lib/copilot/persistence/tool-confirm/index.ts +++ b/apps/sim/lib/copilot/persistence/tool-confirm/index.ts @@ -9,16 +9,24 @@ import { import { getAsyncToolCalls } from '@/lib/copilot/async-runs/repository' import { MothershipStreamV1ToolOutcome } from '@/lib/copilot/generated/mothership-stream-v1' import { getRedisClient } from '@/lib/core/config/redis' -import { createPubSubChannel } from '@/lib/events/pubsub' +import { createPubSubChannel, type PubSubChannel } from '@/lib/events/pubsub' const logger = createLogger('CopilotOrchestratorPersistence') const TOOL_CONFIRMATION_TTL_SECONDS = 60 * 10 const toolConfirmationKey = (toolCallId: string) => `copilot:tool-confirmation:${toolCallId}` -const toolConfirmationChannel = createPubSubChannel({ - channel: 'copilot:tool-confirmation', - label: 'CopilotToolConfirmation', -}) +type ToolConfirmGlobal = typeof globalThis & { + _toolConfirmationChannel?: PubSubChannel +} + +const _g = globalThis as ToolConfirmGlobal +if (!_g._toolConfirmationChannel) { + _g._toolConfirmationChannel = createPubSubChannel({ + channel: 'copilot:tool-confirmation', + label: 'CopilotToolConfirmation', + }) +} +const toolConfirmationChannel = _g._toolConfirmationChannel /** * Get a tool call confirmation state from the durable async tool row. diff --git a/apps/sim/lib/copilot/tasks.ts b/apps/sim/lib/copilot/tasks.ts index db6594ebf28..252b82a61ae 100644 --- a/apps/sim/lib/copilot/tasks.ts +++ b/apps/sim/lib/copilot/tasks.ts @@ -7,7 +7,7 @@ * Channel: `task:status_changed` */ -import { createPubSubChannel } from '@/lib/events/pubsub' +import { createPubSubChannel, type PubSubChannel } from '@/lib/events/pubsub' interface TaskStatusEvent { workspaceId: string @@ -16,10 +16,20 @@ interface TaskStatusEvent { streamId?: string } -const channel = - typeof window !== 'undefined' - ? null - : createPubSubChannel({ channel: 'task:status_changed', label: 'task' }) +type TaskPubSubGlobal = typeof globalThis & { + _taskStatusChannel?: PubSubChannel | null +} + +const g = globalThis as TaskPubSubGlobal + +if (!('_taskStatusChannel' in g)) { + g._taskStatusChannel = + typeof window !== 'undefined' + ? null + : createPubSubChannel({ channel: 'task:status_changed', label: 'task' }) +} + +const channel = g._taskStatusChannel export const taskPubSub = channel ? { diff --git a/apps/sim/lib/core/config/redis.ts b/apps/sim/lib/core/config/redis.ts index 04f976b44ae..820c0215c1e 100644 --- a/apps/sim/lib/core/config/redis.ts +++ b/apps/sim/lib/core/config/redis.ts @@ -54,55 +54,65 @@ export function getRedisConnectionDefaults( } } -let globalRedisClient: Redis | null = null -let pingFailures = 0 -let pingInterval: NodeJS.Timeout | null = null -let pingInFlight = false +interface RedisState { + client: Redis | null + pingFailures: number + pingInterval: NodeJS.Timeout | null + pingInFlight: boolean + reconnectListeners: Array<() => void> +} + +const g = globalThis as typeof globalThis & { _redisState?: RedisState } +if (!g._redisState) { + g._redisState = { + client: null, + pingFailures: 0, + pingInterval: null, + pingInFlight: false, + reconnectListeners: [], + } +} +const state = g._redisState const PING_INTERVAL_MS = 15_000 const MAX_PING_FAILURES = 2 -/** Callbacks invoked when the PING health check forces a reconnect. */ -const reconnectListeners: Array<() => void> = [] - /** * Register a callback that fires when the PING health check forces a reconnect. * Useful for resetting cached adapters that hold a stale Redis reference. */ export function onRedisReconnect(cb: () => void): void { - reconnectListeners.push(cb) + state.reconnectListeners.push(cb) } function startPingHealthCheck(redis: Redis): void { - if (pingInterval) return + if (state.pingInterval) return - pingInterval = setInterval(async () => { - if (pingInFlight) return - pingInFlight = true + state.pingInterval = setInterval(async () => { + if (state.pingInFlight) return + state.pingInFlight = true try { await redis.ping() - pingFailures = 0 + state.pingFailures = 0 } catch (error) { - pingFailures++ + state.pingFailures++ logger.warn('Redis PING failed', { - consecutiveFailures: pingFailures, + consecutiveFailures: state.pingFailures, error: toError(error).message, }) - if (pingFailures >= MAX_PING_FAILURES) { + if (state.pingFailures >= MAX_PING_FAILURES) { logger.error('Redis PING failed consecutive times — forcing reconnect', { - consecutiveFailures: pingFailures, + consecutiveFailures: state.pingFailures, }) - pingFailures = 0 - // Drop the cached client and stop this health check before disconnecting, - // so the next getRedisClient() builds a fresh client and a fresh PING loop. - // Listeners may call getRedisClient() and must observe the cleared global. - globalRedisClient = null - if (pingInterval) { - clearInterval(pingInterval) - pingInterval = null + state.pingFailures = 0 + // Clear before notifying listeners — they may call getRedisClient() and must see the reset state. + state.client = null + if (state.pingInterval) { + clearInterval(state.pingInterval) + state.pingInterval = null } - for (const cb of reconnectListeners) { + for (const cb of state.reconnectListeners) { try { cb() } catch (cbError) { @@ -116,7 +126,7 @@ function startPingHealthCheck(redis: Redis): void { } } } finally { - pingInFlight = false + state.pingInFlight = false } }, PING_INTERVAL_MS) } @@ -131,7 +141,7 @@ function startPingHealthCheck(redis: Redis): void { export function getRedisClient(): Redis | null { if (typeof window !== 'undefined') return null if (!redisUrl) return null - if (globalRedisClient) return globalRedisClient + if (state.client) return state.client // Outside the try/catch so config errors aren't silently swallowed. const defaults = getRedisConnectionDefaults(redisUrl) @@ -139,7 +149,7 @@ export function getRedisClient(): Redis | null { try { logger.info('Initializing Redis client') - globalRedisClient = new Redis(redisUrl, { + state.client = new Redis(redisUrl, { ...defaults, commandTimeout: 5000, maxRetriesPerRequest: 5, @@ -162,17 +172,17 @@ export function getRedisClient(): Redis | null { }, }) - globalRedisClient.on('connect', () => logger.info('Redis connected')) - globalRedisClient.on('ready', () => logger.info('Redis ready')) - globalRedisClient.on('error', (err: Error) => { + state.client.on('connect', () => logger.info('Redis connected')) + state.client.on('ready', () => logger.info('Redis ready')) + state.client.on('error', (err: Error) => { logger.error('Redis error', { error: err.message, code: (err as any).code }) }) - globalRedisClient.on('close', () => logger.warn('Redis connection closed')) - globalRedisClient.on('end', () => logger.error('Redis connection ended')) + state.client.on('close', () => logger.warn('Redis connection closed')) + state.client.on('end', () => logger.error('Redis connection ended')) - startPingHealthCheck(globalRedisClient) + startPingHealthCheck(state.client) - return globalRedisClient + return state.client } catch (error) { logger.error('Failed to initialize Redis client', { error }) return null @@ -274,18 +284,18 @@ export async function extendLock( * Use for graceful shutdown. */ export async function closeRedisConnection(): Promise { - if (pingInterval) { - clearInterval(pingInterval) - pingInterval = null + if (state.pingInterval) { + clearInterval(state.pingInterval) + state.pingInterval = null } - if (globalRedisClient) { + if (state.client) { try { - await globalRedisClient.quit() + await state.client.quit() } catch (error) { logger.error('Error closing Redis connection', { error }) } finally { - globalRedisClient = null + state.client = null } } } @@ -294,12 +304,12 @@ export async function closeRedisConnection(): Promise { * Reset all module-level state. Only intended for use in tests. */ export function resetForTesting(): void { - if (pingInterval) { - clearInterval(pingInterval) - pingInterval = null + if (state.pingInterval) { + clearInterval(state.pingInterval) + state.pingInterval = null } - globalRedisClient = null - pingFailures = 0 - pingInFlight = false - reconnectListeners.length = 0 + state.client = null + state.pingFailures = 0 + state.pingInFlight = false + state.reconnectListeners.length = 0 } diff --git a/apps/sim/lib/core/rate-limiter/storage/factory.ts b/apps/sim/lib/core/rate-limiter/storage/factory.ts index 948e51ad907..9c712ad91f2 100644 --- a/apps/sim/lib/core/rate-limiter/storage/factory.ts +++ b/apps/sim/lib/core/rate-limiter/storage/factory.ts @@ -7,19 +7,27 @@ import { RedisTokenBucket } from './redis-token-bucket' const logger = createLogger('RateLimitStorage') -let cachedAdapter: RateLimitStorageAdapter | null = null -let reconnectListenerRegistered = false +type FactoryGlobal = typeof globalThis & { + _rlCachedAdapter?: RateLimitStorageAdapter | null + _rlReconnectListenerRegistered?: boolean +} + +const g = globalThis as FactoryGlobal +if (!('_rlCachedAdapter' in g)) { + g._rlCachedAdapter = null + g._rlReconnectListenerRegistered = false +} export function createStorageAdapter(): RateLimitStorageAdapter { - if (cachedAdapter) { - return cachedAdapter + if (g._rlCachedAdapter) { + return g._rlCachedAdapter } - if (!reconnectListenerRegistered) { + if (!g._rlReconnectListenerRegistered) { onRedisReconnect(() => { - cachedAdapter = null + g._rlCachedAdapter = null }) - reconnectListenerRegistered = true + g._rlReconnectListenerRegistered = true } const storageMethod = getStorageMethod() @@ -30,17 +38,17 @@ export function createStorageAdapter(): RateLimitStorageAdapter { logger.warn( 'Redis configured but client unavailable - falling back to PostgreSQL for rate limiting' ) - cachedAdapter = new DbTokenBucket() + g._rlCachedAdapter = new DbTokenBucket() } else { logger.info('Rate limiting: Using Redis') - cachedAdapter = new RedisTokenBucket(redis) + g._rlCachedAdapter = new RedisTokenBucket(redis) } } else { logger.info('Rate limiting: Using PostgreSQL') - cachedAdapter = new DbTokenBucket() + g._rlCachedAdapter = new DbTokenBucket() } - return cachedAdapter + return g._rlCachedAdapter! } export function getAdapterType(): StorageMethod { @@ -48,9 +56,9 @@ export function getAdapterType(): StorageMethod { } export function resetStorageAdapter(): void { - cachedAdapter = null + g._rlCachedAdapter = null } export function setStorageAdapter(adapter: RateLimitStorageAdapter): void { - cachedAdapter = adapter + g._rlCachedAdapter = adapter } diff --git a/apps/sim/lib/execution/cancellation.ts b/apps/sim/lib/execution/cancellation.ts index ffa8ad9d444..a08ea280ed4 100644 --- a/apps/sim/lib/execution/cancellation.ts +++ b/apps/sim/lib/execution/cancellation.ts @@ -19,16 +19,20 @@ export type ExecutionCancellationRecordResult = reason: 'redis_unavailable' | 'redis_write_failed' } -let sharedChannel: PubSubChannel | null = null +type CancellationGlobal = typeof globalThis & { + _executionCancelChannel?: PubSubChannel +} + +const _g = globalThis as CancellationGlobal export function getCancellationChannel(): PubSubChannel { - if (!sharedChannel) { - sharedChannel = createPubSubChannel({ + if (!_g._executionCancelChannel) { + _g._executionCancelChannel = createPubSubChannel({ channel: EXECUTION_CANCEL_CHANNEL, label: 'execution-cancel', }) } - return sharedChannel + return _g._executionCancelChannel } export function isRedisCancellationEnabled(): boolean { diff --git a/apps/sim/lib/mcp/connection-manager.ts b/apps/sim/lib/mcp/connection-manager.ts index 178a3f54564..158983739b2 100644 --- a/apps/sim/lib/mcp/connection-manager.ts +++ b/apps/sim/lib/mcp/connection-manager.ts @@ -461,6 +461,13 @@ export class McpConnectionManager { } } -export const mcpConnectionManager: McpConnectionManager | null = isTest - ? null - : new McpConnectionManager() +type McpManagerGlobal = typeof globalThis & { + _mcpConnectionManager?: McpConnectionManager | null +} + +const _g = globalThis as McpManagerGlobal +if (!('_mcpConnectionManager' in _g)) { + _g._mcpConnectionManager = isTest ? null : new McpConnectionManager() +} + +export const mcpConnectionManager: McpConnectionManager | null = _g._mcpConnectionManager ?? null diff --git a/apps/sim/lib/mcp/pubsub.ts b/apps/sim/lib/mcp/pubsub.ts index 725857ae9c7..43bae170979 100644 --- a/apps/sim/lib/mcp/pubsub.ts +++ b/apps/sim/lib/mcp/pubsub.ts @@ -11,7 +11,7 @@ * (published by serve route, consumed by serve route on other processes to push to local SSE clients) */ -import { createPubSubChannel } from '@/lib/events/pubsub' +import { createPubSubChannel, type PubSubChannel } from '@/lib/events/pubsub' import type { ToolsChangedEvent, WorkflowToolsChangedEvent } from '@/lib/mcp/types' interface McpPubSubAdapter { @@ -22,21 +22,35 @@ interface McpPubSubAdapter { dispose(): void } -const toolsChannel = - typeof window !== 'undefined' - ? null - : createPubSubChannel({ - channel: 'mcp:tools_changed', - label: 'mcp-tools', - }) +type McpPubSubGlobal = typeof globalThis & { + _mcpToolsChannel?: PubSubChannel | null + _mcpWorkflowToolsChannel?: PubSubChannel | null +} -const workflowToolsChannel = - typeof window !== 'undefined' - ? null - : createPubSubChannel({ - channel: 'mcp:workflow_tools_changed', - label: 'mcp-workflow-tools', - }) +const g = globalThis as McpPubSubGlobal + +if (!('_mcpToolsChannel' in g)) { + g._mcpToolsChannel = + typeof window !== 'undefined' + ? null + : createPubSubChannel({ + channel: 'mcp:tools_changed', + label: 'mcp-tools', + }) +} + +if (!('_mcpWorkflowToolsChannel' in g)) { + g._mcpWorkflowToolsChannel = + typeof window !== 'undefined' + ? null + : createPubSubChannel({ + channel: 'mcp:workflow_tools_changed', + label: 'mcp-workflow-tools', + }) +} + +const toolsChannel = g._mcpToolsChannel +const workflowToolsChannel = g._mcpWorkflowToolsChannel export const mcpPubSub: McpPubSubAdapter | null = typeof window !== 'undefined' || !toolsChannel || !workflowToolsChannel From 80d966d3363be071ebe9c51aff6e7c1a1c453474 Mon Sep 17 00:00:00 2001 From: Waleed Date: Wed, 3 Jun 2026 14:42:54 -0700 Subject: [PATCH 09/11] fix(gitlab): pin pagination cursor to configured host + consolidate isSameOrigin (#4873) * fix(gitlab): pin pagination cursor to configured host before following it The repository-tree keyset cursor stores GitLab's verbatim rel="next" URL and re-fetches it with an Authorization: Bearer header. Assert the cursor's origin matches the configured apiBase before following it, so a tampered or corrupted fileNextUrl cannot exfiltrate the access token to an attacker-controlled host. Fails closed on mismatch. Co-Authored-By: Claude Opus 4.8 * improvement(validation): generalize isSameOrigin and reuse across connectors/tools Add an optional base argument to the shared isSameOrigin (defaulting to the app base URL) so callers can pin a URL to any trusted origin. The GitLab connector's cursor host-check and the tools self-origin check now consume the shared helper instead of their own URL-parsing. --------- Co-authored-by: Claude Opus 4.8 --- apps/sim/connectors/gitlab/gitlab.ts | 4 ++++ apps/sim/lib/core/utils/validation.ts | 13 +++++++------ apps/sim/tools/index.ts | 13 ++----------- 3 files changed, 13 insertions(+), 17 deletions(-) diff --git a/apps/sim/connectors/gitlab/gitlab.ts b/apps/sim/connectors/gitlab/gitlab.ts index a7f0917c7e9..66e71796d60 100644 --- a/apps/sim/connectors/gitlab/gitlab.ts +++ b/apps/sim/connectors/gitlab/gitlab.ts @@ -1,6 +1,7 @@ import { createLogger } from '@sim/logger' import { getErrorMessage, toError } from '@sim/utils/errors' import { GitLabIcon } from '@/components/icons' +import { isSameOrigin } from '@/lib/core/utils/validation' import { fetchWithRetry, VALIDATE_RETRY_OPTIONS } from '@/lib/knowledge/documents/utils' import type { ConnectorConfig, ExternalDocument, ExternalDocumentList } from '@/connectors/types' import { computeContentHash, joinTagArray, parseTagDate } from '@/connectors/utils' @@ -741,6 +742,9 @@ export const gitlabConnector: ConnectorConfig = { per_page: String(PAGE_SIZE), pagination: 'keyset', }) + if (state.fileNextUrl && !isSameOrigin(state.fileNextUrl, apiBase)) { + throw new Error('GitLab pagination cursor points to an unexpected host') + } const url = state.fileNextUrl ?? `${apiBase}/projects/${encodedProject}/repository/tree?${treeParams.toString()}` diff --git a/apps/sim/lib/core/utils/validation.ts b/apps/sim/lib/core/utils/validation.ts index 5fcfc4d3578..ddb3136c6fa 100644 --- a/apps/sim/lib/core/utils/validation.ts +++ b/apps/sim/lib/core/utils/validation.ts @@ -1,17 +1,18 @@ import { getBaseUrl } from './urls' /** - * Checks if a URL is same-origin with the application's base URL. - * Used to prevent open redirect vulnerabilities. + * Checks if a URL is same-origin with a base URL. Defaults to the application's + * base URL, used to prevent open redirect vulnerabilities; pass an explicit + * `base` to pin a URL to another trusted origin (e.g. a configured API host) + * before following it with credentials. * * @param url - The URL to validate + * @param base - The origin to compare against (defaults to the app base URL) * @returns True if the URL is same-origin, false otherwise (secure default) */ -export function isSameOrigin(url: string): boolean { +export function isSameOrigin(url: string, base: string = getBaseUrl()): boolean { try { - const targetUrl = new URL(url) - const appUrl = new URL(getBaseUrl()) - return targetUrl.origin === appUrl.origin + return new URL(url).origin === new URL(base).origin } catch { return false } diff --git a/apps/sim/tools/index.ts b/apps/sim/tools/index.ts index d98c6bd0ce3..3c83d634f01 100644 --- a/apps/sim/tools/index.ts +++ b/apps/sim/tools/index.ts @@ -19,6 +19,7 @@ import { } from '@/lib/core/utils/stream-limits' import { getBaseUrl, getInternalApiBaseUrl } from '@/lib/core/utils/urls' import { isUserFile } from '@/lib/core/utils/user-file' +import { isSameOrigin } from '@/lib/core/utils/validation' import { SIM_VIA_HEADER, serializeCallChain } from '@/lib/execution/call-chain' import { parseMcpToolId } from '@/lib/mcp/utils' import { resolveWorkspaceFileReference } from '@/lib/uploads/contexts/workspace/workspace-file-manager' @@ -1364,17 +1365,7 @@ function isErrorResponse( * the platform's own workflow execution endpoints via absolute URL. */ function isSelfOriginUrl(url: string): boolean { - try { - const targetOrigin = new URL(url).origin - const publicOrigin = new URL(getBaseUrl()).origin - if (targetOrigin === publicOrigin) return true - - const internalOrigin = new URL(getInternalApiBaseUrl()).origin - if (targetOrigin === internalOrigin) return true - } catch { - return false - } - return false + return isSameOrigin(url, getBaseUrl()) || isSameOrigin(url, getInternalApiBaseUrl()) } /** From 5efb47e8c588d4dc5571557c213b2cdadedc5085 Mon Sep 17 00:00:00 2001 From: Waleed Date: Wed, 3 Jun 2026 14:46:56 -0700 Subject: [PATCH 10/11] fix(storage): percent-encode object key in multipart fallback URL (#4872) * fix(storage): percent-encode object key in multipart fallback URL buildObjectFallbackUrl built the object URL from a raw key. Keys with spaces or reserved characters (and the pre-existing AWS branch) would produce a structurally invalid location. Encode the key per path segment (preserving '/' separators) across all branches (AWS, custom path-style, virtual-hosted). * refactor(storage): clearer per-segment key encoding in fallback URL * test(storage): cover multipart fallback URL (AWS, R2 virtual-hosted, MinIO path-style, key encoding) --- .../lib/uploads/providers/s3/client.test.ts | 70 +++++++++++++++++++ apps/sim/lib/uploads/providers/s3/client.ts | 13 +++- 2 files changed, 80 insertions(+), 3 deletions(-) diff --git a/apps/sim/lib/uploads/providers/s3/client.test.ts b/apps/sim/lib/uploads/providers/s3/client.test.ts index 48c017d2cb6..75eea9a3dde 100644 --- a/apps/sim/lib/uploads/providers/s3/client.test.ts +++ b/apps/sim/lib/uploads/providers/s3/client.test.ts @@ -12,6 +12,7 @@ const { mockPutObjectCommand, mockGetObjectCommand, mockDeleteObjectCommand, + mockCompleteMultipartUploadCommand, mockGetSignedUrl, mockEnv, mockS3Config, @@ -51,6 +52,7 @@ const { mockPutObjectCommand: vi.fn().mockImplementation(class {}), mockGetObjectCommand: vi.fn().mockImplementation(class {}), mockDeleteObjectCommand: vi.fn().mockImplementation(class {}), + mockCompleteMultipartUploadCommand: vi.fn().mockImplementation(class {}), mockGetSignedUrl: vi.fn(), mockEnv, } @@ -61,6 +63,7 @@ vi.mock('@aws-sdk/client-s3', () => ({ PutObjectCommand: mockPutObjectCommand, GetObjectCommand: mockGetObjectCommand, DeleteObjectCommand: mockDeleteObjectCommand, + CompleteMultipartUploadCommand: mockCompleteMultipartUploadCommand, })) vi.mock('@aws-sdk/s3-request-presigner', () => ({ @@ -92,6 +95,7 @@ vi.mock('@/lib/uploads/config', () => ({ })) import { + completeS3MultipartUpload, deleteFromS3, downloadFromS3, getPresignedUrl, @@ -398,4 +402,70 @@ describe('S3 Client', () => { }) }) }) + + describe('completeS3MultipartUpload fallback location', () => { + const parts = [{ ETag: 'etag-1', PartNumber: 1 }] + + it('uses the SDK-provided Location when present', async () => { + mockSend.mockResolvedValueOnce({ Location: 'https://provided.example.com/object' }) + + const result = await completeS3MultipartUpload('kb/uuid-file.txt', 'upload-1', parts) + + expect(result.location).toBe('https://provided.example.com/object') + expect(result.key).toBe('kb/uuid-file.txt') + expect(result.path).toBe('/api/files/serve/kb%2Fuuid-file.txt') + }) + + it('falls back to an AWS virtual-hosted URL when Location is absent', async () => { + mockSend.mockResolvedValueOnce({}) + + const result = await completeS3MultipartUpload('kb/uuid-file.txt', 'upload-1', parts) + + expect(result.location).toBe( + 'https://test-kb-bucket.s3.test-region.amazonaws.com/kb/uuid-file.txt' + ) + }) + + it('builds a path-style fallback URL for a custom endpoint with forcePathStyle', async () => { + mockS3Config.endpoint = 'https://minio.example.com' + mockS3Config.forcePathStyle = true + mockSend.mockResolvedValueOnce({}) + + const result = await completeS3MultipartUpload('kb/uuid-file.txt', 'upload-1', parts) + + expect(result.location).toBe('https://minio.example.com/test-kb-bucket/kb/uuid-file.txt') + }) + + it('builds a virtual-hosted fallback URL for a custom endpoint without forcePathStyle', async () => { + mockS3Config.endpoint = 'https://account.r2.cloudflarestorage.com' + mockS3Config.forcePathStyle = false + mockSend.mockResolvedValueOnce({}) + + const result = await completeS3MultipartUpload('kb/uuid-file.txt', 'upload-1', parts) + + expect(result.location).toBe( + 'https://test-kb-bucket.account.r2.cloudflarestorage.com/kb/uuid-file.txt' + ) + }) + + it('strips a trailing slash from the custom endpoint before appending the key', async () => { + mockS3Config.endpoint = 'https://minio.example.com/' + mockS3Config.forcePathStyle = true + mockSend.mockResolvedValueOnce({}) + + const result = await completeS3MultipartUpload('kb/uuid-file.txt', 'upload-1', parts) + + expect(result.location).toBe('https://minio.example.com/test-kb-bucket/kb/uuid-file.txt') + }) + + it('percent-encodes special characters per path segment, preserving slashes', async () => { + mockSend.mockResolvedValueOnce({}) + + const result = await completeS3MultipartUpload('kb/uuid-my file.txt', 'upload-1', parts) + + expect(result.location).toBe( + 'https://test-kb-bucket.s3.test-region.amazonaws.com/kb/uuid-my%20file.txt' + ) + }) + }) }) diff --git a/apps/sim/lib/uploads/providers/s3/client.ts b/apps/sim/lib/uploads/providers/s3/client.ts index 42b0c62fac8..7ddae7bbf87 100644 --- a/apps/sim/lib/uploads/providers/s3/client.ts +++ b/apps/sim/lib/uploads/providers/s3/client.ts @@ -394,18 +394,25 @@ export async function getS3MultipartPartUrls( * addressing mode — path-style for MinIO/Ceph (`forcePathStyle`), virtual-hosted * (bucket as a subdomain) for R2 and friends. Falls back to the AWS * virtual-hosted host when no custom endpoint is set. + * + * The key is percent-encoded per path segment (preserving `/` separators) so + * keys containing spaces or reserved characters still yield a valid URL. */ function buildObjectFallbackUrl(bucket: string, region: string, key: string): string { + const encodedKey = key + .split('/') + .map((segment) => encodeURIComponent(segment)) + .join('/') if (S3_CONFIG.endpoint) { const base = S3_CONFIG.endpoint.replace(/\/+$/, '') if (S3_CONFIG.forcePathStyle) { - return `${base}/${bucket}/${key}` + return `${base}/${bucket}/${encodedKey}` } const url = new URL(base) url.hostname = `${bucket}.${url.hostname}` - return `${url.origin}/${key}` + return `${url.origin}/${encodedKey}` } - return `https://${bucket}.s3.${region}.amazonaws.com/${key}` + return `https://${bucket}.s3.${region}.amazonaws.com/${encodedKey}` } /** From aed44024d4b1a7c10baf3d0f849244e0ff0dc9da Mon Sep 17 00:00:00 2001 From: Theodore Li Date: Wed, 3 Jun 2026 14:57:06 -0700 Subject: [PATCH 11/11] fix(tables): surface real error causes on cell-execution failures (diagnostics) (#4868) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix(tables): retry transient DB/Redis failures in cell execution and surface error causes Workflow-group-cell runs intermittently failed on trivial DB reads/writes under heavy fan-out, stranding cells in `running`. Investigation showed the PlanetScale and ElastiCache backends were healthy at the time — the failures are transient connection-level faults that the cell (maxAttempts: 1) had no tolerance for, and the real cause was never logged (Drizzle wraps it as "Failed query: ..." and the driver cause lives in error.cause). Resilience: - Add retryTransient (lib/table/retry-transient.ts): retries only transient infra errors (reuses isRetryableInfrastructureError; adds an ioredis command-timeout match) with jittered backoff, then rethrows. Fail-fast for everything else. - Wrap the cell's getTableById/getRowById reads, the terminal write (cell-write updateRow — idempotent via the executionId guard), and the Redis cascade-lock acquire. Diagnostics: - Add describeError (lib/core/errors/retryable-infrastructure.ts): walks the .cause chain and always returns the underlying driver cause (code/errno/ syscall + causeChain), including for unclassified errors like AbortError. - Log `cause` + a `retryable` flag (and aborted/timedOut in the cell's main catch) across the cell + finalization error paths, mirroring the existing schedule-execution pattern. Logging-only; no behavior change. This lets the next recurrence reveal the real cause and whether the retry applies. Co-Authored-By: Claude Opus 4.8 (1M context) * fix(tables): address review feedback on cell retry resilience - retryTransient: re-check the abort signal after the backoff sleep so a cancellation during sleep stops the next attempt (don't run/return work for an already-cancelled task). - isRetryableRedisError: walk the .cause chain (mirroring the infra classifier) so wrapped Redis timeouts are recognized; drop "Connection is in subscriber mode" — that's a connection-state programming error, not a transient drop, and would just fail identically every retry. - cascade-lock: stop wrapping acquireLock in retryTransient. acquireLock is a non-idempotent SET NX, so retrying after a timed-out-but-applied first SET returns false (key already ours) and yields a false `contended` that skips the cascade. A transient Redis blip here just fails the run before pickup (no stranded cell); the dispatcher re-drives it. - Tests: cause-chain Redis match, subscriber-mode exclusion, abort-during-sleep. Co-Authored-By: Claude Opus 4.8 (1M context) * fix(tables): drop out-of-scope abort/timeout fields from cell catch The main catch logged `aborted`/`timedOut` from `abortSignal`/`timeoutController`, but those are declared inside the outer try block (the inner try around executeWorkflow is try/finally, so this catch belongs to the outer try) and are not in scope in the catch — `next build`'s type-check failed with "Cannot find name 'abortSignal'". Local incremental `tsc --noEmit` had skipped the file and falsely passed; the Cursor/Greptile reviewers flagged this correctly. Removed the two fields. Abort/timeout is still surfaced via `cause: describeError(err)` (an aborted run shows `name: 'AbortError'` / the timeout message), so no diagnostic signal is lost. Co-Authored-By: Claude Opus 4.8 (1M context) * refactor(tables): drop in-process retry, keep cause diagnostics only In-process retry is the wrong layer for this path: the cell task is maxAttempts:1 by design, retrying on a possibly-degraded worker may not help, and it masks the very transient-failure signal we're trying to capture before we understand the root cause. Removed retryTransient entirely (file + all wrapping in cell-write, the cascade reads, and the lock acquire) and kept only the diagnostic logging. - Deleted lib/table/retry-transient.ts (+ test); cell-write and the cascade reads call getTableById/getRowById/updateRow directly again, fail-fast. - Kept describeError + `cause`/`retryable` fields across the cell + finalization catch blocks; the cell-path `retryable` flag now sources from isRetryableInfrastructureError (the canonical classifier) for consistency. Diagnostics-first: surface the real driver cause on the next recurrence, then decide the actual fix (e.g. task-level maxAttempts, or addressing the worker- side cause) from evidence rather than a speculative in-process retry. Co-Authored-By: Claude Opus 4.8 (1M context) * fix(schedules): log error cause on scheduled-execution failure paths The scheduled-job failure paths logged the raw error (.message/stack only) — its `.cause` (the real driver error behind a Drizzle "Failed query: ..." wrapper) was never recorded, and the classified-only `describeRetryableInfrastructureError` returns undefined for unrecognized errors. A real failed run (same incident window as the cell failures) failed in `applyScheduleUpdate` with exactly this unrecorded cause. Added `cause: describeError(error)` (always-on, walks the cause chain) to the applyScheduleUpdate catch, the early-failure catch, and the unhandled-error catch — passed as a second arg so the existing message+stack still emit. Co-Authored-By: Claude Opus 4.8 (1M context) * refactor(errors): move describeError to @sim/utils/errors `describeError` is a general-purpose error/cause-chain helper — it didn't belong in `lib/core/errors/retryable-infrastructure.ts` (that module is specifically about classifying retryable infra errors, and the name read wrong for a generic diagnostic). Moved it to `@sim/utils/errors` alongside `toError`/ `getErrorMessage`/`getPostgresErrorCode`, with its own cycle-safe cause walk. - Added describeError + DescribedError + tests to packages/utils/src/errors.ts. - Reverted the describeError addition from retryable-infrastructure.ts (it keeps only isRetryableInfrastructureError / describeRetryableInfrastructureError, which are accurately named and still used by the schedule retry path). - Re-pointed all consumers (cell, logging-session, pause-persistence, schedule) to import describeError from @sim/utils/errors. The `retryable` classification flag still sources from isRetryableInfrastructureError where used. Co-Authored-By: Claude Opus 4.8 (1M context) --------- Co-authored-by: Claude Opus 4.8 (1M context) --- apps/sim/background/schedule-execution.ts | 16 ++++-- .../background/workflow-column-execution.ts | 20 +++++-- .../sim/lib/logs/execution/logging-session.ts | 15 +++++- .../workflows/executor/pause-persistence.ts | 7 ++- packages/utils/src/errors.test.ts | 53 +++++++++++++++++- packages/utils/src/errors.ts | 54 +++++++++++++++++++ 6 files changed, 152 insertions(+), 13 deletions(-) diff --git a/apps/sim/background/schedule-execution.ts b/apps/sim/background/schedule-execution.ts index 4c6b1a9cba8..a786cefd14e 100644 --- a/apps/sim/background/schedule-execution.ts +++ b/apps/sim/background/schedule-execution.ts @@ -7,7 +7,7 @@ import { workflowSchedule, } from '@sim/db' import { createLogger, runWithRequestContext } from '@sim/logger' -import { toError } from '@sim/utils/errors' +import { describeError, toError } from '@sim/utils/errors' import { generateId } from '@sim/utils/id' import { backoffWithJitter } from '@sim/utils/retry' import { task } from '@trigger.dev/sdk' @@ -156,7 +156,7 @@ async function applyScheduleUpdate( return updatedRows.length > 0 } catch (error) { - logger.error(`[${requestId}] ${context}`, error) + logger.error(`[${requestId}] ${context}`, error, { cause: describeError(error) }) throw error } } @@ -530,7 +530,13 @@ async function runWorkflowExecution({ } } - logger.error(`[${requestId}] Early failure in scheduled workflow ${payload.workflowId}`, error) + logger.error( + `[${requestId}] Early failure in scheduled workflow ${payload.workflowId}`, + error, + { + cause: describeError(error), + } + ) if (wasExecutionFinalizedByCore(error, executionId)) { throw error @@ -950,7 +956,9 @@ export async function executeScheduleJob(payload: ScheduleExecutionPayload) { return } - logger.error(`[${requestId}] Error processing schedule ${payload.scheduleId}`, error) + logger.error(`[${requestId}] Error processing schedule ${payload.scheduleId}`, error, { + cause: describeError(error), + }) await releaseClaim( now, `Failed to release schedule ${payload.scheduleId} after unhandled error` diff --git a/apps/sim/background/workflow-column-execution.ts b/apps/sim/background/workflow-column-execution.ts index 53c337842fa..9f617cd5144 100644 --- a/apps/sim/background/workflow-column-execution.ts +++ b/apps/sim/background/workflow-column-execution.ts @@ -1,12 +1,13 @@ import { db } from '@sim/db' import { workflow as workflowTable } from '@sim/db/schema' import { createLogger, runWithRequestContext } from '@sim/logger' -import { toError } from '@sim/utils/errors' +import { describeError, toError } from '@sim/utils/errors' import { sleep } from '@sim/utils/helpers' import { generateId } from '@sim/utils/id' import { backoffWithJitter } from '@sim/utils/retry' import { task } from '@trigger.dev/sdk' import { eq } from 'drizzle-orm' +import { isRetryableInfrastructureError } from '@/lib/core/errors/retryable-infrastructure' import { createTimeoutAbortController } from '@/lib/core/execution-limits' import { RateLimiter } from '@/lib/core/rate-limiter/rate-limiter' import { preprocessExecution } from '@/lib/execution/preprocessing' @@ -597,8 +598,8 @@ async function runWorkflowAndWriteTerminal( }) .catch((err) => { logger.warn( - `Per-block partial write failed (table=${tableId} row=${rowId} group=${groupId}):`, - err + `Per-block partial write failed (table=${tableId} row=${rowId} group=${groupId})`, + { cause: describeError(err), retryable: isRetryableInfrastructureError(err) } ) }) } @@ -720,7 +721,12 @@ async function runWorkflowAndWriteTerminal( const message = toError(err).message logger.error( `Workflow group cell execution failed (table=${tableId} row=${rowId} group=${groupId})`, - { error: message, executionId } + { + error: message, + executionId, + cause: describeError(err), + retryable: isRetryableInfrastructureError(err), + } ) terminalWritten = true await writeChain.catch(() => {}) @@ -735,7 +741,11 @@ async function runWorkflowAndWriteTerminal( blockErrors, }) } catch (writeErr) { - logger.error('Also failed to write error state', { error: toError(writeErr).message }) + logger.error('Also failed to write error state', { + error: toError(writeErr).message, + cause: describeError(writeErr), + retryable: isRetryableInfrastructureError(writeErr), + }) } return 'error' } diff --git a/apps/sim/lib/logs/execution/logging-session.ts b/apps/sim/lib/logs/execution/logging-session.ts index 09bfd2348ca..a0fd011dc7d 100644 --- a/apps/sim/lib/logs/execution/logging-session.ts +++ b/apps/sim/lib/logs/execution/logging-session.ts @@ -1,8 +1,9 @@ import { db } from '@sim/db' import { workflowExecutionLogs } from '@sim/db/schema' import { createLogger } from '@sim/logger' -import { toError } from '@sim/utils/errors' +import { describeError, toError } from '@sim/utils/errors' import { and, eq, sql } from 'drizzle-orm' +import { isRetryableInfrastructureError } from '@/lib/core/errors/retryable-infrastructure' import { executionLogger } from '@/lib/logs/execution/logger' import { calculateCostSummary, @@ -177,6 +178,8 @@ export class LoggingSession { } catch (error) { logger.error(`Failed to persist last started block for execution ${this.executionId}:`, { error: toError(error).message, + cause: describeError(error), + retryable: isRetryableInfrastructureError(error), }) } } @@ -193,6 +196,8 @@ export class LoggingSession { } catch (error) { logger.error(`Failed to persist last completed block for execution ${this.executionId}:`, { error: toError(error).message, + cause: describeError(error), + retryable: isRetryableInfrastructureError(error), }) } } @@ -411,6 +416,8 @@ export class LoggingSession { executionId: this.executionId, error: toError(error).message, stack: error instanceof Error ? error.stack : undefined, + cause: describeError(error), + retryable: isRetryableInfrastructureError(error), }) throw error } @@ -1057,7 +1064,11 @@ export class LoggingSession { this.completionAttemptFailed = true logger.error( `[${this.requestId || 'unknown'}] Cost-only fallback also failed for execution ${this.executionId}:`, - { error: toError(fallbackError).message } + { + error: toError(fallbackError).message, + cause: describeError(fallbackError), + retryable: isRetryableInfrastructureError(fallbackError), + } ) } } diff --git a/apps/sim/lib/workflows/executor/pause-persistence.ts b/apps/sim/lib/workflows/executor/pause-persistence.ts index 2080668cccd..954329ad4fa 100644 --- a/apps/sim/lib/workflows/executor/pause-persistence.ts +++ b/apps/sim/lib/workflows/executor/pause-persistence.ts @@ -1,5 +1,6 @@ import { createLogger } from '@sim/logger' -import { toError } from '@sim/utils/errors' +import { describeError, toError } from '@sim/utils/errors' +import { isRetryableInfrastructureError } from '@/lib/core/errors/retryable-infrastructure' import type { LoggingSession } from '@/lib/logs/execution/logging-session' import { PauseResumeManager } from '@/lib/workflows/executor/human-in-the-loop-manager' import type { ExecutionResult } from '@/executor/types' @@ -46,6 +47,8 @@ export async function handlePostExecutionPauseState({ logger.error('Failed to persist pause result', { executionId, error: toError(pauseError).message, + cause: describeError(pauseError), + retryable: isRetryableInfrastructureError(pauseError), }) await loggingSession.markAsFailed( `Failed to persist pause state: ${toError(pauseError).message}` @@ -59,6 +62,8 @@ export async function handlePostExecutionPauseState({ logger.error('Failed to process queued resumes', { executionId, error: toError(resumeError).message, + cause: describeError(resumeError), + retryable: isRetryableInfrastructureError(resumeError), }) } } diff --git a/packages/utils/src/errors.test.ts b/packages/utils/src/errors.test.ts index fa11dc191f9..272c85e53a4 100644 --- a/packages/utils/src/errors.test.ts +++ b/packages/utils/src/errors.test.ts @@ -2,7 +2,7 @@ * @vitest-environment node */ import { describe, expect, it } from 'vitest' -import { getPostgresErrorCode, toError } from './errors.js' +import { describeError, getPostgresErrorCode, toError } from './errors.js' describe('toError', () => { it('returns the same Error when given an Error', () => { @@ -76,3 +76,54 @@ describe('getPostgresErrorCode', () => { expect(getPostgresErrorCode(err1)).toBeUndefined() }) }) + +describe('describeError', () => { + it('reports name and message for a plain error, omitting causeChain', () => { + const described = describeError(new Error('boom')) + expect(described).toEqual({ name: 'Error', message: 'boom' }) + expect(described.causeChain).toBeUndefined() + }) + + it('surfaces the deepest cause for a wrapped driver error', () => { + const driver = Object.assign(new Error('read ECONNRESET'), { + code: 'ECONNRESET', + errno: 'ECONNRESET', + syscall: 'read', + }) + const wrapped = new Error('Failed query: select ...', { cause: driver }) + const described = describeError(wrapped) + expect(described.message).toBe('read ECONNRESET') + expect(described.code).toBe('ECONNRESET') + expect(described.errno).toBe('ECONNRESET') + expect(described.syscall).toBe('read') + expect(described.causeChain).toEqual([ + 'Error: Failed query: select ...', + 'Error: read ECONNRESET', + ]) + }) + + it('always returns the cause for unclassified errors (AbortError)', () => { + const aborted = Object.assign(new Error('The operation was aborted'), { name: 'AbortError' }) + expect(describeError(aborted)).toEqual({ + name: 'AbortError', + message: 'The operation was aborted', + }) + }) + + it('falls back to a populated description for non-Error input without throwing', () => { + expect(describeError('just a string')).toEqual({ name: 'Error', message: 'just a string' }) + expect(() => describeError({ weird: true })).not.toThrow() + }) + + it('stops at depth 10 and does not loop on a cyclic cause', () => { + const a = new Error('a') + const b = new Error('b') + ;(a as { cause?: unknown }).cause = b + ;(b as { cause?: unknown }).cause = a + let described: ReturnType | undefined + expect(() => { + described = describeError(a) + }).not.toThrow() + expect(described?.causeChain?.length).toBeLessThanOrEqual(10) + }) +}) diff --git a/packages/utils/src/errors.ts b/packages/utils/src/errors.ts index 48fcee083c3..dc21d57b995 100644 --- a/packages/utils/src/errors.ts +++ b/packages/utils/src/errors.ts @@ -39,6 +39,60 @@ export function getPostgresConstraintName(error: unknown): string | undefined { return readPgErrorField(error, 'constraint_name') ?? readPgErrorField(error, 'constraint') } +export interface DescribedError { + name: string + message: string + code?: string + errno?: string + syscall?: string + /** `"Name: message"` per link in the `.cause` chain, outermost first. Present only when the chain has more than one link. */ + causeChain?: string[] +} + +/** + * Always-on diagnostic view of an error and its `.cause` chain. + * + * Reports the fields of the DEEPEST `.cause` link, because a wrapped driver + * error (e.g. Drizzle's `"Failed query: ..."` wrapping an `ECONNRESET`) carries + * the real reason there, not on the outer wrapper. Always returns a populated + * object — including for non-`Error` throws and unclassified errors like + * `AbortError`. Cycle-safe and depth-bounded. + * + * Loggers do not serialize the non-enumerable `Error.prototype.cause`, so pass + * the result as an explicit structured field rather than the raw error. + */ +export function describeError(error: unknown): DescribedError { + const chain: Error[] = [] + const seen = new Set() + let current: unknown = error + while (current instanceof Error && !seen.has(current) && chain.length < 10) { + seen.add(current) + chain.push(current) + current = current.cause + } + + if (chain.length === 0) { + const normalized = toError(error) + return { name: normalized.name, message: normalized.message } + } + + const deepest = chain[chain.length - 1] as Error & Record + const asString = (value: unknown): string | undefined => + typeof value === 'string' ? value : undefined + const code = asString(deepest.code) + const errno = asString(deepest.errno) + const syscall = asString(deepest.syscall) + + return { + name: deepest.name, + message: deepest.message, + ...(code ? { code } : {}), + ...(errno ? { errno } : {}), + ...(syscall ? { syscall } : {}), + ...(chain.length > 1 ? { causeChain: chain.map((e) => `${e.name}: ${e.message}`) } : {}), + } +} + function readPgErrorField(error: unknown, field: string): string | undefined { const seen = new Set() let current: unknown = error