Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 46 additions & 0 deletions lib/sea/SeaInputValidation.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
// Copyright (c) 2026 Databricks, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

import Int64 from 'node-int64';
import { DBSQLParameter, DBSQLParameterValue } from '../DBSQLParameter';
import ParameterError from '../errors/ParameterError';

/**
* Reject a parameter value that cannot be bound as a scalar. Arrays and plain
* objects stringify to garbage (e.g. `[1,2,3]` → `"1,2,3"`) that the server
* fails to coerce — on the Thrift path the operation never returns to
* FINISHED (a DoS hazard), and on SEA it surfaces an opaque server error. We
* fail fast at bind time instead, mirroring the kernel's compound-type
* rejection. `DBSQLParameter`, `Int64`, `Date`, and JS primitives are allowed.
*
* Parameter-marker counting and arity validation are intentionally NOT done
* here: the kernel's `statement::params` codec owns that check and binds
* exactly one placeholder style per statement, so duplicating the SQL-walk
* JS-side would only risk drift. The driver's sole bind-time job is this
* cheap, type-shape gate before the value crosses the napi boundary.
*/
export default function assertBindableValue(value: DBSQLParameter | DBSQLParameterValue, label: string): void {
if (value instanceof DBSQLParameter) return;
if (value === null || value === undefined) return;
if (Array.isArray(value)) {
throw new ParameterError(
`${label} is an array; compound types (ARRAY/MAP/STRUCT) are not bindable as a parameter value`,
);
}
if (typeof value === 'object' && !(value instanceof Date) && !(value instanceof Int64)) {
throw new ParameterError(
`${label} is an object; only scalar values (string/number/bigint/boolean), Date, and Int64 are bindable`,
);
}
}
13 changes: 13 additions & 0 deletions lib/sea/SeaNativeLoader.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,9 @@ import type {
ConnectionOptions as NativeConnectionOptions,
ArrowBatch as NativeArrowBatch,
ArrowSchema as NativeArrowSchema,
ExecuteOptions as NativeExecuteOptions,
TypedValueInput as NativeTypedValueInput,
NamedTypedValueInput as NativeNamedTypedValueInput,
} from '../../native/sea';

// SEA-prefixed re-exports. The kernel-generated `.d.ts` keeps the
Expand All @@ -46,6 +49,16 @@ export type SeaArrowSchema = NativeArrowSchema;
export type SeaConnection = NativeConnection;
export type SeaStatement = NativeStatement;

// Per-statement execution options and bound-parameter inputs are kernel
// concerns: the napi binding generates the canonical shapes (`positionalParams`
// / `namedParams` as `TypedValueInput` / `NamedTypedValueInput`, plus
// `rowLimit`, `queryTimeoutSecs`, `statementConf`, `queryTags`). We re-export
// rather than re-declare so the driver-side param codec can never drift from
// the kernel contract.
export type SeaNativeExecuteOptions = NativeExecuteOptions;
export type SeaNativeTypedValueInput = NativeTypedValueInput;
export type SeaNativeNamedTypedValueInput = NativeNamedTypedValueInput;

/**
* The full native binding surface, derived from the generated module
* so it can never drift from the `.d.ts` contract: when the kernel
Expand Down
124 changes: 124 additions & 0 deletions lib/sea/SeaPositionalParams.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
// Copyright (c) 2026 Databricks, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

import { DBSQLParameter, DBSQLParameterValue } from '../DBSQLParameter';
import ParameterError from '../errors/ParameterError';
import { SeaNativeTypedValueInput, SeaNativeNamedTypedValueInput } from './SeaNativeLoader';
import assertBindableValue from './SeaInputValidation';

/**
* Derive `(precision,scale)` from a decimal value string for the SEA
* `DECIMAL(p,s)` type name — the kernel param codec requires the parenthesised
* form (plain `"DECIMAL"` is rejected) so it preserves the caller's fractional
* digits. `"99.99"` ⇒ `"4,2"`; `"-123"` ⇒ `"3,0"`; `"0.00001"` ⇒ `"5,5"`.
*
* Precision is significant integer digits (insignificant leading zeros
* stripped, matching Spark's decimal-literal rule) plus the fractional scale.
* The value is NOT clamped: a non-numeric value, or one that needs precision >
* the Databricks maximum of 38, throws `ParameterError` at bind time rather
* than emitting an inconsistent `DECIMAL(38,…)` type alongside an
* unrepresentable value (which the kernel would reject or silently truncate).
*/
function decimalPrecisionScale(v: string): string {
// Accept an optional sign + plain decimal numeral only. Exponential form
// ("1e+21"), empty, and non-numeric ("abc") are rejected with a clear error
// instead of mis-deriving (p,s) and surfacing an opaque kernel failure.
const m = /^[+-]?(\d*)(?:\.(\d+))?$/.exec(v.trim());
const intPart = m?.[1] ?? '';
const fracPart = m?.[2] ?? '';
if (!m || (intPart === '' && fracPart === '')) {
throw new ParameterError(
`DECIMAL parameter value "${v}" is not a plain decimal numeral (expected [+-]?digits[.digits]).`,
);
}
const scale = fracPart.length;
// Significant integer digits: drop insignificant leading zeros ("007" → 1,
// "0" / "" → 0) so e.g. 0.00001 is DECIMAL(5,5), not DECIMAL(6,5).
const significantIntDigits = intPart.replace(/^0+/, '').length;
const precision = Math.max(significantIntDigits + scale, 1);
if (precision > 38) {
throw new ParameterError(
`DECIMAL parameter value "${v}" needs precision ${precision}, exceeding the Databricks maximum of 38. ` +
'Round/scale the value or bind it as a STRING.',
);
}
return `${precision},${scale}`;
}

/**
* Reduce a `DBSQLParameter | DBSQLParameterValue` to the napi
* `TypedValueInput` (`{ sqlType, value? }`) the kernel's positional-param
* codec (`parse_typed_value`) accepts. Reuses `DBSQLParameter.toSparkParameter`
* — the same type-inference + value-stringification the Thrift backend uses —
* then adapts the type name to the codec's expectations:
* - DECIMAL → `DECIMAL(p,s)` (parenthesised form required)
* - INTERVAL * → `INTERVAL` (the codec's single interval type name)
* - a missing value ⇒ SQL NULL (`parse_typed_value` maps `value: None` to NULL).
*/
function toTypedValueInput(value: DBSQLParameter | DBSQLParameterValue): SeaNativeTypedValueInput {
const param = value instanceof DBSQLParameter ? value : new DBSQLParameter({ value });
const spark = param.toSparkParameter();
const stringValue = spark.value?.stringValue ?? undefined;

// NULL: no value (and `VOID` ignores any type), matching toSparkParameter's
// type/value-less shape for null/undefined.
if (stringValue === undefined || stringValue === null) {
return { sqlType: 'VOID' };
}

let sqlType = spark.type ?? 'STRING';
const upper = sqlType.toUpperCase();
if (upper === 'DECIMAL') {
sqlType = `DECIMAL(${decimalPrecisionScale(stringValue)})`;
} else if (upper.startsWith('INTERVAL')) {
sqlType = 'INTERVAL';
}
return { sqlType, value: stringValue };
}

/**
* Convert the public `ordinalParameters` option into the napi
* `positionalParams` array (1-based `?` placeholders). Returns `undefined`
* when none were supplied, so the caller can keep the minimal no-options
* call shape.
*/
export function buildSeaPositionalParams(
ordinalParameters?: Array<DBSQLParameter | DBSQLParameterValue>,
): Array<SeaNativeTypedValueInput> | undefined {
if (ordinalParameters === undefined || ordinalParameters.length === 0) {
return undefined;
}
return ordinalParameters.map((value, i) => {
assertBindableValue(value, `ordinalParameters[${i}]`);
return toTypedValueInput(value);
});
}

/**
* Convert the public `namedParameters` option (`Record<name, value>`) into
* the napi `namedParams` array (`:name` placeholders). Each value reuses the
* same `toTypedValueInput` mapping (DECIMAL → DECIMAL(p,s), NULL → VOID, …),
* then carries its name. Returns `undefined` when none were supplied.
*/
export function buildSeaNamedParams(
namedParameters?: Record<string, DBSQLParameter | DBSQLParameterValue>,
): Array<SeaNativeNamedTypedValueInput> | undefined {
if (namedParameters === undefined || Object.keys(namedParameters).length === 0) {
return undefined;
}
return Object.keys(namedParameters).map((name) => {
assertBindableValue(namedParameters[name], `namedParameters[${name}]`);
return { name, ...toTypedValueInput(namedParameters[name]) };
});
}
74 changes: 74 additions & 0 deletions lib/sea/SeaServerInfo.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
// Copyright (c) 2026 Databricks, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

import { TGetInfoType, TGetInfoValue } from '../../thrift/TCLIService_types';

/**
* `getInfo` (JDBC `DatabaseMetaData` / ODBC `SQLGetInfo`) is a Thrift-protocol
* concept: the Thrift backend forwards `TGetInfoReq` to the server's `getInfo`
* RPC. The SEA REST protocol and the Rust kernel have **no** equivalent
* endpoint, so — exactly as JDBC does for `DatabaseMetaData` — we synthesize
* the values client-side.
*
* The Databricks Thrift server itself answers only three `TGetInfoType`s and
* rejects every other value; we mirror that surface so the SEA path is a
* drop-in equivalent:
*
* | TGetInfoType | Thrift server | SEA (here) |
* |---------------------|---------------|-------------------|
* | CLI_SERVER_NAME (13)| "Spark SQL" | "Spark SQL" |
* | CLI_DBMS_NAME (17)| "Spark SQL" | "Spark SQL" |
* | CLI_DBMS_VER (18)| "3.1.1" | "3.1.1" |
* | (any other) | error | undefined → error |
*
* Empirically re-verified against a live warehouse over the Thrift path: the
* three values above are byte-identical to the server's, and the server
* returns an error for every other `TGetInfoType` (probed CLI_MAX_DRIVER_-
* CONNECTIONS, CLI_DATA_SOURCE_NAME, CLI_FETCH_DIRECTION, … — all errored). So
* the SEA "undefined → throw" path matches Thrift's effective behaviour rather
* than narrowing it.
*/

/** Canonical DBMS product name — identical to the Thrift server's value. */
export const SEA_DBMS_NAME = 'Spark SQL';

/** Server-name answer — identical to the Thrift server's value. */
export const SEA_SERVER_NAME = 'Spark SQL';

/**
* DBMS version string. Mirrors the constant the Databricks Thrift server
* reports for `CLI_DBMS_VER` (the HiveServer2-compat Spark SQL version, not
* the DBR release). Kept in lock-step with Thrift for parity; if the server
* ever changes it the comparator's GET_INFO suite flags the drift.
*/
export const SEA_DBMS_VERSION = '3.1.1';

/**
* Synthesize the `TGetInfoValue` for a `getInfo` request on the SEA path.
* Returns `undefined` for any `TGetInfoType` the (Thrift) server does not
* answer — the caller surfaces that as an error, matching Thrift's
* reject-unsupported-info-type behaviour.
*/
export function seaServerInfoValue(infoType: number): TGetInfoValue | undefined {
switch (infoType) {
case TGetInfoType.CLI_SERVER_NAME:
return new TGetInfoValue({ stringValue: SEA_SERVER_NAME });
case TGetInfoType.CLI_DBMS_NAME:
return new TGetInfoValue({ stringValue: SEA_DBMS_NAME });
case TGetInfoType.CLI_DBMS_VER:
return new TGetInfoValue({ stringValue: SEA_DBMS_VERSION });
default:
return undefined;
}
}
Loading
Loading