From c712b646aa835ca7c2a4fb172933b4f3f60302be Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
<41898282+github-actions[bot]@users.noreply.github.com>
Date: Mon, 18 May 2026 19:33:03 +0000
Subject: [PATCH 1/4] [Autoloop: perf-comparison] Iteration 321: Add readHtml
benchmark pair
Run: https://github.com/githubnext/tsb/actions/runs/26055399596
Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
benchmarks/pandas/bench_read_html.py | 52 ++++++++++++++++++++++++++++
benchmarks/tsb/bench_read_html.ts | 43 +++++++++++++++++++++++
2 files changed, 95 insertions(+)
create mode 100644 benchmarks/pandas/bench_read_html.py
create mode 100644 benchmarks/tsb/bench_read_html.ts
diff --git a/benchmarks/pandas/bench_read_html.py b/benchmarks/pandas/bench_read_html.py
new file mode 100644
index 00000000..03dd0199
--- /dev/null
+++ b/benchmarks/pandas/bench_read_html.py
@@ -0,0 +1,52 @@
+"""
+Benchmark: pd.read_html — parse HTML tables into DataFrames.
+Outputs JSON: {"function": "read_html", "mean_ms": ..., "iterations": ..., "total_ms": ...}
+"""
+import json
+import time
+import math
+
+try:
+ import pandas as pd
+except ImportError:
+ import subprocess, sys
+ subprocess.check_call([sys.executable, "-m", "pip", "install", "pandas", "--quiet"])
+ import pandas as pd
+
+try:
+ import lxml # noqa: F401
+except ImportError:
+ import subprocess, sys
+ subprocess.check_call([sys.executable, "-m", "pip", "install", "lxml", "--quiet"])
+
+ROWS = 1_000
+WARMUP = 3
+ITERATIONS = 20
+
+
+def build_html(rows: int) -> str:
+ header = "
| id | name | value | score |
"
+ body_rows = [
+ f"| {i} | item_{i % 100} | {i * 1.5:.2f} | {math.sin(i * 0.01):.6f} |
"
+ for i in range(rows)
+ ]
+ return f"{header}{''.join(body_rows)}
"
+
+
+html = build_html(ROWS)
+
+# Warm-up
+for _ in range(WARMUP):
+ pd.read_html(html)
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+ pd.read_html(html)
+total_ms = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+ "function": "read_html",
+ "mean_ms": total_ms / ITERATIONS,
+ "iterations": ITERATIONS,
+ "total_ms": total_ms,
+}))
diff --git a/benchmarks/tsb/bench_read_html.ts b/benchmarks/tsb/bench_read_html.ts
new file mode 100644
index 00000000..3cbc7149
--- /dev/null
+++ b/benchmarks/tsb/bench_read_html.ts
@@ -0,0 +1,43 @@
+/**
+ * Benchmark: readHtml — parse HTML tables into DataFrames.
+ * Outputs JSON: {"function": "read_html", "mean_ms": ..., "iterations": ..., "total_ms": ...}
+ */
+import { readHtml } from "../../src/index.js";
+
+const ROWS = 1_000;
+const WARMUP = 3;
+const ITERATIONS = 20;
+
+// Build a realistic HTML string with a 1000-row table.
+function buildHtml(rows: number): string {
+ const header = "| id | name | value | score |
";
+ const bodyRows: string[] = [];
+ for (let i = 0; i < rows; i++) {
+ bodyRows.push(
+ `| ${i} | item_${i % 100} | ${(i * 1.5).toFixed(2)} | ${Math.sin(i * 0.01).toFixed(6)} |
`,
+ );
+ }
+ return `${header}${bodyRows.join("")}
`;
+}
+
+const html = buildHtml(ROWS);
+
+// Warm-up
+for (let i = 0; i < WARMUP; i++) {
+ readHtml(html);
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+ readHtml(html);
+}
+const total = performance.now() - start;
+
+console.log(
+ JSON.stringify({
+ function: "read_html",
+ mean_ms: total / ITERATIONS,
+ iterations: ITERATIONS,
+ total_ms: total,
+ }),
+);
From 1e97d5a52197e29f26efd4f809c6dd0032fd018a Mon Sep 17 00:00:00 2001
From: Russell Horton
Date: Mon, 18 May 2026 12:40:55 -0700
Subject: [PATCH 2/4] chore: trigger CI [evergreen]
From 05090de1136083acdab66614faf7f63651200815 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
Date: Tue, 26 May 2026 01:32:10 +0000
Subject: [PATCH 3/4] [Autoloop: perf-comparison] Iteration 330: Add 7
benchmark pairs (notna_boolean, window_extended, na_ops, reduce_ops,
rename_ops, math_ops, value_counts_full)
Run: https://github.com/githubnext/tsb/actions/runs/26427055861
Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
benchmarks/pandas/bench_math_ops.py | 35 +++++++++++++++
benchmarks/pandas/bench_na_ops.py | 42 +++++++++++++++++
benchmarks/pandas/bench_notna_boolean.py | 36 +++++++++++++++
benchmarks/pandas/bench_reduce_ops.py | 37 +++++++++++++++
benchmarks/pandas/bench_rename_ops.py | 36 +++++++++++++++
benchmarks/pandas/bench_value_counts_full.py | 28 ++++++++++++
benchmarks/pandas/bench_window_extended.py | 32 +++++++++++++
benchmarks/tsb/bench_math_ops.ts | 40 +++++++++++++++++
benchmarks/tsb/bench_na_ops.ts | 47 ++++++++++++++++++++
benchmarks/tsb/bench_notna_boolean.ts | 41 +++++++++++++++++
benchmarks/tsb/bench_reduce_ops.ts | 42 +++++++++++++++++
benchmarks/tsb/bench_rename_ops.ts | 41 +++++++++++++++++
benchmarks/tsb/bench_value_counts_full.ts | 32 +++++++++++++
benchmarks/tsb/bench_window_extended.ts | 37 +++++++++++++++
14 files changed, 526 insertions(+)
create mode 100644 benchmarks/pandas/bench_math_ops.py
create mode 100644 benchmarks/pandas/bench_na_ops.py
create mode 100644 benchmarks/pandas/bench_notna_boolean.py
create mode 100644 benchmarks/pandas/bench_reduce_ops.py
create mode 100644 benchmarks/pandas/bench_rename_ops.py
create mode 100644 benchmarks/pandas/bench_value_counts_full.py
create mode 100644 benchmarks/pandas/bench_window_extended.py
create mode 100644 benchmarks/tsb/bench_math_ops.ts
create mode 100644 benchmarks/tsb/bench_na_ops.ts
create mode 100644 benchmarks/tsb/bench_notna_boolean.ts
create mode 100644 benchmarks/tsb/bench_reduce_ops.ts
create mode 100644 benchmarks/tsb/bench_rename_ops.ts
create mode 100644 benchmarks/tsb/bench_value_counts_full.ts
create mode 100644 benchmarks/tsb/bench_window_extended.ts
diff --git a/benchmarks/pandas/bench_math_ops.py b/benchmarks/pandas/bench_math_ops.py
new file mode 100644
index 00000000..1159ec02
--- /dev/null
+++ b/benchmarks/pandas/bench_math_ops.py
@@ -0,0 +1,35 @@
+"""Benchmark: math_ops — abs / round on Series and DataFrame of 100k rows."""
+import json, time
+import numpy as np
+import pandas as pd
+
+SIZE = 100_000
+WARMUP = 5
+ITERATIONS = 50
+
+s = pd.Series(np.where(np.arange(SIZE) % 2 == 0, -(np.arange(SIZE) + 0.567), np.arange(SIZE) + 0.567))
+df = pd.DataFrame({
+ "a": -(np.arange(SIZE) + 0.123),
+ "b": np.arange(SIZE) + 0.456,
+})
+
+for _ in range(WARMUP):
+ s.abs()
+ df.abs()
+ s.round(1)
+ df.round(1)
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+ s.abs()
+ df.abs()
+ s.round(1)
+ df.round(1)
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+ "function": "math_ops",
+ "mean_ms": total / ITERATIONS,
+ "iterations": ITERATIONS,
+ "total_ms": total,
+}))
diff --git a/benchmarks/pandas/bench_na_ops.py b/benchmarks/pandas/bench_na_ops.py
new file mode 100644
index 00000000..b7d0adf0
--- /dev/null
+++ b/benchmarks/pandas/bench_na_ops.py
@@ -0,0 +1,42 @@
+"""Benchmark: na_ops — isna / notna / ffill / bfill on 100k rows."""
+import json, time
+import numpy as np
+import pandas as pd
+
+SIZE = 100_000
+WARMUP = 5
+ITERATIONS = 50
+
+data = pd.array([i if i % 5 != 0 else pd.NA for i in range(SIZE)], dtype="Int64")
+s = pd.Series(data, dtype="float64")
+s[np.arange(SIZE) % 5 == 0] = np.nan
+
+df = pd.DataFrame({
+ "a": s,
+ "b": pd.Series([float(i * 2) if i % 7 != 0 else np.nan for i in range(SIZE)]),
+})
+
+for _ in range(WARMUP):
+ pd.isna(s)
+ pd.notna(s)
+ s.ffill()
+ s.bfill()
+ df.ffill()
+ df.bfill()
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+ pd.isna(s)
+ pd.notna(s)
+ s.ffill()
+ s.bfill()
+ df.ffill()
+ df.bfill()
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+ "function": "na_ops",
+ "mean_ms": total / ITERATIONS,
+ "iterations": ITERATIONS,
+ "total_ms": total,
+}))
diff --git a/benchmarks/pandas/bench_notna_boolean.py b/benchmarks/pandas/bench_notna_boolean.py
new file mode 100644
index 00000000..96c0a59d
--- /dev/null
+++ b/benchmarks/pandas/bench_notna_boolean.py
@@ -0,0 +1,36 @@
+"""Benchmark: notna_boolean — boolean-mask indexing on 100k rows."""
+import json, time
+import numpy as np
+import pandas as pd
+
+SIZE = 100_000
+WARMUP = 5
+ITERATIONS = 50
+
+s = pd.Series(np.arange(SIZE))
+mask = pd.Series(np.arange(SIZE) % 2 == 0)
+bool_arr = np.arange(SIZE) % 3 != 0
+
+df = pd.DataFrame({
+ "a": np.arange(SIZE),
+ "b": np.arange(SIZE) * 2,
+})
+
+for _ in range(WARMUP):
+ s[mask]
+ s[~mask]
+ df[bool_arr]
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+ s[mask]
+ s[~mask]
+ df[bool_arr]
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+ "function": "notna_boolean",
+ "mean_ms": total / ITERATIONS,
+ "iterations": ITERATIONS,
+ "total_ms": total,
+}))
diff --git a/benchmarks/pandas/bench_reduce_ops.py b/benchmarks/pandas/bench_reduce_ops.py
new file mode 100644
index 00000000..2be36963
--- /dev/null
+++ b/benchmarks/pandas/bench_reduce_ops.py
@@ -0,0 +1,37 @@
+"""Benchmark: reduce_ops — nunique / any / all on Series and DataFrame of 100k rows."""
+import json, time
+import numpy as np
+import pandas as pd
+
+SIZE = 100_000
+WARMUP = 5
+ITERATIONS = 50
+
+s = pd.Series(np.arange(SIZE) % 1000)
+bool_s = pd.Series(np.arange(SIZE) > 0)
+df = pd.DataFrame({
+ "a": np.arange(SIZE) % 500,
+ "b": np.arange(SIZE) % 200,
+ "c": np.arange(SIZE) % 100,
+})
+
+for _ in range(WARMUP):
+ s.nunique()
+ bool_s.any()
+ bool_s.all()
+ df.nunique()
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+ s.nunique()
+ bool_s.any()
+ bool_s.all()
+ df.nunique()
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+ "function": "reduce_ops",
+ "mean_ms": total / ITERATIONS,
+ "iterations": ITERATIONS,
+ "total_ms": total,
+}))
diff --git a/benchmarks/pandas/bench_rename_ops.py b/benchmarks/pandas/bench_rename_ops.py
new file mode 100644
index 00000000..897f520b
--- /dev/null
+++ b/benchmarks/pandas/bench_rename_ops.py
@@ -0,0 +1,36 @@
+"""Benchmark: rename_ops — rename / add_prefix / add_suffix on Series/DataFrame of 100k rows."""
+import json, time
+import numpy as np
+import pandas as pd
+
+SIZE = 100_000
+WARMUP = 5
+ITERATIONS = 50
+
+s = pd.Series(np.arange(SIZE), index=[f"row_{i}" for i in range(SIZE)])
+df = pd.DataFrame({
+ "col_a": np.arange(SIZE),
+ "col_b": np.arange(SIZE) * 2,
+ "col_c": np.arange(SIZE) * 3,
+})
+
+for _ in range(WARMUP):
+ s.rename(lambda lbl: f"new_{lbl}")
+ df.rename(columns={"col_a": "a", "col_b": "b"})
+ df.add_prefix("pre_")
+ df.add_suffix("_suf")
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+ s.rename(lambda lbl: f"new_{lbl}")
+ df.rename(columns={"col_a": "a", "col_b": "b"})
+ df.add_prefix("pre_")
+ df.add_suffix("_suf")
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+ "function": "rename_ops",
+ "mean_ms": total / ITERATIONS,
+ "iterations": ITERATIONS,
+ "total_ms": total,
+}))
diff --git a/benchmarks/pandas/bench_value_counts_full.py b/benchmarks/pandas/bench_value_counts_full.py
new file mode 100644
index 00000000..284bb8ed
--- /dev/null
+++ b/benchmarks/pandas/bench_value_counts_full.py
@@ -0,0 +1,28 @@
+"""Benchmark: value_counts_full — value_counts(bins=N) on Series of 100k rows."""
+import json, time
+import numpy as np
+import pandas as pd
+
+SIZE = 100_000
+WARMUP = 5
+ITERATIONS = 50
+
+rng = np.random.default_rng(42)
+s = pd.Series(rng.random(SIZE) * 100)
+
+for _ in range(WARMUP):
+ s.value_counts(bins=10)
+ s.value_counts(bins=20)
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+ s.value_counts(bins=10)
+ s.value_counts(bins=20)
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+ "function": "value_counts_full",
+ "mean_ms": total / ITERATIONS,
+ "iterations": ITERATIONS,
+ "total_ms": total,
+}))
diff --git a/benchmarks/pandas/bench_window_extended.py b/benchmarks/pandas/bench_window_extended.py
new file mode 100644
index 00000000..ddafc28a
--- /dev/null
+++ b/benchmarks/pandas/bench_window_extended.py
@@ -0,0 +1,32 @@
+"""Benchmark: window_extended — rolling sem/skew/kurt/quantile on 100k rows."""
+import json, time
+import numpy as np
+import pandas as pd
+
+SIZE = 100_000
+WARMUP = 3
+ITERATIONS = 20
+WINDOW = 10
+
+s = pd.Series(np.sin(np.arange(SIZE) / 100) * 100 + np.arange(SIZE) * 0.001)
+
+for _ in range(WARMUP):
+ s.rolling(WINDOW).sem()
+ s.rolling(WINDOW).skew()
+ s.rolling(WINDOW).kurt()
+ s.rolling(WINDOW).quantile(0.5)
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+ s.rolling(WINDOW).sem()
+ s.rolling(WINDOW).skew()
+ s.rolling(WINDOW).kurt()
+ s.rolling(WINDOW).quantile(0.5)
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+ "function": "window_extended",
+ "mean_ms": total / ITERATIONS,
+ "iterations": ITERATIONS,
+ "total_ms": total,
+}))
diff --git a/benchmarks/tsb/bench_math_ops.ts b/benchmarks/tsb/bench_math_ops.ts
new file mode 100644
index 00000000..5559bde5
--- /dev/null
+++ b/benchmarks/tsb/bench_math_ops.ts
@@ -0,0 +1,40 @@
+/**
+ * Benchmark: math_ops — absSeries / absDataFrame / roundSeries / roundDataFrame on 100k rows.
+ * Outputs JSON: {"function": "math_ops", "mean_ms": ..., "iterations": ..., "total_ms": ...}
+ */
+import { Series, DataFrame, absSeries, absDataFrame, roundSeries, roundDataFrame } from "../../src/index.ts";
+
+const SIZE = 100_000;
+const WARMUP = 5;
+const ITERATIONS = 50;
+
+const s = new Series({ data: Array.from({ length: SIZE }, (_, i) => (i % 2 === 0 ? -(i + 0.567) : i + 0.567)) });
+const df = DataFrame.fromColumns({
+ a: Array.from({ length: SIZE }, (_, i) => -(i + 0.123)),
+ b: Array.from({ length: SIZE }, (_, i) => i + 0.456),
+});
+
+for (let i = 0; i < WARMUP; i++) {
+ absSeries(s);
+ absDataFrame(df);
+ roundSeries(s, 1);
+ roundDataFrame(df, 1);
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+ absSeries(s);
+ absDataFrame(df);
+ roundSeries(s, 1);
+ roundDataFrame(df, 1);
+}
+const total = performance.now() - start;
+
+console.log(
+ JSON.stringify({
+ function: "math_ops",
+ mean_ms: total / ITERATIONS,
+ iterations: ITERATIONS,
+ total_ms: total,
+ }),
+);
diff --git a/benchmarks/tsb/bench_na_ops.ts b/benchmarks/tsb/bench_na_ops.ts
new file mode 100644
index 00000000..31990d0c
--- /dev/null
+++ b/benchmarks/tsb/bench_na_ops.ts
@@ -0,0 +1,47 @@
+/**
+ * Benchmark: na_ops — isna / notna / ffillSeries / bfillSeries on 100k rows.
+ * Outputs JSON: {"function": "na_ops", "mean_ms": ..., "iterations": ..., "total_ms": ...}
+ */
+import { Series, DataFrame, isna, notna, ffillSeries, bfillSeries, dataFrameFfill, dataFrameBfill } from "../../src/index.ts";
+
+const SIZE = 100_000;
+const WARMUP = 5;
+const ITERATIONS = 50;
+
+const data: (number | null)[] = Array.from({ length: SIZE }, (_, i) =>
+ i % 5 === 0 ? null : i,
+);
+const s = new Series({ data });
+const df = DataFrame.fromColumns({
+ a: data,
+ b: Array.from({ length: SIZE }, (_, i) => (i % 7 === 0 ? null : i * 2)),
+});
+
+for (let i = 0; i < WARMUP; i++) {
+ isna(s);
+ notna(s);
+ ffillSeries(s);
+ bfillSeries(s);
+ dataFrameFfill(df);
+ dataFrameBfill(df);
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+ isna(s);
+ notna(s);
+ ffillSeries(s);
+ bfillSeries(s);
+ dataFrameFfill(df);
+ dataFrameBfill(df);
+}
+const total = performance.now() - start;
+
+console.log(
+ JSON.stringify({
+ function: "na_ops",
+ mean_ms: total / ITERATIONS,
+ iterations: ITERATIONS,
+ total_ms: total,
+ }),
+);
diff --git a/benchmarks/tsb/bench_notna_boolean.ts b/benchmarks/tsb/bench_notna_boolean.ts
new file mode 100644
index 00000000..ecd113db
--- /dev/null
+++ b/benchmarks/tsb/bench_notna_boolean.ts
@@ -0,0 +1,41 @@
+/**
+ * Benchmark: notna_boolean — keepTrue / keepFalse / filterBy on 100k rows.
+ * Outputs JSON: {"function": "notna_boolean", "mean_ms": ..., "iterations": ..., "total_ms": ...}
+ */
+import { Series, DataFrame, keepTrue, keepFalse, filterBy } from "../../src/index.ts";
+
+const SIZE = 100_000;
+const WARMUP = 5;
+const ITERATIONS = 50;
+
+const s = new Series({ data: Array.from({ length: SIZE }, (_, i) => i) });
+const mask = new Series({ data: Array.from({ length: SIZE }, (_, i) => i % 2 === 0) });
+const boolArr = Array.from({ length: SIZE }, (_, i) => i % 3 !== 0);
+
+const df = DataFrame.fromColumns({
+ a: Array.from({ length: SIZE }, (_, i) => i),
+ b: Array.from({ length: SIZE }, (_, i) => i * 2),
+});
+
+for (let i = 0; i < WARMUP; i++) {
+ keepTrue(s, mask);
+ keepFalse(s, mask);
+ filterBy(df, boolArr);
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+ keepTrue(s, mask);
+ keepFalse(s, mask);
+ filterBy(df, boolArr);
+}
+const total = performance.now() - start;
+
+console.log(
+ JSON.stringify({
+ function: "notna_boolean",
+ mean_ms: total / ITERATIONS,
+ iterations: ITERATIONS,
+ total_ms: total,
+ }),
+);
diff --git a/benchmarks/tsb/bench_reduce_ops.ts b/benchmarks/tsb/bench_reduce_ops.ts
new file mode 100644
index 00000000..f2e524f7
--- /dev/null
+++ b/benchmarks/tsb/bench_reduce_ops.ts
@@ -0,0 +1,42 @@
+/**
+ * Benchmark: reduce_ops — nuniqueSeries / anySeries / allSeries / nunique(df) on 100k rows.
+ * Outputs JSON: {"function": "reduce_ops", "mean_ms": ..., "iterations": ..., "total_ms": ...}
+ */
+import { Series, DataFrame, nuniqueSeries, anySeries, allSeries, nunique } from "../../src/index.ts";
+
+const SIZE = 100_000;
+const WARMUP = 5;
+const ITERATIONS = 50;
+
+const s = new Series({ data: Array.from({ length: SIZE }, (_, i) => i % 1000) });
+const boolSeries = new Series({ data: Array.from({ length: SIZE }, (_, i) => i > 0) });
+const df = DataFrame.fromColumns({
+ a: Array.from({ length: SIZE }, (_, i) => i % 500),
+ b: Array.from({ length: SIZE }, (_, i) => i % 200),
+ c: Array.from({ length: SIZE }, (_, i) => i % 100),
+});
+
+for (let i = 0; i < WARMUP; i++) {
+ nuniqueSeries(s);
+ anySeries(boolSeries);
+ allSeries(boolSeries);
+ nunique(df);
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+ nuniqueSeries(s);
+ anySeries(boolSeries);
+ allSeries(boolSeries);
+ nunique(df);
+}
+const total = performance.now() - start;
+
+console.log(
+ JSON.stringify({
+ function: "reduce_ops",
+ mean_ms: total / ITERATIONS,
+ iterations: ITERATIONS,
+ total_ms: total,
+ }),
+);
diff --git a/benchmarks/tsb/bench_rename_ops.ts b/benchmarks/tsb/bench_rename_ops.ts
new file mode 100644
index 00000000..9277e6e6
--- /dev/null
+++ b/benchmarks/tsb/bench_rename_ops.ts
@@ -0,0 +1,41 @@
+/**
+ * Benchmark: rename_ops — renameSeriesIndex / renameDataFrame / addPrefixDataFrame / addSuffixDataFrame on 100k rows.
+ * Outputs JSON: {"function": "rename_ops", "mean_ms": ..., "iterations": ..., "total_ms": ...}
+ */
+import { Series, DataFrame, renameSeriesIndex, renameDataFrame, addPrefixDataFrame, addSuffixDataFrame } from "../../src/index.ts";
+
+const SIZE = 100_000;
+const WARMUP = 5;
+const ITERATIONS = 50;
+
+const s = new Series({ data: Array.from({ length: SIZE }, (_, i) => i), index: Array.from({ length: SIZE }, (_, i) => `row_${i}`) });
+const df = DataFrame.fromColumns({
+ col_a: Array.from({ length: SIZE }, (_, i) => i),
+ col_b: Array.from({ length: SIZE }, (_, i) => i * 2),
+ col_c: Array.from({ length: SIZE }, (_, i) => i * 3),
+});
+
+for (let i = 0; i < WARMUP; i++) {
+ renameSeriesIndex(s, (lbl) => `new_${String(lbl)}`);
+ renameDataFrame(df, { columns: { col_a: "a", col_b: "b" } });
+ addPrefixDataFrame(df, "pre_");
+ addSuffixDataFrame(df, "_suf");
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+ renameSeriesIndex(s, (lbl) => `new_${String(lbl)}`);
+ renameDataFrame(df, { columns: { col_a: "a", col_b: "b" } });
+ addPrefixDataFrame(df, "pre_");
+ addSuffixDataFrame(df, "_suf");
+}
+const total = performance.now() - start;
+
+console.log(
+ JSON.stringify({
+ function: "rename_ops",
+ mean_ms: total / ITERATIONS,
+ iterations: ITERATIONS,
+ total_ms: total,
+ }),
+);
diff --git a/benchmarks/tsb/bench_value_counts_full.ts b/benchmarks/tsb/bench_value_counts_full.ts
new file mode 100644
index 00000000..d55b5b72
--- /dev/null
+++ b/benchmarks/tsb/bench_value_counts_full.ts
@@ -0,0 +1,32 @@
+/**
+ * Benchmark: value_counts_full — valueCountsBinned on 100k rows.
+ * Outputs JSON: {"function": "value_counts_full", "mean_ms": ..., "iterations": ..., "total_ms": ...}
+ */
+import { Series, valueCountsBinned } from "../../src/index.ts";
+
+const SIZE = 100_000;
+const WARMUP = 5;
+const ITERATIONS = 50;
+
+const s = new Series({ data: Array.from({ length: SIZE }, () => Math.random() * 100) });
+
+for (let i = 0; i < WARMUP; i++) {
+ valueCountsBinned(s, { bins: 10 });
+ valueCountsBinned(s, { bins: 20 });
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+ valueCountsBinned(s, { bins: 10 });
+ valueCountsBinned(s, { bins: 20 });
+}
+const total = performance.now() - start;
+
+console.log(
+ JSON.stringify({
+ function: "value_counts_full",
+ mean_ms: total / ITERATIONS,
+ iterations: ITERATIONS,
+ total_ms: total,
+ }),
+);
diff --git a/benchmarks/tsb/bench_window_extended.ts b/benchmarks/tsb/bench_window_extended.ts
new file mode 100644
index 00000000..a4b933cb
--- /dev/null
+++ b/benchmarks/tsb/bench_window_extended.ts
@@ -0,0 +1,37 @@
+/**
+ * Benchmark: window_extended — rollingSem / rollingSkew / rollingKurt / rollingQuantile on 100k rows.
+ * Outputs JSON: {"function": "window_extended", "mean_ms": ..., "iterations": ..., "total_ms": ...}
+ */
+import { Series, rollingSem, rollingSkew, rollingKurt, rollingQuantile } from "../../src/index.ts";
+
+const SIZE = 100_000;
+const WARMUP = 3;
+const ITERATIONS = 20;
+const WINDOW = 10;
+
+const s = new Series({ data: Array.from({ length: SIZE }, (_, i) => Math.sin(i / 100) * 100 + i * 0.001) });
+
+for (let i = 0; i < WARMUP; i++) {
+ rollingSem(s, WINDOW);
+ rollingSkew(s, WINDOW);
+ rollingKurt(s, WINDOW);
+ rollingQuantile(s, WINDOW, 0.5);
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+ rollingSem(s, WINDOW);
+ rollingSkew(s, WINDOW);
+ rollingKurt(s, WINDOW);
+ rollingQuantile(s, WINDOW, 0.5);
+}
+const total = performance.now() - start;
+
+console.log(
+ JSON.stringify({
+ function: "window_extended",
+ mean_ms: total / ITERATIONS,
+ iterations: ITERATIONS,
+ total_ms: total,
+ }),
+);
From 46d46aad29b8323cd5b53034d11c62df2e4ae828 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
Date: Thu, 4 Jun 2026 19:40:52 +0000
Subject: [PATCH 4/4] [Autoloop: perf-comparison] Iteration 342: Add 2
benchmark pairs (to_json_denormalize, cut_bins_to_frame)
Run: https://github.com/githubnext/tsb/actions/runs/26974794187
Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
benchmarks/pandas/bench_cut_bins_to_frame.py | 56 +++++++++++++++++++
.../pandas/bench_to_json_denormalize.py | 41 ++++++++++++++
benchmarks/tsb/bench_cut_bins_to_frame.ts | 36 ++++++++++++
benchmarks/tsb/bench_to_json_denormalize.ts | 42 ++++++++++++++
4 files changed, 175 insertions(+)
create mode 100644 benchmarks/pandas/bench_cut_bins_to_frame.py
create mode 100644 benchmarks/pandas/bench_to_json_denormalize.py
create mode 100644 benchmarks/tsb/bench_cut_bins_to_frame.ts
create mode 100644 benchmarks/tsb/bench_to_json_denormalize.ts
diff --git a/benchmarks/pandas/bench_cut_bins_to_frame.py b/benchmarks/pandas/bench_cut_bins_to_frame.py
new file mode 100644
index 00000000..5ae5908c
--- /dev/null
+++ b/benchmarks/pandas/bench_cut_bins_to_frame.py
@@ -0,0 +1,56 @@
+"""Benchmark: cut_bins_to_frame — pd.cut with value_counts and bin summary on 100k rows."""
+import json, time
+import numpy as np
+import pandas as pd
+
+SIZE = 100_000
+NUM_BINS = 20
+WARMUP = 5
+ITERATIONS = 50
+
+data = np.array([(i % 1000) * 0.1 for i in range(SIZE)])
+
+for _ in range(WARMUP):
+ # pandas equivalent of cutBinsToFrame: cut + value_counts on the categorical result
+ cut_result = pd.cut(data, NUM_BINS)
+ # Summary DataFrame equivalent to cutBinsToFrame
+ counts = cut_result.value_counts(sort=False)
+ summary = pd.DataFrame({
+ "bin": counts.index.astype(str),
+ "left": [iv.left for iv in counts.index],
+ "right": [iv.right for iv in counts.index],
+ "count": counts.values,
+ "frequency": counts.values / len(data),
+ })
+ # cutBinCounts equivalent: counts dict
+ count_dict = dict(zip(counts.index.astype(str), counts.values))
+ # binEdges equivalent: DataFrame of interval edges
+ edges = pd.DataFrame({
+ "left": [iv.left for iv in counts.index],
+ "right": [iv.right for iv in counts.index],
+ })
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+ cut_result = pd.cut(data, NUM_BINS)
+ counts = cut_result.value_counts(sort=False)
+ summary = pd.DataFrame({
+ "bin": counts.index.astype(str),
+ "left": [iv.left for iv in counts.index],
+ "right": [iv.right for iv in counts.index],
+ "count": counts.values,
+ "frequency": counts.values / len(data),
+ })
+ count_dict = dict(zip(counts.index.astype(str), counts.values))
+ edges = pd.DataFrame({
+ "left": [iv.left for iv in counts.index],
+ "right": [iv.right for iv in counts.index],
+ })
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+ "function": "cut_bins_to_frame",
+ "mean_ms": total / ITERATIONS,
+ "iterations": ITERATIONS,
+ "total_ms": total,
+}))
diff --git a/benchmarks/pandas/bench_to_json_denormalize.py b/benchmarks/pandas/bench_to_json_denormalize.py
new file mode 100644
index 00000000..ae51decf
--- /dev/null
+++ b/benchmarks/pandas/bench_to_json_denormalize.py
@@ -0,0 +1,41 @@
+"""Benchmark: to_json_denormalize — json orient variants on 10k-row DataFrame."""
+import json, time
+import numpy as np
+import pandas as pd
+
+ROWS = 10_000
+WARMUP = 5
+ITERATIONS = 30
+
+# DataFrame matching the tsb benchmark (nested-structure-like columns)
+df = pd.DataFrame({
+ "name": [f"user_{i}" for i in range(ROWS)],
+ "address.city": [f"city_{i % 100}" for i in range(ROWS)],
+ "address.zip": [str(10000 + (i % 9000)) for i in range(ROWS)],
+ "score": np.arange(ROWS) * 0.01,
+})
+
+for _ in range(WARMUP):
+ # pandas equivalent of toJsonDenormalize: to_dict("records") then reconstruct nesting
+ recs = df.to_dict("records")
+ # pandas equivalent of toJsonRecords: orient="records"
+ df.to_json(orient="records")
+ # pandas equivalent of toJsonSplit: orient="split"
+ df.to_json(orient="split")
+ # pandas equivalent of toJsonIndex: orient="index"
+ df.to_json(orient="index")
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+ recs = df.to_dict("records")
+ df.to_json(orient="records")
+ df.to_json(orient="split")
+ df.to_json(orient="index")
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+ "function": "to_json_denormalize",
+ "mean_ms": total / ITERATIONS,
+ "iterations": ITERATIONS,
+ "total_ms": total,
+}))
diff --git a/benchmarks/tsb/bench_cut_bins_to_frame.ts b/benchmarks/tsb/bench_cut_bins_to_frame.ts
new file mode 100644
index 00000000..135fcd91
--- /dev/null
+++ b/benchmarks/tsb/bench_cut_bins_to_frame.ts
@@ -0,0 +1,36 @@
+/**
+ * Benchmark: cut_bins_to_frame — cutBinsToFrame / cutBinCounts / binEdges on 100k data points.
+ * Outputs JSON: {"function": "cut_bins_to_frame", "mean_ms": ..., "iterations": ..., "total_ms": ...}
+ */
+import { cut, cutBinsToFrame, cutBinCounts, binEdges } from "../../src/index.ts";
+
+const SIZE = 100_000;
+const NUM_BINS = 20;
+const WARMUP = 5;
+const ITERATIONS = 50;
+
+const data = Array.from({ length: SIZE }, (_, i) => (i % 1000) * 0.1);
+const binResult = cut(data, NUM_BINS);
+
+for (let i = 0; i < WARMUP; i++) {
+ cutBinsToFrame(binResult, { data });
+ cutBinCounts(binResult);
+ binEdges(binResult);
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+ cutBinsToFrame(binResult, { data });
+ cutBinCounts(binResult);
+ binEdges(binResult);
+}
+const total = performance.now() - start;
+
+console.log(
+ JSON.stringify({
+ function: "cut_bins_to_frame",
+ mean_ms: total / ITERATIONS,
+ iterations: ITERATIONS,
+ total_ms: total,
+ }),
+);
diff --git a/benchmarks/tsb/bench_to_json_denormalize.ts b/benchmarks/tsb/bench_to_json_denormalize.ts
new file mode 100644
index 00000000..07a42f5f
--- /dev/null
+++ b/benchmarks/tsb/bench_to_json_denormalize.ts
@@ -0,0 +1,42 @@
+/**
+ * Benchmark: to_json_denormalize — toJsonDenormalize / toJsonRecords / toJsonSplit / toJsonIndex
+ * Outputs JSON: {"function": "to_json_denormalize", "mean_ms": ..., "iterations": ..., "total_ms": ...}
+ */
+import { DataFrame, toJsonDenormalize, toJsonRecords, toJsonSplit, toJsonIndex } from "../../src/index.ts";
+
+const ROWS = 10_000;
+const WARMUP = 5;
+const ITERATIONS = 30;
+
+// Create a nested-structure-like DataFrame (address.city, address.zip pattern)
+const df = DataFrame.fromColumns({
+ "name": Array.from({ length: ROWS }, (_, i) => `user_${i}`),
+ "address.city": Array.from({ length: ROWS }, (_, i) => `city_${i % 100}`),
+ "address.zip": Array.from({ length: ROWS }, (_, i) => `${10000 + (i % 9000)}`),
+ "score": Float64Array.from({ length: ROWS }, (_, i) => i * 0.01),
+});
+
+for (let i = 0; i < WARMUP; i++) {
+ toJsonDenormalize(df);
+ toJsonRecords(df);
+ toJsonSplit(df);
+ toJsonIndex(df);
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+ toJsonDenormalize(df);
+ toJsonRecords(df);
+ toJsonSplit(df);
+ toJsonIndex(df);
+}
+const total = performance.now() - start;
+
+console.log(
+ JSON.stringify({
+ function: "to_json_denormalize",
+ mean_ms: total / ITERATIONS,
+ iterations: ITERATIONS,
+ total_ms: total,
+ }),
+);