Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 81 additions & 3 deletions .github/workflows/build-wheel.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ jobs:
- "3.13"
- "3.14"
- "3.14t"
- "3.15"
- "3.15t"
name: py${{ matrix.python-version }}
runs-on: ${{ (inputs.host-platform == 'linux-64' && 'linux-amd64-cpu8') ||
(inputs.host-platform == 'linux-aarch64' && 'linux-arm64-cpu8') ||
Expand Down Expand Up @@ -162,12 +164,16 @@ jobs:
cuda-version: ${{ inputs.cuda-version }}

- name: Build cuda.bindings wheel
uses: pypa/cibuildwheel@8d2b08b68458a16aeb24b64e68a09ab1c8e82084 # v3.4.1
uses: pypa/cibuildwheel@54327ab9d35de03b359ac25c97de9417d94639c0 # v4.0.0rc1
with:
package-dir: ./cuda_bindings/
output-dir: ${{ env.CUDA_BINDINGS_ARTIFACTS_DIR }}
env:
CIBW_BUILD: ${{ env.CIBW_BUILD }}
# Allow CPython pre-release builds (currently 3.15 / 3.15t). This is a
# no-op for stable Python versions because CIBW_BUILD still filters
# the target version.
CIBW_ENABLE: cpython-prerelease
# CIBW mounts the host filesystem under /host
CIBW_ENVIRONMENT_LINUX: >
CUDA_PATH=/host/${{ env.CUDA_PATH }}
Expand Down Expand Up @@ -226,12 +232,16 @@ jobs:
if-no-files-found: error

- name: Build cuda.core wheel
uses: pypa/cibuildwheel@8d2b08b68458a16aeb24b64e68a09ab1c8e82084 # v3.4.1
uses: pypa/cibuildwheel@54327ab9d35de03b359ac25c97de9417d94639c0 # v4.0.0rc1
with:
package-dir: ./cuda_core/
output-dir: ${{ env.CUDA_CORE_ARTIFACTS_DIR }}
env:
CIBW_BUILD: ${{ env.CIBW_BUILD }}
# Allow CPython pre-release builds (currently 3.15 / 3.15t). This is a
# no-op for stable Python versions because CIBW_BUILD still filters
# the target version.
CIBW_ENABLE: cpython-prerelease
# CIBW mounts the host filesystem under /host
CIBW_ENVIRONMENT_LINUX: >
CUDA_PATH=/host/${{ env.CUDA_PATH }}
Expand Down Expand Up @@ -327,6 +337,7 @@ jobs:
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
with:
python-version: ${{ matrix.python-version }}
allow-prereleases: ${{ startsWith(matrix.python-version, '3.15') }}

- name: verify free-threaded build
if: endsWith(matrix.python-version, 't')
Expand All @@ -346,6 +357,69 @@ jobs:
run: |
pip install cuda_pathfinder/*.whl

- name: Hide GNU link.exe so Meson finds MSVC link.exe
if: ${{ startsWith(inputs.host-platform, 'win') }}
run: |
if [ -f "/c/Program Files/Git/usr/bin/link.exe" ]; then
mv "/c/Program Files/Git/usr/bin/link.exe" "/c/Program Files/Git/usr/bin/link.exe.bak"
fi

- name: Download and patch numpy sdist (pre-release Python)
if: ${{ startsWith(matrix.python-version, '3.15') }}
run: |
pip download --no-binary numpy --no-deps "numpy>=1.21.1" -d numpy-sdist/
cd numpy-sdist && tar xf numpy-*.tar.gz && rm numpy-*.tar.gz
# WAR: numpy 2.4.x ships [tool.cibuildwheel] config that is
# incompatible with cibuildwheel v4.0 (cpython-freethreading enable
# group, OpenBLAS before-build scripts, etc.). Strip the cibuildwheel
# sections but preserve [tool.meson-python] (vendored meson path).
python -c "
import glob
for f in glob.glob('numpy-*/pyproject.toml'):
lines, skip = open(f).readlines(), False
out = []
for line in lines:
hdr = line.strip()
if hdr.startswith('[tool.cibuildwheel') or hdr.startswith('[[tool.cibuildwheel'):
skip = True
continue
if skip and hdr.startswith('[') and 'cibuildwheel' not in hdr:
skip = False
if not skip:
out.append(line)
open(f, 'w').writelines(out)
"
echo "NUMPY_SRC_DIR=$(pwd)/$(ls -d numpy-*/)" >> $GITHUB_ENV

- name: Build numpy wheel (pre-release Python)
if: ${{ startsWith(matrix.python-version, '3.15') }}
uses: pypa/cibuildwheel@54327ab9d35de03b359ac25c97de9417d94639c0 # v4.0.0rc1
env:
CIBW_BUILD: ${{ env.CIBW_BUILD }}
CIBW_SKIP: "*-musllinux* *-win32"
CIBW_ARCHS_LINUX: "native"
CIBW_BUILD_VERBOSITY: 1
CIBW_CONFIG_SETTINGS: "setup-args=-Dallow-noblas=true"
CIBW_CONFIG_SETTINGS_WINDOWS: "setup-args=--vsenv setup-args=-Dallow-noblas=true"
CIBW_BEFORE_BUILD_WINDOWS: "pip install delvewheel"
CIBW_REPAIR_WHEEL_COMMAND_WINDOWS: "delvewheel repair -w {dest_dir} {wheel}"
CIBW_ENABLE: "cpython-prerelease"
with:
package-dir: ${{ env.NUMPY_SRC_DIR }}
output-dir: numpy-wheel/

- name: Upload numpy wheel
if: ${{ startsWith(matrix.python-version, '3.15') }}
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
with:
name: numpy-python${{ env.PYTHON_VERSION_FORMATTED }}-${{ inputs.host-platform }}
path: numpy-wheel/*.whl
if-no-files-found: error

- name: Install numpy wheel
if: ${{ startsWith(matrix.python-version, '3.15') }}
run: pip install numpy-wheel/*.whl

- name: Build cuda.bindings Cython tests
run: |
pip install ${{ env.CUDA_BINDINGS_ARTIFACTS_DIR }}/*.whl --group ./cuda_bindings/pyproject.toml:test
Expand Down Expand Up @@ -411,12 +485,16 @@ jobs:
rmdir $OLD_BASENAME

- name: Build cuda.core wheel
uses: pypa/cibuildwheel@8d2b08b68458a16aeb24b64e68a09ab1c8e82084 # v3.4.1
uses: pypa/cibuildwheel@54327ab9d35de03b359ac25c97de9417d94639c0 # v4.0.0rc1
with:
package-dir: ./cuda_core/
output-dir: ${{ env.CUDA_CORE_ARTIFACTS_DIR }}
env:
CIBW_BUILD: ${{ env.CIBW_BUILD }}
# Allow CPython pre-release builds (currently 3.15 / 3.15t). This is a
# no-op for stable Python versions because CIBW_BUILD still filters
# the target version.
CIBW_ENABLE: cpython-prerelease
# CIBW mounts the host filesystem under /host
CIBW_ENVIRONMENT_LINUX: >
CUDA_PATH=/host/${{ env.CUDA_PATH }}
Expand Down
13 changes: 13 additions & 0 deletions .github/workflows/test-wheel-linux.yml
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ jobs:
fail-fast: false
matrix: ${{ fromJSON(needs.compute-matrix.outputs.MATRIX) }}
runs-on: "${{ matrix.FLAVOR || 'linux' }}-${{ matrix.ARCH }}-gpu-${{ matrix.GPU }}-${{ matrix.DRIVER }}-${{ matrix.GPU_COUNT }}"
continue-on-error: ${{ startsWith(matrix.PY_VER, '3.15') }}
# The build stage could fail but we want the CI to keep moving.
if: ${{ github.repository_owner == 'nvidia' && !cancelled() }}
# Our self-hosted runners require a container
Expand Down Expand Up @@ -258,6 +259,7 @@ jobs:
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
with:
python-version: ${{ matrix.PY_VER }}
allow-prereleases: ${{ startsWith(matrix.PY_VER, '3.15') }}
env:
# we use self-hosted runners on which setup-python behaves weirdly (Python include can't be found)...
AGENT_TOOLSDIRECTORY: "/opt/hostedtoolcache"
Expand All @@ -270,6 +272,17 @@ jobs:
host-platform: ${{ inputs.host-platform }}
cuda-version: ${{ matrix.CUDA_VER }}

- name: Download numpy wheel (pre-release Python)
if: ${{ startsWith(matrix.PY_VER, '3.15') }}
uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1
with:
name: numpy-python${{ env.PYTHON_VERSION_FORMATTED }}-${{ inputs.host-platform }}
path: numpy-wheel

- name: Install numpy wheel (pre-release Python)
if: ${{ startsWith(matrix.PY_VER, '3.15') }}
run: pip install numpy-wheel/*.whl

- name: Set up latest cuda_sanitizer_api
if: ${{ env.SETUP_SANITIZER == '1' }}
uses: ./.github/actions/fetch_ctk
Expand Down
13 changes: 13 additions & 0 deletions .github/workflows/test-wheel-windows.yml
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ jobs:
fail-fast: false
matrix: ${{ fromJSON(needs.compute-matrix.outputs.MATRIX) }}
if: ${{ github.repository_owner == 'nvidia' && !cancelled() }}
continue-on-error: ${{ startsWith(matrix.PY_VER, '3.15') }}
runs-on: "windows-${{ matrix.ARCH }}-gpu-${{ matrix.GPU }}-${{ matrix.DRIVER }}-${{ matrix.GPU_COUNT }}"
steps:
- name: Checkout ${{ github.event.repository.name }}
Expand Down Expand Up @@ -262,6 +263,18 @@ jobs:
host-platform: ${{ inputs.host-platform }}
cuda-version: ${{ matrix.CUDA_VER }}

- name: Download numpy wheel (pre-release Python)
if: ${{ startsWith(matrix.PY_VER, '3.15') }}
uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1
with:
name: numpy-python${{ env.PYTHON_VERSION_FORMATTED }}-${{ inputs.host-platform }}
path: numpy-wheel

- name: Install numpy wheel (pre-release Python)
if: ${{ startsWith(matrix.PY_VER, '3.15') }}
shell: bash --noprofile --norc -xeuo pipefail {0}
run: pip install numpy-wheel/*.whl

- name: Set up test repetition on nightly runs
shell: bash --noprofile --norc -xeuo pipefail {0}
run: echo "PYTEST_ADDOPTS=\"--count=${{ inputs.nruns }}\"" >> "$GITHUB_ENV"
Expand Down
2 changes: 2 additions & 0 deletions ci/test-matrix.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ linux:
- { ARCH: 'amd64', PY_VER: '3.14t', CUDA_VER: '12.9.1', LOCAL_CTK: '1', GPU: 't4', GPU_COUNT: '1', DRIVER: 'latest' }
- { ARCH: 'amd64', PY_VER: '3.14t', CUDA_VER: '13.0.2', LOCAL_CTK: '1', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest' }
- { ARCH: 'amd64', PY_VER: '3.14t', CUDA_VER: '13.3.0', LOCAL_CTK: '1', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest' }
- { ARCH: 'amd64', PY_VER: '3.15', CUDA_VER: '13.3.0', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest' }
- { ARCH: 'amd64', PY_VER: '3.15t', CUDA_VER: '13.3.0', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest' }
# linux-aarch64
- { ARCH: 'arm64', PY_VER: '3.10', CUDA_VER: '12.9.1', LOCAL_CTK: '1', GPU: 'a100', GPU_COUNT: '1', DRIVER: 'latest' }
- { ARCH: 'arm64', PY_VER: '3.10', CUDA_VER: '13.0.2', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest' }
Expand Down
6 changes: 6 additions & 0 deletions ci/tools/download-wheels
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,12 @@ do
continue
fi

# exclude pre-release Python (3.15) wheels from releasing
if [[ "${p}" == *python315* ]]; then
echo "Skipping pre-release Python artifact: $p"
continue
fi

# If we're not downloading "all", only process matching component
if [[ "$COMPONENT" != "all" && "$p" != ${COMPONENT}* ]]; then
continue
Expand Down
2 changes: 1 addition & 1 deletion cuda_bindings/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ all = [
test = [
"cython>=3.2,<3.3",
"setuptools>=77.0.0",
"matplotlib>=3.5.0", # Required by isoFDModelling_test.py
"matplotlib>=3.5.0; python_version < '3.15'",
"numpy>=1.21.1",
"pytest>=6.2.4",
"pytest-benchmark>=3.4.1",
Expand Down
2 changes: 1 addition & 1 deletion cuda_core/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ cu12 = ["cuda-bindings[all]==12.*", "cuda-toolkit==12.*"]
cu13 = ["cuda-bindings[all]==13.*", "cuda-toolkit==13.*"]

[dependency-groups]
test = ["cython>=3.2,<3.3", "setuptools", "pytest>=6.2.4", "pytest-benchmark", "pytest-randomly", "pytest-repeat", "pytest-rerunfailures", "pytest-timeout", "cloudpickle", "psutil", "cffi"]
test = ["cython>=3.2,<3.3", "setuptools", "pytest>=6.2.4", "pytest-benchmark", "pytest-randomly", "pytest-repeat", "pytest-rerunfailures", "pytest-timeout", "cloudpickle", "psutil", "cffi; python_version < '3.15'"]
ml-dtypes = ["ml-dtypes>=0.5.4,<0.6.0"]
test-cu12 = [ {include-group = "ml-dtypes" }, {include-group = "test" }, "cupy-cuda12x; python_version < '3.14'", "cuda-toolkit[cudart]==12.*"] # runtime headers needed by CuPy
test-cu13 = [ {include-group = "ml-dtypes" }, {include-group = "test" }, "cupy-cuda13x; python_version < '3.14'", "cuda-toolkit[cudart]==13.*"] # runtime headers needed by CuPy
Expand Down
1 change: 1 addition & 0 deletions cuda_core/tests/test_rlcompleter_patch.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ def test_patched_completion_succeeds_on_non_ipc_resource():
assert "allocation_handle: True" in result.stdout, result.stdout


@pytest.mark.skipif(sys.version_info >= (3, 15), reason="Python 3.15 fixed the rlcompleter bug upstream")
def test_opt_out_env_var_disables_patch_even_when_interactive():
"""`CUDA_CORE_DONT_FIX_TAB_COMPLETION=1` must short-circuit before the
interactive check, so the bug reproduces again even under PYTHONINSPECT."""
Expand Down
2 changes: 1 addition & 1 deletion cuda_pathfinder/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ cu13 = [
"nvidia-nvshmem-cu13; sys_platform != 'win32'",
]
host = [
"nvidia-cutlass",
"nvidia-cutlass; python_version < '3.15'",
"nvpl-fft; platform_system == 'Linux' and platform_machine == 'aarch64'",
]

Expand Down