|
| 1 | +name: Build Wheels (CU131) for Linux |
| 2 | + |
| 3 | +on: |
| 4 | + workflow_dispatch: |
| 5 | + |
| 6 | +permissions: |
| 7 | + contents: write |
| 8 | + |
| 9 | +jobs: |
| 10 | + build_wheels: |
| 11 | + name: Build Wheel ${{ matrix.os }} py${{ matrix.pyver }} cu131 |
| 12 | + runs-on: ubuntu-22.04 |
| 13 | + container: nvidia/cuda:13.1.2-cudnn-devel-ubuntu22.04 |
| 14 | + |
| 15 | + strategy: |
| 16 | + fail-fast: false |
| 17 | + matrix: |
| 18 | + os: ["ubuntu-22.04"] |
| 19 | + pyver: ["3.10", "3.11", "3.12", "3.13", "3.14"] # Python versions |
| 20 | + cuda: ["13.1.2"] |
| 21 | + cudaarch: ["75-real;80-real;86-real;87-real;89-real;90-real;100-real;120-real;121-real"] |
| 22 | + |
| 23 | + defaults: |
| 24 | + run: |
| 25 | + shell: bash |
| 26 | + |
| 27 | + env: |
| 28 | + CUDAVER: ${{ matrix.cuda }} |
| 29 | + CUDAARCHVER: ${{ matrix.cudaarch }} |
| 30 | + MAX_JOBS: 12 |
| 31 | + |
| 32 | + steps: |
| 33 | + - name: Install dependencies |
| 34 | + run: | |
| 35 | + apt update |
| 36 | + apt install -y \ |
| 37 | + build-essential \ |
| 38 | + ccache \ |
| 39 | + cmake \ |
| 40 | + curl \ |
| 41 | + git \ |
| 42 | + libgomp1 \ |
| 43 | + libjpeg-dev \ |
| 44 | + libssl-dev \ |
| 45 | + ninja-build |
| 46 | +
|
| 47 | + - name: Checkout |
| 48 | + uses: actions/checkout@v6 |
| 49 | + with: |
| 50 | + submodules: recursive |
| 51 | + |
| 52 | + - name: Install uv and Python ${{ matrix.pyver }} |
| 53 | + uses: astral-sh/setup-uv@v7 |
| 54 | + with: |
| 55 | + python-version: ${{ matrix.pyver }} |
| 56 | + activate-environment: true |
| 57 | + enable-cache: true |
| 58 | + |
| 59 | + - name: Show CUDA version |
| 60 | + run: nvcc -V |
| 61 | + |
| 62 | + - name: Build wheel |
| 63 | + env: |
| 64 | + LD_LIBRARY_PATH: "/usr/local/cuda/lib64:/usr/local/cuda/compat:/usr/lib/x86_64-linux-gnu:${LD_LIBRARY_PATH}" |
| 65 | + VERBOSE: "1" |
| 66 | + CUDA_HOME: "/usr/local/cuda" |
| 67 | + CUDA_PATH: "/usr/local/cuda" |
| 68 | + CUDA_TOOLKIT_ROOT_DIR: "/usr/local/cuda" |
| 69 | + run: | |
| 70 | + set -euo pipefail |
| 71 | +
|
| 72 | + echo "LD_LIBRARY_PATH=${LD_LIBRARY_PATH}" |
| 73 | + find /usr/ -name 'libcuda.so.*' || true |
| 74 | + find /usr/ -name 'libcudart.so.*' || true |
| 75 | +
|
| 76 | + cuda_ver_short=$(echo "${CUDAVER}" | cut -d'.' -f 1,2 | sed 's/\.//g') |
| 77 | +
|
| 78 | + # Build one CUDA wheel with dynamic GGML backends: |
| 79 | + # - GGML_BACKEND_DL enables runtime-loadable backend shared libraries. |
| 80 | + # - GGML_CPU_ALL_VARIANTS builds CPU variant backends when supported. |
| 81 | + # - GGML_NATIVE=OFF avoids binding the wheel to the CI runner CPU. |
| 82 | + CMAKE_ARGS_ARRAY=( |
| 83 | + "-G Ninja" |
| 84 | +
|
| 85 | + # Disable non-wheel targets. |
| 86 | + "-DLLAMA_BUILD_EXAMPLES=OFF" |
| 87 | + "-DLLAMA_BUILD_TESTS=OFF" |
| 88 | + "-DLLAMA_BUILD_TOOLS=OFF" |
| 89 | + "-DLLAMA_BUILD_SERVER=OFF" |
| 90 | + "-DLLAMA_BUILD_UI=OFF" |
| 91 | + "-DLLAMA_USE_PREBUILT_UI=OFF" |
| 92 | + "-DLLAMA_CURL=OFF" |
| 93 | + "-DLLAMA_OPENSSL=ON" |
| 94 | +
|
| 95 | + # GGML dynamic backend layout. |
| 96 | + "-DGGML_CPU=ON" |
| 97 | + "-DGGML_CUDA=ON" |
| 98 | + "-DGGML_NATIVE=OFF" |
| 99 | + "-DGGML_BACKEND_DL=ON" |
| 100 | + "-DGGML_CPU_ALL_VARIANTS=ON" |
| 101 | + "-DGGML_OPENMP=ON" |
| 102 | +
|
| 103 | + # CUDA backend. |
| 104 | + "-DCMAKE_CUDA_ARCHITECTURES=${CUDAARCHVER}" |
| 105 | + "-DGGML_CUDA_FORCE_MMQ=ON" |
| 106 | + "-DCUDA_SEPARABLE_COMPILATION=ON" |
| 107 | + "-DCMAKE_CUDA_FLAGS=--diag-suppress=177,221,550" |
| 108 | +
|
| 109 | + # Build behavior. |
| 110 | + "-DCMAKE_BUILD_PARALLEL_LEVEL=${MAX_JOBS}" |
| 111 | + "-DGGML_CCACHE=ON" |
| 112 | + "-DENABLE_CCACHE=ON" |
| 113 | + ) |
| 114 | +
|
| 115 | + CMAKE_ARGS="${CMAKE_ARGS_ARRAY[*]}" |
| 116 | + echo "CMAKE_ARGS=${CMAKE_ARGS}" |
| 117 | +
|
| 118 | + uv pip install --upgrade build setuptools wheel packaging |
| 119 | + CMAKE_ARGS="${CMAKE_ARGS}" uv build --wheel |
| 120 | +
|
| 121 | + if ! ls dist/*.whl >/dev/null 2>&1; then |
| 122 | + echo "No wheel built in dist/ directory" |
| 123 | + exit 1 |
| 124 | + fi |
| 125 | +
|
| 126 | + wheel_path=$(ls dist/*.whl | head -n 1) |
| 127 | + filename=$(basename "$wheel_path") |
| 128 | +
|
| 129 | + # Wheel filename format: |
| 130 | + # name-version-python_tag-abi_tag-platform_tag.whl |
| 131 | + IFS='-' read -r dist_name version py_tag abi_tag plat_tag <<< "$filename" |
| 132 | +
|
| 133 | + # CPU all-variants is now an internal runtime layout detail. |
| 134 | + new_version="${version}+cu${cuda_ver_short}" |
| 135 | + new_filename="${dist_name}-${new_version}-${py_tag}-${abi_tag}-${plat_tag}" |
| 136 | +
|
| 137 | + mv "$wheel_path" "dist/$new_filename" |
| 138 | + echo "Renamed wheel to: $new_filename" |
| 139 | +
|
| 140 | + echo "CUDA_VERSION=$cuda_ver_short" >> "$GITHUB_ENV" |
| 141 | + echo "TAG_VERSION=$version" >> "$GITHUB_ENV" |
| 142 | +
|
| 143 | + - name: Get current date |
| 144 | + id: get-date |
| 145 | + run: | |
| 146 | + currentDate=$(date +%Y%m%d) |
| 147 | + echo "BUILD_DATE=$currentDate" >> "$GITHUB_ENV" |
| 148 | +
|
| 149 | + - name: Create release |
| 150 | + if: always() && env.TAG_VERSION != '' |
| 151 | + uses: softprops/action-gh-release@v3 |
| 152 | + with: |
| 153 | + files: dist/* |
| 154 | + tag_name: v${{ env.TAG_VERSION }}-cu${{ env.CUDA_VERSION }}-linux-${{ env.BUILD_DATE }} |
| 155 | + env: |
| 156 | + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} |
0 commit comments