| name: Release (Prism) | |
| on: | |
| workflow_dispatch: | |
| inputs: | |
| create_release: | |
| description: 'Create new release' | |
| required: true | |
| type: boolean | |
| concurrency: | |
| group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }} | |
| cancel-in-progress: true | |
| env: | |
| BRANCH_NAME: ${{ github.head_ref || github.ref_name }} | |
| CMAKE_ARGS: "-DLLAMA_BUILD_EXAMPLES=OFF -DLLAMA_BUILD_TESTS=OFF -DLLAMA_BUILD_TOOLS=ON -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON" | |
| jobs: | |
| macOS-arm64: | |
| runs-on: macos-14 | |
| steps: | |
| - name: Clone | |
| uses: actions/checkout@v6 | |
| with: | |
| fetch-depth: 0 | |
| - name: ccache | |
| uses: ggml-org/ccache-action@v1.2.16 | |
| with: | |
| key: macOS-latest-cmake-arm64 | |
| evict-old-files: 1d | |
| - name: Build | |
| run: | | |
| cmake -B build \ | |
| -DCMAKE_INSTALL_RPATH='@loader_path' \ | |
| -DCMAKE_BUILD_WITH_INSTALL_RPATH=ON \ | |
| -DLLAMA_FATAL_WARNINGS=ON \ | |
| -DGGML_METAL_USE_BF16=ON \ | |
| -DGGML_METAL_EMBED_LIBRARY=ON \ | |
| -DGGML_RPC=ON \ | |
| ${{ env.CMAKE_ARGS }} | |
| cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) | |
| - name: Determine tag name | |
| id: tag | |
| uses: ./.github/actions/get-tag-name | |
| - name: Pack artifacts | |
| run: | | |
| cp LICENSE ./build/bin/ | |
| tar -czvf llama-${{ steps.tag.outputs.name }}-bin-macos-arm64.tar.gz -s ",./,llama-${{ steps.tag.outputs.name }}/," -C ./build/bin . | |
| - name: Upload artifacts | |
| uses: actions/upload-artifact@v6 | |
| with: | |
| path: llama-${{ steps.tag.outputs.name }}-bin-macos-arm64.tar.gz | |
| name: llama-bin-macos-arm64.tar.gz | |
| linux-cuda: | |
| runs-on: ubuntu-22.04 | |
| strategy: | |
| matrix: | |
| include: | |
| - cuda: '12.4' | |
| cuda_pkg: '12-4' | |
| - cuda: '12.8' | |
| cuda_pkg: '12-8' | |
| - cuda: '13.1' | |
| cuda_pkg: '13-1' | |
| steps: | |
| - name: Clone | |
| uses: actions/checkout@v6 | |
| with: | |
| fetch-depth: 0 | |
| - name: ccache | |
| uses: ggml-org/ccache-action@v1.2.16 | |
| with: | |
| key: ubuntu-22-cmake-cuda-${{ matrix.cuda }} | |
| evict-old-files: 1d | |
| - name: Install CUDA toolkit | |
| run: | | |
| wget -q https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb | |
| sudo dpkg -i cuda-keyring_1.1-1_all.deb | |
| sudo apt-get update | |
| sudo apt-get -y install cuda-toolkit-${{ matrix.cuda_pkg }} | |
| echo "/usr/local/cuda-${{ matrix.cuda }}/bin" >> $GITHUB_PATH | |
| echo "CUDA_PATH=/usr/local/cuda-${{ matrix.cuda }}" >> $GITHUB_ENV | |
| echo "LD_LIBRARY_PATH=/usr/local/cuda-${{ matrix.cuda }}/lib64:$LD_LIBRARY_PATH" >> $GITHUB_ENV | |
| - name: Build | |
| run: | | |
| cmake -B build \ | |
| -DCMAKE_INSTALL_RPATH='$ORIGIN' \ | |
| -DCMAKE_BUILD_WITH_INSTALL_RPATH=ON \ | |
| -DGGML_NATIVE=OFF \ | |
| -DGGML_CUDA=ON \ | |
| ${{ env.CMAKE_ARGS }} | |
| cmake --build build --config Release -j $(nproc) 2>&1 | grep -v "^nvcc warning" | |
| - name: Determine tag name | |
| id: tag | |
| uses: ./.github/actions/get-tag-name | |
| - name: Pack artifacts | |
| run: | | |
| cp LICENSE ./build/bin/ | |
| tar -czvf llama-${{ steps.tag.outputs.name }}-bin-linux-cuda-${{ matrix.cuda }}-x64.tar.gz --transform "s,./,llama-${{ steps.tag.outputs.name }}/," -C ./build/bin . | |
| - name: Upload artifacts | |
| uses: actions/upload-artifact@v6 | |
| with: | |
| path: llama-${{ steps.tag.outputs.name }}-bin-linux-cuda-${{ matrix.cuda }}-x64.tar.gz | |
| name: llama-bin-linux-cuda-${{ matrix.cuda }}-x64.tar.gz | |
| windows-cuda: | |
| runs-on: windows-2022 | |
| strategy: | |
| matrix: | |
| cuda: ['12.4', '13.1'] | |
| steps: | |
| - name: Clone | |
| uses: actions/checkout@v6 | |
| - name: Install ccache | |
| uses: ggml-org/ccache-action@v1.2.16 | |
| with: | |
| key: windows-cuda-${{ matrix.cuda }} | |
| variant: ccache | |
| evict-old-files: 1d | |
| - name: Install Cuda Toolkit | |
| uses: ./.github/actions/windows-setup-cuda | |
| with: | |
| cuda_version: ${{ matrix.cuda }} | |
| - name: Install Ninja | |
| run: choco install ninja | |
| - name: Build | |
| shell: cmd | |
| run: | | |
| call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" x64 | |
| cmake -S . -B build -G "Ninja Multi-Config" ^ | |
| -DGGML_NATIVE=OFF ^ | |
| -DGGML_CUDA=ON ^ | |
| -DLLAMA_BUILD_BORINGSSL=ON ^ | |
| -DCMAKE_CUDA_FLAGS="-diag-suppress=221" ^ | |
| ${{ env.CMAKE_ARGS }} | |
| set /A NINJA_JOBS=%NUMBER_OF_PROCESSORS%-1 | |
| cmake --build build --config Release -j %NINJA_JOBS% | |
| - name: Determine tag name | |
| id: tag | |
| uses: ./.github/actions/get-tag-name | |
| - name: Pack artifacts | |
| run: | | |
| 7z a -snl llama-${{ steps.tag.outputs.name }}-bin-win-cuda-${{ matrix.cuda }}-x64.zip .\build\bin\Release\* | |
| - name: Upload artifacts | |
| uses: actions/upload-artifact@v6 | |
| with: | |
| path: llama-${{ steps.tag.outputs.name }}-bin-win-cuda-${{ matrix.cuda }}-x64.zip | |
| name: llama-bin-win-cuda-${{ matrix.cuda }}-x64.zip | |
| - name: Copy and pack Cuda runtime | |
| run: | | |
| echo "Cuda install location: ${{ env.CUDA_PATH }}" | |
| $dst='.\build\bin\cudart\' | |
| robocopy "${{env.CUDA_PATH}}\bin" $dst cudart64_*.dll cublas64_*.dll cublasLt64_*.dll | |
| robocopy "${{env.CUDA_PATH}}\lib" $dst cudart64_*.dll cublas64_*.dll cublasLt64_*.dll | |
| robocopy "${{env.CUDA_PATH}}\bin\x64" $dst cudart64_*.dll cublas64_*.dll cublasLt64_*.dll | |
| 7z a cudart-llama-bin-win-cuda-${{ matrix.cuda }}-x64.zip $dst\* | |
| - name: Upload Cuda runtime | |
| uses: actions/upload-artifact@v6 | |
| with: | |
| path: cudart-llama-bin-win-cuda-${{ matrix.cuda }}-x64.zip | |
| name: cudart-llama-bin-win-cuda-${{ matrix.cuda }}-x64.zip | |
| release: | |
| if: ${{ github.event.inputs.create_release == 'true' }} | |
| permissions: | |
| contents: write | |
| runs-on: ubuntu-latest | |
| needs: | |
| - macOS-arm64 | |
| - linux-cuda | |
| - windows-cuda | |
| steps: | |
| - name: Clone | |
| uses: actions/checkout@v6 | |
| with: | |
| fetch-depth: 0 | |
| - name: Determine tag name | |
| id: tag | |
| uses: ./.github/actions/get-tag-name | |
| - name: Download artifacts | |
| uses: actions/download-artifact@v7 | |
| with: | |
| path: ./artifact | |
| merge-multiple: true | |
| - name: Move artifacts | |
| run: | | |
| mkdir -p release | |
| mv -v artifact/*.tar.gz release/ 2>/dev/null || true | |
| mv -v artifact/*.zip release/ 2>/dev/null || true | |
| ls -lh release/ | |
| - name: Create release | |
| id: create_release | |
| uses: ggml-org/action-create-release@v1 | |
| env: | |
| GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
| with: | |
| tag_name: ${{ steps.tag.outputs.name }} | |
| body: | | |
| Pre-built binaries (PrismML fork with Q1_0 1-bit quantization support). | |
| **macOS:** | |
| - [macOS Apple Silicon (arm64)](https://github.com/${{ github.repository }}/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-macos-arm64.tar.gz) | |
| **Linux:** | |
| - [Linux x64 (CUDA 12.4)](https://github.com/${{ github.repository }}/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-linux-cuda-12.4-x64.tar.gz) | |
| - [Linux x64 (CUDA 12.8)](https://github.com/${{ github.repository }}/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-linux-cuda-12.8-x64.tar.gz) | |
| - [Linux x64 (CUDA 13.1)](https://github.com/${{ github.repository }}/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-linux-cuda-13.1-x64.tar.gz) | |
| **Windows:** | |
| - [Windows x64 (CUDA 12.4)](https://github.com/${{ github.repository }}/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-win-cuda-12.4-x64.zip) - [CUDA 12.4 DLLs](https://github.com/${{ github.repository }}/releases/download/${{ steps.tag.outputs.name }}/cudart-llama-bin-win-cuda-12.4-x64.zip) | |
| - [Windows x64 (CUDA 13.1)](https://github.com/${{ github.repository }}/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-win-cuda-13.1-x64.zip) - [CUDA 13.1 DLLs](https://github.com/${{ github.repository }}/releases/download/${{ steps.tag.outputs.name }}/cudart-llama-bin-win-cuda-13.1-x64.zip) | |
| - name: Upload release | |
| env: | |
| GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
| run: | | |
| for file in release/*; do | |
| echo "Uploading $(basename $file)..." | |
| gh release upload ${{ steps.tag.outputs.name }} "$file" --clobber | |
| done | |