name: Release (Prism) on: workflow_dispatch: inputs: create_release: description: 'Create new release' required: true type: boolean concurrency: group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }} cancel-in-progress: true env: BRANCH_NAME: ${{ github.head_ref || github.ref_name }} CMAKE_ARGS: "-DLLAMA_BUILD_EXAMPLES=OFF -DLLAMA_BUILD_TESTS=OFF -DLLAMA_BUILD_TOOLS=ON -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON" jobs: macOS-arm64: runs-on: macos-14 steps: - name: Clone uses: actions/checkout@v6 with: fetch-depth: 0 - name: ccache uses: ggml-org/ccache-action@v1.2.16 with: key: macOS-latest-cmake-arm64 evict-old-files: 1d - name: Build run: | cmake -B build \ -DCMAKE_INSTALL_RPATH='@loader_path' \ -DCMAKE_BUILD_WITH_INSTALL_RPATH=ON \ -DLLAMA_FATAL_WARNINGS=ON \ -DGGML_METAL_USE_BF16=ON \ -DGGML_METAL_EMBED_LIBRARY=ON \ -DGGML_RPC=ON \ ${{ env.CMAKE_ARGS }} cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) - name: Determine tag name id: tag uses: ./.github/actions/get-tag-name - name: Pack artifacts run: | cp LICENSE ./build/bin/ tar -czvf llama-${{ steps.tag.outputs.name }}-bin-macos-arm64.tar.gz -s ",./,llama-${{ steps.tag.outputs.name }}/," -C ./build/bin . - name: Upload artifacts uses: actions/upload-artifact@v6 with: path: llama-${{ steps.tag.outputs.name }}-bin-macos-arm64.tar.gz name: llama-bin-macos-arm64.tar.gz linux-cuda: runs-on: ubuntu-22.04 strategy: matrix: include: - cuda: '12.4' cuda_pkg: '12-4' - cuda: '12.8' cuda_pkg: '12-8' - cuda: '13.1' cuda_pkg: '13-1' steps: - name: Clone uses: actions/checkout@v6 with: fetch-depth: 0 - name: ccache uses: ggml-org/ccache-action@v1.2.16 with: key: ubuntu-22-cmake-cuda-${{ matrix.cuda }} evict-old-files: 1d - name: Install CUDA toolkit run: | wget -q https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb sudo dpkg -i cuda-keyring_1.1-1_all.deb sudo apt-get update sudo apt-get -y install cuda-toolkit-${{ matrix.cuda_pkg }} echo "/usr/local/cuda-${{ matrix.cuda }}/bin" >> $GITHUB_PATH echo "CUDA_PATH=/usr/local/cuda-${{ matrix.cuda }}" >> $GITHUB_ENV echo "LD_LIBRARY_PATH=/usr/local/cuda-${{ matrix.cuda }}/lib64:$LD_LIBRARY_PATH" >> $GITHUB_ENV - name: Build run: | cmake -B build \ -DCMAKE_INSTALL_RPATH='$ORIGIN' \ -DCMAKE_BUILD_WITH_INSTALL_RPATH=ON \ -DGGML_NATIVE=OFF \ -DGGML_CUDA=ON \ ${{ env.CMAKE_ARGS }} cmake --build build --config Release -j $(nproc) 2>&1 | grep -v "^nvcc warning" - name: Determine tag name id: tag uses: ./.github/actions/get-tag-name - name: Pack artifacts run: | cp LICENSE ./build/bin/ tar -czvf llama-${{ steps.tag.outputs.name }}-bin-linux-cuda-${{ matrix.cuda }}-x64.tar.gz --transform "s,./,llama-${{ steps.tag.outputs.name }}/," -C ./build/bin . - name: Upload artifacts uses: actions/upload-artifact@v6 with: path: llama-${{ steps.tag.outputs.name }}-bin-linux-cuda-${{ matrix.cuda }}-x64.tar.gz name: llama-bin-linux-cuda-${{ matrix.cuda }}-x64.tar.gz windows-cuda: runs-on: windows-2022 strategy: matrix: cuda: ['12.4', '13.1'] steps: - name: Clone uses: actions/checkout@v6 - name: Install ccache uses: ggml-org/ccache-action@v1.2.16 with: key: windows-cuda-${{ matrix.cuda }} variant: ccache evict-old-files: 1d - name: Install Cuda Toolkit uses: ./.github/actions/windows-setup-cuda with: cuda_version: ${{ matrix.cuda }} - name: Install Ninja run: choco install ninja - name: Build shell: cmd run: | call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" x64 cmake -S . -B build -G "Ninja Multi-Config" ^ -DGGML_NATIVE=OFF ^ -DGGML_CUDA=ON ^ -DLLAMA_BUILD_BORINGSSL=ON ^ -DCMAKE_CUDA_FLAGS="-diag-suppress=221" ^ ${{ env.CMAKE_ARGS }} set /A NINJA_JOBS=%NUMBER_OF_PROCESSORS%-1 cmake --build build --config Release -j %NINJA_JOBS% - name: Determine tag name id: tag uses: ./.github/actions/get-tag-name - name: Pack artifacts run: | 7z a -snl llama-${{ steps.tag.outputs.name }}-bin-win-cuda-${{ matrix.cuda }}-x64.zip .\build\bin\Release\* - name: Upload artifacts uses: actions/upload-artifact@v6 with: path: llama-${{ steps.tag.outputs.name }}-bin-win-cuda-${{ matrix.cuda }}-x64.zip name: llama-bin-win-cuda-${{ matrix.cuda }}-x64.zip - name: Copy and pack Cuda runtime run: | echo "Cuda install location: ${{ env.CUDA_PATH }}" $dst='.\build\bin\cudart\' robocopy "${{env.CUDA_PATH}}\bin" $dst cudart64_*.dll cublas64_*.dll cublasLt64_*.dll robocopy "${{env.CUDA_PATH}}\lib" $dst cudart64_*.dll cublas64_*.dll cublasLt64_*.dll robocopy "${{env.CUDA_PATH}}\bin\x64" $dst cudart64_*.dll cublas64_*.dll cublasLt64_*.dll 7z a cudart-llama-bin-win-cuda-${{ matrix.cuda }}-x64.zip $dst\* - name: Upload Cuda runtime uses: actions/upload-artifact@v6 with: path: cudart-llama-bin-win-cuda-${{ matrix.cuda }}-x64.zip name: cudart-llama-bin-win-cuda-${{ matrix.cuda }}-x64.zip release: if: ${{ github.event.inputs.create_release == 'true' }} permissions: contents: write runs-on: ubuntu-latest needs: - macOS-arm64 - linux-cuda - windows-cuda steps: - name: Clone uses: actions/checkout@v6 with: fetch-depth: 0 - name: Determine tag name id: tag uses: ./.github/actions/get-tag-name - name: Download artifacts uses: actions/download-artifact@v7 with: path: ./artifact merge-multiple: true - name: Move artifacts run: | mkdir -p release mv -v artifact/*.tar.gz release/ 2>/dev/null || true mv -v artifact/*.zip release/ 2>/dev/null || true ls -lh release/ - name: Create release id: create_release uses: ggml-org/action-create-release@v1 env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} with: tag_name: ${{ steps.tag.outputs.name }} body: | Pre-built binaries (PrismML fork with Q1_0 1-bit quantization support). **macOS:** - [macOS Apple Silicon (arm64)](https://github.com/${{ github.repository }}/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-macos-arm64.tar.gz) **Linux:** - [Linux x64 (CUDA 12.4)](https://github.com/${{ github.repository }}/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-linux-cuda-12.4-x64.tar.gz) - [Linux x64 (CUDA 12.8)](https://github.com/${{ github.repository }}/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-linux-cuda-12.8-x64.tar.gz) - [Linux x64 (CUDA 13.1)](https://github.com/${{ github.repository }}/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-linux-cuda-13.1-x64.tar.gz) **Windows:** - [Windows x64 (CUDA 12.4)](https://github.com/${{ github.repository }}/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-win-cuda-12.4-x64.zip) - [CUDA 12.4 DLLs](https://github.com/${{ github.repository }}/releases/download/${{ steps.tag.outputs.name }}/cudart-llama-bin-win-cuda-12.4-x64.zip) - [Windows x64 (CUDA 13.1)](https://github.com/${{ github.repository }}/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-win-cuda-13.1-x64.zip) - [CUDA 13.1 DLLs](https://github.com/${{ github.repository }}/releases/download/${{ steps.tag.outputs.name }}/cudart-llama-bin-win-cuda-13.1-x64.zip) - name: Upload release env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: | for file in release/*; do echo "Uploading $(basename $file)..." gh release upload ${{ steps.tag.outputs.name }} "$file" --clobber done