| name: Self-hosted runner AMD GPU (push) |
|
|
| on: |
| workflow_call: |
| inputs: |
| gpu_flavor: |
| required: true |
| type: string |
|
|
| env: |
| HF_HOME: /mnt/cache |
| TRANSFORMERS_IS_CI: yes |
| OMP_NUM_THREADS: 8 |
| MKL_NUM_THREADS: 8 |
| PYTEST_TIMEOUT: 60 |
| TF_FORCE_GPU_ALLOW_GROWTH: true |
| HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }} |
|
|
| jobs: |
| check_runner_status: |
| name: Check Runner Status |
| runs-on: ubuntu-22.04 |
| steps: |
| - name: Checkout transformers |
| uses: actions/checkout@v4 |
| with: |
| fetch-depth: 2 |
|
|
| - name: Check Runner Status |
| run: python utils/check_self_hosted_runner.py --target_runners amd-mi210-single-gpu-ci-runner-docker --token ${{ secrets.ACCESS_REPO_INFO_TOKEN }} |
|
|
| check_runners: |
| name: Check Runners |
| needs: check_runner_status |
| strategy: |
| matrix: |
| machine_type: [single-gpu, multi-gpu] |
| runs-on: [self-hosted, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}'] |
| container: |
| image: huggingface/transformers-pytorch-amd-gpu-push-ci |
| options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ |
| steps: |
| - name: ROCM-SMI |
| run: | |
| rocm-smi |
| - name: ROCM-INFO |
| run: | |
| rocminfo | grep "Agent" -A 14 |
| - name: Show ROCR environment |
| run: | |
| echo "ROCR: $ROCR_VISIBLE_DEVICES" |
| |
| setup_gpu: |
| name: Setup |
| needs: check_runners |
| strategy: |
| matrix: |
| machine_type: [single-gpu, multi-gpu] |
| runs-on: [self-hosted, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}'] |
| container: |
| image: huggingface/transformers-pytorch-amd-gpu-push-ci |
| options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ |
| outputs: |
| matrix: ${{ steps.set-matrix.outputs.matrix }} |
| test_map: ${{ steps.set-matrix.outputs.test_map }} |
| env: |
| |
| |
| |
| |
| CI_BRANCH_PUSH: ${{ github.event.ref }} |
| CI_BRANCH_WORKFLOW_RUN: ${{ github.event.workflow_run.head_branch }} |
| CI_SHA_PUSH: ${{ github.event.head_commit.id }} |
| CI_SHA_WORKFLOW_RUN: ${{ github.event.workflow_run.head_sha }} |
| steps: |
| |
| |
| - name: Prepare custom environment variables |
| shell: bash |
| |
| |
| run: | |
| CI_BRANCH_PUSH=${CI_BRANCH_PUSH/'refs/heads/'/''} |
| echo $CI_BRANCH_PUSH |
| echo $CI_BRANCH_WORKFLOW_RUN |
| echo $CI_SHA_PUSH |
| echo $CI_SHA_WORKFLOW_RUN |
| [[ ! -z "$CI_BRANCH_PUSH" ]] && echo "CI_BRANCH=$CI_BRANCH_PUSH" >> $GITHUB_ENV || echo "CI_BRANCH=$CI_BRANCH_WORKFLOW_RUN" >> $GITHUB_ENV |
| [[ ! -z "$CI_SHA_PUSH" ]] && echo "CI_SHA=$CI_SHA_PUSH" >> $GITHUB_ENV || echo "CI_SHA=$CI_SHA_WORKFLOW_RUN" >> $GITHUB_ENV |
| |
| - name: print environment variables |
| run: | |
| echo "env.CI_BRANCH = ${{ env.CI_BRANCH }}" |
| echo "env.CI_SHA = ${{ env.CI_SHA }}" |
| |
| - name: Update clone using environment variables |
| working-directory: /transformers |
| run: | |
| echo "original branch = $(git branch --show-current)" |
| git fetch && git checkout ${{ env.CI_BRANCH }} |
| echo "updated branch = $(git branch --show-current)" |
| git checkout ${{ env.CI_SHA }} |
| echo "log = $(git log -n 1)" |
| |
| - name: Cleanup |
| working-directory: /transformers |
| run: | |
| rm -rf tests/__pycache__ |
| rm -rf tests/models/__pycache__ |
| rm -rf reports |
| |
| - name: Show installed libraries and their versions |
| working-directory: /transformers |
| run: pip freeze |
|
|
| - name: Fetch the tests to run |
| working-directory: /transformers |
| |
| run: | |
| pip install --upgrade git-python |
| python3 utils/tests_fetcher.py --diff_with_last_commit | tee test_preparation.txt |
| |
| - name: Report fetched tests |
| uses: actions/upload-artifact@v4 |
| with: |
| name: test_fetched |
| path: /transformers/test_preparation.txt |
|
|
| - id: set-matrix |
| name: Organize tests into models |
| working-directory: /transformers |
| |
| |
| |
| run: | |
| if [ -f test_map.json ]; then |
| keys=$(python3 -c 'import json; fp = open("test_map.json"); test_map = json.load(fp); fp.close(); d = list(test_map.keys()); print(d)') |
| test_map=$(python3 -c 'import json; fp = open("test_map.json"); test_map = json.load(fp); fp.close(); print(test_map)') |
| else |
| keys=$(python3 -c 'keys = ["dummy"]; print(keys)') |
| test_map=$(python3 -c 'test_map = {"dummy": []}; print(test_map)') |
| fi |
| echo $keys |
| echo $test_map |
| echo "matrix=$keys" >> $GITHUB_OUTPUT |
| echo "test_map=$test_map" >> $GITHUB_OUTPUT |
| |
| run_models_gpu: |
| name: Model tests |
| needs: setup_gpu |
| |
| if: contains(fromJson(needs.setup_gpu.outputs.matrix), 'dummy') != true |
| strategy: |
| fail-fast: false |
| matrix: |
| folders: ${{ fromJson(needs.setup_gpu.outputs.matrix) }} |
| machine_type: [single-gpu, multi-gpu] |
| runs-on: [self-hosted, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}'] |
| container: |
| image: huggingface/transformers-pytorch-amd-gpu-push-ci |
| options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ |
| env: |
| |
| CI_BRANCH_PUSH: ${{ github.event.ref }} |
| CI_BRANCH_WORKFLOW_RUN: ${{ github.event.workflow_run.head_branch }} |
| CI_SHA_PUSH: ${{ github.event.head_commit.id }} |
| CI_SHA_WORKFLOW_RUN: ${{ github.event.workflow_run.head_sha }} |
| steps: |
| |
| |
| - name: Prepare custom environment variables |
| shell: bash |
| |
| run: | |
| CI_BRANCH_PUSH=${CI_BRANCH_PUSH/'refs/heads/'/''} |
| echo $CI_BRANCH_PUSH |
| echo $CI_BRANCH_WORKFLOW_RUN |
| echo $CI_SHA_PUSH |
| echo $CI_SHA_WORKFLOW_RUN |
| [[ ! -z "$CI_BRANCH_PUSH" ]] && echo "CI_BRANCH=$CI_BRANCH_PUSH" >> $GITHUB_ENV || echo "CI_BRANCH=$CI_BRANCH_WORKFLOW_RUN" >> $GITHUB_ENV |
| [[ ! -z "$CI_SHA_PUSH" ]] && echo "CI_SHA=$CI_SHA_PUSH" >> $GITHUB_ENV || echo "CI_SHA=$CI_SHA_WORKFLOW_RUN" >> $GITHUB_ENV |
| |
| - name: print environment variables |
| run: | |
| echo "env.CI_BRANCH = ${{ env.CI_BRANCH }}" |
| echo "env.CI_SHA = ${{ env.CI_SHA }}" |
| |
| - name: Update clone using environment variables |
| working-directory: /transformers |
| run: | |
| echo "original branch = $(git branch --show-current)" |
| git fetch && git checkout ${{ env.CI_BRANCH }} |
| echo "updated branch = $(git branch --show-current)" |
| git checkout ${{ env.CI_SHA }} |
| echo "log = $(git log -n 1)" |
| |
| - name: Reinstall transformers in edit mode (remove the one installed during docker image build) |
| working-directory: /transformers |
| run: python3 -m pip uninstall -y transformers && python3 -m pip install -e . |
|
|
| - name: Echo folder ${{ matrix.folders }} |
| shell: bash |
| |
| |
| run: | |
| echo "${{ matrix.folders }}" |
| echo "${{ fromJson(needs.setup_gpu.outputs.test_map)[matrix.folders] }}" |
| matrix_folders=${{ matrix.folders }} |
| matrix_folders=${matrix_folders/'models/'/'models_'} |
| echo "$matrix_folders" |
| echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV |
| |
| - name: ROCM-SMI |
| run: | |
| rocm-smi |
| - name: ROCM-INFO |
| run: | |
| rocminfo | grep "Agent" -A 14 |
| - name: Show ROCR environment |
| run: | |
| echo "ROCR: $ROCR_VISIBLE_DEVICES" |
| |
| - name: Environment |
| working-directory: /transformers |
| run: | |
| python3 utils/print_env.py |
| |
| - name: Show installed libraries and their versions |
| working-directory: /transformers |
| run: pip freeze |
|
|
| - name: Run all non-slow selected tests on GPU |
| working-directory: /transformers |
| run: | |
| python3 -m pytest -n 2 --dist=loadfile -v --make-reports=${{ matrix.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports ${{ fromJson(needs.setup_gpu.outputs.test_map)[matrix.folders] }} -m "not not_device_test" |
| |
| - name: Failure short reports |
| if: ${{ failure() }} |
| continue-on-error: true |
| run: cat /transformers/reports/${{ matrix.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/failures_short.txt |
|
|
| - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports" |
| if: ${{ always() }} |
| uses: actions/upload-artifact@v4 |
| with: |
| name: ${{ matrix.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports |
| path: /transformers/reports/${{ matrix.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports |
|
|
| send_results: |
| name: Send results to webhook |
| runs-on: ubuntu-22.04 |
| if: always() |
| needs: [ |
| check_runner_status, |
| check_runners, |
| setup_gpu, |
| run_models_gpu, |
| |
| |
| ] |
| env: |
| |
| CI_BRANCH_PUSH: ${{ github.event.ref }} |
| CI_BRANCH_WORKFLOW_RUN: ${{ github.event.workflow_run.head_branch }} |
| CI_SHA_PUSH: ${{ github.event.head_commit.id }} |
| CI_SHA_WORKFLOW_RUN: ${{ github.event.workflow_run.head_sha }} |
| steps: |
| - name: Preliminary job status |
| shell: bash |
| |
| run: | |
| echo "Runner availability: ${{ needs.check_runner_status.result }}" |
| echo "Setup status: ${{ needs.setup_gpu.result }}" |
| echo "Runner status: ${{ needs.check_runners.result }}" |
| |
| |
| |
| - name: Prepare custom environment variables |
| shell: bash |
| |
| run: | |
| CI_BRANCH_PUSH=${CI_BRANCH_PUSH/'refs/heads/'/''} |
| echo $CI_BRANCH_PUSH |
| echo $CI_BRANCH_WORKFLOW_RUN |
| echo $CI_SHA_PUSH |
| echo $CI_SHA_WORKFLOW_RUN |
| [[ ! -z "$CI_BRANCH_PUSH" ]] && echo "CI_BRANCH=$CI_BRANCH_PUSH" >> $GITHUB_ENV || echo "CI_BRANCH=$CI_BRANCH_WORKFLOW_RUN" >> $GITHUB_ENV |
| [[ ! -z "$CI_SHA_PUSH" ]] && echo "CI_SHA=$CI_SHA_PUSH" >> $GITHUB_ENV || echo "CI_SHA=$CI_SHA_WORKFLOW_RUN" >> $GITHUB_ENV |
| |
| - name: print environment variables |
| run: | |
| echo "env.CI_BRANCH = ${{ env.CI_BRANCH }}" |
| echo "env.CI_SHA = ${{ env.CI_SHA }}" |
| |
| - uses: actions/checkout@v4 |
| |
| |
| |
| with: |
| fetch-depth: 20 |
|
|
| - name: Update clone using environment variables |
| run: | |
| echo "original branch = $(git branch --show-current)" |
| git fetch && git checkout ${{ env.CI_BRANCH }} |
| echo "updated branch = $(git branch --show-current)" |
| git checkout ${{ env.CI_SHA }} |
| echo "log = $(git log -n 1)" |
| |
| - uses: actions/download-artifact@v4 |
| - name: Send message to Slack |
| env: |
| CI_SLACK_BOT_TOKEN: ${{ secrets.CI_SLACK_BOT_TOKEN }} |
| CI_SLACK_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID }} |
| CI_SLACK_CHANNEL_ID_DAILY: ${{ secrets.CI_SLACK_CHANNEL_ID_DAILY }} |
| CI_SLACK_CHANNEL_ID_AMD: ${{ secrets.CI_SLACK_CHANNEL_ID_AMD }} |
| CI_SLACK_CHANNEL_DUMMY_TESTS: ${{ secrets.CI_SLACK_CHANNEL_DUMMY_TESTS }} |
| CI_SLACK_REPORT_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID_AMD }} |
| ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }} |
| CI_EVENT: Push CI (AMD) - ${{ inputs.gpu_flavor }} |
| CI_TITLE_PUSH: ${{ github.event.head_commit.message }} |
| CI_TITLE_WORKFLOW_RUN: ${{ github.event.workflow_run.head_commit.message }} |
| CI_SHA: ${{ env.CI_SHA }} |
| RUNNER_STATUS: ${{ needs.check_runner_status.result }} |
| RUNNER_ENV_STATUS: ${{ needs.check_runners.result }} |
| SETUP_STATUS: ${{ needs.setup_gpu.result }} |
|
|
| |
| |
| run: | |
| pip install huggingface_hub |
| pip install slack_sdk |
| pip show slack_sdk |
| python utils/notification_service.py "${{ needs.setup_gpu.outputs.matrix }}" |
| |