Files
aitbc/.github/workflows/gpu-benchmark.yml
dependabot[bot] fa1f16555c ci(deps): bump actions/setup-python from 4 to 6
Bumps [actions/setup-python](https://github.com/actions/setup-python) from 4 to 6.
- [Release notes](https://github.com/actions/setup-python/releases)
- [Commits](https://github.com/actions/setup-python/compare/v4...v6)

---
updated-dependencies:
- dependency-name: actions/setup-python
  dependency-version: '6'
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
2026-03-26 07:53:45 +00:00

146 lines
4.8 KiB
YAML

name: GPU Benchmark CI
on:
push:
branches: [ main, develop ]
pull_request:
branches: [ main ]
schedule:
# Run benchmarks daily at 2 AM UTC
- cron: '0 2 * * *'
jobs:
gpu-benchmark:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [3.13.5]
steps:
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v6
with:
python-version: ${{ matrix.python-version }}
- name: Install system dependencies
run: |
sudo apt-get update
sudo apt-get install -y \
build-essential \
python3-dev \
pkg-config \
libnvidia-compute-515 \
cuda-toolkit-12-2 \
nvidia-driver-515
- name: Cache pip dependencies
uses: actions/cache@v5
with:
path: ~/.cache/pip
key: ${{ runner.os }}-pip-${{ hashFiles('**/pyproject.toml') }}
restore-keys: |
${{ runner.os }}-pip-
- name: Install Python dependencies
run: |
python -m pip install --upgrade pip
pip install -e .
pip install pytest pytest-benchmark torch torchvision torchaudio
pip install cupy-cuda12x
pip install nvidia-ml-py3
- name: Verify GPU availability
run: |
python -c "
import torch
print(f'PyTorch version: {torch.__version__}')
print(f'CUDA available: {torch.cuda.is_available()}')
if torch.cuda.is_available():
print(f'CUDA version: {torch.version.cuda}')
print(f'GPU count: {torch.cuda.device_count()}')
print(f'GPU name: {torch.cuda.get_device_name(0)}')
"
- name: Run GPU benchmarks
run: |
python -m pytest dev/gpu/test_gpu_performance.py \
--benchmark-only \
--benchmark-json=benchmark_results.json \
--benchmark-sort=mean \
-v
- name: Generate benchmark report
run: |
python dev/gpu/generate_benchmark_report.py \
--input benchmark_results.json \
--output benchmark_report.html \
--history-file benchmark_history.json
- name: Upload benchmark results
uses: actions/upload-artifact@v3
with:
name: benchmark-results-${{ matrix.python-version }}
path: |
benchmark_results.json
benchmark_report.html
benchmark_history.json
retention-days: 30
- name: Compare with baseline
run: |
python dev/gpu/compare_benchmarks.py \
--current benchmark_results.json \
--baseline .github/baselines/gpu_baseline.json \
--threshold 5.0 \
--output comparison_report.json
- name: Comment PR with results
if: github.event_name == 'pull_request'
uses: actions/github-script@v7
with:
script: |
const fs = require('fs');
try {
const results = JSON.parse(fs.readFileSync('comparison_report.json', 'utf8'));
const comment = `
## 🚀 GPU Benchmark Results
**Performance Summary:**
- **Mean Performance**: ${results.mean_performance.toFixed(2)} ops/sec
- **Performance Change**: ${results.performance_change > 0 ? '+' : ''}${results.performance_change.toFixed(2)}%
- **Status**: ${results.status}
**Key Metrics:**
${results.metrics.map(m => `- **${m.name}**: ${m.value.toFixed(2)} ops/sec (${m.change > 0 ? '+' : ''}${m.change.toFixed(2)}%)`).join('\n')}
${results.regressions.length > 0 ? '⚠️ **Performance Regressions Detected**' : '✅ **No Performance Regressions**'}
[View detailed report](${process.env.GITHUB_SERVER_URL}/${process.env.GITHUB_REPOSITORY}/actions/runs/${process.env.GITHUB_RUN_ID})
`;
github.rest.issues.createComment({
issue_number: context.issue.number,
owner: context.repo.owner,
repo: context.repo.repo,
body: comment
});
} catch (error) {
console.log('Could not generate benchmark comment:', error.message);
}
- name: Update benchmark history
run: |
python dev/gpu/update_benchmark_history.py \
--results benchmark_results.json \
--history-file .github/baselines/benchmark_history.json \
--max-entries 100
- name: Fail on performance regression
run: |
python dev/gpu/check_performance_regression.py \
--results benchmark_results.json \
--baseline .github/baselines/gpu_baseline.json \
--threshold 10.0