diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 8f8d0c4..cb754b6 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -29,7 +29,6 @@ jobs: - 2.5.9 - 2.6.3 - Python - - 3.10 - 3.11 - 3.12 - PyTorch @@ -52,7 +51,7 @@ jobs: fail-fast: false matrix: flash-attn-version: ["2.4.3", "2.5.6", "2.5.9", "2.6.3"] - python-version: ["3.10", "3.11", "3.12"] + python-version: ["3.11", "3.12"] torch-version: ["2.0.1", "2.1.2", "2.2.2", "2.3.1", "2.4.1", "2.5.0"] cuda-version: ["11.8.0", "12.1.1", "12.4.1"] exclude: @@ -134,10 +133,11 @@ jobs: pip install setuptools==68.0.0 ninja packaging wheel export PATH=/usr/local/nvidia/bin:/usr/local/nvidia/lib64:$PATH export LD_LIBRARY_PATH=/usr/local/nvidia/lib64:/usr/local/cuda/lib64:$LD_LIBRARY_PATH - export MAX_JOBS=3 + export MAX_JOBS=$(($(nproc) - 1)) cd flash-attention FLASH_ATTENTION_FORCE_BUILD="TRUE" python setup.py bdist_wheel --dist-dir=dist - wheel_name=$(basename $(ls dist/*.whl | head -n 1)) + base_wheel_name=$(basename $(ls dist/*.whl | head -n 1)) + wheel_name=$(echo $base_wheel_name | sed 's/${{ matrix.flash-attn-version }}/${{ matrix.flash-attn-version }}+cu${{ env.MATRIX_CUDA_VERSION }}torch${{ env.MATRIX_TORCH_VERSION }}') echo "wheel_name=$wheel_name" >> $GITHUB_ENV - name: Install Test