Recompute KV cache for Phi3 when switching from short to long factor #3814
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: "Linux CPU ARM64 Build" | |
on: | |
workflow_dispatch: | |
push: | |
branches: | |
- main | |
- rel-* | |
pull_request: | |
concurrency: | |
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} | |
cancel-in-progress: true | |
env: | |
ORT_NIGHTLY_REST_API: "https://feeds.dev.azure.com/aiinfra/PublicPackages/_apis/packaging/Feeds/ORT-Nightly/packages?packageNameQuery=Microsoft.ML.OnnxRuntime&api-version=6.0-preview.1" | |
ORT_PACKAGE_NAME: "Microsoft.ML.OnnxRuntime" | |
ORT_NIGHTLY_SOURCE: "https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/ORT-Nightly/nuget/v3/index.json" | |
DOTNET_INSTALL_DIR: "${{ github.workspace }}/dotnet" | |
jobs: | |
linux-cpu-arm64-build: | |
runs-on: [ "self-hosted", "1ES.Pool=onnxruntime-genai-Ubuntu2004-ARM-CPU" ] | |
steps: | |
- name: Checkout OnnxRuntime GenAI repo | |
uses: actions/checkout@v4 | |
with: | |
submodules: 'true' | |
- uses: actions/setup-dotnet@v4 | |
with: | |
dotnet-version: '8.0.x' | |
- name: Get the Latest OnnxRuntime Nightly Version | |
shell: pwsh | |
run: | | |
$resp = Invoke-RestMethod "${{ env.ORT_NIGHTLY_REST_API }}" | |
$ORT_NIGHTLY_VERSION = $resp.value[0].versions[0].normalizedVersion | |
Write-Host "$ORT_NIGHTLY_VERSION" | |
"ORT_NIGHTLY_VERSION=$ORT_NIGHTLY_VERSION" | Out-File -FilePath $env:GITHUB_ENV -Append | |
- name: Download OnnxRuntime Nightly | |
run: | | |
dotnet new console | |
dotnet add package ${{ env.ORT_PACKAGE_NAME }} --version ${{ env.ORT_NIGHTLY_VERSION }} --source ${{ env.ORT_NIGHTLY_SOURCE }} --package-directory . | |
dotnet build | |
continue-on-error: true | |
- name: list files | |
shell: bash | |
run: | | |
ls -l | |
ls -R microsoft.ml.onnxruntime | |
continue-on-error: true | |
# TODO: Find out why do we need to to have libonnxruntime.so.$ort_version | |
- name: Extract OnnxRuntime library and header files | |
run: | | |
mkdir -p ort/lib | |
mv microsoft.ml.onnxruntime/**/build/native/include ort/ | |
mv microsoft.ml.onnxruntime/**/runtimes/linux-arm64/native/* ort/lib/ | |
cp ort/lib/libonnxruntime.so ort/lib/libonnxruntime.so.1 | |
- name: Download Docker Image | |
run: | | |
set -e -x | |
az login --identity --username 63b63039-6328-442f-954b-5a64d124e5b4 | |
az acr login --name onnxruntimebuildcache --subscription 00c06639-6ee4-454e-8058-8d8b1703bd87 | |
python3 tools/ci_build/get_docker_image.py --dockerfile tools/ci_build/github/linux/docker/inference/aarch64/default/cpu/Dockerfile \ | |
--context tools/ci_build/github/linux/docker/inference/aarch64/default/cpu \ | |
--docker-build-args "--build-arg BUILD_UID=$( id -u )" \ | |
--container-registry onnxruntimebuildcache \ | |
--repository ort_genai_linux_arm64_gha | |
- name: Docker -- Configure with CMake and GCC | |
run: | | |
docker run --rm \ | |
--volume $GITHUB_WORKSPACE:/onnxruntime_src \ | |
-w /onnxruntime_src ort_genai_linux_arm64_gha bash -c "/usr/bin/cmake --preset linux_gcc_cpu_release" | |
- name: Docker -- Build with CMake and GCC | |
run: | | |
docker run --rm \ | |
--volume $GITHUB_WORKSPACE:/onnxruntime_src \ | |
-w /onnxruntime_src ort_genai_linux_arm64_gha bash -c "/usr/bin/cmake --build --preset linux_gcc_cpu_release" | |
- name: Docker -- Check test directory | |
run: | | |
docker run --rm \ | |
--volume $GITHUB_WORKSPACE:/onnxruntime_src \ | |
-w /onnxruntime_src ort_genai_linux_arm64_gha bash -c "ls -l /onnxruntime_src/build/cpu/test/" | |
- name: Docker -- Run tests | |
run: | | |
docker run --rm \ | |
--volume $GITHUB_WORKSPACE:/onnxruntime_src \ | |
-w /onnxruntime_src ort_genai_linux_arm64_gha bash -c "LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/onnxruntime_src/ort/lib/ /onnxruntime_src/build/cpu/unit_tests" |