Skip to content

Recompute KV cache for Phi3 when switching from short to long factor #3807

Recompute KV cache for Phi3 when switching from short to long factor

Recompute KV cache for Phi3 when switching from short to long factor #3807

name: "Linux CPU ARM64 Build"
on:
workflow_dispatch:
push:
branches:
- main
- rel-*
pull_request:
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
env:
ORT_NIGHTLY_REST_API: "https://feeds.dev.azure.com/aiinfra/PublicPackages/_apis/packaging/Feeds/ORT-Nightly/packages?packageNameQuery=Microsoft.ML.OnnxRuntime&api-version=6.0-preview.1"
ORT_PACKAGE_NAME: "Microsoft.ML.OnnxRuntime"
ORT_NIGHTLY_SOURCE: "https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/ORT-Nightly/nuget/v3/index.json"
DOTNET_INSTALL_DIR: "${{ github.workspace }}/dotnet"
jobs:
linux-cpu-arm64-build:
runs-on: [ "self-hosted", "1ES.Pool=onnxruntime-genai-Ubuntu2004-ARM-CPU" ]
steps:
- name: Checkout OnnxRuntime GenAI repo
uses: actions/checkout@v4
with:
submodules: 'true'
- uses: actions/setup-dotnet@v4
with:
dotnet-version: '8.0.x'
- name: Get the Latest OnnxRuntime Nightly Version
shell: pwsh
run: |
$resp = Invoke-RestMethod "${{ env.ORT_NIGHTLY_REST_API }}"
$ORT_NIGHTLY_VERSION = $resp.value[0].versions[0].normalizedVersion
Write-Host "$ORT_NIGHTLY_VERSION"
"ORT_NIGHTLY_VERSION=$ORT_NIGHTLY_VERSION" | Out-File -FilePath $env:GITHUB_ENV -Append
- name: Download OnnxRuntime Nightly
run: |
dotnet new console
dotnet add package ${{ env.ORT_PACKAGE_NAME }} --version ${{ env.ORT_NIGHTLY_VERSION }} --source ${{ env.ORT_NIGHTLY_SOURCE }} --package-directory .
dotnet build
continue-on-error: true
- name: list files
shell: bash
run: |
ls -l
ls -R microsoft.ml.onnxruntime
continue-on-error: true
# TODO: Find out why do we need to to have libonnxruntime.so.$ort_version
- name: Extract OnnxRuntime library and header files
run: |
mkdir -p ort/lib
mv microsoft.ml.onnxruntime/**/build/native/include ort/
mv microsoft.ml.onnxruntime/**/runtimes/linux-arm64/native/* ort/lib/
cp ort/lib/libonnxruntime.so ort/lib/libonnxruntime.so.1
- name: Download Docker Image
run: |
set -e -x
az login --identity --username 63b63039-6328-442f-954b-5a64d124e5b4
az acr login --name onnxruntimebuildcache --subscription 00c06639-6ee4-454e-8058-8d8b1703bd87
python3 tools/ci_build/get_docker_image.py --dockerfile tools/ci_build/github/linux/docker/inference/aarch64/default/cpu/Dockerfile \
--context tools/ci_build/github/linux/docker/inference/aarch64/default/cpu \
--docker-build-args "--build-arg BUILD_UID=$( id -u )" \
--container-registry onnxruntimebuildcache \
--repository ort_genai_linux_arm64_gha
- name: Docker -- Configure with CMake and GCC
run: |
docker run --rm \
--volume $GITHUB_WORKSPACE:/onnxruntime_src \
-w /onnxruntime_src ort_genai_linux_arm64_gha bash -c "/usr/bin/cmake --preset linux_gcc_cpu_release"
- name: Docker -- Build with CMake and GCC
run: |
docker run --rm \
--volume $GITHUB_WORKSPACE:/onnxruntime_src \
-w /onnxruntime_src ort_genai_linux_arm64_gha bash -c "/usr/bin/cmake --build --preset linux_gcc_cpu_release"
- name: Docker -- Check test directory
run: |
docker run --rm \
--volume $GITHUB_WORKSPACE:/onnxruntime_src \
-w /onnxruntime_src ort_genai_linux_arm64_gha bash -c "ls -l /onnxruntime_src/build/cpu/test/"
- name: Docker -- Run tests
run: |
docker run --rm \
--volume $GITHUB_WORKSPACE:/onnxruntime_src \
-w /onnxruntime_src ort_genai_linux_arm64_gha bash -c "LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/onnxruntime_src/ort/lib/ /onnxruntime_src/build/cpu/unit_tests"