Skip to content

Commit

Permalink
Added test of vector arguments.
Browse files Browse the repository at this point in the history
  • Loading branch information
yugr committed Apr 8, 2024
1 parent 53f7393 commit 6ea3972
Show file tree
Hide file tree
Showing 9 changed files with 229 additions and 6 deletions.
16 changes: 16 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,22 @@ jobs:
fi
done
codecov --required
avx:
runs-on: ubuntu-latest
env:
CFLAGS: -mavx
steps:
- uses: actions/checkout@v3
- name: Run tests
run: scripts/travis.sh
# avx2:
# runs-on: ubuntu-latest
# env:
# CFLAGS: -mavx2 -mavx512f
# steps:
# - uses: actions/checkout@v3
# - name: Run tests
# run: scripts/travis.sh
x86:
runs-on: ubuntu-latest
env:
Expand Down
73 changes: 71 additions & 2 deletions arch/x86_64/table.S.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -34,18 +34,27 @@ _${lib_suffix}_save_regs_and_resolve:
#define DEC_STACK(d) subq $$d, %rsp; .cfi_adjust_cfa_offset d
#define INC_STACK(d) addq $$d, %rsp; .cfi_adjust_cfa_offset -d

#define PUSH_MMX_REG(reg) DEC_STACK(8); movq %reg, (%rsp); .cfi_rel_offset reg, 0
#define POP_MMX_REG(reg) movq (%rsp), %reg; .cfi_restore reg; INC_STACK(8)

#define PUSH_XMM_REG(reg) DEC_STACK(16); movdqa %reg, (%rsp); .cfi_rel_offset reg, 0
#define POP_XMM_REG(reg) movdqa (%rsp), %reg; .cfi_restore reg; INC_STACK(16)

// TODO: cfi_offset/cfi_restore
#define PUSH_YMM_REG(reg) DEC_STACK(32); vmovdqu %reg, (%rsp)
#define POP_YMM_REG(reg) vmovdqu (%rsp), %reg; INC_STACK(32)

// TODO: cfi_offset/cfi_restore
#define PUSH_ZMM_REG(reg) DEC_STACK(64); vmovdqu32 %reg, (%rsp)
#define POP_ZMM_REG(reg) vmovdqu32 (%rsp), %reg; INC_STACK(64)

// Slow path which calls dlsym, taken only on first call.
// All registers are stored to handle arbitrary calling conventions
// (except x87 FPU registers which do not have to be preserved).
// For Dwarf directives, read https://www.imperialviolet.org/2017/01/18/cfi.html.

.cfi_def_cfa_offset 8 // Return address

// FIXME: AVX (YMM, ZMM) registers are NOT saved to simplify code.

PUSH_REG(rdi) // 16
mov 0x10(%rsp), %rdi
PUSH_REG(rax)
Expand All @@ -62,6 +71,26 @@ _${lib_suffix}_save_regs_and_resolve:
PUSH_REG(r13) // 16
PUSH_REG(r14)
PUSH_REG(r15) // 16

#ifdef __AVX2__
PUSH_ZMM_REG(zmm0)
PUSH_ZMM_REG(zmm1)
PUSH_ZMM_REG(zmm2)
PUSH_ZMM_REG(zmm3)
PUSH_ZMM_REG(zmm4)
PUSH_ZMM_REG(zmm5)
PUSH_ZMM_REG(zmm6)
PUSH_ZMM_REG(zmm7)
#elif defined __AVX__
PUSH_YMM_REG(ymm0)
PUSH_YMM_REG(ymm1)
PUSH_YMM_REG(ymm2)
PUSH_YMM_REG(ymm3)
PUSH_YMM_REG(ymm4)
PUSH_YMM_REG(ymm5)
PUSH_YMM_REG(ymm6)
PUSH_YMM_REG(ymm7)
#elif defined __SSE__
PUSH_XMM_REG(xmm0)
PUSH_XMM_REG(xmm1)
PUSH_XMM_REG(xmm2)
Expand All @@ -70,10 +99,39 @@ _${lib_suffix}_save_regs_and_resolve:
PUSH_XMM_REG(xmm5)
PUSH_XMM_REG(xmm6)
PUSH_XMM_REG(xmm7)
#elif defined __MMX__
PUSH_MMX_REG(mm0)
PUSH_MMX_REG(mm1)
PUSH_MMX_REG(mm2)
PUSH_MMX_REG(mm3)
PUSH_MMX_REG(mm4)
PUSH_MMX_REG(mm5)
PUSH_MMX_REG(mm6)
PUSH_MMX_REG(mm7)
#endif

// Stack is just 8-byte aligned but callee will re-align to 16
call _${lib_suffix}_tramp_resolve

#ifdef __AVX2__
POP_ZMM_REG(zmm7)
POP_ZMM_REG(zmm6)
POP_ZMM_REG(zmm5)
POP_ZMM_REG(zmm4)
POP_ZMM_REG(zmm3)
POP_ZMM_REG(zmm2)
POP_ZMM_REG(zmm1)
POP_ZMM_REG(zmm0) // 16
#elif defined __AVX__
POP_YMM_REG(ymm7)
POP_YMM_REG(ymm6)
POP_YMM_REG(ymm5)
POP_YMM_REG(ymm4)
POP_YMM_REG(ymm3)
POP_YMM_REG(ymm2)
POP_YMM_REG(ymm1)
POP_YMM_REG(ymm0) // 16
#elif defined __SSE__
POP_XMM_REG(xmm7)
POP_XMM_REG(xmm6)
POP_XMM_REG(xmm5)
Expand All @@ -82,6 +140,17 @@ _${lib_suffix}_save_regs_and_resolve:
POP_XMM_REG(xmm2)
POP_XMM_REG(xmm1)
POP_XMM_REG(xmm0) // 16
#elif defined __MMX__
POP_MMX_REG(mm7)
POP_MMX_REG(mm6)
POP_MMX_REG(mm5)
POP_MMX_REG(mm4)
POP_MMX_REG(mm3)
POP_MMX_REG(mm2)
POP_MMX_REG(mm1)
POP_MMX_REG(mm0) // 16
#endif

POP_REG(r15)
POP_REG(r14) // 16
POP_REG(r13)
Expand Down
4 changes: 4 additions & 0 deletions scripts/travis.sh
Original file line number Diff line number Diff line change
Expand Up @@ -41,3 +41,7 @@ if ! echo "$ARCH" | grep -q powerpc; then
tests/many-functions/run.sh $ARCH
fi
tests/stack-args/run.sh $ARCH
if ! echo "$ARCH" | grep -q 'powerpc\|mips'; then
# TODO: support vector types for remaining platforms
tests/vector-args/run.sh $ARCH
fi
5 changes: 1 addition & 4 deletions tests/stack-args/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,9 @@
# Use of this source code is governed by MIT license that can be
# found in the LICENSE.txt file.

# This is a simple test for Implib.so functionality.
# This is a simple test that verifies that parameters are correctly passed on stack.
# Run it like
# ./run.sh ARCH
# where ARCH stands for any supported arch (arm, x86_64, etc., see `implib-gen -h' for full list).
# Note that you may need to install qemu-user for respective platform
# (i386 also needs gcc-multilib).

set -eu

Expand Down
30 changes: 30 additions & 0 deletions tests/vector-args/interposed.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
/*
* Copyright 2024 Yury Gribov
*
* The MIT License (MIT)
*
* Use of this source code is governed by MIT license that can be
* found in the LICENSE.txt file.
*/

#include "interposed.h"

__attribute__((visibility("default")))
vector_type foo(vector_type x) {
return 3 *x;
}

static
#ifdef __clang__
__attribute__((noipa))
#else
__attribute__((noinline,noclone))
#endif
vector_type dummy(vector_type x0, vector_type x1, vector_type x2, vector_type x3, vector_type x4, vector_type x5, vector_type x6, vector_type x7) {
return x0 + x1 + x2 + x3 + x4 + x5 + x6 + x7;
}

__attribute__((constructor)) void touch_vector_regs() {
vector_type zero = {0};
dummy(zero, zero, zero, zero, zero, zero, zero, zero);
}
33 changes: 33 additions & 0 deletions tests/vector-args/interposed.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
/*
* Copyright 2024 Yury Gribov
*
* The MIT License (MIT)
*
* Use of this source code is governed by MIT license that can be
* found in the LICENSE.txt file.
*/

#ifndef INTERPOSED_H
#define INTERPOSED_H

// Determine number of 32-bit ints in native vector type
// for each supported platform
#if defined __AVX2__ /* ZMM regs */
# define VECTOR_SIZE 16
#elif defined __AVX__ /* YMM regs */
# define VECTOR_SIZE 8
#elif defined __SSE__ /* XMM regs */ \
|| defined __aarch64__ /* NEON regs */
# define VECTOR_SIZE 4
#elif defined __MMX__ /* MMX regs */ \
|| defined __arm__ /* NEON regs */
# define VECTOR_SIZE 2
#else
# error "Unknown platform"
#endif

typedef int vector_type __attribute__((vector_size(sizeof(int) * VECTOR_SIZE)));

extern vector_type foo(vector_type x);

#endif
36 changes: 36 additions & 0 deletions tests/vector-args/main.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
/*
* Copyright 2024 Yury Gribov
*
* The MIT License (MIT)
*
* Use of this source code is governed by MIT license that can be
* found in the LICENSE.txt file.
*/

#include <stdio.h>

#include "interposed.h"

#if VECTOR_SIZE == 2
# define VECTOR_INIT {1, 2}
#elif VECTOR_SIZE == 4
# define VECTOR_INIT {1, 2, 3, 4}
#elif VECTOR_SIZE == 8
# define VECTOR_INIT {1, 2, 3, 4, 5, 6, 7}
#elif VECTOR_SIZE == 16
# define VECTOR_INIT {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}
#else
# error "Unsupported vector size"
#endif

int main() {
vector_type x = VECTOR_INIT, res = foo(x), ref = 3 * x;
int i;
for (i = 0; i < VECTOR_SIZE; ++i) {
if (res[i] != ref[i]) {
printf("NOT OK\n");
return 1;
}
}
return 0;
}
38 changes: 38 additions & 0 deletions tests/vector-args/run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#!/bin/sh

# Copyright 2024 Yury Gribov
#
# The MIT License (MIT)
#
# Use of this source code is governed by MIT license that can be
# found in the LICENSE.txt file.

# This test verifies that Implib trampolines save/restore vector regs correctly.
# Run it like
# ./run.sh ARCH

set -eu

cd $(dirname $0)

if test -n "${1:-}"; then
ARCH="$1"
fi

. ../common.sh

CFLAGS="-g -O2 $CFLAGS"

# Build shlib to test against
$CC $CFLAGS -shared -fPIC interposed.c -o libinterposed.so

# Prepare implib
${PYTHON:-} ../../implib-gen.py -q --target $TARGET libinterposed.so

# Build app
$CC $CFLAGS main.c libinterposed.so.tramp.S libinterposed.so.init.c $LIBS

LD_LIBRARY_PATH=.:${LD_LIBRARY_PATH:-} $INTERP ./a.out > a.out.log
diff test.ref a.out.log

echo SUCCESS
Empty file added tests/vector-args/test.ref
Empty file.

0 comments on commit 6ea3972

Please sign in to comment.