Skip to content

Commit

Permalink
Merge branch 'dev'
Browse files Browse the repository at this point in the history
  • Loading branch information
herumi committed Nov 11, 2024
2 parents 2c02730 + 2d6794c commit 4e44f46
Show file tree
Hide file tree
Showing 11 changed files with 110 additions and 45 deletions.
11 changes: 7 additions & 4 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,14 @@ jobs:
steps:
- uses: actions/checkout@v4
- run: apt -y update
- run: apt -y install g++-multilib libboost-dev make nasm yasm wget xz-utils python3
- run: apt -y install g++-multilib libboost-dev make nasm yasm wget python3 #xz-utils
- run: make test
- run: make -C sample CXXFLAGS="-DXBYAK_NO_EXCEPTION"
- run: |
cd test
wget https://downloadmirror.intel.com/831748/sde-external-9.44.0-2024-08-22-lin.tar.xz
tar xvf sde-external-9.44.0-2024-08-22-lin.tar.xz
env XED=sde-external-9.44.0-2024-08-22-lin/xed64 make xed_test
#wget https://downloadmirror.intel.com/831748/sde-external-9.44.0-2024-08-22-lin.tar.xz
#tar xvf sde-external-9.44.0-2024-08-22-lin.tar.xz
wget https://github.com/herumi/xed-bin/raw/refs/heads/main/xed.tgz
tar xvf xed.tgz
./xed -version
env XED=./xed make xed_test
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
cmake_minimum_required(VERSION 3.5)

project(xbyak LANGUAGES CXX VERSION 7.21)
project(xbyak LANGUAGES CXX VERSION 7.22)

file(GLOB headers xbyak/*.h)

Expand Down
1 change: 1 addition & 0 deletions doc/changelog.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# History

* 2024/Nov/11 ver 7.22 add Reg::cvt{128,256,512}(). tested by xed 2024.11.04
* 2024/Oct/31 ver 7.21 Enhance XMM register validation in SSE instructions
* 2024/Oct/17 ver 7.20.1 Updated to comply with AVX10.2 specification rev 2.0
* 2024/Oct/15 ver 7.20 Fixed the specification of setDefaultEncoding, setDefaultEncodingAVX10.
Expand Down
2 changes: 1 addition & 1 deletion meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
project(
'xbyak',
'cpp',
version: '7.21',
version: '7.22',
license: 'BSD-3-Clause',
default_options: 'b_ndebug=if-release'
)
Expand Down
2 changes: 1 addition & 1 deletion readme.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@

# Xbyak 7.21 [![Badge Build]][Build Status]
# Xbyak 7.22 [![Badge Build]][Build Status]

*A JIT assembler for x86/x64 architectures supporting advanced instruction sets up to AVX10.2*

Expand Down
3 changes: 2 additions & 1 deletion readme.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@

C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 7.21
C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 7.22

-----------------------------------------------------------------------------
◎概要
Expand Down Expand Up @@ -404,6 +404,7 @@ sample/{echo,hello}.bfは http://www.kmonos.net/alang/etc/brainfuck.php から
-----------------------------------------------------------------------------
◎履歴

2024/11/11 ver 7.22 Reg::cvt{128,256,512}(). xed 2024.11.04でテスト
2024/10/31 ver 7.21 SSE命令のXMMレジスタのチェックを厳密化
2024/10/17 ver 7.20.1 AVX10.2 rev 2.0仕様書の変更に追従
2024/10/15 ver 7.20 setDefaultEncoding/setDefaultEncodingAVX10の仕様確定
Expand Down
3 changes: 1 addition & 2 deletions test/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,7 @@ apx: apx.cpp $(XBYAK_INC)
avx10_test: avx10_test.cpp $(XBYAK_INC)
$(CXX) $(CFLAGS) avx10_test.cpp -o $@ -DXBYAK64

#TEST_FILES=old.txt new-ymm.txt bf16.txt comp.txt misc.txt convert.txt minmax.txt saturation.txt
TEST_FILES=old.txt new-ymm.txt bf16.txt misc.txt convert.txt minmax.txt saturation.txt
TEST_FILES=old.txt new-ymm.txt bf16.txt comp.txt misc.txt convert.txt minmax.txt saturation.txt
xed_test:
@set -e; \
for target in $(addprefix avx10/, $(TEST_FILES)); do \
Expand Down
18 changes: 9 additions & 9 deletions test/avx10/bf16.txt
Original file line number Diff line number Diff line change
Expand Up @@ -113,17 +113,17 @@ vfpclasspbf16(k7|k5, zword_b[rax+128], 13);
vcomsbf16(xm2, xm3);
vcomsbf16(xm2, ptr[rax+128]);

//vgetexppbf16(xm1|k3, xmm2);
//vgetexppbf16(xm1|k3, ptr[rax+128]);
//vgetexppbf16(xm1|k3, ptr_b[rax+128]);
vgetexppbf16(xm1|k3, xmm2);
vgetexppbf16(xm1|k3, ptr[rax+128]);
vgetexppbf16(xm1|k3, ptr_b[rax+128]);

//vgetexppbf16(ym1|k3, ymm2);
//vgetexppbf16(ym1|k3, ptr[rax+128]);
//vgetexppbf16(ym1|k3, ptr_b[rax+128]);
vgetexppbf16(ym1|k3, ymm2);
vgetexppbf16(ym1|k3, ptr[rax+128]);
vgetexppbf16(ym1|k3, ptr_b[rax+128]);

//vgetexppbf16(zm1|k3, zmm2);
//vgetexppbf16(zm1|k3, ptr[rax+128]);
//vgetexppbf16(zm1|k3, ptr_b[rax+128]);
vgetexppbf16(zm1|k3, zmm2);
vgetexppbf16(zm1|k3, ptr[rax+128]);
vgetexppbf16(zm1|k3, ptr_b[rax+128]);

vgetmantpbf16(xm1|k3, xmm2, 3);
vgetmantpbf16(xm1|k3, ptr[rax+128], 5);
Expand Down
88 changes: 64 additions & 24 deletions test/cvt_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,38 +12,45 @@ const struct Ptn {
Reg16 reg16;
Reg32 reg32;
Reg64 reg64;
Xmm x;
Ymm y;
Zmm z;
} tbl[] = {
{ &al, ax, eax, rax },
{ &bl, bx, ebx, rbx },
{ &cl, cx, ecx, rcx },
{ &dl, dx, edx, rdx },
{ &sil, si, esi, rsi },
{ &dil, di, edi, rdi },
{ &bpl, bp, ebp, rbp },
{ &spl, sp, esp, rsp },
{ &r8b, r8w, r8d, r8 },
{ &r9b, r9w, r9d, r9 },
{ &r10b, r10w, r10d, r10 },
{ &r11b, r11w, r11d, r11 },
{ &r12b, r12w, r12d, r12 },
{ &r13b, r13w, r13d, r13 },
{ &r14b, r14w, r14d, r14 },
{ &r15b, r15w, r15d, r15 },
{ &al, ax, eax, rax, xmm0, ymm0, zmm0 },
{ &bl, bx, ebx, rbx, xmm3, ymm3, zmm3 },
{ &cl, cx, ecx, rcx, xmm1, ymm1, zmm1 },
{ &dl, dx, edx, rdx, xmm2, ymm2, zmm2 },
{ &sil, si, esi, rsi, xmm6, ymm6, zmm6 },
{ &dil, di, edi, rdi, xmm7, ymm7, zmm7 },
{ &bpl, bp, ebp, rbp, xmm5, ymm5, zmm5 },
{ &spl, sp, esp, rsp, xmm4, ymm4, zmm4 },
{ &r8b, r8w, r8d, r8, xmm8, ymm8, zmm8 },
{ &r9b, r9w, r9d, r9, xmm9, ymm9, zmm9 },
{ &r10b, r10w, r10d, r10, xmm10, ymm10, zmm10 },
{ &r11b, r11w, r11d, r11, xmm11, ymm11, zmm11 },
{ &r12b, r12w, r12d, r12, xmm12, ymm12, zmm12 },
{ &r13b, r13w, r13d, r13, xmm13, ymm13, zmm13 },
{ &r14b, r14w, r14d, r14, xmm14, ymm14, zmm14 },
{ &r15b, r15w, r15d, r15, xmm15, ymm15, zmm15 },
{ &r31b, r31w, r31d, r31, xmm31, ymm31, zmm31 },
};
#else
const struct Ptn {
const Reg8 *reg8;
Reg16 reg16;
Reg32 reg32;
Xmm x;
Ymm y;
Zmm z;
} tbl[] = {
{ &al, ax, eax },
{ &bl, bx, ebx },
{ &cl, cx, ecx },
{ &dl, dx, edx },
{ 0, si, esi },
{ 0, di, edi },
{ 0, bp, ebp },
{ 0, sp, esp },
{ &al, ax, eax, xmm0, ymm0, zmm0 },
{ &bl, bx, ebx, xmm3, ymm3, zmm3 },
{ &cl, cx, ecx, xmm1, ymm1, zmm1 },
{ &dl, dx, edx, xmm2, ymm2, zmm2 },
{ 0, si, esi, xmm6, ymm6, zmm6 },
{ 0, di, edi, xmm7, ymm7, zmm7 },
{ 0, bp, ebp, xmm5, ymm5, zmm5 },
{ 0, sp, esp, xmm4, ymm4, zmm4 },
};
#endif

Expand All @@ -54,13 +61,40 @@ CYBOZU_TEST_AUTO(cvt)
CYBOZU_TEST_ASSERT(tbl[i].reg8->cvt8() == *tbl[i].reg8);
CYBOZU_TEST_ASSERT(tbl[i].reg8->cvt16() == tbl[i].reg16);
CYBOZU_TEST_ASSERT(tbl[i].reg8->cvt32() == tbl[i].reg32);
CYBOZU_TEST_ASSERT(tbl[i].reg8->cvt128() == tbl[i].x);
CYBOZU_TEST_ASSERT(tbl[i].reg8->cvt256() == tbl[i].y);
CYBOZU_TEST_ASSERT(tbl[i].reg8->cvt512() == tbl[i].z);
CYBOZU_TEST_ASSERT(tbl[i].reg16.cvt8() == *tbl[i].reg8);
CYBOZU_TEST_ASSERT(tbl[i].reg32.cvt8() == *tbl[i].reg8);
CYBOZU_TEST_ASSERT(tbl[i].x.cvt8() == *tbl[i].reg8);
CYBOZU_TEST_ASSERT(tbl[i].y.cvt8() == *tbl[i].reg8);
CYBOZU_TEST_ASSERT(tbl[i].z.cvt8() == *tbl[i].reg8);
}
CYBOZU_TEST_ASSERT(tbl[i].reg16.cvt16() == tbl[i].reg16);
CYBOZU_TEST_ASSERT(tbl[i].reg16.cvt32() == tbl[i].reg32);
CYBOZU_TEST_ASSERT(tbl[i].reg16.cvt128() == tbl[i].x);
CYBOZU_TEST_ASSERT(tbl[i].reg16.cvt256() == tbl[i].y);
CYBOZU_TEST_ASSERT(tbl[i].reg16.cvt512() == tbl[i].z);
CYBOZU_TEST_ASSERT(tbl[i].reg32.cvt16() == tbl[i].reg16);
CYBOZU_TEST_ASSERT(tbl[i].reg32.cvt32() == tbl[i].reg32);
CYBOZU_TEST_ASSERT(tbl[i].reg32.cvt128() == tbl[i].x);
CYBOZU_TEST_ASSERT(tbl[i].reg32.cvt256() == tbl[i].y);
CYBOZU_TEST_ASSERT(tbl[i].reg32.cvt512() == tbl[i].z);
CYBOZU_TEST_ASSERT(tbl[i].x.cvt16() == tbl[i].reg16);
CYBOZU_TEST_ASSERT(tbl[i].x.cvt32() == tbl[i].reg32);
CYBOZU_TEST_ASSERT(tbl[i].x.cvt128() == tbl[i].x);
CYBOZU_TEST_ASSERT(tbl[i].x.cvt256() == tbl[i].y);
CYBOZU_TEST_ASSERT(tbl[i].x.cvt512() == tbl[i].z);
CYBOZU_TEST_ASSERT(tbl[i].y.cvt16() == tbl[i].reg16);
CYBOZU_TEST_ASSERT(tbl[i].y.cvt32() == tbl[i].reg32);
CYBOZU_TEST_ASSERT(tbl[i].y.cvt128() == tbl[i].x);
CYBOZU_TEST_ASSERT(tbl[i].y.cvt256() == tbl[i].y);
CYBOZU_TEST_ASSERT(tbl[i].y.cvt512() == tbl[i].z);
CYBOZU_TEST_ASSERT(tbl[i].z.cvt16() == tbl[i].reg16);
CYBOZU_TEST_ASSERT(tbl[i].z.cvt32() == tbl[i].reg32);
CYBOZU_TEST_ASSERT(tbl[i].z.cvt128() == tbl[i].x);
CYBOZU_TEST_ASSERT(tbl[i].z.cvt256() == tbl[i].y);
CYBOZU_TEST_ASSERT(tbl[i].y.cvt512() == tbl[i].z);
#ifdef XBYAK64
if (tbl[i].reg8) {
CYBOZU_TEST_ASSERT(tbl[i].reg64.cvt8() == *tbl[i].reg8);
Expand All @@ -69,8 +103,14 @@ CYBOZU_TEST_AUTO(cvt)
CYBOZU_TEST_ASSERT(tbl[i].reg64.cvt16() == tbl[i].reg16);
CYBOZU_TEST_ASSERT(tbl[i].reg64.cvt32() == tbl[i].reg32);
CYBOZU_TEST_ASSERT(tbl[i].reg64.cvt64() == tbl[i].reg64);
CYBOZU_TEST_ASSERT(tbl[i].reg64.cvt128() == tbl[i].x);
CYBOZU_TEST_ASSERT(tbl[i].reg64.cvt256() == tbl[i].y);
CYBOZU_TEST_ASSERT(tbl[i].reg64.cvt512() == tbl[i].z);
CYBOZU_TEST_ASSERT(tbl[i].reg16.cvt64() == tbl[i].reg64);
CYBOZU_TEST_ASSERT(tbl[i].reg32.cvt64() == tbl[i].reg64);
CYBOZU_TEST_ASSERT(tbl[i].x.cvt64() == tbl[i].reg64);
CYBOZU_TEST_ASSERT(tbl[i].y.cvt64() == tbl[i].reg64);
CYBOZU_TEST_ASSERT(tbl[i].z.cvt64() == tbl[i].reg64);
#endif
}
{
Expand Down
23 changes: 22 additions & 1 deletion xbyak/xbyak.h
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ namespace Xbyak {

enum {
DEFAULT_MAX_CODE_SIZE = 4096,
VERSION = 0x7210 /* 0xABCD = A.BC(.D) */
VERSION = 0x7220 /* 0xABCD = A.BC(.D) */
};

#ifndef MIE_INTEGER_TYPE_DEFINED
Expand Down Expand Up @@ -786,6 +786,9 @@ class Label;
struct Reg8;
struct Reg16;
struct Reg32;
struct Xmm;
struct Ymm;
struct Zmm;
#ifdef XBYAK64
struct Reg64;
#endif
Expand All @@ -801,6 +804,9 @@ class Reg : public Operand {
#ifdef XBYAK64
Reg64 cvt64() const;
#endif
Xmm cvt128() const;
Ymm cvt256() const;
Zmm cvt512() const;
Reg operator|(const ApxFlagNF&) const { Reg r(*this); r.setNF(); return r; }
Reg operator|(const ApxFlagZU&) const { Reg r(*this); r.setZU(); return r; }
};
Expand Down Expand Up @@ -938,6 +944,21 @@ inline Reg64 Reg::cvt64() const
}
#endif

inline Xmm Reg::cvt128() const
{
return Xmm(changeBit(128).getIdx());
}

inline Ymm Reg::cvt256() const
{
return Ymm(changeBit(256).getIdx());
}

inline Zmm Reg::cvt512() const
{
return Zmm(changeBit(512).getIdx());
}

#ifndef XBYAK_DISABLE_SEGMENT
// not derived from Reg
class Segment {
Expand Down
2 changes: 1 addition & 1 deletion xbyak/xbyak_mnemonic.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
const char *getVersionString() const { return "7.21"; }
const char *getVersionString() const { return "7.22"; }
void aadd(const Address& addr, const Reg32e &reg) { opMR(addr, reg, T_0F38, 0x0FC, T_APX); }
void aand(const Address& addr, const Reg32e &reg) { opMR(addr, reg, T_0F38|T_66, 0x0FC, T_APX|T_66); }
void adc(const Operand& op, uint32_t imm) { opOI(op, imm, 0x10, 2); }
Expand Down

0 comments on commit 4e44f46

Please sign in to comment.